mirror of https://github.com/astral-sh/uv
Cache source reads during resolution (#16888)
## Summary If you have requirements files that are included multiple times, we can avoid going back to disk. This also guards against accidental repeated reads on standard input streams.
This commit is contained in:
parent
2cdbf9e547
commit
c8996d24a1
|
|
@ -40,7 +40,7 @@ use std::io;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use rustc_hash::FxHashSet;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use tracing::instrument;
|
||||
use unscanny::{Pattern, Scanner};
|
||||
use url::Url;
|
||||
|
|
@ -66,6 +66,9 @@ use crate::shquote::unquote;
|
|||
mod requirement;
|
||||
mod shquote;
|
||||
|
||||
/// A cache of file contents, keyed by path, to avoid re-reading files from disk.
|
||||
pub type SourceCache = FxHashMap<PathBuf, String>;
|
||||
|
||||
/// We emit one of those for each `requirements.txt` entry.
|
||||
enum RequirementsTxtStatement {
|
||||
/// `-r` inclusion filename
|
||||
|
|
@ -171,12 +174,39 @@ impl RequirementsTxt {
|
|||
requirements_txt: impl AsRef<Path>,
|
||||
working_dir: impl AsRef<Path>,
|
||||
client_builder: &BaseClientBuilder<'_>,
|
||||
) -> Result<Self, RequirementsTxtFileError> {
|
||||
Self::parse_with_cache(
|
||||
requirements_txt,
|
||||
working_dir,
|
||||
client_builder,
|
||||
&mut SourceCache::default(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Parse a `requirements.txt` file, using the given cache to avoid re-reading files from disk.
|
||||
#[instrument(
|
||||
skip_all,
|
||||
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
|
||||
)]
|
||||
pub async fn parse_with_cache(
|
||||
requirements_txt: impl AsRef<Path>,
|
||||
working_dir: impl AsRef<Path>,
|
||||
client_builder: &BaseClientBuilder<'_>,
|
||||
cache: &mut SourceCache,
|
||||
) -> Result<Self, RequirementsTxtFileError> {
|
||||
let mut visited = VisitedFiles::Requirements {
|
||||
requirements: &mut FxHashSet::default(),
|
||||
constraints: &mut FxHashSet::default(),
|
||||
};
|
||||
Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await
|
||||
Self::parse_impl(
|
||||
requirements_txt,
|
||||
working_dir,
|
||||
client_builder,
|
||||
&mut visited,
|
||||
cache,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// See module level documentation
|
||||
|
|
@ -189,49 +219,64 @@ impl RequirementsTxt {
|
|||
working_dir: impl AsRef<Path>,
|
||||
client_builder: &BaseClientBuilder<'_>,
|
||||
visited: &mut VisitedFiles<'_>,
|
||||
cache: &mut SourceCache,
|
||||
) -> Result<Self, RequirementsTxtFileError> {
|
||||
let requirements_txt = requirements_txt.as_ref();
|
||||
let working_dir = working_dir.as_ref();
|
||||
|
||||
let content =
|
||||
if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") {
|
||||
#[cfg(not(feature = "http"))]
|
||||
{
|
||||
let content = if let Some(content) = cache.get(requirements_txt) {
|
||||
// Use cached content if available.
|
||||
content.clone()
|
||||
} else if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://")
|
||||
{
|
||||
#[cfg(not(feature = "http"))]
|
||||
{
|
||||
return Err(RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: RequirementsTxtParserError::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"Remote file not supported without `http` feature",
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "http")]
|
||||
{
|
||||
// Avoid constructing a client if network is disabled already
|
||||
if client_builder.is_offline() {
|
||||
return Err(RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: RequirementsTxtParserError::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"Remote file not supported without `http` feature",
|
||||
format!(
|
||||
"Network connectivity is disabled, but a remote requirements file was requested: {}",
|
||||
requirements_txt.display()
|
||||
),
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "http")]
|
||||
{
|
||||
// Avoid constructing a client if network is disabled already
|
||||
if client_builder.is_offline() {
|
||||
return Err(RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: RequirementsTxtParserError::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("Network connectivity is disabled, but a remote requirements file was requested: {}", requirements_txt.display()),
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
let client = client_builder.build();
|
||||
read_url_to_string(&requirements_txt, client).await
|
||||
}
|
||||
} else {
|
||||
// Ex) `file:///home/ferris/project/requirements.txt`
|
||||
uv_fs::read_to_string_transcode(&requirements_txt)
|
||||
let client = client_builder.build();
|
||||
let content = read_url_to_string(&requirements_txt, client)
|
||||
.await
|
||||
.map_err(RequirementsTxtParserError::Io)
|
||||
.map_err(|err| RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: err,
|
||||
})?;
|
||||
cache.insert(requirements_txt.to_path_buf(), content.clone());
|
||||
content
|
||||
}
|
||||
.map_err(|err| RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: err,
|
||||
})?;
|
||||
} else {
|
||||
// Ex) `file:///home/ferris/project/requirements.txt`
|
||||
let content = uv_fs::read_to_string_transcode(&requirements_txt)
|
||||
.await
|
||||
.map_err(|err| RequirementsTxtFileError {
|
||||
file: requirements_txt.to_path_buf(),
|
||||
error: RequirementsTxtParserError::Io(err),
|
||||
})?;
|
||||
cache.insert(requirements_txt.to_path_buf(), content.clone());
|
||||
content
|
||||
};
|
||||
|
||||
let requirements_dir = requirements_txt.parent().unwrap_or(working_dir);
|
||||
let data = Self::parse_inner(
|
||||
|
|
@ -241,6 +286,7 @@ impl RequirementsTxt {
|
|||
client_builder,
|
||||
requirements_txt,
|
||||
visited,
|
||||
cache,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| RequirementsTxtFileError {
|
||||
|
|
@ -264,6 +310,7 @@ impl RequirementsTxt {
|
|||
client_builder: &BaseClientBuilder<'_>,
|
||||
requirements_txt: &Path,
|
||||
visited: &mut VisitedFiles<'_>,
|
||||
cache: &mut SourceCache,
|
||||
) -> Result<Self, RequirementsTxtParserError> {
|
||||
let mut s = Scanner::new(content);
|
||||
|
||||
|
|
@ -318,6 +365,7 @@ impl RequirementsTxt {
|
|||
working_dir,
|
||||
client_builder,
|
||||
visited,
|
||||
cache,
|
||||
))
|
||||
.await
|
||||
.map_err(|err| RequirementsTxtParserError::Subfile {
|
||||
|
|
@ -394,6 +442,7 @@ impl RequirementsTxt {
|
|||
working_dir,
|
||||
client_builder,
|
||||
&mut visited,
|
||||
cache,
|
||||
))
|
||||
.await
|
||||
.map_err(|err| RequirementsTxtParserError::Subfile {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ use uv_distribution_types::{
|
|||
use uv_fs::{CWD, Simplified};
|
||||
use uv_normalize::{ExtraName, PackageName, PipGroupName};
|
||||
use uv_pypi_types::PyProjectToml;
|
||||
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement};
|
||||
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement, SourceCache};
|
||||
use uv_scripts::{Pep723Error, Pep723Item, Pep723Script};
|
||||
use uv_warnings::warn_user;
|
||||
|
||||
|
|
@ -91,6 +91,16 @@ impl RequirementsSpecification {
|
|||
pub async fn from_source(
|
||||
source: &RequirementsSource,
|
||||
client_builder: &BaseClientBuilder<'_>,
|
||||
) -> Result<Self> {
|
||||
Self::from_source_with_cache(source, client_builder, &mut SourceCache::default()).await
|
||||
}
|
||||
|
||||
/// Read the requirements and constraints from a source, using a cache for file contents.
|
||||
#[instrument(skip_all, level = tracing::Level::DEBUG, fields(source = % source))]
|
||||
pub async fn from_source_with_cache(
|
||||
source: &RequirementsSource,
|
||||
client_builder: &BaseClientBuilder<'_>,
|
||||
cache: &mut SourceCache,
|
||||
) -> Result<Self> {
|
||||
Ok(match source {
|
||||
RequirementsSource::Package(requirement) => Self {
|
||||
|
|
@ -114,7 +124,8 @@ impl RequirementsSpecification {
|
|||
return Err(anyhow::anyhow!("File not found: `{}`", path.user_display()));
|
||||
}
|
||||
|
||||
let requirements_txt = RequirementsTxt::parse(path, &*CWD, client_builder).await?;
|
||||
let requirements_txt =
|
||||
RequirementsTxt::parse_with_cache(path, &*CWD, client_builder, cache).await?;
|
||||
|
||||
if requirements_txt == RequirementsTxt::default() {
|
||||
if path == Path::new("-") {
|
||||
|
|
@ -352,6 +363,7 @@ impl RequirementsSpecification {
|
|||
client_builder: &BaseClientBuilder<'_>,
|
||||
) -> Result<Self> {
|
||||
let mut spec = Self::default();
|
||||
let mut cache = SourceCache::default();
|
||||
|
||||
// Disallow `pylock.toml` files as constraints.
|
||||
if let Some(pylock_toml) = constraints.iter().find_map(|source| {
|
||||
|
|
@ -489,7 +501,7 @@ impl RequirementsSpecification {
|
|||
// Resolve sources into specifications so we know their `source_tree`.
|
||||
let mut requirement_sources = Vec::new();
|
||||
for source in requirements {
|
||||
let source = Self::from_source(source, client_builder).await?;
|
||||
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||
requirement_sources.push(source);
|
||||
}
|
||||
|
||||
|
|
@ -540,7 +552,7 @@ impl RequirementsSpecification {
|
|||
// Read all constraints, treating both requirements _and_ constraints as constraints.
|
||||
// Overrides are ignored.
|
||||
for source in constraints {
|
||||
let source = Self::from_source(source, client_builder).await?;
|
||||
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||
for entry in source.requirements {
|
||||
match entry.requirement {
|
||||
UnresolvedRequirement::Named(requirement) => {
|
||||
|
|
@ -578,7 +590,7 @@ impl RequirementsSpecification {
|
|||
// Read all overrides, treating both requirements _and_ overrides as overrides.
|
||||
// Constraints are ignored.
|
||||
for source in overrides {
|
||||
let source = Self::from_source(source, client_builder).await?;
|
||||
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||
spec.overrides.extend(source.requirements);
|
||||
spec.overrides.extend(source.overrides);
|
||||
|
||||
|
|
@ -601,7 +613,7 @@ impl RequirementsSpecification {
|
|||
|
||||
// Collect excludes.
|
||||
for source in excludes {
|
||||
let source = Self::from_source(source, client_builder).await?;
|
||||
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||
for req_spec in source.requirements {
|
||||
match req_spec.requirement {
|
||||
UnresolvedRequirement::Named(requirement) => {
|
||||
|
|
|
|||
Loading…
Reference in New Issue