mirror of https://github.com/astral-sh/uv
Cache source reads during resolution (#16888)
## Summary If you have requirements files that are included multiple times, we can avoid going back to disk. This also guards against accidental repeated reads on standard input streams.
This commit is contained in:
parent
2cdbf9e547
commit
c8996d24a1
|
|
@ -40,7 +40,7 @@ use std::io;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::{FxHashMap, FxHashSet};
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
use unscanny::{Pattern, Scanner};
|
use unscanny::{Pattern, Scanner};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
@ -66,6 +66,9 @@ use crate::shquote::unquote;
|
||||||
mod requirement;
|
mod requirement;
|
||||||
mod shquote;
|
mod shquote;
|
||||||
|
|
||||||
|
/// A cache of file contents, keyed by path, to avoid re-reading files from disk.
|
||||||
|
pub type SourceCache = FxHashMap<PathBuf, String>;
|
||||||
|
|
||||||
/// We emit one of those for each `requirements.txt` entry.
|
/// We emit one of those for each `requirements.txt` entry.
|
||||||
enum RequirementsTxtStatement {
|
enum RequirementsTxtStatement {
|
||||||
/// `-r` inclusion filename
|
/// `-r` inclusion filename
|
||||||
|
|
@ -171,12 +174,39 @@ impl RequirementsTxt {
|
||||||
requirements_txt: impl AsRef<Path>,
|
requirements_txt: impl AsRef<Path>,
|
||||||
working_dir: impl AsRef<Path>,
|
working_dir: impl AsRef<Path>,
|
||||||
client_builder: &BaseClientBuilder<'_>,
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
|
) -> Result<Self, RequirementsTxtFileError> {
|
||||||
|
Self::parse_with_cache(
|
||||||
|
requirements_txt,
|
||||||
|
working_dir,
|
||||||
|
client_builder,
|
||||||
|
&mut SourceCache::default(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a `requirements.txt` file, using the given cache to avoid re-reading files from disk.
|
||||||
|
#[instrument(
|
||||||
|
skip_all,
|
||||||
|
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
|
||||||
|
)]
|
||||||
|
pub async fn parse_with_cache(
|
||||||
|
requirements_txt: impl AsRef<Path>,
|
||||||
|
working_dir: impl AsRef<Path>,
|
||||||
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
|
cache: &mut SourceCache,
|
||||||
) -> Result<Self, RequirementsTxtFileError> {
|
) -> Result<Self, RequirementsTxtFileError> {
|
||||||
let mut visited = VisitedFiles::Requirements {
|
let mut visited = VisitedFiles::Requirements {
|
||||||
requirements: &mut FxHashSet::default(),
|
requirements: &mut FxHashSet::default(),
|
||||||
constraints: &mut FxHashSet::default(),
|
constraints: &mut FxHashSet::default(),
|
||||||
};
|
};
|
||||||
Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await
|
Self::parse_impl(
|
||||||
|
requirements_txt,
|
||||||
|
working_dir,
|
||||||
|
client_builder,
|
||||||
|
&mut visited,
|
||||||
|
cache,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// See module level documentation
|
/// See module level documentation
|
||||||
|
|
@ -189,49 +219,64 @@ impl RequirementsTxt {
|
||||||
working_dir: impl AsRef<Path>,
|
working_dir: impl AsRef<Path>,
|
||||||
client_builder: &BaseClientBuilder<'_>,
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
visited: &mut VisitedFiles<'_>,
|
visited: &mut VisitedFiles<'_>,
|
||||||
|
cache: &mut SourceCache,
|
||||||
) -> Result<Self, RequirementsTxtFileError> {
|
) -> Result<Self, RequirementsTxtFileError> {
|
||||||
let requirements_txt = requirements_txt.as_ref();
|
let requirements_txt = requirements_txt.as_ref();
|
||||||
let working_dir = working_dir.as_ref();
|
let working_dir = working_dir.as_ref();
|
||||||
|
|
||||||
let content =
|
let content = if let Some(content) = cache.get(requirements_txt) {
|
||||||
if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") {
|
// Use cached content if available.
|
||||||
#[cfg(not(feature = "http"))]
|
content.clone()
|
||||||
{
|
} else if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://")
|
||||||
|
{
|
||||||
|
#[cfg(not(feature = "http"))]
|
||||||
|
{
|
||||||
|
return Err(RequirementsTxtFileError {
|
||||||
|
file: requirements_txt.to_path_buf(),
|
||||||
|
error: RequirementsTxtParserError::Io(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"Remote file not supported without `http` feature",
|
||||||
|
)),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "http")]
|
||||||
|
{
|
||||||
|
// Avoid constructing a client if network is disabled already
|
||||||
|
if client_builder.is_offline() {
|
||||||
return Err(RequirementsTxtFileError {
|
return Err(RequirementsTxtFileError {
|
||||||
file: requirements_txt.to_path_buf(),
|
file: requirements_txt.to_path_buf(),
|
||||||
error: RequirementsTxtParserError::Io(io::Error::new(
|
error: RequirementsTxtParserError::Io(io::Error::new(
|
||||||
io::ErrorKind::InvalidInput,
|
io::ErrorKind::InvalidInput,
|
||||||
"Remote file not supported without `http` feature",
|
format!(
|
||||||
|
"Network connectivity is disabled, but a remote requirements file was requested: {}",
|
||||||
|
requirements_txt.display()
|
||||||
|
),
|
||||||
)),
|
)),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "http")]
|
let client = client_builder.build();
|
||||||
{
|
let content = read_url_to_string(&requirements_txt, client)
|
||||||
// Avoid constructing a client if network is disabled already
|
|
||||||
if client_builder.is_offline() {
|
|
||||||
return Err(RequirementsTxtFileError {
|
|
||||||
file: requirements_txt.to_path_buf(),
|
|
||||||
error: RequirementsTxtParserError::Io(io::Error::new(
|
|
||||||
io::ErrorKind::InvalidInput,
|
|
||||||
format!("Network connectivity is disabled, but a remote requirements file was requested: {}", requirements_txt.display()),
|
|
||||||
)),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let client = client_builder.build();
|
|
||||||
read_url_to_string(&requirements_txt, client).await
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ex) `file:///home/ferris/project/requirements.txt`
|
|
||||||
uv_fs::read_to_string_transcode(&requirements_txt)
|
|
||||||
.await
|
.await
|
||||||
.map_err(RequirementsTxtParserError::Io)
|
.map_err(|err| RequirementsTxtFileError {
|
||||||
|
file: requirements_txt.to_path_buf(),
|
||||||
|
error: err,
|
||||||
|
})?;
|
||||||
|
cache.insert(requirements_txt.to_path_buf(), content.clone());
|
||||||
|
content
|
||||||
}
|
}
|
||||||
.map_err(|err| RequirementsTxtFileError {
|
} else {
|
||||||
file: requirements_txt.to_path_buf(),
|
// Ex) `file:///home/ferris/project/requirements.txt`
|
||||||
error: err,
|
let content = uv_fs::read_to_string_transcode(&requirements_txt)
|
||||||
})?;
|
.await
|
||||||
|
.map_err(|err| RequirementsTxtFileError {
|
||||||
|
file: requirements_txt.to_path_buf(),
|
||||||
|
error: RequirementsTxtParserError::Io(err),
|
||||||
|
})?;
|
||||||
|
cache.insert(requirements_txt.to_path_buf(), content.clone());
|
||||||
|
content
|
||||||
|
};
|
||||||
|
|
||||||
let requirements_dir = requirements_txt.parent().unwrap_or(working_dir);
|
let requirements_dir = requirements_txt.parent().unwrap_or(working_dir);
|
||||||
let data = Self::parse_inner(
|
let data = Self::parse_inner(
|
||||||
|
|
@ -241,6 +286,7 @@ impl RequirementsTxt {
|
||||||
client_builder,
|
client_builder,
|
||||||
requirements_txt,
|
requirements_txt,
|
||||||
visited,
|
visited,
|
||||||
|
cache,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| RequirementsTxtFileError {
|
.map_err(|err| RequirementsTxtFileError {
|
||||||
|
|
@ -264,6 +310,7 @@ impl RequirementsTxt {
|
||||||
client_builder: &BaseClientBuilder<'_>,
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
requirements_txt: &Path,
|
requirements_txt: &Path,
|
||||||
visited: &mut VisitedFiles<'_>,
|
visited: &mut VisitedFiles<'_>,
|
||||||
|
cache: &mut SourceCache,
|
||||||
) -> Result<Self, RequirementsTxtParserError> {
|
) -> Result<Self, RequirementsTxtParserError> {
|
||||||
let mut s = Scanner::new(content);
|
let mut s = Scanner::new(content);
|
||||||
|
|
||||||
|
|
@ -318,6 +365,7 @@ impl RequirementsTxt {
|
||||||
working_dir,
|
working_dir,
|
||||||
client_builder,
|
client_builder,
|
||||||
visited,
|
visited,
|
||||||
|
cache,
|
||||||
))
|
))
|
||||||
.await
|
.await
|
||||||
.map_err(|err| RequirementsTxtParserError::Subfile {
|
.map_err(|err| RequirementsTxtParserError::Subfile {
|
||||||
|
|
@ -394,6 +442,7 @@ impl RequirementsTxt {
|
||||||
working_dir,
|
working_dir,
|
||||||
client_builder,
|
client_builder,
|
||||||
&mut visited,
|
&mut visited,
|
||||||
|
cache,
|
||||||
))
|
))
|
||||||
.await
|
.await
|
||||||
.map_err(|err| RequirementsTxtParserError::Subfile {
|
.map_err(|err| RequirementsTxtParserError::Subfile {
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ use uv_distribution_types::{
|
||||||
use uv_fs::{CWD, Simplified};
|
use uv_fs::{CWD, Simplified};
|
||||||
use uv_normalize::{ExtraName, PackageName, PipGroupName};
|
use uv_normalize::{ExtraName, PackageName, PipGroupName};
|
||||||
use uv_pypi_types::PyProjectToml;
|
use uv_pypi_types::PyProjectToml;
|
||||||
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement};
|
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement, SourceCache};
|
||||||
use uv_scripts::{Pep723Error, Pep723Item, Pep723Script};
|
use uv_scripts::{Pep723Error, Pep723Item, Pep723Script};
|
||||||
use uv_warnings::warn_user;
|
use uv_warnings::warn_user;
|
||||||
|
|
||||||
|
|
@ -91,6 +91,16 @@ impl RequirementsSpecification {
|
||||||
pub async fn from_source(
|
pub async fn from_source(
|
||||||
source: &RequirementsSource,
|
source: &RequirementsSource,
|
||||||
client_builder: &BaseClientBuilder<'_>,
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
|
) -> Result<Self> {
|
||||||
|
Self::from_source_with_cache(source, client_builder, &mut SourceCache::default()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the requirements and constraints from a source, using a cache for file contents.
|
||||||
|
#[instrument(skip_all, level = tracing::Level::DEBUG, fields(source = % source))]
|
||||||
|
pub async fn from_source_with_cache(
|
||||||
|
source: &RequirementsSource,
|
||||||
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
|
cache: &mut SourceCache,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
Ok(match source {
|
Ok(match source {
|
||||||
RequirementsSource::Package(requirement) => Self {
|
RequirementsSource::Package(requirement) => Self {
|
||||||
|
|
@ -114,7 +124,8 @@ impl RequirementsSpecification {
|
||||||
return Err(anyhow::anyhow!("File not found: `{}`", path.user_display()));
|
return Err(anyhow::anyhow!("File not found: `{}`", path.user_display()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let requirements_txt = RequirementsTxt::parse(path, &*CWD, client_builder).await?;
|
let requirements_txt =
|
||||||
|
RequirementsTxt::parse_with_cache(path, &*CWD, client_builder, cache).await?;
|
||||||
|
|
||||||
if requirements_txt == RequirementsTxt::default() {
|
if requirements_txt == RequirementsTxt::default() {
|
||||||
if path == Path::new("-") {
|
if path == Path::new("-") {
|
||||||
|
|
@ -352,6 +363,7 @@ impl RequirementsSpecification {
|
||||||
client_builder: &BaseClientBuilder<'_>,
|
client_builder: &BaseClientBuilder<'_>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let mut spec = Self::default();
|
let mut spec = Self::default();
|
||||||
|
let mut cache = SourceCache::default();
|
||||||
|
|
||||||
// Disallow `pylock.toml` files as constraints.
|
// Disallow `pylock.toml` files as constraints.
|
||||||
if let Some(pylock_toml) = constraints.iter().find_map(|source| {
|
if let Some(pylock_toml) = constraints.iter().find_map(|source| {
|
||||||
|
|
@ -489,7 +501,7 @@ impl RequirementsSpecification {
|
||||||
// Resolve sources into specifications so we know their `source_tree`.
|
// Resolve sources into specifications so we know their `source_tree`.
|
||||||
let mut requirement_sources = Vec::new();
|
let mut requirement_sources = Vec::new();
|
||||||
for source in requirements {
|
for source in requirements {
|
||||||
let source = Self::from_source(source, client_builder).await?;
|
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||||
requirement_sources.push(source);
|
requirement_sources.push(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -540,7 +552,7 @@ impl RequirementsSpecification {
|
||||||
// Read all constraints, treating both requirements _and_ constraints as constraints.
|
// Read all constraints, treating both requirements _and_ constraints as constraints.
|
||||||
// Overrides are ignored.
|
// Overrides are ignored.
|
||||||
for source in constraints {
|
for source in constraints {
|
||||||
let source = Self::from_source(source, client_builder).await?;
|
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||||
for entry in source.requirements {
|
for entry in source.requirements {
|
||||||
match entry.requirement {
|
match entry.requirement {
|
||||||
UnresolvedRequirement::Named(requirement) => {
|
UnresolvedRequirement::Named(requirement) => {
|
||||||
|
|
@ -578,7 +590,7 @@ impl RequirementsSpecification {
|
||||||
// Read all overrides, treating both requirements _and_ overrides as overrides.
|
// Read all overrides, treating both requirements _and_ overrides as overrides.
|
||||||
// Constraints are ignored.
|
// Constraints are ignored.
|
||||||
for source in overrides {
|
for source in overrides {
|
||||||
let source = Self::from_source(source, client_builder).await?;
|
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||||
spec.overrides.extend(source.requirements);
|
spec.overrides.extend(source.requirements);
|
||||||
spec.overrides.extend(source.overrides);
|
spec.overrides.extend(source.overrides);
|
||||||
|
|
||||||
|
|
@ -601,7 +613,7 @@ impl RequirementsSpecification {
|
||||||
|
|
||||||
// Collect excludes.
|
// Collect excludes.
|
||||||
for source in excludes {
|
for source in excludes {
|
||||||
let source = Self::from_source(source, client_builder).await?;
|
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
|
||||||
for req_spec in source.requirements {
|
for req_spec in source.requirements {
|
||||||
match req_spec.requirement {
|
match req_spec.requirement {
|
||||||
UnresolvedRequirement::Named(requirement) => {
|
UnresolvedRequirement::Named(requirement) => {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue