From c8996d24a1f19f7b44429e8b4266e74d92392e69 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 2 Dec 2025 04:38:56 -0500 Subject: [PATCH] Cache source reads during resolution (#16888) ## Summary If you have requirements files that are included multiple times, we can avoid going back to disk. This also guards against accidental repeated reads on standard input streams. --- crates/uv-requirements-txt/src/lib.rs | 111 ++++++++++++++------ crates/uv-requirements/src/specification.rs | 24 +++-- 2 files changed, 98 insertions(+), 37 deletions(-) diff --git a/crates/uv-requirements-txt/src/lib.rs b/crates/uv-requirements-txt/src/lib.rs index 5cca0fba4..a0a4fc2a3 100644 --- a/crates/uv-requirements-txt/src/lib.rs +++ b/crates/uv-requirements-txt/src/lib.rs @@ -40,7 +40,7 @@ use std::io; use std::path::{Path, PathBuf}; use std::str::FromStr; -use rustc_hash::FxHashSet; +use rustc_hash::{FxHashMap, FxHashSet}; use tracing::instrument; use unscanny::{Pattern, Scanner}; use url::Url; @@ -66,6 +66,9 @@ use crate::shquote::unquote; mod requirement; mod shquote; +/// A cache of file contents, keyed by path, to avoid re-reading files from disk. +pub type SourceCache = FxHashMap; + /// We emit one of those for each `requirements.txt` entry. enum RequirementsTxtStatement { /// `-r` inclusion filename @@ -171,12 +174,39 @@ impl RequirementsTxt { requirements_txt: impl AsRef, working_dir: impl AsRef, client_builder: &BaseClientBuilder<'_>, + ) -> Result { + Self::parse_with_cache( + requirements_txt, + working_dir, + client_builder, + &mut SourceCache::default(), + ) + .await + } + + /// Parse a `requirements.txt` file, using the given cache to avoid re-reading files from disk. + #[instrument( + skip_all, + fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str()) + )] + pub async fn parse_with_cache( + requirements_txt: impl AsRef, + working_dir: impl AsRef, + client_builder: &BaseClientBuilder<'_>, + cache: &mut SourceCache, ) -> Result { let mut visited = VisitedFiles::Requirements { requirements: &mut FxHashSet::default(), constraints: &mut FxHashSet::default(), }; - Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await + Self::parse_impl( + requirements_txt, + working_dir, + client_builder, + &mut visited, + cache, + ) + .await } /// See module level documentation @@ -189,49 +219,64 @@ impl RequirementsTxt { working_dir: impl AsRef, client_builder: &BaseClientBuilder<'_>, visited: &mut VisitedFiles<'_>, + cache: &mut SourceCache, ) -> Result { let requirements_txt = requirements_txt.as_ref(); let working_dir = working_dir.as_ref(); - let content = - if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") { - #[cfg(not(feature = "http"))] - { + let content = if let Some(content) = cache.get(requirements_txt) { + // Use cached content if available. + content.clone() + } else if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") + { + #[cfg(not(feature = "http"))] + { + return Err(RequirementsTxtFileError { + file: requirements_txt.to_path_buf(), + error: RequirementsTxtParserError::Io(io::Error::new( + io::ErrorKind::InvalidInput, + "Remote file not supported without `http` feature", + )), + }); + } + + #[cfg(feature = "http")] + { + // Avoid constructing a client if network is disabled already + if client_builder.is_offline() { return Err(RequirementsTxtFileError { file: requirements_txt.to_path_buf(), error: RequirementsTxtParserError::Io(io::Error::new( io::ErrorKind::InvalidInput, - "Remote file not supported without `http` feature", + format!( + "Network connectivity is disabled, but a remote requirements file was requested: {}", + requirements_txt.display() + ), )), }); } - #[cfg(feature = "http")] - { - // Avoid constructing a client if network is disabled already - if client_builder.is_offline() { - return Err(RequirementsTxtFileError { - file: requirements_txt.to_path_buf(), - error: RequirementsTxtParserError::Io(io::Error::new( - io::ErrorKind::InvalidInput, - format!("Network connectivity is disabled, but a remote requirements file was requested: {}", requirements_txt.display()), - )), - }); - } - - let client = client_builder.build(); - read_url_to_string(&requirements_txt, client).await - } - } else { - // Ex) `file:///home/ferris/project/requirements.txt` - uv_fs::read_to_string_transcode(&requirements_txt) + let client = client_builder.build(); + let content = read_url_to_string(&requirements_txt, client) .await - .map_err(RequirementsTxtParserError::Io) + .map_err(|err| RequirementsTxtFileError { + file: requirements_txt.to_path_buf(), + error: err, + })?; + cache.insert(requirements_txt.to_path_buf(), content.clone()); + content } - .map_err(|err| RequirementsTxtFileError { - file: requirements_txt.to_path_buf(), - error: err, - })?; + } else { + // Ex) `file:///home/ferris/project/requirements.txt` + let content = uv_fs::read_to_string_transcode(&requirements_txt) + .await + .map_err(|err| RequirementsTxtFileError { + file: requirements_txt.to_path_buf(), + error: RequirementsTxtParserError::Io(err), + })?; + cache.insert(requirements_txt.to_path_buf(), content.clone()); + content + }; let requirements_dir = requirements_txt.parent().unwrap_or(working_dir); let data = Self::parse_inner( @@ -241,6 +286,7 @@ impl RequirementsTxt { client_builder, requirements_txt, visited, + cache, ) .await .map_err(|err| RequirementsTxtFileError { @@ -264,6 +310,7 @@ impl RequirementsTxt { client_builder: &BaseClientBuilder<'_>, requirements_txt: &Path, visited: &mut VisitedFiles<'_>, + cache: &mut SourceCache, ) -> Result { let mut s = Scanner::new(content); @@ -318,6 +365,7 @@ impl RequirementsTxt { working_dir, client_builder, visited, + cache, )) .await .map_err(|err| RequirementsTxtParserError::Subfile { @@ -394,6 +442,7 @@ impl RequirementsTxt { working_dir, client_builder, &mut visited, + cache, )) .await .map_err(|err| RequirementsTxtParserError::Subfile { diff --git a/crates/uv-requirements/src/specification.rs b/crates/uv-requirements/src/specification.rs index bb8d8adbf..e1e3fd1ec 100644 --- a/crates/uv-requirements/src/specification.rs +++ b/crates/uv-requirements/src/specification.rs @@ -45,7 +45,7 @@ use uv_distribution_types::{ use uv_fs::{CWD, Simplified}; use uv_normalize::{ExtraName, PackageName, PipGroupName}; use uv_pypi_types::PyProjectToml; -use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement}; +use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement, SourceCache}; use uv_scripts::{Pep723Error, Pep723Item, Pep723Script}; use uv_warnings::warn_user; @@ -91,6 +91,16 @@ impl RequirementsSpecification { pub async fn from_source( source: &RequirementsSource, client_builder: &BaseClientBuilder<'_>, + ) -> Result { + Self::from_source_with_cache(source, client_builder, &mut SourceCache::default()).await + } + + /// Read the requirements and constraints from a source, using a cache for file contents. + #[instrument(skip_all, level = tracing::Level::DEBUG, fields(source = % source))] + pub async fn from_source_with_cache( + source: &RequirementsSource, + client_builder: &BaseClientBuilder<'_>, + cache: &mut SourceCache, ) -> Result { Ok(match source { RequirementsSource::Package(requirement) => Self { @@ -114,7 +124,8 @@ impl RequirementsSpecification { return Err(anyhow::anyhow!("File not found: `{}`", path.user_display())); } - let requirements_txt = RequirementsTxt::parse(path, &*CWD, client_builder).await?; + let requirements_txt = + RequirementsTxt::parse_with_cache(path, &*CWD, client_builder, cache).await?; if requirements_txt == RequirementsTxt::default() { if path == Path::new("-") { @@ -352,6 +363,7 @@ impl RequirementsSpecification { client_builder: &BaseClientBuilder<'_>, ) -> Result { let mut spec = Self::default(); + let mut cache = SourceCache::default(); // Disallow `pylock.toml` files as constraints. if let Some(pylock_toml) = constraints.iter().find_map(|source| { @@ -489,7 +501,7 @@ impl RequirementsSpecification { // Resolve sources into specifications so we know their `source_tree`. let mut requirement_sources = Vec::new(); for source in requirements { - let source = Self::from_source(source, client_builder).await?; + let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?; requirement_sources.push(source); } @@ -540,7 +552,7 @@ impl RequirementsSpecification { // Read all constraints, treating both requirements _and_ constraints as constraints. // Overrides are ignored. for source in constraints { - let source = Self::from_source(source, client_builder).await?; + let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?; for entry in source.requirements { match entry.requirement { UnresolvedRequirement::Named(requirement) => { @@ -578,7 +590,7 @@ impl RequirementsSpecification { // Read all overrides, treating both requirements _and_ overrides as overrides. // Constraints are ignored. for source in overrides { - let source = Self::from_source(source, client_builder).await?; + let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?; spec.overrides.extend(source.requirements); spec.overrides.extend(source.overrides); @@ -601,7 +613,7 @@ impl RequirementsSpecification { // Collect excludes. for source in excludes { - let source = Self::from_source(source, client_builder).await?; + let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?; for req_spec in source.requirements { match req_spec.requirement { UnresolvedRequirement::Named(requirement) => {