diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index ef9fa9469..d3aa0f285 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -99,6 +99,8 @@ impl Deref for CacheShard { pub struct Cache { /// The cache directory. root: PathBuf, + /// The refresh strategy to use when reading from the cache. + refresh: Refresh, /// A temporary cache directory, if the user requested `--no-cache`. /// /// Included to ensure that the temporary directory exists for the length of the operation, but @@ -111,6 +113,7 @@ impl Cache { pub fn from_path(root: impl Into) -> Result { Ok(Self { root: Self::init(root)?, + refresh: Refresh::None, _temp_dir_drop: None, }) } @@ -120,10 +123,17 @@ impl Cache { let temp_dir = tempdir()?; Ok(Self { root: Self::init(temp_dir.path())?, + refresh: Refresh::None, _temp_dir_drop: Some(Arc::new(temp_dir)), }) } + /// Set the [`Refresh`] policy for the cache. + #[must_use] + pub fn with_refresh(self, refresh: Refresh) -> Self { + Self { refresh, ..self } + } + /// Return the root of the cache. pub fn root(&self) -> &Path { &self.root @@ -149,13 +159,42 @@ impl Cache { CacheEntry::new(self.bucket(cache_bucket).join(dir), file) } - /// Persist a temporary directory to the artifact store. - pub fn persist( + /// Returns `true` if a cache entry is up-to-date given the [`Refresh`] policy. + pub fn freshness( &self, - temp_dir: impl AsRef, - path: impl AsRef, - ) -> Result<(), io::Error> { + entry: &CacheEntry, + package: Option<&PackageName>, + ) -> io::Result { + // Grab the cutoff timestamp, if it's relevant. + let timestamp = match &self.refresh { + Refresh::None => return Ok(Freshness::Fresh), + Refresh::All(timestamp) => timestamp, + Refresh::Packages(packages, timestamp) => { + if package.map_or(true, |package| packages.contains(package)) { + timestamp + } else { + return Ok(Freshness::Fresh); + } + } + }; + + match fs::metadata(entry.path()) { + Ok(metadata) => { + if metadata.modified()? >= *timestamp { + Ok(Freshness::Fresh) + } else { + Ok(Freshness::Stale) + } + } + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing), + Err(err) => Err(err), + } + } + + /// Persist a temporary directory to the artifact store. + pub fn persist(&self, temp_dir: impl AsRef, path: impl AsRef) -> io::Result<()> { // Create a unique ID for the artifact. + // TODO(charlie): Support content-addressed persistence via SHAs. let id = uuid::Uuid::new_v4(); // Move the temporary directory into the directory store. @@ -589,33 +628,103 @@ impl Display for CacheBucket { } } -/// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip -/// archive) or a directory containing a Python package. -/// -/// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`), -/// returns `None`. -pub fn archive_mtime(path: &Path) -> Result, io::Error> { - let metadata = fs_err::metadata(path)?; - if metadata.is_file() { - // `modified()` is infallible on Windows and Unix (i.e., all platforms we support). - Ok(Some(metadata.modified()?)) - } else { - if let Some(metadata) = path - .join("pyproject.toml") - .metadata() - .ok() - .filter(std::fs::Metadata::is_file) - { - Ok(Some(metadata.modified()?)) - } else if let Some(metadata) = path - .join("setup.py") - .metadata() - .ok() - .filter(std::fs::Metadata::is_file) - { - Ok(Some(metadata.modified()?)) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ArchiveTimestamp { + /// The archive consists of a single file with the given modification time. + Exact(SystemTime), + /// The archive consists of a directory. The modification time is the latest modification time + /// of the `pyproject.toml` or `setup.py` file in the directory. + Approximate(SystemTime), +} + +impl ArchiveTimestamp { + /// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip + /// archive) or a directory containing a Python package. + /// + /// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`), + /// returns `None`. + pub fn from_path(path: impl AsRef) -> Result, io::Error> { + let metadata = fs_err::metadata(path.as_ref())?; + if metadata.is_file() { + // `modified()` is infallible on Windows and Unix (i.e., all platforms we support). + Ok(Some(Self::Exact(metadata.modified()?))) } else { - Ok(None) + if let Some(metadata) = path + .as_ref() + .join("pyproject.toml") + .metadata() + .ok() + .filter(std::fs::Metadata::is_file) + { + Ok(Some(Self::Approximate(metadata.modified()?))) + } else if let Some(metadata) = path + .as_ref() + .join("setup.py") + .metadata() + .ok() + .filter(std::fs::Metadata::is_file) + { + Ok(Some(Self::Approximate(metadata.modified()?))) + } else { + Ok(None) + } + } + } + + /// Return the modification timestamp for an archive. + pub fn timestamp(&self) -> SystemTime { + match self { + Self::Exact(timestamp) => *timestamp, + Self::Approximate(timestamp) => *timestamp, } } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Freshness { + /// The cache entry is fresh according to the [`Refresh`] policy. + Fresh, + /// The cache entry is stale according to the [`Refresh`] policy. + Stale, + /// The cache entry does not exist. + Missing, +} + +impl Freshness { + pub const fn is_fresh(self) -> bool { + matches!(self, Self::Fresh) + } + + pub const fn is_stale(self) -> bool { + matches!(self, Self::Stale) + } +} + +/// A refresh policy for cache entries. +#[derive(Debug, Clone)] +pub enum Refresh { + /// Don't refresh any entries. + None, + /// Refresh entries linked to the given packages, if created before the given timestamp. + Packages(Vec, SystemTime), + /// Refresh all entries created before the given timestamp. + All(SystemTime), +} + +impl Refresh { + /// Determine the refresh strategy to use based on the command-line arguments. + pub fn from_args(refresh: bool, refresh_package: Vec) -> Self { + if refresh { + Self::All(SystemTime::now()) + } else if !refresh_package.is_empty() { + Self::Packages(refresh_package, SystemTime::now()) + } else { + Self::None + } + } + + /// Returns `true` if no packages should be reinstalled. + pub fn is_none(&self) -> bool { + matches!(self, Self::None) + } +} diff --git a/crates/puffin-client/src/cached_client.rs b/crates/puffin-client/src/cached_client.rs index 56cf7463d..2bb77b15a 100644 --- a/crates/puffin-client/src/cached_client.rs +++ b/crates/puffin-client/src/cached_client.rs @@ -9,7 +9,7 @@ use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; use tracing::{debug, info_span, instrument, trace, warn, Instrument}; -use puffin_cache::CacheEntry; +use puffin_cache::{CacheEntry, Freshness}; use puffin_fs::write_atomic; use crate::cache_headers::CacheHeaders; @@ -104,6 +104,7 @@ impl CachedClient { &self, req: Request, cache_entry: &CacheEntry, + cache_control: CacheControl, response_callback: Callback, ) -> Result> where @@ -136,7 +137,7 @@ impl CachedClient { None }; - let cached_response = self.send_cached(req, cached).boxed().await?; + let cached_response = self.send_cached(req, cache_control, cached).boxed().await?; let write_cache = info_span!("write_cache", file = %cache_entry.path().display()); match cached_response { @@ -190,6 +191,7 @@ impl CachedClient { async fn send_cached( &self, mut req: Request, + cache_control: CacheControl, cached: Option>, ) -> Result, crate::Error> { // The converted types are from the specific `reqwest` types to the more generic `http` @@ -198,6 +200,7 @@ impl CachedClient { req.try_clone() .expect("You can't use streaming request bodies with this function"), )?; + let url = req.url().clone(); let cached_response = if let Some(cached) = cached { // Avoid sending revalidation requests for immutable responses. @@ -206,6 +209,17 @@ impl CachedClient { return Ok(CachedResponse::FreshCache(cached.data)); } + // Apply the cache control header, if necessary. + match cache_control { + CacheControl::None => {} + CacheControl::MustRevalidate => { + converted_req.headers_mut().insert( + http::header::CACHE_CONTROL, + http::HeaderValue::from_static("max-age=0, must-revalidate"), + ); + } + } + match cached .cache_policy .before_request(&converted_req, SystemTime::now()) @@ -300,3 +314,21 @@ impl CachedClient { )) } } + +#[derive(Debug, Clone, Copy)] +pub enum CacheControl { + /// Respect the `cache-control` header from the response. + None, + /// Apply `max-age=0, must-revalidate` to the request. + MustRevalidate, +} + +impl From for CacheControl { + fn from(value: Freshness) -> Self { + match value { + Freshness::Fresh => CacheControl::None, + Freshness::Stale => CacheControl::MustRevalidate, + Freshness::Missing => CacheControl::None, + } + } +} diff --git a/crates/puffin-client/src/flat_index.rs b/crates/puffin-client/src/flat_index.rs index 25a6de2a2..30b603181 100644 --- a/crates/puffin-client/src/flat_index.rs +++ b/crates/puffin-client/src/flat_index.rs @@ -19,6 +19,7 @@ use puffin_cache::{Cache, CacheBucket}; use puffin_normalize::PackageName; use pypi_types::Hashes; +use crate::cached_client::CacheControl; use crate::html::SimpleHtml; use crate::{Error, RegistryClient}; @@ -91,6 +92,8 @@ impl<'a> FlatIndexClient<'a> { "html", format!("{}.msgpack", cache_key::digest(&url.to_string())), ); + let cache_control = CacheControl::from(self.cache.freshness(&cache_entry, None)?); + let cached_client = self.client.cached_client(); let flat_index_request = cached_client @@ -124,7 +127,12 @@ impl<'a> FlatIndexClient<'a> { .instrument(info_span!("parse_flat_index_html", url = % url)) }; let files = cached_client - .get_cached_with_callback(flat_index_request, &cache_entry, parse_simple_response) + .get_cached_with_callback( + flat_index_request, + &cache_entry, + cache_control, + parse_simple_response, + ) .await?; Ok(files .into_iter() diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index 70e5fa2c6..336caf00b 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -1,4 +1,4 @@ -pub use cached_client::{CachedClient, CachedClientError, DataWithCachePolicy}; +pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy}; pub use error::Error; pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexClient, FlatIndexError}; pub use registry_client::{ diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index 3b56c24a7..5410004f2 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -24,6 +24,7 @@ use puffin_cache::{Cache, CacheBucket, WheelCache}; use puffin_normalize::PackageName; use pypi_types::{BaseUrl, Metadata21, SimpleJson}; +use crate::cached_client::CacheControl; use crate::html::SimpleHtml; use crate::remote_metadata::wheel_metadata_from_remote_zip; use crate::{CachedClient, CachedClientError, Error}; @@ -166,6 +167,8 @@ impl RegistryClient { }), format!("{package_name}.msgpack"), ); + let cache_control = + CacheControl::from(self.cache.freshness(&cache_entry, Some(package_name))?); let simple_request = self .client @@ -211,7 +214,12 @@ impl RegistryClient { }; let result = self .client - .get_cached_with_callback(simple_request, &cache_entry, parse_simple_response) + .get_cached_with_callback( + simple_request, + &cache_entry, + cache_control, + parse_simple_response, + ) .await; Ok(result) } @@ -286,6 +294,8 @@ impl RegistryClient { WheelCache::Index(index).remote_wheel_dir(filename.name.as_ref()), format!("{}.msgpack", filename.stem()), ); + let cache_control = + CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?); let response_callback = |response: Response| async { let bytes = response.bytes().await?; @@ -299,7 +309,7 @@ impl RegistryClient { let req = self.client.uncached().get(url.clone()).build()?; Ok(self .client - .get_cached_with_callback(req, &cache_entry, response_callback) + .get_cached_with_callback(req, &cache_entry, cache_control, response_callback) .await?) } else { // If we lack PEP 658 support, try using HTTP range requests to read only the @@ -322,6 +332,8 @@ impl RegistryClient { cache_shard.remote_wheel_dir(filename.name.as_ref()), format!("{}.msgpack", filename.stem()), ); + let cache_control = + CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?); // This response callback is special, we actually make a number of subsequent requests to // fetch the file from the remote zip. @@ -343,7 +355,12 @@ impl RegistryClient { let req = self.client.uncached().head(url.clone()).build()?; let result = self .client - .get_cached_with_callback(req, &cache_entry, read_metadata_range_request) + .get_cached_with_callback( + req, + &cache_entry, + cache_control, + read_metadata_range_request, + ) .await .map_err(crate::Error::from); diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 9ff6e902b..f8f6aefae 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -15,7 +15,7 @@ use distribution_types::{ }; use platform_tags::Tags; use puffin_cache::{Cache, CacheBucket, WheelCache}; -use puffin_client::{CachedClientError, RegistryClient}; +use puffin_client::{CacheControl, CachedClientError, RegistryClient}; use puffin_extract::unzip_no_seek; use puffin_git::GitSource; use puffin_traits::{BuildContext, NoBinary}; @@ -35,6 +35,8 @@ pub enum DistributionDatabaseError { #[error(transparent)] Request(#[from] reqwest::Error), #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] SourceBuild(#[from] SourceDistError), #[error("Git operation failed")] Git(#[source] anyhow::Error), @@ -129,6 +131,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> wheel.filename.stem(), ); + // TODO(charlie): There's no need to unzip if the wheel is unchanged. return Ok(LocalWheel::Disk(DiskWheel { dist: dist.clone(), path: path.clone(), @@ -167,9 +170,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> }; let req = self.client.cached_client().uncached().get(url).build()?; + let cache_control = + CacheControl::from(self.cache.freshness(&http_entry, Some(wheel.name()))?); self.client .cached_client() - .get_cached_with_callback(req, &http_entry, download) + .get_cached_with_callback(req, &http_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, @@ -222,9 +227,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .uncached() .get(wheel.url.raw().clone()) .build()?; + let cache_control = + CacheControl::from(self.cache.freshness(&http_entry, Some(wheel.name()))?); self.client .cached_client() - .get_cached_with_callback(req, &http_entry, download) + .get_cached_with_callback(req, &http_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, @@ -249,6 +256,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> wheel.filename.stem(), ); + // TODO(charlie): There's no need to unzip if the wheel is unchanged. Ok(LocalWheel::Disk(DiskWheel { dist: dist.clone(), path: wheel.path.clone(), @@ -262,12 +270,23 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> let _guard = lock.lock().await; let built_wheel = self.builder.download_and_build(source_dist).boxed().await?; - Ok(LocalWheel::Built(BuiltWheel { - dist: dist.clone(), - path: built_wheel.path, - target: built_wheel.target, - filename: built_wheel.filename, - })) + + // If the wheel was unzipped previously, respect it. Source distributions are + // cached under a unique build ID, so unzipped directories are never stale. + if built_wheel.target.exists() { + Ok(LocalWheel::Unzipped(UnzippedWheel { + dist: dist.clone(), + target: built_wheel.target, + filename: built_wheel.filename, + })) + } else { + Ok(LocalWheel::Built(BuiltWheel { + dist: dist.clone(), + path: built_wheel.path, + target: built_wheel.target, + filename: built_wheel.filename, + })) + } } } } diff --git a/crates/puffin-distribution/src/index/built_wheel_index.rs b/crates/puffin-distribution/src/index/built_wheel_index.rs index 36120c188..1310b0e77 100644 --- a/crates/puffin-distribution/src/index/built_wheel_index.rs +++ b/crates/puffin-distribution/src/index/built_wheel_index.rs @@ -1,7 +1,8 @@ use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, Name, PathSourceDist}; use platform_tags::Tags; -use puffin_cache::{Cache, CacheBucket, CacheShard, WheelCache}; +use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, Freshness, WheelCache}; use puffin_fs::symlinks; +use puffin_normalize::PackageName; use crate::index::cached_wheel::CachedWheel; use crate::source::{read_http_manifest, read_timestamp_manifest, MANIFEST}; @@ -26,13 +27,19 @@ impl BuiltWheelIndex { WheelCache::Url(source_dist.url.raw()).remote_wheel_dir(source_dist.name().as_ref()), ); - // Read the existing metadata from the cache, if it exists. + // Read the manifest from the cache. There's no need to enforce freshness, since we + // enforce freshness on the entries. let manifest_entry = cache_shard.entry(MANIFEST); let Some(manifest) = read_http_manifest(&manifest_entry)? else { return Ok(None); }; - Ok(Self::find(&cache_shard.shard(manifest.digest()), tags)) + Ok(Self::find( + &cache_shard.shard(manifest.digest()), + source_dist.name(), + cache, + tags, + )) } /// Return the most compatible [`CachedWheel`] for a given source distribution at a local path. @@ -47,17 +54,23 @@ impl BuiltWheelIndex { ); // Determine the last-modified time of the source distribution. - let Some(modified) = puffin_cache::archive_mtime(&source_dist.path)? else { + let Some(modified) = ArchiveTimestamp::from_path(&source_dist.path)? else { return Err(SourceDistError::DirWithoutEntrypoint); }; - // Read the existing metadata from the cache, if it's up-to-date. + // Read the manifest from the cache. There's no need to enforce freshness, since we + // enforce freshness on the entries. let manifest_entry = cache_shard.entry(MANIFEST); let Some(manifest) = read_timestamp_manifest(&manifest_entry, modified)? else { return Ok(None); }; - Ok(Self::find(&cache_shard.shard(manifest.digest()), tags)) + Ok(Self::find( + &cache_shard.shard(manifest.digest()), + source_dist.name(), + cache, + tags, + )) } /// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL. @@ -72,7 +85,7 @@ impl BuiltWheelIndex { .remote_wheel_dir(source_dist.name().as_ref()), ); - Self::find(&cache_shard, tags) + Self::find(&cache_shard, source_dist.name(), cache, tags) } /// Find the "best" distribution in the index for a given source distribution. @@ -91,7 +104,12 @@ impl BuiltWheelIndex { /// ``` /// /// The `shard` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`. - pub fn find(shard: &CacheShard, tags: &Tags) -> Option { + fn find( + shard: &CacheShard, + package: &PackageName, + cache: &Cache, + tags: &Tags, + ) -> Option { let mut candidate: Option = None; // Unzipped wheels are stored as symlinks into the archive directory. @@ -99,6 +117,15 @@ impl BuiltWheelIndex { match CachedWheel::from_path(&subdir) { None => {} Some(dist_info) => { + // If the [`Refresh`] policy is set, ignore entries that were created before + // the cutoff. + if cache + .freshness(&dist_info.entry, Some(package)) + .is_ok_and(Freshness::is_stale) + { + continue; + } + // Pick the wheel with the highest priority let compatibility = dist_info.filename.compatibility(tags); diff --git a/crates/puffin-distribution/src/index/registry_wheel_index.rs b/crates/puffin-distribution/src/index/registry_wheel_index.rs index 61d45b273..410c2566b 100644 --- a/crates/puffin-distribution/src/index/registry_wheel_index.rs +++ b/crates/puffin-distribution/src/index/registry_wheel_index.rs @@ -7,7 +7,7 @@ use rustc_hash::FxHashMap; use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl}; use pep440_rs::Version; use platform_tags::Tags; -use puffin_cache::{Cache, CacheBucket, WheelCache}; +use puffin_cache::{Cache, CacheBucket, Freshness, WheelCache}; use puffin_fs::{directories, symlinks}; use puffin_normalize::PackageName; @@ -94,7 +94,7 @@ impl<'a> RegistryWheelIndex<'a> { WheelCache::Index(index_url).remote_wheel_dir(package.to_string()), ); - Self::add_directory(&*wheel_dir, tags, &mut versions); + Self::add_directory(&wheel_dir, package, cache, tags, &mut versions); // Index all the built wheels, created by downloading and building source distributions // from the registry. @@ -109,7 +109,13 @@ impl<'a> RegistryWheelIndex<'a> { let cache_shard = cache_shard.shard(shard); let manifest_entry = cache_shard.entry(MANIFEST); if let Ok(Some(manifest)) = read_http_manifest(&manifest_entry) { - Self::add_directory(cache_shard.join(manifest.digest()), tags, &mut versions); + Self::add_directory( + cache_shard.join(manifest.digest()), + package, + cache, + tags, + &mut versions, + ); }; } } @@ -122,6 +128,8 @@ impl<'a> RegistryWheelIndex<'a> { /// Each subdirectory in the given path is expected to be that of an unzipped wheel. fn add_directory( path: impl AsRef, + package: &PackageName, + cache: &Cache, tags: &Tags, versions: &mut BTreeMap, ) { @@ -130,6 +138,13 @@ impl<'a> RegistryWheelIndex<'a> { match CachedWheel::from_path(&wheel_dir) { None => {} Some(dist_info) => { + if cache + .freshness(&dist_info.entry, Some(package)) + .is_ok_and(Freshness::is_stale) + { + continue; + } + let dist_info = dist_info.into_registry_dist(); // Pick the wheel with the highest priority diff --git a/crates/puffin-distribution/src/source/built_wheel_metadata.rs b/crates/puffin-distribution/src/source/built_wheel_metadata.rs index 16a51df77..1e6879a51 100644 --- a/crates/puffin-distribution/src/source/built_wheel_metadata.rs +++ b/crates/puffin-distribution/src/source/built_wheel_metadata.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use distribution_filename::WheelFilename; use platform_tags::Tags; use puffin_cache::CacheShard; -use puffin_fs::directories; +use puffin_fs::files; /// The information about the wheel we either just built or got from the cache. #[derive(Debug, Clone)] @@ -20,8 +20,8 @@ pub struct BuiltWheelMetadata { impl BuiltWheelMetadata { /// Find a compatible wheel in the cache based on the given manifest. pub(crate) fn find_in_cache(tags: &Tags, cache_shard: &CacheShard) -> Option { - for directory in directories(cache_shard) { - if let Some(metadata) = Self::from_path(directory) { + for directory in files(cache_shard) { + if let Some(metadata) = Self::from_path(directory, cache_shard) { // Validate that the wheel is compatible with the target platform. if metadata.filename.is_compatible(tags) { return Some(metadata); @@ -32,11 +32,11 @@ impl BuiltWheelMetadata { } /// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any.whl`). - fn from_path(path: PathBuf) -> Option { + fn from_path(path: PathBuf, cache_shard: &CacheShard) -> Option { let filename = path.file_name()?.to_str()?; let filename = WheelFilename::from_str(filename).ok()?; Some(Self { - target: path.join(filename.stem()), + target: cache_shard.join(filename.stem()), path, filename, }) diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index 3b1cedc20..68c447a47 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -23,8 +23,10 @@ use distribution_types::{ use install_wheel_rs::read_dist_info; use pep508_rs::VerbatimUrl; use platform_tags::Tags; -use puffin_cache::{CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, WheelCache}; -use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy}; +use puffin_cache::{ + ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache, +}; +use puffin_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy}; use puffin_fs::{write_atomic, LockedFile}; use puffin_git::{Fetch, GitSource}; use puffin_traits::{BuildContext, BuildKind, SourceBuildTrait}; @@ -247,6 +249,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { subdirectory: Option<&'data Path>, ) -> Result { let cache_entry = cache_shard.entry(MANIFEST); + let cache_control = CacheControl::from( + self.build_context + .cache() + .freshness(&cache_entry, Some(source_dist.name()))?, + ); let download = |response| { async { @@ -267,14 +274,16 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { let req = self.cached_client.uncached().get(url.clone()).build()?; let manifest = self .cached_client - .get_cached_with_callback(req, &cache_entry, download) + .get_cached_with_callback(req, &cache_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, CachedClientError::Client(err) => SourceDistError::Client(err), })?; - // From here on, scope all operations to the current build. + // From here on, scope all operations to the current build. Within the manifest shard, + // there's no need to check for freshness, since entries have to be fresher than the + // manifest itself. let cache_shard = cache_shard.shard(manifest.digest()); // If the cache contains a compatible wheel, return it. @@ -282,8 +291,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { return Ok(built_wheel); } - // At this point, we're seeing cached metadata (as in, we have an up-to-date source - // distribution), but the wheel(s) we built previously are incompatible. let task = self .reporter .as_ref() @@ -310,12 +317,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { let metadata_entry = cache_shard.entry(METADATA); write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?).await?; - let path = cache_shard.join(&disk_filename); - let target = cache_shard.join(wheel_filename.stem()); - Ok(BuiltWheelMetadata { - path, - target, + path: cache_shard.join(&disk_filename), + target: cache_shard.join(wheel_filename.stem()), filename: wheel_filename, }) } @@ -334,6 +338,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { subdirectory: Option<&'data Path>, ) -> Result { let cache_entry = cache_shard.entry(MANIFEST); + let cache_control = CacheControl::from( + self.build_context + .cache() + .freshness(&cache_entry, Some(source_dist.name()))?, + ); let download = |response| { async { @@ -354,18 +363,22 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { let req = self.cached_client.uncached().get(url.clone()).build()?; let manifest = self .cached_client - .get_cached_with_callback(req, &cache_entry, download) + .get_cached_with_callback(req, &cache_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, CachedClientError::Client(err) => SourceDistError::Client(err), })?; - // From here on, scope all operations to the current build. + // From here on, scope all operations to the current build. Within the manifest shard, + // there's no need to check for freshness, since entries have to be fresher than the + // manifest itself. let cache_shard = cache_shard.shard(manifest.digest()); // If the cache contains compatible metadata, return it. - if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? { + let metadata_entry = cache_shard.entry(METADATA); + if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { + debug!("Using cached metadata for {source_dist}"); return Ok(metadata.clone()); } @@ -386,8 +399,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { return Ok(metadata); } - // At this point, we're seeing cached metadata (as in, we have an up-to-date source - // distribution), but the wheel(s) we built previously are incompatible. let task = self .reporter .as_ref() @@ -429,15 +440,22 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { ); // Determine the last-modified time of the source distribution. - let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else { + let Some(modified) = ArchiveTimestamp::from_path(&path_source_dist.path)? else { return Err(SourceDistError::DirWithoutEntrypoint); }; - // Read the existing metadata from the cache, to clear stale wheels. + // Read the existing metadata from the cache. let manifest_entry = cache_shard.entry(MANIFEST); - let manifest = refresh_timestamp_manifest(&manifest_entry, modified).await?; + let manifest_freshness = self + .build_context + .cache() + .freshness(&manifest_entry, Some(source_dist.name()))?; + let manifest = + refresh_timestamp_manifest(&manifest_entry, manifest_freshness, modified).await?; - // From here on, scope all operations to the current build. + // From here on, scope all operations to the current build. Within the manifest shard, + // there's no need to check for freshness, since entries have to be fresher than the + // manifest itself. let cache_shard = cache_shard.shard(manifest.digest()); // If the cache contains a compatible wheel, return it. @@ -488,20 +506,36 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { ); // Determine the last-modified time of the source distribution. - let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else { + let Some(modified) = ArchiveTimestamp::from_path(&path_source_dist.path)? else { return Err(SourceDistError::DirWithoutEntrypoint); }; // Read the existing metadata from the cache, to clear stale entries. let manifest_entry = cache_shard.entry(MANIFEST); - let manifest = refresh_timestamp_manifest(&manifest_entry, modified).await?; + let manifest_freshness = self + .build_context + .cache() + .freshness(&manifest_entry, Some(source_dist.name()))?; + let manifest = + refresh_timestamp_manifest(&manifest_entry, manifest_freshness, modified).await?; - // From here on, scope all operations to the current build. + // From here on, scope all operations to the current build. Within the manifest shard, + // there's no need to check for freshness, since entries have to be fresher than the + // manifest itself. let cache_shard = cache_shard.shard(manifest.digest()); // If the cache contains compatible metadata, return it. - if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? { - return Ok(metadata.clone()); + let metadata_entry = cache_shard.entry(METADATA); + if self + .build_context + .cache() + .freshness(&metadata_entry, Some(source_dist.name())) + .is_ok_and(Freshness::is_fresh) + { + if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { + debug!("Using cached metadata for {source_dist}"); + return Ok(metadata.clone()); + } } // If the backend supports `prepare_metadata_for_build_wheel`, use it. @@ -611,8 +645,17 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { ); // If the cache contains compatible metadata, return it. - if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? { - return Ok(metadata.clone()); + let metadata_entry = cache_shard.entry(METADATA); + if self + .build_context + .cache() + .freshness(&metadata_entry, Some(source_dist.name())) + .is_ok_and(Freshness::is_fresh) + { + if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { + debug!("Using cached metadata for {source_dist}"); + return Ok(metadata.clone()); + } } // If the backend supports `prepare_metadata_for_build_wheel`, use it. @@ -902,13 +945,13 @@ pub(crate) fn read_http_manifest( /// If the cache entry is stale, a new entry will be created. pub(crate) fn read_timestamp_manifest( cache_entry: &CacheEntry, - modified: SystemTime, + modified: ArchiveTimestamp, ) -> Result, SourceDistError> { // If the cache entry is up-to-date, return it. match std::fs::read(cache_entry.path()) { Ok(cached) => { let cached = rmp_serde::from_slice::>(&cached)?; - if cached.timestamp == modified { + if cached.timestamp == modified.timestamp() { return Ok(Some(cached.data)); } } @@ -923,11 +966,14 @@ pub(crate) fn read_timestamp_manifest( /// If the cache entry is stale, a new entry will be created. pub(crate) async fn refresh_timestamp_manifest( cache_entry: &CacheEntry, - modified: SystemTime, + freshness: Freshness, + modified: ArchiveTimestamp, ) -> Result { - // If the cache entry is up-to-date, return it. - if let Some(manifest) = read_timestamp_manifest(cache_entry, modified)? { - return Ok(manifest); + // If we know the exact modification time, we don't need to force a revalidate. + if matches!(modified, ArchiveTimestamp::Exact(_)) || freshness.is_fresh() { + if let Some(manifest) = read_timestamp_manifest(cache_entry, modified)? { + return Ok(manifest); + } } // Otherwise, create a new manifest. @@ -936,7 +982,7 @@ pub(crate) async fn refresh_timestamp_manifest( write_atomic( cache_entry.path(), rmp_serde::to_vec(&CachedByTimestamp { - timestamp: modified, + timestamp: modified.timestamp(), data: manifest, })?, ) diff --git a/crates/puffin-fs/src/lib.rs b/crates/puffin-fs/src/lib.rs index ded5172aa..9bf205d9d 100644 --- a/crates/puffin-fs/src/lib.rs +++ b/crates/puffin-fs/src/lib.rs @@ -131,6 +131,30 @@ pub fn symlinks(path: impl AsRef) -> impl Iterator { .map(|entry| entry.path()) } +/// Iterate over the files in a directory. +/// +/// If the directory does not exist, returns an empty iterator. +pub fn files(path: impl AsRef) -> impl Iterator { + path.as_ref() + .read_dir() + .ok() + .into_iter() + .flatten() + .filter_map(|entry| match entry { + Ok(entry) => Some(entry), + Err(err) => { + warn!("Failed to read entry: {}", err); + None + } + }) + .filter(|entry| { + entry + .file_type() + .map_or(false, |file_type| file_type.is_file()) + }) + .map(|entry| entry.path()) +} + /// A file lock that is automatically released when dropped. #[derive(Debug)] pub struct LockedFile(fs_err::File); diff --git a/crates/puffin-installer/src/plan.rs b/crates/puffin-installer/src/plan.rs index 43aa2e512..ab15fa025 100644 --- a/crates/puffin-installer/src/plan.rs +++ b/crates/puffin-installer/src/plan.rs @@ -12,7 +12,7 @@ use distribution_types::{ }; use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; -use puffin_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; +use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Freshness, WheelCache}; use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex}; use puffin_interpreter::Virtualenv; use puffin_normalize::PackageName; @@ -48,6 +48,11 @@ impl<'a> Planner<'a> { /// Partition a set of requirements into those that should be linked from the cache, those that /// need to be downloaded, and those that should be removed. + /// + /// The install plan will respect cache [`Freshness`]. Specifically, if refresh is enabled, the + /// plan will respect cache entries created after the current time (as per the [`Refresh`] + /// policy). Otherwise, entries will be ignored. The downstream distribution database may still + /// read those entries from the cache after revalidating them. #[allow(clippy::too_many_arguments)] pub fn build( self, @@ -140,9 +145,6 @@ impl<'a> Planner<'a> { }; if reinstall { - // If necessary, purge the cached distributions. - debug!("Purging cached distributions for: {requirement}"); - cache.purge(&requirement.name)?; if let Some(distribution) = site_packages.remove(&requirement.name) { reinstalls.push(distribution); } @@ -164,7 +166,7 @@ impl<'a> Planner<'a> { if &distribution.url == url.raw() { // If the requirement came from a local path, check freshness. if let Ok(archive) = url.to_file_path() { - if is_fresh_install(distribution, &archive)? { + if not_modified_install(distribution, &archive)? { debug!("Requirement already satisfied (and up-to-date): {distribution}"); continue; } @@ -243,16 +245,21 @@ impl<'a> Planner<'a> { ) .entry(wheel.filename.stem()); - if cache_entry.path().exists() { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - cache_entry.into_path_buf(), - ); + if cache + .freshness(&cache_entry, Some(wheel.name())) + .is_ok_and(Freshness::is_fresh) + { + if cache_entry.path().exists() { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + cache_entry.into_path_buf(), + ); - debug!("URL wheel requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist)); - continue; + debug!("URL wheel requirement already cached: {cached_dist}"); + local.push(CachedDist::Url(cached_dist)); + continue; + } } } Dist::Built(BuiltDist::Path(wheel)) => { @@ -280,16 +287,21 @@ impl<'a> Planner<'a> { ) .entry(wheel.filename.stem()); - if is_fresh_cache(&cache_entry, &wheel.path)? { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - cache_entry.into_path_buf(), - ); + if cache + .freshness(&cache_entry, Some(wheel.name())) + .is_ok_and(Freshness::is_fresh) + { + if not_modified_cache(&cache_entry, &wheel.path)? { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + cache_entry.into_path_buf(), + ); - debug!("Path wheel requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist)); - continue; + debug!("Path wheel requirement already cached: {cached_dist}"); + local.push(CachedDist::Url(cached_dist)); + continue; + } } } Dist::Source(SourceDist::DirectUrl(sdist)) => { @@ -357,32 +369,21 @@ impl<'a> Planner<'a> { } } -/// Returns `true` if the cache entry linked to the file at the given [`Path`] is fresh. +/// Returns `true` if the cache entry linked to the file at the given [`Path`] is not-modified. /// -/// A cache entry is considered fresh if it exists and is newer than the file at the given path. -/// If the cache entry is stale, it will be removed from the cache. -fn is_fresh_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result { +/// A cache entry is not modified if it exists and is newer than the file at the given path. +fn not_modified_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result { match fs_err::metadata(cache_entry.path()).and_then(|metadata| metadata.modified()) { Ok(cache_mtime) => { // Determine the modification time of the wheel. - let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else { - // The artifact doesn't exist, so it's not fresh. - return Ok(false); - }; - if cache_mtime >= artifact_mtime { - Ok(true) + if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? { + Ok(cache_mtime >= artifact_mtime.timestamp()) } else { - debug!( - "Removing stale built wheels for: {}", - cache_entry.path().display() - ); - if let Err(err) = fs_err::remove_dir_all(cache_entry.dir()) { - warn!("Failed to remove stale built wheel cache directory: {err}"); - } + // The artifact doesn't exist, so it's not fresh. Ok(false) } } - Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + Err(err) if err.kind() == io::ErrorKind::NotFound => { // The cache entry doesn't exist, so it's not fresh. Ok(false) } @@ -390,20 +391,20 @@ fn is_fresh_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result Result { +/// Returns `true` if the installed distribution linked to the file at the given [`Path`] is +/// not-modified based on the modification time of the installed distribution. +fn not_modified_install(dist: &InstalledDirectUrlDist, artifact: &Path) -> Result { // Determine the modification time of the installed distribution. let dist_metadata = fs_err::metadata(&dist.path)?; let dist_mtime = dist_metadata.modified()?; // Determine the modification time of the wheel. - let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else { + if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? { + Ok(dist_mtime >= artifact_mtime.timestamp()) + } else { // The artifact doesn't exist, so it's not fresh. - return Ok(false); - }; - - Ok(dist_mtime >= artifact_mtime) + Ok(false) + } } #[derive(Debug, Default)] @@ -425,7 +426,7 @@ pub struct Plan { pub extraneous: Vec, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Reinstall { /// Don't reinstall any packages; respect the existing installation. None, diff --git a/crates/puffin-interpreter/src/interpreter.rs b/crates/puffin-interpreter/src/interpreter.rs index c4d5abc8d..c55747acd 100644 --- a/crates/puffin-interpreter/src/interpreter.rs +++ b/crates/puffin-interpreter/src/interpreter.rs @@ -11,7 +11,7 @@ use pep440_rs::Version; use pep508_rs::MarkerEnvironment; use platform_host::Platform; use platform_tags::{Tags, TagsError}; -use puffin_cache::{Cache, CacheBucket, CachedByTimestamp}; +use puffin_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness}; use puffin_fs::write_atomic_sync; use crate::python_platform::PythonPlatform; @@ -272,6 +272,7 @@ impl InterpreterQueryResult { /// time as a cache key. pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result { let executable_bytes = executable.as_os_str().as_encoded_bytes(); + let cache_entry = cache.entry( CacheBucket::Interpreter, "", @@ -281,25 +282,30 @@ impl InterpreterQueryResult { let modified = Timestamp::from_path(fs_err::canonicalize(executable)?.as_ref())?; // Read from the cache. - if let Ok(data) = fs::read(cache_entry.path()) { - match rmp_serde::from_slice::>(&data) { - Ok(cached) => { - if cached.timestamp == modified { - debug!("Using cached markers for: {}", executable.display()); - return Ok(cached.data); - } + if cache + .freshness(&cache_entry, None) + .is_ok_and(Freshness::is_fresh) + { + if let Ok(data) = fs::read(cache_entry.path()) { + match rmp_serde::from_slice::>(&data) { + Ok(cached) => { + if cached.timestamp == modified { + debug!("Using cached markers for: {}", executable.display()); + return Ok(cached.data); + } - debug!( - "Ignoring stale cached markers for: {}", - executable.display() - ); - } - Err(err) => { - warn!( - "Broken cache entry at {}, removing: {err}", - cache_entry.path().display() - ); - let _ = fs_err::remove_file(cache_entry.path()); + debug!( + "Ignoring stale cached markers for: {}", + executable.display() + ); + } + Err(err) => { + warn!( + "Broken cache entry at {}, removing: {err}", + cache_entry.path().display() + ); + let _ = fs_err::remove_file(cache_entry.path()); + } } } } diff --git a/crates/puffin/src/main.rs b/crates/puffin/src/main.rs index d0a0d0291..293aa7af3 100644 --- a/crates/puffin/src/main.rs +++ b/crates/puffin/src/main.rs @@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand}; use owo_colors::OwoColorize; use distribution_types::{FlatIndexLocation, IndexLocations, IndexUrl}; -use puffin_cache::{Cache, CacheArgs}; +use puffin_cache::{Cache, CacheArgs, Refresh}; use puffin_installer::{NoBinary, Reinstall}; use puffin_interpreter::PythonVersion; use puffin_normalize::{ExtraName, PackageName}; @@ -200,6 +200,14 @@ struct PipCompileArgs { #[clap(short, long)] output_file: Option, + /// Refresh all cached data. + #[clap(long)] + refresh: bool, + + /// Refresh cached data for a specific package. + #[clap(long)] + refresh_package: Vec, + /// The URL of the Python Package Index. #[clap(long, short, default_value = IndexUrl::Pypi.as_str(), env = "PUFFIN_INDEX_URL")] index_url: IndexUrl, @@ -264,7 +272,7 @@ struct PipCompileArgs { /// Timestamps are given either as RFC 3339 timestamps such as `2006-12-02T02:07:43Z` or as /// UTC dates in the same format such as `2006-12-02`. Dates are interpreted as including this /// day, i.e. until midnight UTC that day. - #[arg(long, value_parser = date_or_datetime, hide = true)] + #[arg(long, value_parser = date_or_datetime)] exclude_newer: Option>, } @@ -275,16 +283,22 @@ struct PipSyncArgs { #[clap(required(true))] src_file: Vec, - /// Reinstall all packages, overwriting any entries in the cache and replacing any existing - /// packages in the environment. + /// Reinstall all packages, regardless of whether they're already installed. #[clap(long, alias = "force-reinstall")] reinstall: bool, - /// Reinstall a specific package, overwriting any entries in the cache and replacing any - /// existing versions in the environment. + /// Reinstall a specific package, regardless of whether it's already installed. #[clap(long)] reinstall_package: Vec, + /// Refresh all cached data. + #[clap(long)] + refresh: bool, + + /// Refresh cached data for a specific package. + #[clap(long)] + refresh_package: Vec, + /// The method to use when installing packages from the global cache. #[clap(long, value_enum, default_value_t = install_wheel_rs::linker::LinkMode::default())] link_mode: install_wheel_rs::linker::LinkMode, @@ -390,16 +404,22 @@ struct PipInstallArgs { #[clap(long, conflicts_with = "extra")] all_extras: bool, - /// Reinstall all packages, overwriting any entries in the cache and replacing any existing - /// packages in the environment. + /// Reinstall all packages, regardless of whether they're already installed. #[clap(long, alias = "force-reinstall")] reinstall: bool, - /// Reinstall a specific package, overwriting any entries in the cache and replacing any - /// existing versions in the environment. + /// Reinstall a specific package, regardless of whether it's already installed. #[clap(long)] reinstall_package: Vec, + /// Refresh all cached data. + #[clap(long)] + refresh: bool, + + /// Refresh cached data for a specific package. + #[clap(long)] + refresh_package: Vec, + /// The method to use when installing packages from the global cache. #[clap(long, value_enum, default_value_t = install_wheel_rs::linker::LinkMode::default())] link_mode: install_wheel_rs::linker::LinkMode, @@ -619,6 +639,7 @@ async fn inner() -> Result { Commands::Pip(PipArgs { command: PipCommand::Compile(args), }) => { + let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package)); let requirements = args .src_file .into_iter() @@ -675,6 +696,7 @@ async fn inner() -> Result { Commands::Pip(PipArgs { command: PipCommand::Sync(args), }) => { + let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package)); let index_urls = IndexLocations::from_args( args.index_url, args.extra_index_url, @@ -709,6 +731,7 @@ async fn inner() -> Result { Commands::Pip(PipArgs { command: PipCommand::Install(args), }) => { + let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package)); let requirements = args .package .into_iter() diff --git a/crates/puffin/tests/pip_install.rs b/crates/puffin/tests/pip_install.rs index 09c49d208..a23053d4e 100644 --- a/crates/puffin/tests/pip_install.rs +++ b/crates/puffin/tests/pip_install.rs @@ -728,7 +728,7 @@ fn reinstall_build_system() -> Result<()> { ----- stderr ----- Resolved 8 packages in [TIME] - Downloaded 8 packages in [TIME] + Downloaded 7 packages in [TIME] Installed 8 packages in [TIME] + blinker==1.7.0 + click==8.1.7 @@ -930,7 +930,6 @@ fn reinstall_no_binary() -> Result<()> { ----- stderr ----- Resolved 7 packages in [TIME] - Downloaded 1 package in [TIME] Installed 1 package in [TIME] - flask==3.0.0 + flask==3.0.0 diff --git a/crates/puffin/tests/pip_sync.rs b/crates/puffin/tests/pip_sync.rs index b84ebfc16..a7e39bede 100644 --- a/crates/puffin/tests/pip_sync.rs +++ b/crates/puffin/tests/pip_sync.rs @@ -2366,8 +2366,6 @@ fn reinstall() -> Result<()> { ----- stdout ----- ----- stderr ----- - Resolved 2 packages in [TIME] - Downloaded 2 packages in [TIME] Uninstalled 2 packages in [TIME] Installed 2 packages in [TIME] - markupsafe==2.1.3 @@ -2442,8 +2440,6 @@ fn reinstall_package() -> Result<()> { ----- stdout ----- ----- stderr ----- - Resolved 1 package in [TIME] - Downloaded 1 package in [TIME] Uninstalled 1 package in [TIME] Installed 1 package in [TIME] - tomli==2.0.1 @@ -2515,8 +2511,6 @@ fn reinstall_git() -> Result<()> { ----- stdout ----- ----- stderr ----- - Resolved 1 package in [TIME] - Downloaded 1 package in [TIME] Uninstalled 1 package in [TIME] Installed 1 package in [TIME] - werkzeug==2.0.0 (from git+https://github.com/pallets/werkzeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74) @@ -2529,6 +2523,159 @@ fn reinstall_git() -> Result<()> { Ok(()) } +/// Verify that we can force refresh of cached data. +#[test] +fn refresh() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&temp_dir, &cache_dir); + + let requirements_txt = temp_dir.child("requirements.txt"); + requirements_txt.touch()?; + requirements_txt.write_str("MarkupSafe==2.1.3\ntomli==2.0.1")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Downloaded 2 packages in [TIME] + Installed 2 packages in [TIME] + + markupsafe==2.1.3 + + tomli==2.0.1 + "###); + }); + + check_command(&venv, "import markupsafe", &temp_dir); + check_command(&venv, "import tomli", &temp_dir); + + // Re-run the installation into with `--refresh`. Ensure that we resolve and download the + // latest versions of the packages. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--refresh") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Downloaded 2 packages in [TIME] + Installed 2 packages in [TIME] + + markupsafe==2.1.3 + + tomli==2.0.1 + "###); + }); + + check_command(&venv, "import markupsafe", &temp_dir); + check_command(&venv, "import tomli", &temp_dir); + + Ok(()) +} + +/// Verify that we can force refresh of selective packages. +#[test] +fn refresh_package() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&temp_dir, &cache_dir); + + let requirements_txt = temp_dir.child("requirements.txt"); + requirements_txt.touch()?; + requirements_txt.write_str("MarkupSafe==2.1.3\ntomli==2.0.1")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Downloaded 2 packages in [TIME] + Installed 2 packages in [TIME] + + markupsafe==2.1.3 + + tomli==2.0.1 + "###); + }); + + check_command(&venv, "import markupsafe", &temp_dir); + check_command(&venv, "import tomli", &temp_dir); + + // Re-run the installation into with `--refresh`. Ensure that we resolve and download the + // latest versions of the packages. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--refresh-package") + .arg("tomli") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 2 packages in [TIME] + + markupsafe==2.1.3 + + tomli==2.0.1 + "###); + }); + + check_command(&venv, "import markupsafe", &temp_dir); + check_command(&venv, "import tomli", &temp_dir); + + Ok(()) +} + #[test] #[cfg(feature = "maturin")] fn sync_editable() -> Result<()> {