From bbe0246205a2349b88e5882b1f565d5ae7de06c0 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 25 Dec 2023 21:10:30 -0500 Subject: [PATCH] Change internal representation of `CacheEntry` to avoid allocations (#730) Removes a TODO. --- crates/puffin-cache/src/lib.rs | 53 +++++++++++-------- crates/puffin-client/src/cached_client.rs | 2 +- .../src/distribution_database.rs | 22 +++++--- crates/puffin-distribution/src/source_dist.rs | 50 ++++++++--------- crates/puffin-installer/src/plan.rs | 14 ++--- crates/puffin-interpreter/src/interpreter.rs | 2 +- 6 files changed, 79 insertions(+), 64 deletions(-) diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index 3fa3a94a6..12a9b2e89 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -22,25 +22,38 @@ mod by_timestamp; mod cli; mod wheel; -/// A cache entry which may or may not exist yet. +/// A [`CacheEntry`] which may or may not exist yet. #[derive(Debug, Clone)] -pub struct CacheEntry { - pub dir: PathBuf, - pub file: String, -} +pub struct CacheEntry(PathBuf); impl CacheEntry { - pub fn path(&self) -> PathBuf { - // TODO(konstin): Cache this to avoid allocations? - self.dir.join(&self.file) + /// Create a new [`CacheEntry`] from a directory and a file name. + pub fn new(dir: impl Into, file: impl AsRef) -> Self { + Self(dir.into().join(file)) } + /// Convert the [`CacheEntry`] into a [`PathBuf`]. + #[inline] + pub fn into_path_buf(self) -> PathBuf { + self.0 + } + + /// Return the path to the [`CacheEntry`]. + #[inline] + pub fn path(&self) -> &Path { + &self.0 + } + + /// Return the cache entry's parent directory. + #[inline] + pub fn dir(&self) -> &Path { + self.0.parent().expect("Cache entry has no parent") + } + + /// Create a new [`CacheEntry`] with the given file name. #[must_use] - pub fn with_file(self, file: impl Into) -> Self { - Self { - file: file.into(), - ..self - } + pub fn with_file(&self, file: impl AsRef) -> Self { + Self(self.dir().join(file)) } } @@ -49,11 +62,8 @@ impl CacheEntry { pub struct CacheShard(PathBuf); impl CacheShard { - pub fn entry(&self, file: impl Into) -> CacheEntry { - CacheEntry { - dir: self.0.clone(), - file: file.into(), - } + pub fn entry(&self, file: impl AsRef) -> CacheEntry { + CacheEntry::new(&self.0, file) } } @@ -115,12 +125,9 @@ impl Cache { &self, cache_bucket: CacheBucket, dir: impl AsRef, - file: String, + file: impl AsRef, ) -> CacheEntry { - CacheEntry { - dir: self.bucket(cache_bucket).join(dir.as_ref()), - file, - } + CacheEntry::new(self.bucket(cache_bucket).join(dir), file) } /// Initialize a directory for use as a cache. diff --git a/crates/puffin-client/src/cached_client.rs b/crates/puffin-client/src/cached_client.rs index 1d782763d..334ceb4fd 100644 --- a/crates/puffin-client/src/cached_client.rs +++ b/crates/puffin-client/src/cached_client.rs @@ -136,7 +136,7 @@ impl CachedClient { .map_err(|err| CachedClientError::Callback(err))?; if let Some(cache_policy) = cache_policy { let data_with_cache_policy = DataWithCachePolicy { data, cache_policy }; - fs_err::tokio::create_dir_all(&cache_entry.dir) + fs_err::tokio::create_dir_all(cache_entry.dir()) .await .map_err(crate::Error::CacheWrite)?; let data = diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 9fd3c0105..26c5b62a1 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -136,7 +136,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> LocalWheel::InMemory(InMemoryWheel { dist: dist.clone(), - target: cache_entry.path(), + target: cache_entry.into_path_buf(), buffer, filename: wheel_filename, }) @@ -162,13 +162,16 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .remote_wheel_dir(wheel_filename.name.as_ref()), filename, ); - fs::create_dir_all(&cache_entry.dir).await?; + fs::create_dir_all(&cache_entry.dir()).await?; tokio::fs::rename(temp_file, &cache_entry.path()).await?; LocalWheel::Disk(DiskWheel { dist: dist.clone(), - path: cache_entry.path(), - target: cache_entry.with_file(wheel_filename.stem()).path(), + target: cache_entry + .with_file(wheel_filename.stem()) + .path() + .to_path_buf(), + path: cache_entry.into_path_buf(), filename: wheel_filename, }) }; @@ -195,13 +198,16 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()), filename, ); - fs::create_dir_all(&cache_entry.dir).await?; + fs::create_dir_all(&cache_entry.dir()).await?; tokio::fs::rename(temp_file, &cache_entry.path()).await?; let local_wheel = LocalWheel::Disk(DiskWheel { dist: dist.clone(), - path: cache_entry.path(), - target: cache_entry.with_file(wheel.filename.stem()).path(), + target: cache_entry + .with_file(wheel.filename.stem()) + .path() + .to_path_buf(), + path: cache_entry.into_path_buf(), filename: wheel.filename.clone(), }); @@ -218,7 +224,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> Ok(LocalWheel::Disk(DiskWheel { dist: dist.clone(), path: wheel.path.clone(), - target: cache_entry.path(), + target: cache_entry.into_path_buf(), filename: wheel.filename.clone(), })) } diff --git a/crates/puffin-distribution/src/source_dist.rs b/crates/puffin-distribution/src/source_dist.rs index 0705d97c2..ef82452df 100644 --- a/crates/puffin-distribution/src/source_dist.rs +++ b/crates/puffin-distribution/src/source_dist.rs @@ -133,8 +133,8 @@ impl BuiltWheelMetadata { cache_entry: &CacheEntry, ) -> Self { Self { - path: cache_entry.dir.join(&cached_dist.disk_filename), - target: cache_entry.dir.join(filename.stem()), + path: cache_entry.dir().join(&cached_dist.disk_filename), + target: cache_entry.dir().join(filename.stem()), filename, metadata: cached_dist.metadata, } @@ -270,12 +270,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { cache_shard: &CacheShard, subdirectory: Option<&'data Path>, ) -> Result { - let cache_entry = cache_shard.entry(METADATA.to_string()); + let cache_entry = cache_shard.entry(METADATA); let response_callback = |response| async { // At this point, we're seeing a new or updated source distribution; delete all // wheels, and rebuild. - match fs::remove_dir_all(&cache_entry.dir).await { + match fs::remove_dir_all(&cache_entry.dir()).await { Ok(()) => debug!("Cleared built wheels and metadata for {source_dist}"), Err(err) if err.kind() == std::io::ErrorKind::NotFound => (), Err(err) => return Err(err.into()), @@ -288,13 +288,14 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { .map(|reporter| reporter.on_build_start(source_dist)); // Download the source distribution. + let source_dist_entry = cache_shard.entry(filename); let cache_dir = self - .persist_source_dist_url(response, source_dist, filename, cache_shard) + .persist_source_dist_url(response, source_dist, filename, &source_dist_entry) .await?; // Build the source distribution. let (disk_filename, wheel_filename, metadata) = self - .build_source_dist(source_dist, &cache_dir, subdirectory, &cache_entry) + .build_source_dist(source_dist, cache_dir, subdirectory, &cache_entry) .await?; if let Some(task) = task { @@ -343,13 +344,15 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { .send() .await .map_err(puffin_client::Error::RequestMiddlewareError)?; + + let source_dist_entry = cache_shard.entry(filename); let cache_dir = self - .persist_source_dist_url(response, source_dist, filename, cache_shard) + .persist_source_dist_url(response, source_dist, filename, &source_dist_entry) .await?; // Build the source distribution. let (disk_filename, wheel_filename, metadata) = self - .build_source_dist(source_dist, &cache_dir, subdirectory, &cache_entry) + .build_source_dist(source_dist, cache_dir, subdirectory, &cache_entry) .await?; if let Some(task) = task { @@ -395,7 +398,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { CacheBucket::BuiltWheels, WheelCache::Path(&path_source_dist.url) .remote_wheel_dir(path_source_dist.name().as_ref()), - METADATA.to_string(), + METADATA, ); // Determine the last-modified time of the source distribution. @@ -475,8 +478,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } } - let path = cache_entry.dir.join(&disk_filename); - let target = cache_entry.dir.join(filename.stem()); + let path = cache_entry.dir().join(&disk_filename); + let target = cache_entry.dir().join(filename.stem()); Ok(BuiltWheelMetadata { path, @@ -500,7 +503,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { CacheBucket::BuiltWheels, WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string()) .remote_wheel_dir(git_source_dist.name().as_ref()), - METADATA.to_string(), + METADATA, ); // Read the existing metadata from the cache. @@ -551,8 +554,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } } - let path = cache_entry.dir.join(&disk_filename); - let target = cache_entry.dir.join(filename.stem()); + let path = cache_entry.dir().join(&disk_filename); + let target = cache_entry.dir().join(filename.stem()); Ok(BuiltWheelMetadata { path, @@ -563,14 +566,13 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } /// Download and unzip a source distribution into the cache from an HTTP response. - async fn persist_source_dist_url( + async fn persist_source_dist_url<'data>( &self, response: Response, source_dist: &SourceDist, filename: &str, - cache_shard: &CacheShard, - ) -> Result { - let cache_entry = cache_shard.entry(filename); + cache_entry: &'data CacheEntry, + ) -> Result<&'data Path, SourceDistError> { let cache_path = cache_entry.path(); if cache_path.is_dir() { debug!("Distribution is already cached: {source_dist}"); @@ -594,8 +596,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { drop(span); // Persist the unzipped distribution to the cache. - fs::create_dir_all(&cache_entry.dir).await?; - if let Err(err) = fs_err::rename(&source_dist_dir, &cache_path) { + fs::create_dir_all(&cache_entry.dir()).await?; + if let Err(err) = fs_err::rename(&source_dist_dir, cache_path) { // If another thread already cached the distribution, we can ignore the error. if cache_path.is_dir() { warn!("Downloaded already-cached distribution: {source_dist}"); @@ -688,7 +690,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } // Build the wheel. - fs::create_dir_all(&cache_entry.dir).await?; + fs::create_dir_all(&cache_entry.dir()).await?; let disk_filename = self .build_context .setup_build( @@ -699,13 +701,13 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { ) .await .map_err(|err| SourceDistError::Build(dist.to_string(), err))? - .wheel(&cache_entry.dir) + .wheel(cache_entry.dir()) .await .map_err(|err| SourceDistError::Build(dist.to_string(), err))?; // Read the metadata from the wheel. let filename = WheelFilename::from_str(&disk_filename)?; - let metadata = read_metadata(&filename, cache_entry.dir.join(&disk_filename))?; + let metadata = read_metadata(&filename, cache_entry.dir().join(&disk_filename))?; debug!("Finished building: {dist}"); Ok((disk_filename, filename, metadata)) @@ -760,7 +762,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { "Removing stale built wheels for: {}", cache_entry.path().display() ); - if let Err(err) = fs::remove_dir_all(&cache_entry.dir).await { + if let Err(err) = fs::remove_dir_all(&cache_entry.dir()).await { warn!("Failed to remove stale built wheel cache directory: {err}"); } Ok(None) diff --git a/crates/puffin-installer/src/plan.rs b/crates/puffin-installer/src/plan.rs index 9e0462315..11ef27495 100644 --- a/crates/puffin-installer/src/plan.rs +++ b/crates/puffin-installer/src/plan.rs @@ -204,11 +204,11 @@ impl InstallPlan { let cached_dist = CachedDirectUrlDist::from_url( wheel.filename, wheel.url, - cache_entry.path(), + cache_entry.into_path_buf(), ); debug!("URL wheel requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist.clone())); + local.push(CachedDist::Url(cached_dist)); continue; } } @@ -225,11 +225,11 @@ impl InstallPlan { let cached_dist = CachedDirectUrlDist::from_url( wheel.filename, wheel.url, - cache_entry.path(), + cache_entry.into_path_buf(), ); debug!("Path wheel requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist.clone())); + local.push(CachedDist::Url(cached_dist)); continue; } } @@ -244,7 +244,7 @@ impl InstallPlan { if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) { let cached_dist = wheel.into_url_dist(url.clone()); debug!("URL source requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist.clone())); + local.push(CachedDist::Url(cached_dist)); continue; } } @@ -260,7 +260,7 @@ impl InstallPlan { if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) { let cached_dist = wheel.into_url_dist(url.clone()); debug!("Path source requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist.clone())); + local.push(CachedDist::Url(cached_dist)); continue; } } @@ -277,7 +277,7 @@ impl InstallPlan { if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) { let cached_dist = wheel.into_url_dist(url.clone()); debug!("Git source requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist.clone())); + local.push(CachedDist::Url(cached_dist)); continue; } } diff --git a/crates/puffin-interpreter/src/interpreter.rs b/crates/puffin-interpreter/src/interpreter.rs index 660b6161e..c6152c561 100644 --- a/crates/puffin-interpreter/src/interpreter.rs +++ b/crates/puffin-interpreter/src/interpreter.rs @@ -209,7 +209,7 @@ impl InterpreterQueryResult { // If `executable` is a pyenv shim, a bash script that redirects to the activated // python executable at another path, we're not allowed to cache the interpreter info if executable == info.sys_executable { - fs::create_dir_all(&cache_entry.dir)?; + fs::create_dir_all(cache_entry.dir())?; // Write to the cache. write_atomic_sync( cache_entry.path(),