Change internal representation of `CacheEntry` to avoid allocations (#730)

Removes a TODO.
This commit is contained in:
Charlie Marsh 2023-12-25 21:10:30 -05:00 committed by GitHub
parent 188ab75769
commit bbe0246205
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 79 additions and 64 deletions

View File

@ -22,25 +22,38 @@ mod by_timestamp;
mod cli;
mod wheel;
/// A cache entry which may or may not exist yet.
/// A [`CacheEntry`] which may or may not exist yet.
#[derive(Debug, Clone)]
pub struct CacheEntry {
pub dir: PathBuf,
pub file: String,
}
pub struct CacheEntry(PathBuf);
impl CacheEntry {
pub fn path(&self) -> PathBuf {
// TODO(konstin): Cache this to avoid allocations?
self.dir.join(&self.file)
/// Create a new [`CacheEntry`] from a directory and a file name.
pub fn new(dir: impl Into<PathBuf>, file: impl AsRef<Path>) -> Self {
Self(dir.into().join(file))
}
/// Convert the [`CacheEntry`] into a [`PathBuf`].
#[inline]
pub fn into_path_buf(self) -> PathBuf {
self.0
}
/// Return the path to the [`CacheEntry`].
#[inline]
pub fn path(&self) -> &Path {
&self.0
}
/// Return the cache entry's parent directory.
#[inline]
pub fn dir(&self) -> &Path {
self.0.parent().expect("Cache entry has no parent")
}
/// Create a new [`CacheEntry`] with the given file name.
#[must_use]
pub fn with_file(self, file: impl Into<String>) -> Self {
Self {
file: file.into(),
..self
}
pub fn with_file(&self, file: impl AsRef<Path>) -> Self {
Self(self.dir().join(file))
}
}
@ -49,11 +62,8 @@ impl CacheEntry {
pub struct CacheShard(PathBuf);
impl CacheShard {
pub fn entry(&self, file: impl Into<String>) -> CacheEntry {
CacheEntry {
dir: self.0.clone(),
file: file.into(),
}
pub fn entry(&self, file: impl AsRef<Path>) -> CacheEntry {
CacheEntry::new(&self.0, file)
}
}
@ -115,12 +125,9 @@ impl Cache {
&self,
cache_bucket: CacheBucket,
dir: impl AsRef<Path>,
file: String,
file: impl AsRef<Path>,
) -> CacheEntry {
CacheEntry {
dir: self.bucket(cache_bucket).join(dir.as_ref()),
file,
}
CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
}
/// Initialize a directory for use as a cache.

View File

@ -136,7 +136,7 @@ impl CachedClient {
.map_err(|err| CachedClientError::Callback(err))?;
if let Some(cache_policy) = cache_policy {
let data_with_cache_policy = DataWithCachePolicy { data, cache_policy };
fs_err::tokio::create_dir_all(&cache_entry.dir)
fs_err::tokio::create_dir_all(cache_entry.dir())
.await
.map_err(crate::Error::CacheWrite)?;
let data =

View File

@ -136,7 +136,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
LocalWheel::InMemory(InMemoryWheel {
dist: dist.clone(),
target: cache_entry.path(),
target: cache_entry.into_path_buf(),
buffer,
filename: wheel_filename,
})
@ -162,13 +162,16 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
.remote_wheel_dir(wheel_filename.name.as_ref()),
filename,
);
fs::create_dir_all(&cache_entry.dir).await?;
fs::create_dir_all(&cache_entry.dir()).await?;
tokio::fs::rename(temp_file, &cache_entry.path()).await?;
LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: cache_entry.path(),
target: cache_entry.with_file(wheel_filename.stem()).path(),
target: cache_entry
.with_file(wheel_filename.stem())
.path()
.to_path_buf(),
path: cache_entry.into_path_buf(),
filename: wheel_filename,
})
};
@ -195,13 +198,16 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()),
filename,
);
fs::create_dir_all(&cache_entry.dir).await?;
fs::create_dir_all(&cache_entry.dir()).await?;
tokio::fs::rename(temp_file, &cache_entry.path()).await?;
let local_wheel = LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: cache_entry.path(),
target: cache_entry.with_file(wheel.filename.stem()).path(),
target: cache_entry
.with_file(wheel.filename.stem())
.path()
.to_path_buf(),
path: cache_entry.into_path_buf(),
filename: wheel.filename.clone(),
});
@ -218,7 +224,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: wheel.path.clone(),
target: cache_entry.path(),
target: cache_entry.into_path_buf(),
filename: wheel.filename.clone(),
}))
}

View File

@ -133,8 +133,8 @@ impl BuiltWheelMetadata {
cache_entry: &CacheEntry,
) -> Self {
Self {
path: cache_entry.dir.join(&cached_dist.disk_filename),
target: cache_entry.dir.join(filename.stem()),
path: cache_entry.dir().join(&cached_dist.disk_filename),
target: cache_entry.dir().join(filename.stem()),
filename,
metadata: cached_dist.metadata,
}
@ -270,12 +270,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
cache_shard: &CacheShard,
subdirectory: Option<&'data Path>,
) -> Result<BuiltWheelMetadata, SourceDistError> {
let cache_entry = cache_shard.entry(METADATA.to_string());
let cache_entry = cache_shard.entry(METADATA);
let response_callback = |response| async {
// At this point, we're seeing a new or updated source distribution; delete all
// wheels, and rebuild.
match fs::remove_dir_all(&cache_entry.dir).await {
match fs::remove_dir_all(&cache_entry.dir()).await {
Ok(()) => debug!("Cleared built wheels and metadata for {source_dist}"),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => (),
Err(err) => return Err(err.into()),
@ -288,13 +288,14 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
.map(|reporter| reporter.on_build_start(source_dist));
// Download the source distribution.
let source_dist_entry = cache_shard.entry(filename);
let cache_dir = self
.persist_source_dist_url(response, source_dist, filename, cache_shard)
.persist_source_dist_url(response, source_dist, filename, &source_dist_entry)
.await?;
// Build the source distribution.
let (disk_filename, wheel_filename, metadata) = self
.build_source_dist(source_dist, &cache_dir, subdirectory, &cache_entry)
.build_source_dist(source_dist, cache_dir, subdirectory, &cache_entry)
.await?;
if let Some(task) = task {
@ -343,13 +344,15 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
.send()
.await
.map_err(puffin_client::Error::RequestMiddlewareError)?;
let source_dist_entry = cache_shard.entry(filename);
let cache_dir = self
.persist_source_dist_url(response, source_dist, filename, cache_shard)
.persist_source_dist_url(response, source_dist, filename, &source_dist_entry)
.await?;
// Build the source distribution.
let (disk_filename, wheel_filename, metadata) = self
.build_source_dist(source_dist, &cache_dir, subdirectory, &cache_entry)
.build_source_dist(source_dist, cache_dir, subdirectory, &cache_entry)
.await?;
if let Some(task) = task {
@ -395,7 +398,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
CacheBucket::BuiltWheels,
WheelCache::Path(&path_source_dist.url)
.remote_wheel_dir(path_source_dist.name().as_ref()),
METADATA.to_string(),
METADATA,
);
// Determine the last-modified time of the source distribution.
@ -475,8 +478,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
}
}
let path = cache_entry.dir.join(&disk_filename);
let target = cache_entry.dir.join(filename.stem());
let path = cache_entry.dir().join(&disk_filename);
let target = cache_entry.dir().join(filename.stem());
Ok(BuiltWheelMetadata {
path,
@ -500,7 +503,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
CacheBucket::BuiltWheels,
WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string())
.remote_wheel_dir(git_source_dist.name().as_ref()),
METADATA.to_string(),
METADATA,
);
// Read the existing metadata from the cache.
@ -551,8 +554,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
}
}
let path = cache_entry.dir.join(&disk_filename);
let target = cache_entry.dir.join(filename.stem());
let path = cache_entry.dir().join(&disk_filename);
let target = cache_entry.dir().join(filename.stem());
Ok(BuiltWheelMetadata {
path,
@ -563,14 +566,13 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
}
/// Download and unzip a source distribution into the cache from an HTTP response.
async fn persist_source_dist_url(
async fn persist_source_dist_url<'data>(
&self,
response: Response,
source_dist: &SourceDist,
filename: &str,
cache_shard: &CacheShard,
) -> Result<PathBuf, SourceDistError> {
let cache_entry = cache_shard.entry(filename);
cache_entry: &'data CacheEntry,
) -> Result<&'data Path, SourceDistError> {
let cache_path = cache_entry.path();
if cache_path.is_dir() {
debug!("Distribution is already cached: {source_dist}");
@ -594,8 +596,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
drop(span);
// Persist the unzipped distribution to the cache.
fs::create_dir_all(&cache_entry.dir).await?;
if let Err(err) = fs_err::rename(&source_dist_dir, &cache_path) {
fs::create_dir_all(&cache_entry.dir()).await?;
if let Err(err) = fs_err::rename(&source_dist_dir, cache_path) {
// If another thread already cached the distribution, we can ignore the error.
if cache_path.is_dir() {
warn!("Downloaded already-cached distribution: {source_dist}");
@ -688,7 +690,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
}
// Build the wheel.
fs::create_dir_all(&cache_entry.dir).await?;
fs::create_dir_all(&cache_entry.dir()).await?;
let disk_filename = self
.build_context
.setup_build(
@ -699,13 +701,13 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
)
.await
.map_err(|err| SourceDistError::Build(dist.to_string(), err))?
.wheel(&cache_entry.dir)
.wheel(cache_entry.dir())
.await
.map_err(|err| SourceDistError::Build(dist.to_string(), err))?;
// Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_metadata(&filename, cache_entry.dir.join(&disk_filename))?;
let metadata = read_metadata(&filename, cache_entry.dir().join(&disk_filename))?;
debug!("Finished building: {dist}");
Ok((disk_filename, filename, metadata))
@ -760,7 +762,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
"Removing stale built wheels for: {}",
cache_entry.path().display()
);
if let Err(err) = fs::remove_dir_all(&cache_entry.dir).await {
if let Err(err) = fs::remove_dir_all(&cache_entry.dir()).await {
warn!("Failed to remove stale built wheel cache directory: {err}");
}
Ok(None)

View File

@ -204,11 +204,11 @@ impl InstallPlan {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.path(),
cache_entry.into_path_buf(),
);
debug!("URL wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist.clone()));
local.push(CachedDist::Url(cached_dist));
continue;
}
}
@ -225,11 +225,11 @@ impl InstallPlan {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.path(),
cache_entry.into_path_buf(),
);
debug!("Path wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist.clone()));
local.push(CachedDist::Url(cached_dist));
continue;
}
}
@ -244,7 +244,7 @@ impl InstallPlan {
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("URL source requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist.clone()));
local.push(CachedDist::Url(cached_dist));
continue;
}
}
@ -260,7 +260,7 @@ impl InstallPlan {
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("Path source requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist.clone()));
local.push(CachedDist::Url(cached_dist));
continue;
}
}
@ -277,7 +277,7 @@ impl InstallPlan {
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("Git source requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist.clone()));
local.push(CachedDist::Url(cached_dist));
continue;
}
}

View File

@ -209,7 +209,7 @@ impl InterpreterQueryResult {
// If `executable` is a pyenv shim, a bash script that redirects to the activated
// python executable at another path, we're not allowed to cache the interpreter info
if executable == info.sys_executable {
fs::create_dir_all(&cache_entry.dir)?;
fs::create_dir_all(cache_entry.dir())?;
// Write to the cache.
write_atomic_sync(
cache_entry.path(),