From 3bf79e2adab015b9fd81b148b03d0c72064f9576 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 21 Nov 2025 23:15:09 -0500 Subject: [PATCH] Use a slash-delimited path --- Cargo.lock | 1 - crates/uv-cache/Cargo.toml | 1 - crates/uv-cache/src/archive.rs | 79 ++++++++++++++----- crates/uv-cache/src/lib.rs | 52 ++++++------ crates/uv-distribution/src/archive.rs | 10 +-- .../src/distribution_database.rs | 46 ++++------- .../uv-distribution/src/index/cached_wheel.rs | 30 ++++--- crates/uv-installer/src/plan.rs | 8 +- crates/uv/src/commands/project/environment.rs | 34 +++++--- crates/uv/tests/it/sync.rs | 1 - 10 files changed, 160 insertions(+), 102 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1d701c0af..9b708053b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5683,7 +5683,6 @@ version = "0.0.1" dependencies = [ "clap", "fs-err", - "nanoid", "rmp-serde", "rustc-hash", "same-file", diff --git a/crates/uv-cache/Cargo.toml b/crates/uv-cache/Cargo.toml index c3cc960d3..79af0235a 100644 --- a/crates/uv-cache/Cargo.toml +++ b/crates/uv-cache/Cargo.toml @@ -29,7 +29,6 @@ uv-static = { workspace = true } clap = { workspace = true, features = ["derive", "env"], optional = true } fs-err = { workspace = true, features = ["tokio"] } -nanoid = { workspace = true } rmp-serde = { workspace = true } rustc-hash = { workspace = true } same-file = { workspace = true } diff --git a/crates/uv-cache/src/archive.rs b/crates/uv-cache/src/archive.rs index 9bc3f44bc..e0d905c22 100644 --- a/crates/uv-cache/src/archive.rs +++ b/crates/uv-cache/src/archive.rs @@ -1,26 +1,58 @@ -use std::path::Path; +use std::path::PathBuf; use std::str::FromStr; +use uv_pypi_types::{HashAlgorithm, HashDigest}; +use uv_small_str::SmallString; + +/// The latest version of the archive bucket. +pub static LATEST: ArchiveVersion = ArchiveVersion::V1; + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] +pub enum ArchiveVersion { + V0 = 0, + V1 = 1, +} + +impl std::fmt::Display for ArchiveVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::V0 => write!(f, "0"), + Self::V1 => write!(f, "1"), + } + } +} + +impl FromStr for ArchiveVersion { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "0" => Ok(Self::V0), + "1" => Ok(Self::V1), + _ => Err(()), + } + } +} + /// A unique identifier for an archive (unzipped wheel) in the cache. #[derive(Debug, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] -pub struct ArchiveId(String); +pub struct ArchiveId(SmallString); impl ArchiveId { - /// Create a content-addressed identifier for an archive from a SHA256 digest. - pub fn from_sha256(digest: &str) -> Self { - Self(digest.to_string()) - } - - /// Create a random content-addressed identifier for an archive. - pub fn nanoid() -> Self { - Self(nanoid::nanoid!()) - } - -} - -impl AsRef for ArchiveId { - fn as_ref(&self) -> &Path { - self.0.as_ref() + /// Return the content-addressed path for the [`ArchiveId`]. + pub fn to_path_buf(&self, version: ArchiveVersion) -> PathBuf { + match version { + // Version 0: A 21-digit NanoID. + ArchiveVersion::V0 => PathBuf::from(self.0.as_ref()), + // Version 1: A SHA256 hex digest, split into three segments. + ArchiveVersion::V1 => { + let mut path = PathBuf::new(); + path.push(&self.0[0..2]); + path.push(&self.0[2..4]); + path.push(&self.0[4..]); + path + } + } } } @@ -34,6 +66,17 @@ impl FromStr for ArchiveId { type Err = ::Err; fn from_str(s: &str) -> Result { - Ok(Self(s.to_string())) + Ok(Self(SmallString::from(s))) + } +} + +impl From for ArchiveId { + fn from(value: HashDigest) -> Self { + assert_eq!( + value.algorithm, + HashAlgorithm::Sha256, + "Archive IDs must be created from SHA256 digests" + ); + Self(value.digest) } } diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index d7c87e0a9..c8ff62374 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -12,7 +12,7 @@ use tracing::{debug, trace, warn}; use uv_cache_info::Timestamp; use uv_fs::{LockedFile, Simplified, cachedir, directories}; use uv_normalize::PackageName; -use uv_pypi_types::ResolutionMetadata; +use uv_pypi_types::{HashDigest, ResolutionMetadata}; pub use crate::by_timestamp::CachedByTimestamp; #[cfg(feature = "clap")] @@ -21,7 +21,7 @@ use crate::removal::Remover; pub use crate::removal::{Removal, rm_rf}; pub use crate::wheel::WheelCache; use crate::wheel::WheelCacheKind; -pub use archive::ArchiveId; +pub use archive::{ArchiveId, ArchiveVersion, LATEST}; mod archive; mod by_timestamp; @@ -30,11 +30,6 @@ mod cli; mod removal; mod wheel; -/// The version of the archive bucket. -/// -/// Must be kept in-sync with the version in [`CacheBucket::to_str`]. -pub const ARCHIVE_VERSION: u8 = 1; - /// A [`CacheEntry`] which may or may not exist yet. #[derive(Debug, Clone)] pub struct CacheEntry(PathBuf); @@ -267,8 +262,11 @@ impl Cache { } /// Return the path to an archive in the cache. - pub fn archive(&self, id: &ArchiveId) -> PathBuf { - self.bucket(CacheBucket::Archive).join(id) + pub fn archive(&self, id: &ArchiveId, version: ArchiveVersion) -> PathBuf { + // TODO(charlie): Reuse `CacheBucket::Archive`. + self.root + .join(format!("archive-v{version}")) + .join(id.to_path_buf(version)) } /// Create a temporary directory to be used as a Python virtual environment. @@ -353,12 +351,17 @@ impl Cache { &self, temp_dir: impl AsRef, path: impl AsRef, - id: ArchiveId, + hash: HashDigest, ) -> io::Result { // Move the temporary directory into the directory store. - let archive_entry = self.entry(CacheBucket::Archive, "", &id); - fs_err::create_dir_all(archive_entry.dir())?; - match uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await { + let id = ArchiveId::from(hash); + let archive_entry = self + .bucket(CacheBucket::Archive) + .join(id.to_path_buf(LATEST)); + if let Some(parent) = archive_entry.parent() { + fs_err::create_dir_all(parent)?; + } + match uv_fs::rename_with_retry(temp_dir.as_ref(), &archive_entry).await { Ok(()) => {} Err(err) if err.kind() == io::ErrorKind::AlreadyExists @@ -366,7 +369,7 @@ impl Cache { { debug!( "Archive already exists at {}; skipping extraction", - archive_entry.path().display() + archive_entry.display() ); fs_err::tokio::remove_dir_all(temp_dir.as_ref()).await?; } @@ -760,7 +763,7 @@ impl Cache { #[cfg(unix)] pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef) -> io::Result<()> { // Construct the link target. - let src = self.archive(id); + let src = self.archive(id, ArchiveVersion::V1); let dst = dst.as_ref(); // Attempt to create the symlink directly. @@ -797,7 +800,7 @@ struct Link { /// The unique ID of the entry in the archive bucket. id: ArchiveId, /// The version of the archive bucket. - version: u8, + version: ArchiveVersion, } #[allow(unused)] @@ -806,7 +809,7 @@ impl Link { fn new(id: ArchiveId) -> Self { Self { id, - version: ARCHIVE_VERSION, + version: ArchiveVersion::V1, } } } @@ -835,10 +838,10 @@ impl FromStr for Link { let version = version .strip_prefix("archive-v") .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?; - let version = u8::from_str(version).map_err(|err| { + let version = ArchiveVersion::from_str(version).map_err(|()| { io::Error::new( io::ErrorKind::InvalidData, - format!("failed to parse version: {err}"), + format!("failed to parse version: {version}"), ) })?; @@ -1368,15 +1371,20 @@ impl Refresh { #[cfg(test)] mod tests { - use std::str::FromStr; - use crate::ArchiveId; + use std::str::FromStr; + use uv_pypi_types::{HashAlgorithm, HashDigest}; + use uv_small_str::SmallString; use super::Link; #[test] fn test_link_round_trip() { - let id = ArchiveId::from_sha256("a".repeat(64).as_str()); + let digest = HashDigest { + algorithm: HashAlgorithm::Sha256, + digest: SmallString::from("a".repeat(64)), + }; + let id = ArchiveId::from(digest); let link = Link::new(id); let s = link.to_string(); let parsed = Link::from_str(&s).unwrap(); diff --git a/crates/uv-distribution/src/archive.rs b/crates/uv-distribution/src/archive.rs index a7c23c0d0..795079260 100644 --- a/crates/uv-distribution/src/archive.rs +++ b/crates/uv-distribution/src/archive.rs @@ -1,4 +1,4 @@ -use uv_cache::{ARCHIVE_VERSION, ArchiveId, Cache}; +use uv_cache::{ArchiveId, ArchiveVersion, Cache, LATEST}; use uv_distribution_filename::WheelFilename; use uv_distribution_types::Hashed; use uv_pypi_types::{HashAlgorithm, HashDigest, HashDigests}; @@ -13,7 +13,7 @@ pub struct Archive { /// The filename of the wheel. pub filename: WheelFilename, /// The version of the archive bucket. - pub version: u8, + pub version: ArchiveVersion, } impl Archive { @@ -26,18 +26,18 @@ impl Archive { .iter() .find(|digest| digest.algorithm == HashAlgorithm::Sha256) .expect("SHA256 hash must be present"); - let id = ArchiveId::from_sha256(&sha256.digest); + let id = ArchiveId::from(sha256.clone()); Self { id, hashes, filename, - version: ARCHIVE_VERSION, + version: LATEST, } } /// Returns `true` if the archive exists in the cache. pub(crate) fn exists(&self, cache: &Cache) -> bool { - self.version == ARCHIVE_VERSION && cache.archive(&self.id).exists() + cache.archive(&self.id, self.version).exists() } } diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index b4e84a4e3..a3c55ebe1 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -12,7 +12,7 @@ use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{Instrument, info_span, instrument, warn}; use url::Url; -use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache}; +use uv_cache::{ArchiveId, CacheBucket, CacheEntry, LATEST, WheelCache}; use uv_cache_info::{CacheInfo, Timestamp}; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, @@ -227,7 +227,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: wheel.filename.clone(), @@ -265,7 +265,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: wheel.filename.clone(), @@ -304,7 +304,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: wheel.filename.clone(), @@ -335,7 +335,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: wheel.filename.clone(), @@ -421,7 +421,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { Ok(LocalWheel { dist: Dist::Source(dist.clone()), - archive: self.build_context.cache().archive(&id).into_boxed_path(), + archive: self + .build_context + .cache() + .archive(&id, LATEST) + .into_boxed_path(), hashes: built_wheel.hashes, filename: built_wheel.filename, cache: built_wheel.cache_info, @@ -648,11 +652,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { // Persist the temporary directory to the directory store. self.build_context .cache() - .persist( - temp_dir.keep(), - wheel_entry.path(), - ArchiveId::from_sha256(&sha256.digest), - ) + .persist(temp_dir.keep(), wheel_entry.path(), sha256.clone()) .await .map_err(Error::CacheRead)?; @@ -838,11 +838,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { // Persist the temporary directory to the directory store. self.build_context .cache() - .persist( - temp_dir.keep(), - wheel_entry.path(), - ArchiveId::from_sha256(&sha256.digest), - ) + .persist(temp_dir.keep(), wheel_entry.path(), sha256.clone()) .await .map_err(Error::CacheRead)?; @@ -961,7 +957,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: filename.clone(), @@ -1009,11 +1005,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { // Persist the temporary directory to the directory store. self.build_context .cache() - .persist( - temp_dir.keep(), - wheel_entry.path(), - ArchiveId::from_sha256(&sha256.digest), - ) + .persist(temp_dir.keep(), wheel_entry.path(), sha256.clone()) .await .map_err(Error::CacheWrite)?; @@ -1032,7 +1024,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self .build_context .cache() - .archive(&archive.id) + .archive(&archive.id, archive.version) .into_boxed_path(), hashes: archive.hashes, filename: filename.clone(), @@ -1066,17 +1058,13 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { hasher.finish().await.map_err(Error::HashExhaustion)?; // Extract the digest. - let hash_digest = HashDigest::from(hashers.into_iter().next().expect("SHA256 hasher")); + let sha256 = HashDigest::from(hashers.into_iter().next().expect("SHA256 hasher")); // Persist the temporary directory to the directory store. let id = self .build_context .cache() - .persist( - temp_dir.keep(), - target, - ArchiveId::from_sha256(&hash_digest.digest), - ) + .persist(temp_dir.keep(), target, sha256.clone()) .await .map_err(Error::CacheWrite)?; diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index 1f91ef13a..768dc22bd 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -1,6 +1,6 @@ use std::path::Path; -use uv_cache::{Cache, CacheBucket, CacheEntry}; +use uv_cache::{Cache, CacheEntry}; use uv_cache_info::CacheInfo; use uv_distribution_filename::WheelFilename; use uv_distribution_types::{ @@ -76,19 +76,23 @@ impl CachedWheel { let cache_info = pointer.to_cache_info(); let build_info = pointer.to_build_info(); let archive = pointer.into_archive(); + let Archive { + id, + version, + hashes, + .. + } = archive; + let path = cache.archive(&id, version); // Ignore stale pointers. - if !archive.exists(cache) { + if !path.exists() { return None; } - let Archive { id, hashes, .. } = archive; - let entry = cache.entry(CacheBucket::Archive, "", id); - // Convert to a cached wheel. Some(Self { filename: archive.filename, - entry, + entry: CacheEntry::from_path(path), hashes, cache_info, build_info, @@ -104,19 +108,23 @@ impl CachedWheel { let cache_info = pointer.to_cache_info(); let build_info = pointer.to_build_info(); let archive = pointer.into_archive(); + let Archive { + id, + version, + hashes, + .. + } = archive; + let path = cache.archive(&id, version); // Ignore stale pointers. - if !archive.exists(cache) { + if !path.exists() { return None; } - let Archive { id, hashes, .. } = archive; - let entry = cache.entry(CacheBucket::Archive, "", id); - // Convert to a cached wheel. Some(Self { filename: archive.filename, - entry, + entry: CacheEntry::from_path(path), hashes, cache_info, build_info, diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index 1941a8371..359ed0464 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -263,7 +263,9 @@ impl<'a> Planner<'a> { hashes: archive.hashes, cache_info, build_info, - path: cache.archive(&archive.id).into_boxed_path(), + path: cache + .archive(&archive.id, archive.version) + .into_boxed_path(), }; debug!("URL wheel requirement already cached: {cached_dist}"); @@ -338,7 +340,9 @@ impl<'a> Planner<'a> { hashes: archive.hashes, cache_info, build_info, - path: cache.archive(&archive.id).into_boxed_path(), + path: cache + .archive(&archive.id, archive.version) + .into_boxed_path(), }; debug!( diff --git a/crates/uv/src/commands/project/environment.rs b/crates/uv/src/commands/project/environment.rs index 56f028021..2c6759020 100644 --- a/crates/uv/src/commands/project/environment.rs +++ b/crates/uv/src/commands/project/environment.rs @@ -2,6 +2,17 @@ use std::path::Path; use tracing::debug; +use uv_cache::{Cache, CacheBucket, LATEST}; +use uv_cache_key::{cache_digest, hash_digest}; +use uv_client::BaseClientBuilder; +use uv_configuration::{Concurrency, Constraints, TargetTriple}; +use uv_distribution_types::{Name, Resolution}; +use uv_extract::hash::Hasher; +use uv_fs::PythonExt; +use uv_preview::Preview; +use uv_pypi_types::{HashAlgorithm, HashDigest}; +use uv_python::{Interpreter, PythonEnvironment, canonicalize_executable}; + use crate::commands::pip::loggers::{InstallLogger, ResolveLogger}; use crate::commands::pip::operations::Modifications; use crate::commands::project::{ @@ -10,15 +21,6 @@ use crate::commands::project::{ use crate::printer::Printer; use crate::settings::ResolverInstallerSettings; -use uv_cache::{ArchiveId, Cache, CacheBucket}; -use uv_cache_key::{cache_digest, hash_digest}; -use uv_client::BaseClientBuilder; -use uv_configuration::{Concurrency, Constraints, TargetTriple}; -use uv_distribution_types::{Name, Resolution}; -use uv_fs::PythonExt; -use uv_preview::Preview; -use uv_python::{Interpreter, PythonEnvironment, canonicalize_executable}; - /// An ephemeral [`PythonEnvironment`] for running an individual command. #[derive(Debug)] pub(crate) struct EphemeralEnvironment(PythonEnvironment); @@ -172,7 +174,11 @@ impl CachedEnvironment { cache_digest(&canonicalize_executable(interpreter.sys_executable())?); // Search in the content-addressed cache. - let cache_entry = cache.entry(CacheBucket::Environments, interpreter_hash, resolution_hash); + let cache_entry = cache.entry( + CacheBucket::Environments, + &interpreter_hash, + &resolution_hash, + ); if let Ok(root) = cache.resolve_link(cache_entry.path()) { if let Ok(environment) = PythonEnvironment::from_root(root, cache) { @@ -212,10 +218,14 @@ impl CachedEnvironment { .await?; // Now that the environment is complete, sync it to its content-addressed location. + let mut hasher = Hasher::from(HashAlgorithm::Sha256); + hasher.update(interpreter_hash.as_bytes()); + hasher.update(resolution_hash.as_bytes()); + let sha256 = HashDigest::from(hasher); let id = cache - .persist(temp_dir.keep(), cache_entry.path(), ArchiveId::nanoid()) + .persist(temp_dir.keep(), cache_entry.path(), sha256) .await?; - let root = cache.archive(&id); + let root = cache.archive(&id, LATEST); Ok(Self(PythonEnvironment::from_root(root, cache)?)) } diff --git a/crates/uv/tests/it/sync.rs b/crates/uv/tests/it/sync.rs index fad338270..42a61751a 100644 --- a/crates/uv/tests/it/sync.rs +++ b/crates/uv/tests/it/sync.rs @@ -9377,7 +9377,6 @@ fn sync_all_groups() -> Result<()> { ----- stderr ----- Resolved 8 packages in [TIME] - Prepared 1 package in [TIME] Uninstalled 2 packages in [TIME] Installed 1 package in [TIME] + packaging==24.0