Use a slash-delimited path

This commit is contained in:
Charlie Marsh 2025-11-21 23:15:09 -05:00
parent 6dbfe80ed7
commit 3bf79e2ada
10 changed files with 160 additions and 102 deletions

1
Cargo.lock generated
View File

@ -5683,7 +5683,6 @@ version = "0.0.1"
dependencies = [
"clap",
"fs-err",
"nanoid",
"rmp-serde",
"rustc-hash",
"same-file",

View File

@ -29,7 +29,6 @@ uv-static = { workspace = true }
clap = { workspace = true, features = ["derive", "env"], optional = true }
fs-err = { workspace = true, features = ["tokio"] }
nanoid = { workspace = true }
rmp-serde = { workspace = true }
rustc-hash = { workspace = true }
same-file = { workspace = true }

View File

@ -1,26 +1,58 @@
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;
use uv_pypi_types::{HashAlgorithm, HashDigest};
use uv_small_str::SmallString;
/// The latest version of the archive bucket.
pub static LATEST: ArchiveVersion = ArchiveVersion::V1;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)]
pub enum ArchiveVersion {
V0 = 0,
V1 = 1,
}
impl std::fmt::Display for ArchiveVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::V0 => write!(f, "0"),
Self::V1 => write!(f, "1"),
}
}
}
impl FromStr for ArchiveVersion {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"0" => Ok(Self::V0),
"1" => Ok(Self::V1),
_ => Err(()),
}
}
}
/// A unique identifier for an archive (unzipped wheel) in the cache.
#[derive(Debug, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)]
pub struct ArchiveId(String);
pub struct ArchiveId(SmallString);
impl ArchiveId {
/// Create a content-addressed identifier for an archive from a SHA256 digest.
pub fn from_sha256(digest: &str) -> Self {
Self(digest.to_string())
}
/// Create a random content-addressed identifier for an archive.
pub fn nanoid() -> Self {
Self(nanoid::nanoid!())
}
}
impl AsRef<Path> for ArchiveId {
fn as_ref(&self) -> &Path {
self.0.as_ref()
/// Return the content-addressed path for the [`ArchiveId`].
pub fn to_path_buf(&self, version: ArchiveVersion) -> PathBuf {
match version {
// Version 0: A 21-digit NanoID.
ArchiveVersion::V0 => PathBuf::from(self.0.as_ref()),
// Version 1: A SHA256 hex digest, split into three segments.
ArchiveVersion::V1 => {
let mut path = PathBuf::new();
path.push(&self.0[0..2]);
path.push(&self.0[2..4]);
path.push(&self.0[4..]);
path
}
}
}
}
@ -34,6 +66,17 @@ impl FromStr for ArchiveId {
type Err = <String as FromStr>::Err;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_string()))
Ok(Self(SmallString::from(s)))
}
}
impl From<HashDigest> for ArchiveId {
fn from(value: HashDigest) -> Self {
assert_eq!(
value.algorithm,
HashAlgorithm::Sha256,
"Archive IDs must be created from SHA256 digests"
);
Self(value.digest)
}
}

View File

@ -12,7 +12,7 @@ use tracing::{debug, trace, warn};
use uv_cache_info::Timestamp;
use uv_fs::{LockedFile, Simplified, cachedir, directories};
use uv_normalize::PackageName;
use uv_pypi_types::ResolutionMetadata;
use uv_pypi_types::{HashDigest, ResolutionMetadata};
pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
@ -21,7 +21,7 @@ use crate::removal::Remover;
pub use crate::removal::{Removal, rm_rf};
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
pub use archive::ArchiveId;
pub use archive::{ArchiveId, ArchiveVersion, LATEST};
mod archive;
mod by_timestamp;
@ -30,11 +30,6 @@ mod cli;
mod removal;
mod wheel;
/// The version of the archive bucket.
///
/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
pub const ARCHIVE_VERSION: u8 = 1;
/// A [`CacheEntry`] which may or may not exist yet.
#[derive(Debug, Clone)]
pub struct CacheEntry(PathBuf);
@ -267,8 +262,11 @@ impl Cache {
}
/// Return the path to an archive in the cache.
pub fn archive(&self, id: &ArchiveId) -> PathBuf {
self.bucket(CacheBucket::Archive).join(id)
pub fn archive(&self, id: &ArchiveId, version: ArchiveVersion) -> PathBuf {
// TODO(charlie): Reuse `CacheBucket::Archive`.
self.root
.join(format!("archive-v{version}"))
.join(id.to_path_buf(version))
}
/// Create a temporary directory to be used as a Python virtual environment.
@ -353,12 +351,17 @@ impl Cache {
&self,
temp_dir: impl AsRef<Path>,
path: impl AsRef<Path>,
id: ArchiveId,
hash: HashDigest,
) -> io::Result<ArchiveId> {
// Move the temporary directory into the directory store.
let archive_entry = self.entry(CacheBucket::Archive, "", &id);
fs_err::create_dir_all(archive_entry.dir())?;
match uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await {
let id = ArchiveId::from(hash);
let archive_entry = self
.bucket(CacheBucket::Archive)
.join(id.to_path_buf(LATEST));
if let Some(parent) = archive_entry.parent() {
fs_err::create_dir_all(parent)?;
}
match uv_fs::rename_with_retry(temp_dir.as_ref(), &archive_entry).await {
Ok(()) => {}
Err(err)
if err.kind() == io::ErrorKind::AlreadyExists
@ -366,7 +369,7 @@ impl Cache {
{
debug!(
"Archive already exists at {}; skipping extraction",
archive_entry.path().display()
archive_entry.display()
);
fs_err::tokio::remove_dir_all(temp_dir.as_ref()).await?;
}
@ -760,7 +763,7 @@ impl Cache {
#[cfg(unix)]
pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
// Construct the link target.
let src = self.archive(id);
let src = self.archive(id, ArchiveVersion::V1);
let dst = dst.as_ref();
// Attempt to create the symlink directly.
@ -797,7 +800,7 @@ struct Link {
/// The unique ID of the entry in the archive bucket.
id: ArchiveId,
/// The version of the archive bucket.
version: u8,
version: ArchiveVersion,
}
#[allow(unused)]
@ -806,7 +809,7 @@ impl Link {
fn new(id: ArchiveId) -> Self {
Self {
id,
version: ARCHIVE_VERSION,
version: ArchiveVersion::V1,
}
}
}
@ -835,10 +838,10 @@ impl FromStr for Link {
let version = version
.strip_prefix("archive-v")
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
let version = u8::from_str(version).map_err(|err| {
let version = ArchiveVersion::from_str(version).map_err(|()| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse version: {err}"),
format!("failed to parse version: {version}"),
)
})?;
@ -1368,15 +1371,20 @@ impl Refresh {
#[cfg(test)]
mod tests {
use std::str::FromStr;
use crate::ArchiveId;
use std::str::FromStr;
use uv_pypi_types::{HashAlgorithm, HashDigest};
use uv_small_str::SmallString;
use super::Link;
#[test]
fn test_link_round_trip() {
let id = ArchiveId::from_sha256("a".repeat(64).as_str());
let digest = HashDigest {
algorithm: HashAlgorithm::Sha256,
digest: SmallString::from("a".repeat(64)),
};
let id = ArchiveId::from(digest);
let link = Link::new(id);
let s = link.to_string();
let parsed = Link::from_str(&s).unwrap();

View File

@ -1,4 +1,4 @@
use uv_cache::{ARCHIVE_VERSION, ArchiveId, Cache};
use uv_cache::{ArchiveId, ArchiveVersion, Cache, LATEST};
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::Hashed;
use uv_pypi_types::{HashAlgorithm, HashDigest, HashDigests};
@ -13,7 +13,7 @@ pub struct Archive {
/// The filename of the wheel.
pub filename: WheelFilename,
/// The version of the archive bucket.
pub version: u8,
pub version: ArchiveVersion,
}
impl Archive {
@ -26,18 +26,18 @@ impl Archive {
.iter()
.find(|digest| digest.algorithm == HashAlgorithm::Sha256)
.expect("SHA256 hash must be present");
let id = ArchiveId::from_sha256(&sha256.digest);
let id = ArchiveId::from(sha256.clone());
Self {
id,
hashes,
filename,
version: ARCHIVE_VERSION,
version: LATEST,
}
}
/// Returns `true` if the archive exists in the cache.
pub(crate) fn exists(&self, cache: &Cache) -> bool {
self.version == ARCHIVE_VERSION && cache.archive(&self.id).exists()
cache.archive(&self.id, self.version).exists()
}
}

View File

@ -12,7 +12,7 @@ use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{Instrument, info_span, instrument, warn};
use url::Url;
use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache};
use uv_cache::{ArchiveId, CacheBucket, CacheEntry, LATEST, WheelCache};
use uv_cache_info::{CacheInfo, Timestamp};
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
@ -227,7 +227,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: wheel.filename.clone(),
@ -265,7 +265,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: wheel.filename.clone(),
@ -304,7 +304,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: wheel.filename.clone(),
@ -335,7 +335,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: wheel.filename.clone(),
@ -421,7 +421,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
Ok(LocalWheel {
dist: Dist::Source(dist.clone()),
archive: self.build_context.cache().archive(&id).into_boxed_path(),
archive: self
.build_context
.cache()
.archive(&id, LATEST)
.into_boxed_path(),
hashes: built_wheel.hashes,
filename: built_wheel.filename,
cache: built_wheel.cache_info,
@ -648,11 +652,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
// Persist the temporary directory to the directory store.
self.build_context
.cache()
.persist(
temp_dir.keep(),
wheel_entry.path(),
ArchiveId::from_sha256(&sha256.digest),
)
.persist(temp_dir.keep(), wheel_entry.path(), sha256.clone())
.await
.map_err(Error::CacheRead)?;
@ -838,11 +838,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
// Persist the temporary directory to the directory store.
self.build_context
.cache()
.persist(
temp_dir.keep(),
wheel_entry.path(),
ArchiveId::from_sha256(&sha256.digest),
)
.persist(temp_dir.keep(), wheel_entry.path(), sha256.clone())
.await
.map_err(Error::CacheRead)?;
@ -961,7 +957,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: filename.clone(),
@ -1009,11 +1005,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
// Persist the temporary directory to the directory store.
self.build_context
.cache()
.persist(
temp_dir.keep(),
wheel_entry.path(),
ArchiveId::from_sha256(&sha256.digest),
)
.persist(temp_dir.keep(), wheel_entry.path(), sha256.clone())
.await
.map_err(Error::CacheWrite)?;
@ -1032,7 +1024,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
archive: self
.build_context
.cache()
.archive(&archive.id)
.archive(&archive.id, archive.version)
.into_boxed_path(),
hashes: archive.hashes,
filename: filename.clone(),
@ -1066,17 +1058,13 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
hasher.finish().await.map_err(Error::HashExhaustion)?;
// Extract the digest.
let hash_digest = HashDigest::from(hashers.into_iter().next().expect("SHA256 hasher"));
let sha256 = HashDigest::from(hashers.into_iter().next().expect("SHA256 hasher"));
// Persist the temporary directory to the directory store.
let id = self
.build_context
.cache()
.persist(
temp_dir.keep(),
target,
ArchiveId::from_sha256(&hash_digest.digest),
)
.persist(temp_dir.keep(), target, sha256.clone())
.await
.map_err(Error::CacheWrite)?;

View File

@ -1,6 +1,6 @@
use std::path::Path;
use uv_cache::{Cache, CacheBucket, CacheEntry};
use uv_cache::{Cache, CacheEntry};
use uv_cache_info::CacheInfo;
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::{
@ -76,19 +76,23 @@ impl CachedWheel {
let cache_info = pointer.to_cache_info();
let build_info = pointer.to_build_info();
let archive = pointer.into_archive();
let Archive {
id,
version,
hashes,
..
} = archive;
let path = cache.archive(&id, version);
// Ignore stale pointers.
if !archive.exists(cache) {
if !path.exists() {
return None;
}
let Archive { id, hashes, .. } = archive;
let entry = cache.entry(CacheBucket::Archive, "", id);
// Convert to a cached wheel.
Some(Self {
filename: archive.filename,
entry,
entry: CacheEntry::from_path(path),
hashes,
cache_info,
build_info,
@ -104,19 +108,23 @@ impl CachedWheel {
let cache_info = pointer.to_cache_info();
let build_info = pointer.to_build_info();
let archive = pointer.into_archive();
let Archive {
id,
version,
hashes,
..
} = archive;
let path = cache.archive(&id, version);
// Ignore stale pointers.
if !archive.exists(cache) {
if !path.exists() {
return None;
}
let Archive { id, hashes, .. } = archive;
let entry = cache.entry(CacheBucket::Archive, "", id);
// Convert to a cached wheel.
Some(Self {
filename: archive.filename,
entry,
entry: CacheEntry::from_path(path),
hashes,
cache_info,
build_info,

View File

@ -263,7 +263,9 @@ impl<'a> Planner<'a> {
hashes: archive.hashes,
cache_info,
build_info,
path: cache.archive(&archive.id).into_boxed_path(),
path: cache
.archive(&archive.id, archive.version)
.into_boxed_path(),
};
debug!("URL wheel requirement already cached: {cached_dist}");
@ -338,7 +340,9 @@ impl<'a> Planner<'a> {
hashes: archive.hashes,
cache_info,
build_info,
path: cache.archive(&archive.id).into_boxed_path(),
path: cache
.archive(&archive.id, archive.version)
.into_boxed_path(),
};
debug!(

View File

@ -2,6 +2,17 @@ use std::path::Path;
use tracing::debug;
use uv_cache::{Cache, CacheBucket, LATEST};
use uv_cache_key::{cache_digest, hash_digest};
use uv_client::BaseClientBuilder;
use uv_configuration::{Concurrency, Constraints, TargetTriple};
use uv_distribution_types::{Name, Resolution};
use uv_extract::hash::Hasher;
use uv_fs::PythonExt;
use uv_preview::Preview;
use uv_pypi_types::{HashAlgorithm, HashDigest};
use uv_python::{Interpreter, PythonEnvironment, canonicalize_executable};
use crate::commands::pip::loggers::{InstallLogger, ResolveLogger};
use crate::commands::pip::operations::Modifications;
use crate::commands::project::{
@ -10,15 +21,6 @@ use crate::commands::project::{
use crate::printer::Printer;
use crate::settings::ResolverInstallerSettings;
use uv_cache::{ArchiveId, Cache, CacheBucket};
use uv_cache_key::{cache_digest, hash_digest};
use uv_client::BaseClientBuilder;
use uv_configuration::{Concurrency, Constraints, TargetTriple};
use uv_distribution_types::{Name, Resolution};
use uv_fs::PythonExt;
use uv_preview::Preview;
use uv_python::{Interpreter, PythonEnvironment, canonicalize_executable};
/// An ephemeral [`PythonEnvironment`] for running an individual command.
#[derive(Debug)]
pub(crate) struct EphemeralEnvironment(PythonEnvironment);
@ -172,7 +174,11 @@ impl CachedEnvironment {
cache_digest(&canonicalize_executable(interpreter.sys_executable())?);
// Search in the content-addressed cache.
let cache_entry = cache.entry(CacheBucket::Environments, interpreter_hash, resolution_hash);
let cache_entry = cache.entry(
CacheBucket::Environments,
&interpreter_hash,
&resolution_hash,
);
if let Ok(root) = cache.resolve_link(cache_entry.path()) {
if let Ok(environment) = PythonEnvironment::from_root(root, cache) {
@ -212,10 +218,14 @@ impl CachedEnvironment {
.await?;
// Now that the environment is complete, sync it to its content-addressed location.
let mut hasher = Hasher::from(HashAlgorithm::Sha256);
hasher.update(interpreter_hash.as_bytes());
hasher.update(resolution_hash.as_bytes());
let sha256 = HashDigest::from(hasher);
let id = cache
.persist(temp_dir.keep(), cache_entry.path(), ArchiveId::nanoid())
.persist(temp_dir.keep(), cache_entry.path(), sha256)
.await?;
let root = cache.archive(&id);
let root = cache.archive(&id, LATEST);
Ok(Self(PythonEnvironment::from_root(root, cache)?))
}

View File

@ -9377,7 +9377,6 @@ fn sync_all_groups() -> Result<()> {
----- stderr -----
Resolved 8 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 2 packages in [TIME]
Installed 1 package in [TIME]
+ packaging==24.0