mirror of https://github.com/astral-sh/uv
Use a stable cache key
This commit is contained in:
parent
203594d482
commit
ed48a81fa7
|
|
@ -5833,9 +5833,11 @@ name = "uv-distribution"
|
|||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake2",
|
||||
"either",
|
||||
"fs-err",
|
||||
"futures",
|
||||
"hex",
|
||||
"indoc",
|
||||
"insta",
|
||||
"nanoid",
|
||||
|
|
@ -5856,6 +5858,7 @@ dependencies = [
|
|||
"uv-auth",
|
||||
"uv-cache",
|
||||
"uv-cache-info",
|
||||
"uv-cache-key",
|
||||
"uv-client",
|
||||
"uv-configuration",
|
||||
"uv-distribution-filename",
|
||||
|
|
|
|||
|
|
@ -704,7 +704,10 @@ impl RegistryClient {
|
|||
pub async fn fetch_simple_index(
|
||||
&self,
|
||||
index_url: &IndexUrl,
|
||||
download_concurrency: &Semaphore,
|
||||
) -> Result<SimpleIndexMetadata, Error> {
|
||||
let _permit = download_concurrency.acquire().await;
|
||||
|
||||
// Format the URL for PyPI.
|
||||
let mut url = index_url.url().clone();
|
||||
url.path_segments_mut()
|
||||
|
|
@ -1306,15 +1309,10 @@ pub struct VersionSourceDist {
|
|||
#[rkyv(derive(Debug))]
|
||||
pub struct SimpleIndexMetadata {
|
||||
/// The list of project names available in the index.
|
||||
projects: Vec<PackageName>,
|
||||
pub projects: Vec<PackageName>,
|
||||
}
|
||||
|
||||
impl SimpleIndexMetadata {
|
||||
/// Iterate over the projects in the index.
|
||||
pub fn iter(&self) -> impl Iterator<Item = &PackageName> {
|
||||
self.projects.iter()
|
||||
}
|
||||
|
||||
/// Create a [`SimpleIndexMetadata`] from a [`PypiSimpleIndex`].
|
||||
fn from_pypi_index(index: PypiSimpleIndex) -> Self {
|
||||
Self {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ workspace = true
|
|||
uv-auth = { workspace = true }
|
||||
uv-cache = { workspace = true }
|
||||
uv-cache-info = { workspace = true }
|
||||
uv-cache-key = { workspace = true }
|
||||
uv-client = { workspace = true }
|
||||
uv-configuration = { workspace = true }
|
||||
uv-distribution-filename = { workspace = true }
|
||||
|
|
@ -39,9 +40,11 @@ uv-types = { workspace = true }
|
|||
uv-workspace = { workspace = true }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
blake2 = { workspace = true }
|
||||
either = { workspace = true }
|
||||
fs-err = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
nanoid = { workspace = true }
|
||||
owo-colors = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ use tempfile::TempDir;
|
|||
use tokio::io::{AsyncRead, AsyncSeekExt, ReadBuf};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{Instrument, info_span, instrument, warn, debug};
|
||||
use tracing::{Instrument, debug, info_span, instrument, warn};
|
||||
use url::Url;
|
||||
|
||||
use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache};
|
||||
|
|
@ -383,7 +383,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
hashes: HashPolicy<'_>,
|
||||
) -> Result<LocalWheel, Error> {
|
||||
// If the metadata is available in a remote cache, fetch it.
|
||||
if let Some(wheel) = self.get_remote_wheel(dist, tags, hashes).await? {
|
||||
if let Ok(Some(wheel)) = self.get_remote_wheel(dist, tags, hashes).await {
|
||||
return Ok(wheel);
|
||||
}
|
||||
|
||||
|
|
@ -558,7 +558,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
}
|
||||
|
||||
// If the metadata is available in a remote cache, fetch it.
|
||||
if let Some(metadata) = self.get_remote_metadata(source, hashes).await? {
|
||||
if let Ok(Some(metadata)) = self.get_remote_metadata(source, hashes).await {
|
||||
return Ok(metadata);
|
||||
}
|
||||
|
||||
|
|
@ -584,34 +584,31 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
/// Fetch a wheel from a remote cache, if available.
|
||||
async fn get_remote_wheel(
|
||||
&self,
|
||||
dist: &SourceDist,
|
||||
source: &SourceDist,
|
||||
tags: &Tags,
|
||||
hashes: HashPolicy<'_>,
|
||||
) -> Result<Option<LocalWheel>, Error> {
|
||||
let Some(index) = self
|
||||
.resolver
|
||||
.get_cached_distribution(dist, Some(tags), &self.client)
|
||||
.get_cached_distribution(&BuildableSource::Dist(source), Some(tags), &self.client)
|
||||
.await?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some(entries) = index.get(dist.name()) else {
|
||||
return Ok(None);
|
||||
};
|
||||
for (.., prioritized_dist) in entries.iter() {
|
||||
for prioritized_dist in index.iter() {
|
||||
let Some(compatible_dist) = prioritized_dist.get() else {
|
||||
continue;
|
||||
};
|
||||
match compatible_dist {
|
||||
CompatibleDist::InstalledDist(..) => {}
|
||||
CompatibleDist::SourceDist { sdist, .. } => {
|
||||
debug!("Found cached remote source distribution for: {dist}");
|
||||
debug!("Found cached remote source distribution for: {source}");
|
||||
let dist = SourceDist::Registry(sdist.clone());
|
||||
return self.build_wheel_inner(&dist, tags, hashes).await.map(Some);
|
||||
}
|
||||
CompatibleDist::CompatibleWheel { wheel, .. }
|
||||
| CompatibleDist::IncompatibleWheel { wheel, .. } => {
|
||||
debug!("Found cached remote built distribution for: {dist}");
|
||||
debug!("Found cached remote built distribution for: {source}");
|
||||
let dist = BuiltDist::Registry(RegistryBuiltDist {
|
||||
wheels: vec![wheel.clone()],
|
||||
best_wheel_index: 0,
|
||||
|
|
@ -630,30 +627,21 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
source: &BuildableSource<'_>,
|
||||
hashes: HashPolicy<'_>,
|
||||
) -> Result<Option<ArchiveMetadata>, Error> {
|
||||
// TODO(charlie): If the distribution is unnamed, we should be able to infer the name
|
||||
// from the list of available distributions in the index, since we expect exactly one
|
||||
// package name per cache entry.
|
||||
let BuildableSource::Dist(dist) = source else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some(index) = self
|
||||
.resolver
|
||||
.get_cached_distribution(dist, None, &self.client)
|
||||
.get_cached_distribution(source, None, &self.client)
|
||||
.await?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some(entries) = index.get(dist.name()) else {
|
||||
return Ok(None);
|
||||
};
|
||||
for (.., prioritized_dist) in entries.iter() {
|
||||
for prioritized_dist in index.iter() {
|
||||
let Some(compatible_dist) = prioritized_dist.get() else {
|
||||
continue;
|
||||
};
|
||||
match compatible_dist {
|
||||
CompatibleDist::InstalledDist(..) => {}
|
||||
CompatibleDist::SourceDist { sdist, .. } => {
|
||||
debug!("Found cached remote source distribution for: {dist}");
|
||||
debug!("Found cached remote source distribution for: {source}");
|
||||
let dist = SourceDist::Registry(sdist.clone());
|
||||
return self
|
||||
.build_wheel_metadata_inner(&BuildableSource::Dist(&dist), hashes)
|
||||
|
|
@ -662,7 +650,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
}
|
||||
CompatibleDist::CompatibleWheel { wheel, .. }
|
||||
| CompatibleDist::IncompatibleWheel { wheel, .. } => {
|
||||
debug!("Found cached remote built distribution for: {dist}");
|
||||
debug!("Found cached remote built distribution for: {source}");
|
||||
let dist = BuiltDist::Registry(RegistryBuiltDist {
|
||||
wheels: vec![wheel.clone()],
|
||||
best_wheel_index: 0,
|
||||
|
|
|
|||
|
|
@ -1,21 +1,25 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use blake2::Digest;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
use tracing::{debug, instrument, warn};
|
||||
|
||||
use uv_auth::PyxTokenStore;
|
||||
use uv_client::{MetadataFormat, VersionFiles};
|
||||
use uv_cache_key::RepositoryUrl;
|
||||
use uv_client::{MetadataFormat, SimpleIndexMetadata, VersionFiles};
|
||||
use uv_configuration::BuildOptions;
|
||||
use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
|
||||
use uv_distribution_types::{
|
||||
File, HashComparison, HashPolicy, IncompatibleSource, IncompatibleWheel, IndexFormat,
|
||||
IndexMetadata, IndexUrl, Name, PrioritizedDist, RegistryBuiltWheel, RegistrySourceDist,
|
||||
SourceDist, SourceDistCompatibility, WheelCompatibility,
|
||||
BuildableSource, File, HashComparison, HashPolicy, IncompatibleSource, IncompatibleWheel,
|
||||
IndexFormat, IndexMetadata, IndexUrl, PrioritizedDist, RegistryBuiltWheel, RegistrySourceDist,
|
||||
SourceDist, SourceDistCompatibility, SourceUrl, WheelCompatibility,
|
||||
};
|
||||
use uv_git_types::GitHubRepository;
|
||||
use uv_git_types::{GitOid, GitUrl};
|
||||
use uv_normalize::PackageName;
|
||||
use uv_pep440::Version;
|
||||
use uv_pep508::VerbatimUrl;
|
||||
|
|
@ -49,12 +53,12 @@ impl<'a, T: BuildContext> RemoteCacheResolver<'a, T> {
|
|||
/// Return the cached Git index for the given distribution, if any.
|
||||
pub(crate) async fn get_cached_distribution(
|
||||
&self,
|
||||
dist: &SourceDist,
|
||||
source: &BuildableSource<'_>,
|
||||
tags: Option<&Tags>,
|
||||
client: &ManagedClient<'a>,
|
||||
) -> Result<Option<GitIndex>, Error> {
|
||||
// Fetch the entries for the given distribution.
|
||||
let entries = self.get_or_fetch_index(dist, client).await?;
|
||||
let entries = self.get_or_fetch_index(source, client).await?;
|
||||
if entries.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -72,38 +76,81 @@ impl<'a, T: BuildContext> RemoteCacheResolver<'a, T> {
|
|||
/// Fetch the remote Git index for the given distribution.
|
||||
async fn get_or_fetch_index(
|
||||
&self,
|
||||
dist: &SourceDist,
|
||||
source: &BuildableSource<'_>,
|
||||
client: &ManagedClient<'a>,
|
||||
) -> Result<Vec<GitIndexEntry>, Error> {
|
||||
#[derive(Debug)]
|
||||
struct BuildableGitSource<'a> {
|
||||
git: &'a GitUrl,
|
||||
subdirectory: Option<&'a Path>,
|
||||
name: Option<&'a PackageName>,
|
||||
}
|
||||
|
||||
let Some(workspace) = &self.workspace else {
|
||||
return Ok(Vec::default());
|
||||
};
|
||||
|
||||
let SourceDist::Git(dist) = dist else {
|
||||
return Ok(Vec::default());
|
||||
};
|
||||
|
||||
// TODO(charlie): Handle subdirectories.
|
||||
if dist.subdirectory.is_some() {
|
||||
let source = match source {
|
||||
BuildableSource::Dist(SourceDist::Git(dist)) => BuildableGitSource {
|
||||
git: &dist.git,
|
||||
subdirectory: dist.subdirectory.as_deref(),
|
||||
name: Some(&dist.name),
|
||||
},
|
||||
BuildableSource::Url(SourceUrl::Git(url)) => BuildableGitSource {
|
||||
git: url.git,
|
||||
subdirectory: url.subdirectory,
|
||||
name: None,
|
||||
},
|
||||
_ => {
|
||||
return Ok(Vec::default());
|
||||
}
|
||||
};
|
||||
|
||||
let Some(repo) = GitHubRepository::parse(dist.git.repository()) else {
|
||||
let Some(precise) = self.build_context.git().get_precise(source.git) else {
|
||||
return Ok(Vec::default());
|
||||
};
|
||||
|
||||
let Some(precise) = self.build_context.git().get_precise(&dist.git) else {
|
||||
return Ok(Vec::default());
|
||||
// Determine the cache key for the Git source.
|
||||
let cache_key = GitCacheKey {
|
||||
repository: RepositoryUrl::new(source.git.repository()),
|
||||
precise,
|
||||
subdirectory: source.subdirectory,
|
||||
};
|
||||
|
||||
// Store the index entries in a cache, to avoid redundant fetches.
|
||||
let digest = cache_key.digest();
|
||||
let index = IndexUrl::from(
|
||||
VerbatimUrl::parse_url(format!(
|
||||
"http://localhost:8000/v1/git/{workspace}/{}/{}/{precise}",
|
||||
repo.owner, repo.repo
|
||||
"http://localhost:8000/v1/git/{workspace}/{}/{}/{}",
|
||||
&digest[..2],
|
||||
&digest[2..4],
|
||||
&digest[4..],
|
||||
))
|
||||
.unwrap(),
|
||||
);
|
||||
debug!("Using remote Git index URL: {}", index);
|
||||
|
||||
// Determine the package name.
|
||||
let name = if let Some(name) = source.name {
|
||||
Cow::Borrowed(name)
|
||||
} else {
|
||||
// Fetch the list of packages from the Simple API.
|
||||
let SimpleIndexMetadata { projects } = client
|
||||
.manual(|client, semaphore| client.fetch_simple_index(&index, semaphore))
|
||||
.await?;
|
||||
|
||||
// Ensure that the index contains exactly one package.
|
||||
let mut packages = projects.into_iter();
|
||||
let Some(name) = packages.next() else {
|
||||
debug!("Remote Git index at `{index}` contains no packages");
|
||||
return Ok(Vec::default());
|
||||
};
|
||||
if packages.next().is_some() {
|
||||
debug!("Remote Git index at `{index}` contains multiple packages");
|
||||
return Ok(Vec::default());
|
||||
}
|
||||
Cow::Owned(name)
|
||||
};
|
||||
|
||||
// Store the index entries in a cache, to avoid redundant fetches.
|
||||
{
|
||||
let cache = self.cache.lock().await;
|
||||
if let Some(entries) = cache.get(&index) {
|
||||
|
|
@ -118,8 +165,8 @@ impl<'a, T: BuildContext> RemoteCacheResolver<'a, T> {
|
|||
};
|
||||
let archives = client
|
||||
.manual(|client, semaphore| {
|
||||
client.package_metadata(
|
||||
dist.name(),
|
||||
client.simple_detail(
|
||||
name.as_ref(),
|
||||
Some(metadata.as_ref()),
|
||||
self.build_context.capabilities(),
|
||||
semaphore,
|
||||
|
|
@ -137,6 +184,13 @@ impl<'a, T: BuildContext> RemoteCacheResolver<'a, T> {
|
|||
let files = rkyv::deserialize::<VersionFiles, rkyv::rancor::Error>(&datum.files)
|
||||
.expect("archived version files always deserializes");
|
||||
for (filename, file) in files.all() {
|
||||
if *filename.name() != *name {
|
||||
warn!(
|
||||
"Skipping file `{filename}` from remote Git index at `{index}` due to name mismatch (expected: `{name}`)"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
entries.push(GitIndexEntry {
|
||||
filename,
|
||||
file,
|
||||
|
|
@ -165,6 +219,9 @@ struct GitIndexEntry {
|
|||
}
|
||||
|
||||
/// A set of [`PrioritizedDist`] from a Git index.
|
||||
///
|
||||
/// In practice, it's assumed that the [`GitIndex`] will only contain distributions for a single
|
||||
/// package.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct GitIndex(FxHashMap<PackageName, GitIndexDistributions>);
|
||||
|
||||
|
|
@ -177,7 +234,6 @@ impl GitIndex {
|
|||
hasher: &HashStrategy,
|
||||
build_options: &BuildOptions,
|
||||
) -> Self {
|
||||
// Collect compatible distributions.
|
||||
let mut index = FxHashMap::<PackageName, GitIndexDistributions>::default();
|
||||
for entry in entries {
|
||||
let distributions = index.entry(entry.filename.name().clone()).or_default();
|
||||
|
|
@ -193,9 +249,11 @@ impl GitIndex {
|
|||
Self(index)
|
||||
}
|
||||
|
||||
/// Get the [`GitIndexDistributions`] for the given package name.
|
||||
pub(crate) fn get(&self, package_name: &PackageName) -> Option<&GitIndexDistributions> {
|
||||
self.0.get(package_name)
|
||||
/// Returns an [`Iterator`] over the distributions.
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = &PrioritizedDist> {
|
||||
self.0
|
||||
.iter()
|
||||
.flat_map(|(.., distributions)| distributions.0.iter().map(|(.., dist)| dist))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -204,11 +262,6 @@ impl GitIndex {
|
|||
pub(crate) struct GitIndexDistributions(BTreeMap<Version, PrioritizedDist>);
|
||||
|
||||
impl GitIndexDistributions {
|
||||
/// Returns an [`Iterator`] over the distributions.
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = (&Version, &PrioritizedDist)> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
/// Add the given [`File`] to the [`GitIndexDistributions`] for the given package.
|
||||
fn add_file(
|
||||
&mut self,
|
||||
|
|
@ -219,8 +272,7 @@ impl GitIndexDistributions {
|
|||
build_options: &BuildOptions,
|
||||
index: IndexUrl,
|
||||
) {
|
||||
// No `requires-python` here: for source distributions, we don't have that information;
|
||||
// for wheels, we read it lazily only when selected.
|
||||
// TODO(charlie): Incorporate `Requires-Python`, yanked status, etc.
|
||||
match filename {
|
||||
DistFilename::WheelFilename(filename) => {
|
||||
let version = filename.version.clone();
|
||||
|
|
@ -366,3 +418,39 @@ impl GitIndexCache {
|
|||
self.0.insert(index, entries)
|
||||
}
|
||||
}
|
||||
|
||||
/// A cache key for a Git repository at a precise commit.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct GitCacheKey<'a> {
|
||||
repository: RepositoryUrl,
|
||||
precise: GitOid,
|
||||
subdirectory: Option<&'a Path>,
|
||||
}
|
||||
|
||||
impl GitCacheKey<'_> {
|
||||
/// Compute the digest for the Git cache key.
|
||||
fn digest(&self) -> String {
|
||||
let mut hasher = blake2::Blake2b::<blake2::digest::consts::U32>::new();
|
||||
hasher.update(self.repository.as_str().as_bytes());
|
||||
hasher.update(b"/");
|
||||
hasher.update(self.precise.as_str().as_bytes());
|
||||
if let Some(subdirectory) = self
|
||||
.subdirectory
|
||||
.and_then(|subdirectory| subdirectory.to_str())
|
||||
{
|
||||
hasher.update(b"?subdirectory=");
|
||||
hasher.update(subdirectory.as_bytes());
|
||||
}
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for GitCacheKey<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}/{}", self.repository, self.precise.as_str())?;
|
||||
if let Some(subdirectory) = &self.subdirectory {
|
||||
write!(f, "?subdirectory={}", subdirectory.display())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
from hashlib import blake2b
|
||||
from os import fspath
|
||||
from typing import Optional, Union
|
||||
|
||||
Pathish = Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"]
|
||||
|
||||
def git_cache_digest(repository: str, precise: str, subdirectory: Optional[Pathish] = None) -> str:
|
||||
"""
|
||||
Reproduces the Rust digest() exactly:
|
||||
|
||||
- blake2b with 32-byte (256-bit) digest
|
||||
- bytes fed in this order:
|
||||
repository + "/" + precise [+ "?subdirectory=" + subdirectory]
|
||||
- subdirectory is included only if it is representable as UTF-8
|
||||
(mirrors Rust Path::to_str() -> Option<&str>)
|
||||
- hex output is lowercase
|
||||
"""
|
||||
h = blake2b(digest_size=32)
|
||||
|
||||
# repository and precise are Rust &str equivalents: encode as UTF-8
|
||||
h.update(repository.encode("utf-8"))
|
||||
h.update(b"/")
|
||||
h.update(precise.encode("utf-8"))
|
||||
|
||||
if subdirectory is not None:
|
||||
# Normalize to either str or bytes using fspath (handles PathLike)
|
||||
p = fspath(subdirectory)
|
||||
|
||||
# Try to get a UTF-8 string like Path::to_str()
|
||||
if isinstance(p, bytes):
|
||||
try:
|
||||
p_str = p.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
p_str = None
|
||||
else:
|
||||
# Already a str
|
||||
p_str = p
|
||||
|
||||
if p_str is not None:
|
||||
h.update(b"?subdirectory=")
|
||||
h.update(p_str.encode("utf-8"))
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
digest = git_cache_digest(
|
||||
repository="https://github.com/agronholm/anyio",
|
||||
precise="64b753b19c9a49e3ae395cde457cf82d51f7e999",
|
||||
subdirectory=None
|
||||
)
|
||||
print(digest) # lowercase hex, identical to the Rust version
|
||||
Loading…
Reference in New Issue