Retain dot-separated wheel tags during cache prune (#13379)

## Summary

If a set of wheel tags includes a dot, this code is treating the part
_after_ the dot as an extension, and thereby failing to detect that the
entry is a symlink to an archive (and thereby removing the archive).

This is all an optimization, so this code just makes it a little
targeted: we skip specific known extensions, rather than anything with
any extension.

Closes https://github.com/astral-sh/uv/issues/13270.
This commit is contained in:
Charlie Marsh 2025-05-10 14:39:11 -04:00 committed by GitHub
parent 62692b4e1b
commit d3fc814ff9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 36 deletions

1
Cargo.lock generated
View File

@ -4829,7 +4829,6 @@ dependencies = [
"uv-cache-info",
"uv-cache-key",
"uv-dirs",
"uv-distribution-filename",
"uv-distribution-types",
"uv-fs",
"uv-normalize",

View File

@ -20,7 +20,6 @@ workspace = true
uv-cache-info = { workspace = true }
uv-cache-key = { workspace = true }
uv-dirs = { workspace = true }
uv-distribution-filename = { workspace = true }
uv-distribution-types = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-normalize = { workspace = true }

View File

@ -11,7 +11,6 @@ use tracing::debug;
pub use archive::ArchiveId;
use uv_cache_info::Timestamp;
use uv_distribution_filename::WheelFilename;
use uv_fs::{cachedir, directories, LockedFile};
use uv_normalize::PackageName;
use uv_pypi_types::ResolutionMetadata;
@ -534,47 +533,28 @@ impl Cache {
fn find_archive_references(&self) -> Result<FxHashSet<PathBuf>, io::Error> {
let mut references = FxHashSet::default();
for bucket in CacheBucket::iter() {
// As an optimization, skip the archive bucket itself.
if matches!(bucket, CacheBucket::Archive) {
continue;
}
let bucket_path = self.bucket(bucket);
if bucket_path.is_dir() {
for entry in walkdir::WalkDir::new(bucket_path) {
let entry = entry?;
// Ignore any `.lock` files.
if entry
.path()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
// As an optimization, ignore any `.lock`, `.whl`, `.msgpack`, `.rev`, or
// `.http` files.
if entry.path().extension().is_some_and(|ext| {
ext.eq_ignore_ascii_case("lock")
|| ext.eq_ignore_ascii_case("whl")
|| ext.eq_ignore_ascii_case("http")
|| ext.eq_ignore_ascii_case("rev")
|| ext.eq_ignore_ascii_case("msgpack")
}) {
continue;
}
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if bucket == CacheBucket::Wheels {
// In the `wheels` bucket, we often use a hash of the filename as the
// directory name, so we can't rely on the stem.
//
// Instead, we skip if it contains an extension (e.g., `.whl`, `.http`,
// `.rev`, and `.msgpack` files).
if filename
.rsplit_once('-') // strip version/tags, might contain a dot ('.')
.is_none_or(|(_, suffix)| suffix.contains('.'))
{
continue;
}
} else {
// For other buckets only include entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
if WheelFilename::from_stem(filename).is_err() {
continue;
}
}
if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}