Enable selective cache purging in `puffin clean` (#589)

## Summary

This PR enables `puffin clean` to accept package names as command line
arguments, and selectively purge entries from the cache tied to the
given package.

Relate to #572.

## Test Plan

Modified all the caching tests to run an additional step to (1) purge
the cache, and (2) re-install the package.
This commit is contained in:
Charlie Marsh 2023-12-08 14:51:32 -05:00 committed by GitHub
parent cbe1cb4229
commit 4b8642c6f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 580 additions and 69 deletions

4
Cargo.lock generated
View File

@ -2288,11 +2288,14 @@ dependencies = [
"directories",
"fs-err",
"hex",
"puffin-fs",
"puffin-normalize",
"pypi-types",
"seahash",
"serde",
"serde_json",
"tempfile",
"tracing",
"url",
]
@ -2491,6 +2494,7 @@ version = "0.0.1"
dependencies = [
"fs-err",
"tempfile",
"tracing",
]
[[package]]

View File

@ -14,6 +14,8 @@ license = { workspace = true }
workspace = true
[dependencies]
puffin-fs = { path = "../puffin-fs" }
puffin-normalize = { path = "../puffin-normalize" }
pypi-types = { path = "../pypi-types" }
cachedir = { workspace = true }
@ -25,4 +27,5 @@ seahash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tempfile = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }

View File

@ -7,12 +7,16 @@ use std::sync::Arc;
use fs_err as fs;
use tempfile::{tempdir, TempDir};
use tracing::debug;
use crate::wheel::WheelCacheKind;
pub use by_timestamp::CachedByTimestamp;
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
#[cfg(feature = "clap")]
pub use cli::CacheArgs;
pub use digest::digest;
use puffin_fs::{directories, force_remove_all};
use puffin_normalize::PackageName;
pub use stable_hash::{StableHash, StableHasher};
pub use wheel::WheelCache;
@ -144,6 +148,23 @@ impl Cache {
fs::canonicalize(root)
}
/// Remove a package from the cache.
///
/// Returns the number of entries removed from the cache.
pub fn purge(&self, name: &PackageName) -> Result<usize, io::Error> {
let mut count = 0;
for bucket in [
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
] {
count += bucket.purge(self, name)?;
}
Ok(count)
}
}
/// The different kinds of data in the cache are stored in different bucket, which in our case
@ -269,7 +290,7 @@ pub enum CacheBucket {
/// * `built-wheels-v0/pypi/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
///
/// But the url filename does not need to be a valid source dist filename
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
@ -384,6 +405,117 @@ impl CacheBucket {
CacheBucket::Wheels => "wheels-v0",
}
}
/// Purge a package from the cache bucket.
///
/// Returns the number of entries removed from the cache.
fn purge(self, cache: &Cache, name: &PackageName) -> Result<usize, io::Error> {
fn remove(path: impl AsRef<Path>) -> Result<bool, io::Error> {
Ok(if force_remove_all(path.as_ref())? {
debug!("Removed cache entry: {}", path.as_ref().display());
true
} else {
false
})
}
let mut count = 0;
match self {
CacheBucket::Wheels => {
// For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
if remove(root.join(name.to_string()))? {
count += 1;
}
// For alternate indices, we expect a directory for every index, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Index);
for directory in directories(root) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
// For direct URLs, we expect a directory for every URL, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Url);
for directory in directories(root) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
}
CacheBucket::BuiltWheels => {
// For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
if remove(root.join(name.to_string()))? {
count += 1;
}
// For alternate indices, we expect a directory for every index, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Index);
for directory in directories(root) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
// For direct URLs, we expect a directory for every index, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Url);
for directory in directories(root) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
// For local dependencies, we expect a directory for every path, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Path);
for directory in directories(root) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
// For Git dependencies, we expect a directory for every repository, followed by a
// directory for every SHA, followed by a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Git);
for directory in directories(root) {
for directory in directories(directory) {
if remove(directory.join(name.to_string()))? {
count += 1;
}
}
}
}
CacheBucket::Simple => {
// For `pypi` wheels, we expect a JSON file per package, indexed by name.
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
if remove(root.join(format!("{name}.json")))? {
count += 1;
}
// For alternate indices, we expect a directory for every index, followed by a
// JSON file per package, indexed by name.
let root = cache.bucket(self).join(WheelCacheKind::Url);
for directory in directories(root) {
if remove(directory.join(format!("{name}.json")))? {
count += 1;
}
}
}
CacheBucket::Git => {
// Nothing to do.
}
CacheBucket::Interpreter => {
// Nothing to do.
}
}
Ok(count)
}
}
impl Display for CacheBucket {

View File

@ -12,8 +12,9 @@ use crate::{digest, CanonicalUrl};
///
/// Use [`WheelCache::remote_wheel_dir`] for remote wheel metadata caching and
/// [`WheelCache::built_wheel_dir`] for built source distributions metadata caching.
#[derive(Debug, Clone)]
pub enum WheelCache<'a> {
/// Either pypi or an alternative index, which we key by index URL.
/// Either PyPI or an alternative index, which we key by index URL.
Index(&'a IndexUrl),
/// A direct URL dependency, which we key by URL.
Url(&'a Url),
@ -29,11 +30,18 @@ pub enum WheelCache<'a> {
impl<'a> WheelCache<'a> {
fn bucket(&self) -> PathBuf {
match self {
WheelCache::Index(IndexUrl::Pypi) => PathBuf::from("pypi"),
WheelCache::Index(url) => PathBuf::from("index").join(digest(&CanonicalUrl::new(url))),
WheelCache::Url(url) => PathBuf::from("url").join(digest(&CanonicalUrl::new(url))),
WheelCache::Path(url) => PathBuf::from("path").join(digest(&CanonicalUrl::new(url))),
WheelCache::Git(url, sha) => PathBuf::from("git")
WheelCache::Index(IndexUrl::Pypi) => WheelCacheKind::Pypi.root(),
WheelCache::Index(url) => WheelCacheKind::Index
.root()
.join(digest(&CanonicalUrl::new(url))),
WheelCache::Url(url) => WheelCacheKind::Url
.root()
.join(digest(&CanonicalUrl::new(url))),
WheelCache::Path(url) => WheelCacheKind::Path
.root()
.join(digest(&CanonicalUrl::new(url))),
WheelCache::Git(url, sha) => WheelCacheKind::Git
.root()
.join(digest(&CanonicalUrl::new(url)))
.join(sha),
}
@ -49,3 +57,39 @@ impl<'a> WheelCache<'a> {
self.bucket().join(filename)
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) enum WheelCacheKind {
/// A cache of data from PyPI.
Pypi,
/// A cache of data from an alternative index.
Index,
/// A cache of data from an arbitrary URL.
Url,
/// A cache of data from a local path.
Path,
/// A cache of data from a Git repository.
Git,
}
impl WheelCacheKind {
pub(crate) fn to_str(self) -> &'static str {
match self {
WheelCacheKind::Pypi => "pypi",
WheelCacheKind::Index => "index",
WheelCacheKind::Url => "url",
WheelCacheKind::Path => "path",
WheelCacheKind::Git => "git",
}
}
pub(crate) fn root(self) -> PathBuf {
Path::new(self.to_str()).to_path_buf()
}
}
impl AsRef<Path> for WheelCacheKind {
fn as_ref(&self) -> &Path {
self.to_str().as_ref()
}
}

View File

@ -1,40 +1,58 @@
use std::fmt::Write;
use anyhow::{Context, Result};
use colored::Colorize;
use fs_err as fs;
use tracing::debug;
use puffin_cache::Cache;
use puffin_normalize::PackageName;
use crate::commands::ExitStatus;
use crate::printer::Printer;
/// Clear the cache.
pub(crate) fn clean(cache: &Cache, mut printer: Printer) -> Result<ExitStatus> {
pub(crate) fn clean(
cache: &Cache,
packages: &[PackageName],
mut printer: Printer,
) -> Result<ExitStatus> {
if !cache.root().exists() {
writeln!(printer, "No cache found at: {}", cache.root().display())?;
writeln!(
printer,
"No cache found at: {}",
format!("{}", cache.root().display()).cyan()
)?;
return Ok(ExitStatus::Success);
}
debug!("Clearing cache at: {}", cache.root().display());
for entry in cache
.root()
.read_dir()
.with_context(|| {
format!(
"Failed to read directory contents while clearing {}",
cache.root().display()
)
})?
.flatten()
{
if entry.file_type()?.is_dir() {
fs::remove_dir_all(entry.path())
.with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?;
} else {
fs::remove_file(entry.path())
.with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?;
if packages.is_empty() {
writeln!(
printer,
"Clearing cache at: {}",
format!("{}", cache.root().display()).cyan()
)?;
fs::remove_dir_all(cache.root())
.with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?;
} else {
for package in packages {
let count = cache.purge(package)?;
match count {
0 => writeln!(
printer,
"No entries found for package: {}",
format!("{package}").cyan()
)?,
1 => writeln!(
printer,
"Cleared 1 entry for package: {}",
format!("{package}").cyan()
)?,
count => writeln!(
printer,
"Cleared {count} entries for package: {}",
format!("{package}").cyan()
)?,
}
}
}

View File

@ -68,7 +68,7 @@ enum Commands {
/// Uninstall packages from the current environment.
PipUninstall(PipUninstallArgs),
/// Clear the cache.
Clean,
Clean(CleanArgs),
/// Enumerate the installed packages in the current environment.
Freeze,
/// Create a virtual environment.
@ -210,6 +210,12 @@ struct PipUninstallArgs {
requirement: Vec<PathBuf>,
}
#[derive(Args)]
struct CleanArgs {
/// The packages to remove from the cache.
package: Vec<PackageName>,
}
#[derive(Args)]
struct VenvArgs {
/// The Python interpreter to use for the virtual environment.
@ -321,7 +327,7 @@ async fn inner() -> Result<ExitStatus> {
.collect::<Vec<_>>();
commands::pip_uninstall(&sources, cache, printer).await
}
Commands::Clean => commands::clean(&cache, printer),
Commands::Clean(args) => commands::clean(&cache, &args.package, printer),
Commands::Freeze => commands::freeze(&cache, printer),
Commands::Venv(args) => commands::venv(&args.name, args.python.as_deref(), &cache, printer),
Commands::Add(args) => commands::add(&args.name, printer),

View File

@ -994,6 +994,54 @@ fn install_url_source_dist_cached() -> Result<()> {
check_command(&venv, "import tqdm", &temp_dir);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("tqdm")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 1 entry for package: tqdm
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ tqdm @ https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz
"###);
});
check_command(&venv, "import tqdm", &temp_dir);
Ok(())
}
@ -1060,6 +1108,54 @@ fn install_git_source_dist_cached() -> Result<()> {
check_command(&venv, "import werkzeug", &temp_dir);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("werkzeug")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 1 entry for package: werkzeug
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ werkzeug @ git+https://github.com/pallets/werkzeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74
"###);
});
check_command(&venv, "import werkzeug", &temp_dir);
Ok(())
}
@ -1125,6 +1221,54 @@ fn install_registry_source_dist_cached() -> Result<()> {
check_command(&venv, "import future", &temp_dir);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("future")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 2 entries for package: future
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ future==0.18.3
"###);
});
check_command(&venv, "import future", &temp_dir);
Ok(())
}
@ -1199,6 +1343,54 @@ fn install_path_source_dist_cached() -> Result<()> {
check_command(&venv, "import wheel", &temp_dir);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("wheel")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 1 entry for package: wheel
"###);
});
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ wheel @ file://[TEMP_DIR]/wheel-0.42.0.tar.gz
"###);
});
check_command(&venv, "import wheel", &temp_dir);
Ok(())
}
@ -1273,6 +1465,54 @@ fn install_path_built_dist_cached() -> Result<()> {
check_command(&venv, "import tomli", &parent);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("tomli")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 1 entry for package: tomli
"###);
});
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ tomli @ file://[TEMP_DIR]/tomli-3.0.1-py3-none-any.whl
"###);
});
check_command(&venv, "import tomli", &temp_dir);
Ok(())
}
@ -1338,6 +1578,54 @@ fn install_url_built_dist_cached() -> Result<()> {
check_command(&venv, "import tqdm", &temp_dir);
// Clear the cache, then re-run the installation in a new virtual environment.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("clean")
.arg("tqdm")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Cleared 1 entry for package: tqdm
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ tqdm @ https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl
"###);
});
check_command(&venv, "import tqdm", &temp_dir);
Ok(())
}

View File

@ -4,8 +4,7 @@ use tracing::warn;
use distribution_types::CachedWheel;
use platform_tags::Tags;
use puffin_cache::CacheShard;
use crate::index::iter_directories;
use puffin_fs::directories;
/// A local index of built distributions for a specific source distribution.
pub struct BuiltWheelIndex;
@ -30,7 +29,7 @@ impl BuiltWheelIndex {
pub fn find(shard: &CacheShard, tags: &Tags) -> Option<CachedWheel> {
let mut candidate: Option<CachedWheel> = None;
for subdir in iter_directories(shard.read_dir().ok()?) {
for subdir in directories(&**shard) {
match CachedWheel::from_path(&subdir) {
Ok(None) => {}
Ok(Some(dist_info)) => {

View File

@ -1,27 +1,5 @@
use std::path::PathBuf;
use tracing::warn;
pub use built_wheel_index::BuiltWheelIndex;
pub use registry_wheel_index::RegistryWheelIndex;
mod built_wheel_index;
mod registry_wheel_index;
/// Iterate over the subdirectories of a directory.
fn iter_directories(read_dir: std::fs::ReadDir) -> impl Iterator<Item = PathBuf> {
read_dir
.filter_map(|entry| match entry {
Ok(entry) => Some(entry),
Err(err) => {
warn!("Failed to read entry of cache: {}", err);
None
}
})
.filter(|entry| {
entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
})
.map(|entry| entry.path())
}

View File

@ -1,6 +1,5 @@
use std::collections::hash_map::Entry;
use std::collections::BTreeMap;
use std::path::Path;
use fs_err as fs;
@ -11,11 +10,10 @@ use distribution_types::{CachedRegistryDist, CachedWheel};
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_fs::directories;
use puffin_normalize::PackageName;
use pypi_types::IndexUrls;
use crate::index::iter_directories;
/// A local index of distributions that originate from a registry, like `PyPI`.
#[derive(Debug)]
pub struct RegistryWheelIndex<'a> {
@ -79,10 +77,7 @@ impl<'a> RegistryWheelIndex<'a> {
// Built wheels have one more level of indirection, as they are keyed by the source
// distribution filename.
let Ok(read_dir) = built_wheel_dir.read_dir() else {
continue;
};
for subdir in iter_directories(read_dir) {
for subdir in directories(&*built_wheel_dir) {
Self::add_directory(subdir, tags, &mut versions);
}
}
@ -98,11 +93,7 @@ impl<'a> RegistryWheelIndex<'a> {
tags: &Tags,
versions: &mut BTreeMap<Version, CachedRegistryDist>,
) {
let Ok(read_dir) = path.as_ref().read_dir() else {
return;
};
for wheel_dir in iter_directories(read_dir) {
for wheel_dir in directories(path.as_ref()) {
match CachedWheel::from_path(&wheel_dir) {
Ok(None) => {}
Ok(Some(dist_info)) => {

View File

@ -15,3 +15,4 @@ workspace = true
[dependencies]
fs-err = { workspace = true, features = ["tokio"] }
tempfile = { workspace = true }
tracing = { workspace = true }

View File

@ -1,6 +1,8 @@
use std::path::Path;
use std::path::{Path, PathBuf};
use fs_err as fs;
use tempfile::NamedTempFile;
use tracing::warn;
/// Write `data` to `path` atomically using a temporary file and atomic rename.
pub async fn write_atomic(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> std::io::Result<()> {
@ -43,3 +45,48 @@ pub fn write_atomic_sync(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> std:
})?;
Ok(())
}
/// Remove the file or directory at `path`, if it exists.
///
/// Returns `true` if the file or directory was removed, and `false` if the path did not exist.
pub fn force_remove_all(path: impl AsRef<Path>) -> Result<bool, std::io::Error> {
let path = path.as_ref();
let metadata = match fs::metadata(path) {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(false),
Err(err) => return Err(err),
};
if metadata.is_dir() {
fs::remove_dir_all(path)?;
} else {
fs::remove_file(path)?;
}
Ok(true)
}
/// Iterate over the subdirectories of a directory.
///
/// If the directory does not exist, returns an empty iterator.
pub fn directories(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
path.as_ref()
.read_dir()
.ok()
.into_iter()
.flatten()
.filter_map(|entry| match entry {
Ok(entry) => Some(entry),
Err(err) => {
warn!("Failed to read entry: {}", err);
None
}
})
.filter(|entry| {
entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
})
.map(|entry| entry.path())
}