diff --git a/Cargo.lock b/Cargo.lock index 33f31b1b6..351f06cd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2288,11 +2288,14 @@ dependencies = [ "directories", "fs-err", "hex", + "puffin-fs", + "puffin-normalize", "pypi-types", "seahash", "serde", "serde_json", "tempfile", + "tracing", "url", ] @@ -2491,6 +2494,7 @@ version = "0.0.1" dependencies = [ "fs-err", "tempfile", + "tracing", ] [[package]] diff --git a/crates/puffin-cache/Cargo.toml b/crates/puffin-cache/Cargo.toml index 541dc3657..222ecdc2c 100644 --- a/crates/puffin-cache/Cargo.toml +++ b/crates/puffin-cache/Cargo.toml @@ -14,6 +14,8 @@ license = { workspace = true } workspace = true [dependencies] +puffin-fs = { path = "../puffin-fs" } +puffin-normalize = { path = "../puffin-normalize" } pypi-types = { path = "../pypi-types" } cachedir = { workspace = true } @@ -25,4 +27,5 @@ seahash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tempfile = { workspace = true } +tracing = { workspace = true } url = { workspace = true } diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index 4028163b9..d0b4a6ffb 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -7,12 +7,16 @@ use std::sync::Arc; use fs_err as fs; use tempfile::{tempdir, TempDir}; +use tracing::debug; +use crate::wheel::WheelCacheKind; pub use by_timestamp::CachedByTimestamp; pub use canonical_url::{CanonicalUrl, RepositoryUrl}; #[cfg(feature = "clap")] pub use cli::CacheArgs; pub use digest::digest; +use puffin_fs::{directories, force_remove_all}; +use puffin_normalize::PackageName; pub use stable_hash::{StableHash, StableHasher}; pub use wheel::WheelCache; @@ -144,6 +148,23 @@ impl Cache { fs::canonicalize(root) } + + /// Remove a package from the cache. + /// + /// Returns the number of entries removed from the cache. + pub fn purge(&self, name: &PackageName) -> Result { + let mut count = 0; + for bucket in [ + CacheBucket::Wheels, + CacheBucket::BuiltWheels, + CacheBucket::Git, + CacheBucket::Interpreter, + CacheBucket::Simple, + ] { + count += bucket.purge(self, name)?; + } + Ok(count) + } } /// The different kinds of data in the cache are stored in different bucket, which in our case @@ -269,7 +290,7 @@ pub enum CacheBucket { /// * `built-wheels-v0/pypi/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}` /// * `built-wheels-v0//foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}` /// * `built-wheels-v0/url//foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}` - /// * `built-wheels-v0/git///foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}` + /// * `built-wheels-v0/git///foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}` /// /// But the url filename does not need to be a valid source dist filename /// (), @@ -384,6 +405,117 @@ impl CacheBucket { CacheBucket::Wheels => "wheels-v0", } } + + /// Purge a package from the cache bucket. + /// + /// Returns the number of entries removed from the cache. + fn purge(self, cache: &Cache, name: &PackageName) -> Result { + fn remove(path: impl AsRef) -> Result { + Ok(if force_remove_all(path.as_ref())? { + debug!("Removed cache entry: {}", path.as_ref().display()); + true + } else { + false + }) + } + + let mut count = 0; + match self { + CacheBucket::Wheels => { + // For `pypi` wheels, we expect a directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Pypi); + if remove(root.join(name.to_string()))? { + count += 1; + } + + // For alternate indices, we expect a directory for every index, followed by a + // directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Index); + for directory in directories(root) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + + // For direct URLs, we expect a directory for every URL, followed by a + // directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Url); + for directory in directories(root) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + } + CacheBucket::BuiltWheels => { + // For `pypi` wheels, we expect a directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Pypi); + if remove(root.join(name.to_string()))? { + count += 1; + } + + // For alternate indices, we expect a directory for every index, followed by a + // directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Index); + for directory in directories(root) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + + // For direct URLs, we expect a directory for every index, followed by a + // directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Url); + for directory in directories(root) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + + // For local dependencies, we expect a directory for every path, followed by a + // directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Path); + for directory in directories(root) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + + // For Git dependencies, we expect a directory for every repository, followed by a + // directory for every SHA, followed by a directory per package (indexed by name). + let root = cache.bucket(self).join(WheelCacheKind::Git); + for directory in directories(root) { + for directory in directories(directory) { + if remove(directory.join(name.to_string()))? { + count += 1; + } + } + } + } + CacheBucket::Simple => { + // For `pypi` wheels, we expect a JSON file per package, indexed by name. + let root = cache.bucket(self).join(WheelCacheKind::Pypi); + if remove(root.join(format!("{name}.json")))? { + count += 1; + } + + // For alternate indices, we expect a directory for every index, followed by a + // JSON file per package, indexed by name. + let root = cache.bucket(self).join(WheelCacheKind::Url); + for directory in directories(root) { + if remove(directory.join(format!("{name}.json")))? { + count += 1; + } + } + } + CacheBucket::Git => { + // Nothing to do. + } + CacheBucket::Interpreter => { + // Nothing to do. + } + } + Ok(count) + } } impl Display for CacheBucket { diff --git a/crates/puffin-cache/src/wheel.rs b/crates/puffin-cache/src/wheel.rs index 1ac864e1f..e560ce4b9 100644 --- a/crates/puffin-cache/src/wheel.rs +++ b/crates/puffin-cache/src/wheel.rs @@ -12,8 +12,9 @@ use crate::{digest, CanonicalUrl}; /// /// Use [`WheelCache::remote_wheel_dir`] for remote wheel metadata caching and /// [`WheelCache::built_wheel_dir`] for built source distributions metadata caching. +#[derive(Debug, Clone)] pub enum WheelCache<'a> { - /// Either pypi or an alternative index, which we key by index URL. + /// Either PyPI or an alternative index, which we key by index URL. Index(&'a IndexUrl), /// A direct URL dependency, which we key by URL. Url(&'a Url), @@ -29,11 +30,18 @@ pub enum WheelCache<'a> { impl<'a> WheelCache<'a> { fn bucket(&self) -> PathBuf { match self { - WheelCache::Index(IndexUrl::Pypi) => PathBuf::from("pypi"), - WheelCache::Index(url) => PathBuf::from("index").join(digest(&CanonicalUrl::new(url))), - WheelCache::Url(url) => PathBuf::from("url").join(digest(&CanonicalUrl::new(url))), - WheelCache::Path(url) => PathBuf::from("path").join(digest(&CanonicalUrl::new(url))), - WheelCache::Git(url, sha) => PathBuf::from("git") + WheelCache::Index(IndexUrl::Pypi) => WheelCacheKind::Pypi.root(), + WheelCache::Index(url) => WheelCacheKind::Index + .root() + .join(digest(&CanonicalUrl::new(url))), + WheelCache::Url(url) => WheelCacheKind::Url + .root() + .join(digest(&CanonicalUrl::new(url))), + WheelCache::Path(url) => WheelCacheKind::Path + .root() + .join(digest(&CanonicalUrl::new(url))), + WheelCache::Git(url, sha) => WheelCacheKind::Git + .root() .join(digest(&CanonicalUrl::new(url))) .join(sha), } @@ -49,3 +57,39 @@ impl<'a> WheelCache<'a> { self.bucket().join(filename) } } + +#[derive(Debug, Clone, Copy)] +pub(crate) enum WheelCacheKind { + /// A cache of data from PyPI. + Pypi, + /// A cache of data from an alternative index. + Index, + /// A cache of data from an arbitrary URL. + Url, + /// A cache of data from a local path. + Path, + /// A cache of data from a Git repository. + Git, +} + +impl WheelCacheKind { + pub(crate) fn to_str(self) -> &'static str { + match self { + WheelCacheKind::Pypi => "pypi", + WheelCacheKind::Index => "index", + WheelCacheKind::Url => "url", + WheelCacheKind::Path => "path", + WheelCacheKind::Git => "git", + } + } + + pub(crate) fn root(self) -> PathBuf { + Path::new(self.to_str()).to_path_buf() + } +} + +impl AsRef for WheelCacheKind { + fn as_ref(&self) -> &Path { + self.to_str().as_ref() + } +} diff --git a/crates/puffin-cli/src/commands/clean.rs b/crates/puffin-cli/src/commands/clean.rs index 0b13001c9..2ebbf6226 100644 --- a/crates/puffin-cli/src/commands/clean.rs +++ b/crates/puffin-cli/src/commands/clean.rs @@ -1,40 +1,58 @@ use std::fmt::Write; use anyhow::{Context, Result}; +use colored::Colorize; use fs_err as fs; -use tracing::debug; use puffin_cache::Cache; +use puffin_normalize::PackageName; use crate::commands::ExitStatus; use crate::printer::Printer; /// Clear the cache. -pub(crate) fn clean(cache: &Cache, mut printer: Printer) -> Result { +pub(crate) fn clean( + cache: &Cache, + packages: &[PackageName], + mut printer: Printer, +) -> Result { if !cache.root().exists() { - writeln!(printer, "No cache found at: {}", cache.root().display())?; + writeln!( + printer, + "No cache found at: {}", + format!("{}", cache.root().display()).cyan() + )?; return Ok(ExitStatus::Success); } - debug!("Clearing cache at: {}", cache.root().display()); - - for entry in cache - .root() - .read_dir() - .with_context(|| { - format!( - "Failed to read directory contents while clearing {}", - cache.root().display() - ) - })? - .flatten() - { - if entry.file_type()?.is_dir() { - fs::remove_dir_all(entry.path()) - .with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?; - } else { - fs::remove_file(entry.path()) - .with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?; + if packages.is_empty() { + writeln!( + printer, + "Clearing cache at: {}", + format!("{}", cache.root().display()).cyan() + )?; + fs::remove_dir_all(cache.root()) + .with_context(|| format!("Failed to clear cache at: {}", cache.root().display()))?; + } else { + for package in packages { + let count = cache.purge(package)?; + match count { + 0 => writeln!( + printer, + "No entries found for package: {}", + format!("{package}").cyan() + )?, + 1 => writeln!( + printer, + "Cleared 1 entry for package: {}", + format!("{package}").cyan() + )?, + count => writeln!( + printer, + "Cleared {count} entries for package: {}", + format!("{package}").cyan() + )?, + } } } diff --git a/crates/puffin-cli/src/main.rs b/crates/puffin-cli/src/main.rs index d7d431abe..20f4c9985 100644 --- a/crates/puffin-cli/src/main.rs +++ b/crates/puffin-cli/src/main.rs @@ -68,7 +68,7 @@ enum Commands { /// Uninstall packages from the current environment. PipUninstall(PipUninstallArgs), /// Clear the cache. - Clean, + Clean(CleanArgs), /// Enumerate the installed packages in the current environment. Freeze, /// Create a virtual environment. @@ -210,6 +210,12 @@ struct PipUninstallArgs { requirement: Vec, } +#[derive(Args)] +struct CleanArgs { + /// The packages to remove from the cache. + package: Vec, +} + #[derive(Args)] struct VenvArgs { /// The Python interpreter to use for the virtual environment. @@ -321,7 +327,7 @@ async fn inner() -> Result { .collect::>(); commands::pip_uninstall(&sources, cache, printer).await } - Commands::Clean => commands::clean(&cache, printer), + Commands::Clean(args) => commands::clean(&cache, &args.package, printer), Commands::Freeze => commands::freeze(&cache, printer), Commands::Venv(args) => commands::venv(&args.name, args.python.as_deref(), &cache, printer), Commands::Add(args) => commands::add(&args.name, printer), diff --git a/crates/puffin-cli/tests/pip_sync.rs b/crates/puffin-cli/tests/pip_sync.rs index b6a27dba5..7b3a75660 100644 --- a/crates/puffin-cli/tests/pip_sync.rs +++ b/crates/puffin-cli/tests/pip_sync.rs @@ -994,6 +994,54 @@ fn install_url_source_dist_cached() -> Result<()> { check_command(&venv, "import tqdm", &temp_dir); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("tqdm") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 1 entry for package: tqdm + "###); + }); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm @ https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz + "###); + }); + + check_command(&venv, "import tqdm", &temp_dir); + Ok(()) } @@ -1060,6 +1108,54 @@ fn install_git_source_dist_cached() -> Result<()> { check_command(&venv, "import werkzeug", &temp_dir); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("werkzeug") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 1 entry for package: werkzeug + "###); + }); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + werkzeug @ git+https://github.com/pallets/werkzeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74 + "###); + }); + + check_command(&venv, "import werkzeug", &temp_dir); + Ok(()) } @@ -1125,6 +1221,54 @@ fn install_registry_source_dist_cached() -> Result<()> { check_command(&venv, "import future", &temp_dir); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("future") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 2 entries for package: future + "###); + }); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + future==0.18.3 + "###); + }); + + check_command(&venv, "import future", &temp_dir); + Ok(()) } @@ -1199,6 +1343,54 @@ fn install_path_source_dist_cached() -> Result<()> { check_command(&venv, "import wheel", &temp_dir); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("wheel") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 1 entry for package: wheel + "###); + }); + + insta::with_settings!({ + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + wheel @ file://[TEMP_DIR]/wheel-0.42.0.tar.gz + "###); + }); + + check_command(&venv, "import wheel", &temp_dir); + Ok(()) } @@ -1273,6 +1465,54 @@ fn install_path_built_dist_cached() -> Result<()> { check_command(&venv, "import tomli", &parent); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("tomli") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 1 entry for package: tomli + "###); + }); + + insta::with_settings!({ + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + tomli @ file://[TEMP_DIR]/tomli-3.0.1-py3-none-any.whl + "###); + }); + + check_command(&venv, "import tomli", &temp_dir); + Ok(()) } @@ -1338,6 +1578,54 @@ fn install_url_built_dist_cached() -> Result<()> { check_command(&venv, "import tqdm", &temp_dir); + // Clear the cache, then re-run the installation in a new virtual environment. + let parent = assert_fs::TempDir::new()?; + let venv = create_venv_py312(&parent, &cache_dir); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("clean") + .arg("tqdm") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Cleared 1 entry for package: tqdm + "###); + }); + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-sync") + .arg("requirements.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Unzipped 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm @ https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl + "###); + }); + + check_command(&venv, "import tqdm", &temp_dir); + Ok(()) } diff --git a/crates/puffin-distribution/src/index/built_wheel_index.rs b/crates/puffin-distribution/src/index/built_wheel_index.rs index 3b52c8340..e457fe440 100644 --- a/crates/puffin-distribution/src/index/built_wheel_index.rs +++ b/crates/puffin-distribution/src/index/built_wheel_index.rs @@ -4,8 +4,7 @@ use tracing::warn; use distribution_types::CachedWheel; use platform_tags::Tags; use puffin_cache::CacheShard; - -use crate::index::iter_directories; +use puffin_fs::directories; /// A local index of built distributions for a specific source distribution. pub struct BuiltWheelIndex; @@ -30,7 +29,7 @@ impl BuiltWheelIndex { pub fn find(shard: &CacheShard, tags: &Tags) -> Option { let mut candidate: Option = None; - for subdir in iter_directories(shard.read_dir().ok()?) { + for subdir in directories(&**shard) { match CachedWheel::from_path(&subdir) { Ok(None) => {} Ok(Some(dist_info)) => { diff --git a/crates/puffin-distribution/src/index/mod.rs b/crates/puffin-distribution/src/index/mod.rs index e219b3de8..aebeef891 100644 --- a/crates/puffin-distribution/src/index/mod.rs +++ b/crates/puffin-distribution/src/index/mod.rs @@ -1,27 +1,5 @@ -use std::path::PathBuf; - -use tracing::warn; - pub use built_wheel_index::BuiltWheelIndex; pub use registry_wheel_index::RegistryWheelIndex; mod built_wheel_index; mod registry_wheel_index; - -/// Iterate over the subdirectories of a directory. -fn iter_directories(read_dir: std::fs::ReadDir) -> impl Iterator { - read_dir - .filter_map(|entry| match entry { - Ok(entry) => Some(entry), - Err(err) => { - warn!("Failed to read entry of cache: {}", err); - None - } - }) - .filter(|entry| { - entry - .file_type() - .map_or(false, |file_type| file_type.is_dir()) - }) - .map(|entry| entry.path()) -} diff --git a/crates/puffin-distribution/src/index/registry_wheel_index.rs b/crates/puffin-distribution/src/index/registry_wheel_index.rs index 31af9489b..a2d76b587 100644 --- a/crates/puffin-distribution/src/index/registry_wheel_index.rs +++ b/crates/puffin-distribution/src/index/registry_wheel_index.rs @@ -1,6 +1,5 @@ use std::collections::hash_map::Entry; use std::collections::BTreeMap; - use std::path::Path; use fs_err as fs; @@ -11,11 +10,10 @@ use distribution_types::{CachedRegistryDist, CachedWheel}; use pep440_rs::Version; use platform_tags::Tags; use puffin_cache::{Cache, CacheBucket, WheelCache}; +use puffin_fs::directories; use puffin_normalize::PackageName; use pypi_types::IndexUrls; -use crate::index::iter_directories; - /// A local index of distributions that originate from a registry, like `PyPI`. #[derive(Debug)] pub struct RegistryWheelIndex<'a> { @@ -79,10 +77,7 @@ impl<'a> RegistryWheelIndex<'a> { // Built wheels have one more level of indirection, as they are keyed by the source // distribution filename. - let Ok(read_dir) = built_wheel_dir.read_dir() else { - continue; - }; - for subdir in iter_directories(read_dir) { + for subdir in directories(&*built_wheel_dir) { Self::add_directory(subdir, tags, &mut versions); } } @@ -98,11 +93,7 @@ impl<'a> RegistryWheelIndex<'a> { tags: &Tags, versions: &mut BTreeMap, ) { - let Ok(read_dir) = path.as_ref().read_dir() else { - return; - }; - - for wheel_dir in iter_directories(read_dir) { + for wheel_dir in directories(path.as_ref()) { match CachedWheel::from_path(&wheel_dir) { Ok(None) => {} Ok(Some(dist_info)) => { diff --git a/crates/puffin-fs/Cargo.toml b/crates/puffin-fs/Cargo.toml index 18f23f8ec..71176064a 100644 --- a/crates/puffin-fs/Cargo.toml +++ b/crates/puffin-fs/Cargo.toml @@ -15,3 +15,4 @@ workspace = true [dependencies] fs-err = { workspace = true, features = ["tokio"] } tempfile = { workspace = true } +tracing = { workspace = true } diff --git a/crates/puffin-fs/src/lib.rs b/crates/puffin-fs/src/lib.rs index e524df666..8f44c210b 100644 --- a/crates/puffin-fs/src/lib.rs +++ b/crates/puffin-fs/src/lib.rs @@ -1,6 +1,8 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; +use fs_err as fs; use tempfile::NamedTempFile; +use tracing::warn; /// Write `data` to `path` atomically using a temporary file and atomic rename. pub async fn write_atomic(path: impl AsRef, data: impl AsRef<[u8]>) -> std::io::Result<()> { @@ -43,3 +45,48 @@ pub fn write_atomic_sync(path: impl AsRef, data: impl AsRef<[u8]>) -> std: })?; Ok(()) } + +/// Remove the file or directory at `path`, if it exists. +/// +/// Returns `true` if the file or directory was removed, and `false` if the path did not exist. +pub fn force_remove_all(path: impl AsRef) -> Result { + let path = path.as_ref(); + + let metadata = match fs::metadata(path) { + Ok(metadata) => metadata, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(false), + Err(err) => return Err(err), + }; + + if metadata.is_dir() { + fs::remove_dir_all(path)?; + } else { + fs::remove_file(path)?; + } + + Ok(true) +} + +/// Iterate over the subdirectories of a directory. +/// +/// If the directory does not exist, returns an empty iterator. +pub fn directories(path: impl AsRef) -> impl Iterator { + path.as_ref() + .read_dir() + .ok() + .into_iter() + .flatten() + .filter_map(|entry| match entry { + Ok(entry) => Some(entry), + Err(err) => { + warn!("Failed to read entry: {}", err); + None + } + }) + .filter(|entry| { + entry + .file_type() + .map_or(false, |file_type| file_type.is_dir()) + }) + .map(|entry| entry.path()) +}