Avoid removing local wheels when unzipping (#560)

## Summary

When installing a local wheel, we need to avoid removing the zipped
wheel (since it lives outside of the cache), _and_ need to ensure that
we unzip the wheel into the cache (rather than replacing the zipped
wheel, which may even live outside of the project).

Closes https://github.com/astral-sh/puffin/issues/553.
This commit is contained in:
Charlie Marsh 2023-12-05 12:50:08 -05:00 committed by GitHub
parent 6f055ecf3b
commit a15da36d74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 53 deletions

1
Cargo.lock generated
View File

@ -2525,6 +2525,7 @@ dependencies = [
"puffin-cache", "puffin-cache",
"puffin-client", "puffin-client",
"puffin-distribution", "puffin-distribution",
"puffin-fs",
"puffin-git", "puffin-git",
"puffin-interpreter", "puffin-interpreter",
"puffin-normalize", "puffin-normalize",

View File

@ -427,7 +427,7 @@ impl SourceBuild {
let from = tmp_dir.path().join(&filename); let from = tmp_dir.path().join(&filename);
let to = wheel_dir.join(&filename); let to = wheel_dir.join(&filename);
if !rename_atomic_sync(from, &to)? { if rename_atomic_sync(from, &to)?.is_some() {
warn!("Overwriting existing wheel at: {}", to.display()); warn!("Overwriting existing wheel at: {}", to.display());
} }
@ -461,7 +461,7 @@ impl SourceBuild {
let from = dist_wheel.path(); let from = dist_wheel.path();
let to = wheel_dir.join(dist_wheel.file_name()); let to = wheel_dir.join(dist_wheel.file_name());
if !copy_atomic_sync(from, &to)? { if copy_atomic_sync(from, &to)?.is_some() {
warn!("Overwriting existing wheel at: {}", to.display()); warn!("Overwriting existing wheel at: {}", to.display());
} }

View File

@ -184,7 +184,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(), dist: dist.clone(),
filename, filename,
buffer, buffer,
path: cache_entry.path(), target: cache_entry.path(),
}) })
} else { } else {
let size = let size =
@ -201,6 +201,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(), dist: dist.clone(),
filename, filename,
path: cache_entry.path(), path: cache_entry.path(),
target: cache_entry.path(),
}) })
}; };
@ -212,7 +213,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
} }
Dist::Built(BuiltDist::DirectUrl(wheel)) => { Dist::Built(BuiltDist::DirectUrl(wheel)) => {
debug!("Fetching disk-based wheel from URL: {}", &wheel.url); debug!("Fetching disk-based wheel from URL: {}", wheel.url);
// Create a directory for the wheel. // Create a directory for the wheel.
let wheel_filename = wheel.filename()?; let wheel_filename = wheel.filename()?;
@ -235,6 +236,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(), dist: dist.clone(),
filename: wheel.filename.clone(), filename: wheel.filename.clone(),
path: cache_entry.path(), path: cache_entry.path(),
target: cache_entry.path(),
}); });
if let Some(reporter) = self.reporter.as_ref() { if let Some(reporter) = self.reporter.as_ref() {
@ -244,11 +246,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(local_wheel) Ok(local_wheel)
} }
Dist::Built(BuiltDist::Path(wheel)) => Ok(LocalWheel::Disk(DiskWheel { Dist::Built(BuiltDist::Path(wheel)) => {
dist: dist.clone(), let cache_entry = self.cache.entry(
path: wheel.path.clone(), CacheBucket::Wheels,
filename: wheel.filename.clone(), WheelCache::Url(&wheel.url).wheel_dir(),
})), wheel.filename.to_string(),
);
Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
filename: wheel.filename.clone(),
path: wheel.path.clone(),
target: cache_entry.path(),
}))
}
Dist::Source(source_dist) => { Dist::Source(source_dist) => {
let lock = self.locks.acquire(&dist).await; let lock = self.locks.acquire(&dist).await;
@ -258,7 +269,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(LocalWheel::Built(BuiltWheel { Ok(LocalWheel::Built(BuiltWheel {
dist: dist.clone(), dist: dist.clone(),
filename: built_wheel.filename, filename: built_wheel.filename,
path: built_wheel.path, path: built_wheel.path.clone(),
target: built_wheel.path,
})) }))
} }
} }

View File

@ -19,8 +19,8 @@ pub struct InMemoryWheel {
pub(crate) filename: WheelFilename, pub(crate) filename: WheelFilename,
/// The contents of the wheel. /// The contents of the wheel.
pub(crate) buffer: Vec<u8>, pub(crate) buffer: Vec<u8>,
/// The path where the downloaded wheel would have been stored, if it wasn't an in-memory wheel. /// The expected path to the downloaded wheel's entry in the cache.
pub(crate) path: PathBuf, pub(crate) target: PathBuf,
} }
/// A downloaded wheel that's stored on-disk. /// A downloaded wheel that's stored on-disk.
@ -32,6 +32,8 @@ pub struct DiskWheel {
pub(crate) filename: WheelFilename, pub(crate) filename: WheelFilename,
/// The path to the downloaded wheel. /// The path to the downloaded wheel.
pub(crate) path: PathBuf, pub(crate) path: PathBuf,
/// The expected path to the downloaded wheel's entry in the cache.
pub(crate) target: PathBuf,
} }
/// A wheel built from a source distribution that's stored on-disk. /// A wheel built from a source distribution that's stored on-disk.
@ -43,6 +45,8 @@ pub struct BuiltWheel {
pub(crate) filename: WheelFilename, pub(crate) filename: WheelFilename,
/// The path to the built wheel. /// The path to the built wheel.
pub(crate) path: PathBuf, pub(crate) path: PathBuf,
/// The expected path to the downloaded wheel's entry in the cache.
pub(crate) target: PathBuf,
} }
/// A downloaded or built wheel. /// A downloaded or built wheel.
@ -54,11 +58,12 @@ pub enum LocalWheel {
} }
impl LocalWheel { impl LocalWheel {
pub fn path(&self) -> &Path { /// Return the path to the downloaded wheel's entry in the cache.
pub fn target(&self) -> &Path {
match self { match self {
LocalWheel::InMemory(wheel) => &wheel.path, LocalWheel::InMemory(wheel) => &wheel.target,
LocalWheel::Disk(wheel) => &wheel.path, LocalWheel::Disk(wheel) => &wheel.target,
LocalWheel::Built(wheel) => &wheel.path, LocalWheel::Built(wheel) => &wheel.target,
} }
} }
} }

View File

@ -46,47 +46,55 @@ pub fn write_atomic_sync(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> std:
/// Rename `from` to `to` atomically using a temporary file and atomic rename. /// Rename `from` to `to` atomically using a temporary file and atomic rename.
/// ///
/// Returns `false` if the `to` path already existed and thus was removed before performing the /// Returns a [`Target`] if the `to` path already existed and thus was removed before performing the
/// rename. /// rename.
pub fn rename_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<bool> { pub fn rename_atomic_sync(
// Remove the destination if it exists. from: impl AsRef<Path>,
let safe = if let Ok(metadata) = fs_err::metadata(&to) { to: impl AsRef<Path>,
if metadata.is_dir() { ) -> std::io::Result<Option<Target>> {
// Remove the destination, if it exists.
let target = if let Ok(metadata) = fs_err::metadata(&to) {
Some(if metadata.is_dir() {
fs_err::remove_dir_all(&to)?; fs_err::remove_dir_all(&to)?;
Target::Directory
} else { } else {
fs_err::remove_file(&to)?; fs_err::remove_file(&to)?;
} Target::File
false })
} else { } else {
true None
}; };
// Move the source file to the destination. // Move the source file to the destination.
fs_err::rename(from, to)?; fs_err::rename(from, to)?;
Ok(safe) Ok(target)
} }
/// Copy `from` to `to` atomically using a temporary file and atomic rename. /// Copy `from` to `to` atomically using a temporary file and atomic rename.
/// ///
/// Returns `false` if the `to` path already existed and thus was removed before performing the /// Returns a [`Target`] if the `to` path already existed and thus was removed before performing the
/// rename. /// copy.
pub fn copy_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<bool> { pub fn copy_atomic_sync(
from: impl AsRef<Path>,
to: impl AsRef<Path>,
) -> std::io::Result<Option<Target>> {
// Copy to a temporary file. // Copy to a temporary file.
let temp_file = let temp_file =
NamedTempFile::new_in(to.as_ref().parent().expect("Write path must have a parent"))?; NamedTempFile::new_in(to.as_ref().parent().expect("Write path must have a parent"))?;
fs_err::copy(from, &temp_file)?; fs_err::copy(from, &temp_file)?;
// Remove the destination if it exists. // Remove the destination, if it exists.
let safe = if let Ok(metadata) = fs_err::metadata(&to) { let target = if let Ok(metadata) = fs_err::metadata(&to) {
if metadata.is_dir() { Some(if metadata.is_dir() {
fs_err::remove_dir_all(&to)?; fs_err::remove_dir_all(&to)?;
Target::Directory
} else { } else {
fs_err::remove_file(&to)?; fs_err::remove_file(&to)?;
} Target::File
false })
} else { } else {
true None
}; };
// Move the temporary file to the destination. // Move the temporary file to the destination.
@ -101,5 +109,25 @@ pub fn copy_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io
) )
})?; })?;
Ok(safe) Ok(target)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Target {
/// The target path was an existing file.
File,
/// The target path was an existing directory.
Directory,
/// The target path did not exist.
NotFound,
}
impl Target {
pub fn is_file(self) -> bool {
matches!(self, Self::File)
}
pub fn is_directory(self) -> bool {
matches!(self, Self::Directory)
}
} }

View File

@ -22,6 +22,7 @@ puffin-client = { path = "../puffin-client" }
distribution-types = { path = "../distribution-types" } distribution-types = { path = "../distribution-types" }
platform-tags = { path = "../platform-tags" } platform-tags = { path = "../platform-tags" }
puffin-distribution = { path = "../puffin-distribution" } puffin-distribution = { path = "../puffin-distribution" }
puffin-fs = { path = "../puffin-fs" }
puffin-git = { path = "../puffin-git" } puffin-git = { path = "../puffin-git" }
puffin-interpreter = { path = "../puffin-interpreter" } puffin-interpreter = { path = "../puffin-interpreter" }
puffin-normalize = { path = "../puffin-normalize" } puffin-normalize = { path = "../puffin-normalize" }

View File

@ -6,6 +6,7 @@ use tracing::{debug, instrument, warn};
use distribution_types::{CachedDist, Dist, RemoteSource}; use distribution_types::{CachedDist, Dist, RemoteSource};
use puffin_distribution::{LocalWheel, Unzip}; use puffin_distribution::{LocalWheel, Unzip};
use puffin_fs::{rename_atomic_sync, Target};
#[derive(Default)] #[derive(Default)]
pub struct Unzipper { pub struct Unzipper {
@ -40,32 +41,28 @@ impl Unzipper {
// Unzip the wheel. // Unzip the wheel.
let normalized_path = tokio::task::spawn_blocking({ let normalized_path = tokio::task::spawn_blocking({
move || -> Result<PathBuf> { move || -> Result<PathBuf> {
let parent = download.path().parent().expect("Cache paths can't be root"); // Unzip the wheel into a temporary directory.
let parent = download
.target()
.parent()
.expect("Cache paths can't be root");
fs_err::create_dir_all(parent)?; fs_err::create_dir_all(parent)?;
let staging = tempfile::tempdir_in(parent)?; let staging = tempfile::tempdir_in(parent)?;
download.unzip(staging.path())?; download.unzip(staging.path())?;
// Remove the file we just unzipped and replace it with the unzipped directory. // Move the unzipped wheel into the cache, removing any existing files or
// If we abort before renaming the directory that's not a problem, we just lose // directories. This will often include the zipped wheel itself, which we
// the cache. // replace in the cache with the unzipped directory.
if !matches!(download, LocalWheel::InMemory(_)) { if rename_atomic_sync(staging.into_path(), download.target())?
fs_err::remove_file(download.path())?; .is_some_and(Target::is_directory)
} {
let normalized_path = parent.join(download.filename().to_string());
if fs_err::remove_dir_all(&normalized_path).is_ok() {
// If we're replacing an existing directory, warn. If a wheel already exists
// in the cache, we should avoid re-downloading it, so reaching this
// condition represents a bug in the install plan.
warn!( warn!(
"Removed existing directory at: {}", "Removing existing directory at: {}",
normalized_path.display() download.target().display()
); );
} }
fs_err::rename(staging.into_path(), &normalized_path)?;
Ok(normalized_path) Ok(download.target().to_path_buf())
} }
}) })
.await? .await?