Avoid removing local wheels when unzipping (#560)

## Summary

When installing a local wheel, we need to avoid removing the zipped
wheel (since it lives outside of the cache), _and_ need to ensure that
we unzip the wheel into the cache (rather than replacing the zipped
wheel, which may even live outside of the project).

Closes https://github.com/astral-sh/puffin/issues/553.
This commit is contained in:
Charlie Marsh 2023-12-05 12:50:08 -05:00 committed by GitHub
parent 6f055ecf3b
commit a15da36d74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 53 deletions

1
Cargo.lock generated
View File

@ -2525,6 +2525,7 @@ dependencies = [
"puffin-cache",
"puffin-client",
"puffin-distribution",
"puffin-fs",
"puffin-git",
"puffin-interpreter",
"puffin-normalize",

View File

@ -427,7 +427,7 @@ impl SourceBuild {
let from = tmp_dir.path().join(&filename);
let to = wheel_dir.join(&filename);
if !rename_atomic_sync(from, &to)? {
if rename_atomic_sync(from, &to)?.is_some() {
warn!("Overwriting existing wheel at: {}", to.display());
}
@ -461,7 +461,7 @@ impl SourceBuild {
let from = dist_wheel.path();
let to = wheel_dir.join(dist_wheel.file_name());
if !copy_atomic_sync(from, &to)? {
if copy_atomic_sync(from, &to)?.is_some() {
warn!("Overwriting existing wheel at: {}", to.display());
}

View File

@ -184,7 +184,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(),
filename,
buffer,
path: cache_entry.path(),
target: cache_entry.path(),
})
} else {
let size =
@ -201,6 +201,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(),
filename,
path: cache_entry.path(),
target: cache_entry.path(),
})
};
@ -212,7 +213,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
}
Dist::Built(BuiltDist::DirectUrl(wheel)) => {
debug!("Fetching disk-based wheel from URL: {}", &wheel.url);
debug!("Fetching disk-based wheel from URL: {}", wheel.url);
// Create a directory for the wheel.
let wheel_filename = wheel.filename()?;
@ -235,6 +236,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist: dist.clone(),
filename: wheel.filename.clone(),
path: cache_entry.path(),
target: cache_entry.path(),
});
if let Some(reporter) = self.reporter.as_ref() {
@ -244,11 +246,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(local_wheel)
}
Dist::Built(BuiltDist::Path(wheel)) => Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: wheel.path.clone(),
filename: wheel.filename.clone(),
})),
Dist::Built(BuiltDist::Path(wheel)) => {
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(),
wheel.filename.to_string(),
);
Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
filename: wheel.filename.clone(),
path: wheel.path.clone(),
target: cache_entry.path(),
}))
}
Dist::Source(source_dist) => {
let lock = self.locks.acquire(&dist).await;
@ -258,7 +269,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(LocalWheel::Built(BuiltWheel {
dist: dist.clone(),
filename: built_wheel.filename,
path: built_wheel.path,
path: built_wheel.path.clone(),
target: built_wheel.path,
}))
}
}

View File

@ -19,8 +19,8 @@ pub struct InMemoryWheel {
pub(crate) filename: WheelFilename,
/// The contents of the wheel.
pub(crate) buffer: Vec<u8>,
/// The path where the downloaded wheel would have been stored, if it wasn't an in-memory wheel.
pub(crate) path: PathBuf,
/// The expected path to the downloaded wheel's entry in the cache.
pub(crate) target: PathBuf,
}
/// A downloaded wheel that's stored on-disk.
@ -32,6 +32,8 @@ pub struct DiskWheel {
pub(crate) filename: WheelFilename,
/// The path to the downloaded wheel.
pub(crate) path: PathBuf,
/// The expected path to the downloaded wheel's entry in the cache.
pub(crate) target: PathBuf,
}
/// A wheel built from a source distribution that's stored on-disk.
@ -43,6 +45,8 @@ pub struct BuiltWheel {
pub(crate) filename: WheelFilename,
/// The path to the built wheel.
pub(crate) path: PathBuf,
/// The expected path to the downloaded wheel's entry in the cache.
pub(crate) target: PathBuf,
}
/// A downloaded or built wheel.
@ -54,11 +58,12 @@ pub enum LocalWheel {
}
impl LocalWheel {
pub fn path(&self) -> &Path {
/// Return the path to the downloaded wheel's entry in the cache.
pub fn target(&self) -> &Path {
match self {
LocalWheel::InMemory(wheel) => &wheel.path,
LocalWheel::Disk(wheel) => &wheel.path,
LocalWheel::Built(wheel) => &wheel.path,
LocalWheel::InMemory(wheel) => &wheel.target,
LocalWheel::Disk(wheel) => &wheel.target,
LocalWheel::Built(wheel) => &wheel.target,
}
}
}

View File

@ -46,47 +46,55 @@ pub fn write_atomic_sync(path: impl AsRef<Path>, data: impl AsRef<[u8]>) -> std:
/// Rename `from` to `to` atomically using a temporary file and atomic rename.
///
/// Returns `false` if the `to` path already existed and thus was removed before performing the
/// Returns a [`Target`] if the `to` path already existed and thus was removed before performing the
/// rename.
pub fn rename_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<bool> {
// Remove the destination if it exists.
let safe = if let Ok(metadata) = fs_err::metadata(&to) {
if metadata.is_dir() {
pub fn rename_atomic_sync(
from: impl AsRef<Path>,
to: impl AsRef<Path>,
) -> std::io::Result<Option<Target>> {
// Remove the destination, if it exists.
let target = if let Ok(metadata) = fs_err::metadata(&to) {
Some(if metadata.is_dir() {
fs_err::remove_dir_all(&to)?;
Target::Directory
} else {
fs_err::remove_file(&to)?;
}
false
Target::File
})
} else {
true
None
};
// Move the source file to the destination.
fs_err::rename(from, to)?;
Ok(safe)
Ok(target)
}
/// Copy `from` to `to` atomically using a temporary file and atomic rename.
///
/// Returns `false` if the `to` path already existed and thus was removed before performing the
/// rename.
pub fn copy_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io::Result<bool> {
/// Returns a [`Target`] if the `to` path already existed and thus was removed before performing the
/// copy.
pub fn copy_atomic_sync(
from: impl AsRef<Path>,
to: impl AsRef<Path>,
) -> std::io::Result<Option<Target>> {
// Copy to a temporary file.
let temp_file =
NamedTempFile::new_in(to.as_ref().parent().expect("Write path must have a parent"))?;
fs_err::copy(from, &temp_file)?;
// Remove the destination if it exists.
let safe = if let Ok(metadata) = fs_err::metadata(&to) {
if metadata.is_dir() {
// Remove the destination, if it exists.
let target = if let Ok(metadata) = fs_err::metadata(&to) {
Some(if metadata.is_dir() {
fs_err::remove_dir_all(&to)?;
Target::Directory
} else {
fs_err::remove_file(&to)?;
}
false
Target::File
})
} else {
true
None
};
// Move the temporary file to the destination.
@ -101,5 +109,25 @@ pub fn copy_atomic_sync(from: impl AsRef<Path>, to: impl AsRef<Path>) -> std::io
)
})?;
Ok(safe)
Ok(target)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Target {
/// The target path was an existing file.
File,
/// The target path was an existing directory.
Directory,
/// The target path did not exist.
NotFound,
}
impl Target {
pub fn is_file(self) -> bool {
matches!(self, Self::File)
}
pub fn is_directory(self) -> bool {
matches!(self, Self::Directory)
}
}

View File

@ -22,6 +22,7 @@ puffin-client = { path = "../puffin-client" }
distribution-types = { path = "../distribution-types" }
platform-tags = { path = "../platform-tags" }
puffin-distribution = { path = "../puffin-distribution" }
puffin-fs = { path = "../puffin-fs" }
puffin-git = { path = "../puffin-git" }
puffin-interpreter = { path = "../puffin-interpreter" }
puffin-normalize = { path = "../puffin-normalize" }

View File

@ -6,6 +6,7 @@ use tracing::{debug, instrument, warn};
use distribution_types::{CachedDist, Dist, RemoteSource};
use puffin_distribution::{LocalWheel, Unzip};
use puffin_fs::{rename_atomic_sync, Target};
#[derive(Default)]
pub struct Unzipper {
@ -40,32 +41,28 @@ impl Unzipper {
// Unzip the wheel.
let normalized_path = tokio::task::spawn_blocking({
move || -> Result<PathBuf> {
let parent = download.path().parent().expect("Cache paths can't be root");
// Unzip the wheel into a temporary directory.
let parent = download
.target()
.parent()
.expect("Cache paths can't be root");
fs_err::create_dir_all(parent)?;
let staging = tempfile::tempdir_in(parent)?;
download.unzip(staging.path())?;
// Remove the file we just unzipped and replace it with the unzipped directory.
// If we abort before renaming the directory that's not a problem, we just lose
// the cache.
if !matches!(download, LocalWheel::InMemory(_)) {
fs_err::remove_file(download.path())?;
}
let normalized_path = parent.join(download.filename().to_string());
if fs_err::remove_dir_all(&normalized_path).is_ok() {
// If we're replacing an existing directory, warn. If a wheel already exists
// in the cache, we should avoid re-downloading it, so reaching this
// condition represents a bug in the install plan.
// Move the unzipped wheel into the cache, removing any existing files or
// directories. This will often include the zipped wheel itself, which we
// replace in the cache with the unzipped directory.
if rename_atomic_sync(staging.into_path(), download.target())?
.is_some_and(Target::is_directory)
{
warn!(
"Removed existing directory at: {}",
normalized_path.display()
"Removing existing directory at: {}",
download.target().display()
);
}
fs_err::rename(staging.into_path(), &normalized_path)?;
Ok(normalized_path)
Ok(download.target().to_path_buf())
}
})
.await?