diff --git a/Cargo.lock b/Cargo.lock index 73c922daf..3ec6c91d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6569,6 +6569,7 @@ dependencies = [ "uv-static", "uv-trampoline-builder", "uv-warnings", + "walkdir", "which", "windows 0.59.0", "windows-registry", diff --git a/crates/uv-python/Cargo.toml b/crates/uv-python/Cargo.toml index 0e88362fc..37802babc 100644 --- a/crates/uv-python/Cargo.toml +++ b/crates/uv-python/Cargo.toml @@ -65,6 +65,7 @@ tokio = { workspace = true } tokio-util = { workspace = true, features = ["compat"] } tracing = { workspace = true } url = { workspace = true } +walkdir = { workspace = true } which = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies] diff --git a/crates/uv-python/src/downloads.rs b/crates/uv-python/src/downloads.rs index f500d3e1d..102074732 100644 --- a/crates/uv-python/src/downloads.rs +++ b/crates/uv-python/src/downloads.rs @@ -8,6 +8,8 @@ use std::task::{Context, Poll}; use std::time::{Duration, SystemTime}; use std::{env, io}; +use walkdir::WalkDir; + use futures::TryStreamExt; use itertools::Itertools; use owo_colors::OwoColorize; @@ -1072,6 +1074,141 @@ async fn fetch_bytes_from_url(client: &BaseClient, url: &DisplaySafeUrl) -> Resu Ok(buf) } +/// Tracks the state of hard link/copy fallback attempts. +/// +/// Hard linking might not be supported, but we can't detect this ahead of time, +/// so we'll try hard linking the first file - if this succeeds we'll know later +/// errors are not due to lack of OS/filesystem support. If it fails, we'll switch +/// to copying for the rest of the operation. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +enum Attempt { + #[default] + Initial, + Subsequent, + UseCopyFallback, +} + +/// Check if a file should be copied instead of hard-linked. +/// +/// These files are modified after installation and must be copied to avoid +/// corrupting the cache: +/// - `_sysconfigdata_*.py` - patched by `ensure_sysconfig_patched()` +/// - `*.pc` files in `pkgconfig/` directories - patched by sysconfig +/// - `libpython*.dylib` on macOS - patched by `ensure_dylib_patched()` +fn should_copy_python_distribution_file(path: &Path) -> bool { + let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else { + return false; + }; + + let extension = path.extension().and_then(|e| e.to_str()); + + // _sysconfigdata_*.py files + if file_name.starts_with("_sysconfigdata_") + && extension.is_some_and(|ext| ext.eq_ignore_ascii_case("py")) + { + return true; + } + + // *.pc files in pkgconfig directories + if extension.is_some_and(|ext| ext.eq_ignore_ascii_case("pc")) { + if let Some(parent) = path.parent() { + if parent.file_name().and_then(|n| n.to_str()) == Some("pkgconfig") { + return true; + } + } + return true; + } + + // libpython*.dylib on macOS + #[cfg(target_os = "macos")] + if file_name.starts_with("libpython") + && extension.is_some_and(|ext| ext.eq_ignore_ascii_case("dylib")) + { + return true; + } + + false +} + +/// Recursively hard link or copy a directory tree from `src` to `dst`. +/// +/// Tries hard linking first for efficiency, falling back to copying if hard links +/// are not supported (e.g., cross-filesystem operations). +/// +/// Files that will be patched after installation (sysconfig, pkgconfig, dylib) are +/// always copied to avoid modifying the cached source. +fn hardlink_or_copy_dir(src: &Path, dst: &Path) -> Result<(), Error> { + let mut attempt = Attempt::Initial; + + for entry in WalkDir::new(src) { + let entry = entry.map_err(|e| Error::ReadError { + dir: src.to_path_buf(), + err: io::Error::other(e), + })?; + + let path = entry.path(); + let relative = path.strip_prefix(src).expect("walkdir starts with root"); + let target = dst.join(relative); + + if entry.file_type().is_dir() { + fs_err::create_dir_all(&target)?; + continue; + } + + // Always copy files that will be patched to avoid modifying the cache + if should_copy_python_distribution_file(path) { + fs_err::copy(path, &target).map_err(|err| Error::CopyError { + to: target.clone(), + err, + })?; + continue; + } + + match attempt { + Attempt::Initial => { + if let Err(err) = fs_err::hard_link(path, &target) { + debug!( + "Failed to hard link `{}` to `{}`: {}; falling back to copy", + path.display(), + target.display(), + err + ); + attempt = Attempt::UseCopyFallback; + fs_err::copy(path, &target).map_err(|err| Error::CopyError { + to: target.clone(), + err, + })?; + } else { + attempt = Attempt::Subsequent; + } + } + Attempt::Subsequent => { + if let Err(err) = fs_err::hard_link(path, &target) { + // Unexpected failure after initial success - still fall back to copy + debug!( + "Unexpected hard link failure for `{}`: {}; falling back to copy", + path.display(), + err + ); + attempt = Attempt::UseCopyFallback; + fs_err::copy(path, &target).map_err(|err| Error::CopyError { + to: target.clone(), + err, + })?; + } + } + Attempt::UseCopyFallback => { + fs_err::copy(path, &target).map_err(|err| Error::CopyError { + to: target.clone(), + err, + })?; + } + } + } + + Ok(()) +} + impl ManagedPythonDownload { /// Return a display type that includes the build information. pub fn to_display_with_build(&self) -> ManagedPythonDownloadWithBuild<'_> { @@ -1203,13 +1340,13 @@ impl ManagedPythonDownload { let ext = SourceDistExtension::from_path(&filename) .map_err(|err| Error::MissingExtension(url.to_string(), err))?; - let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?; - - if let Some(python_builds_dir) = + // Track the unpacked cache path if caching is enabled + let (target_unpacked, temp_dir) = if let Some(python_builds_dir) = env::var_os(EnvVars::UV_PYTHON_CACHE_DIR).filter(|s| !s.is_empty()) { let python_builds_dir = PathBuf::from(python_builds_dir); fs_err::create_dir_all(&python_builds_dir)?; + let hash_prefix = match self.sha256.as_deref() { Some(sha) => { // Shorten the hash to avoid too-long-filename errors @@ -1218,7 +1355,36 @@ impl ManagedPythonDownload { None => "none", }; let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}")); + // Strip the archive extension for the unpacked directory name + let basename = filename + .strip_suffix(&format!(".{}", ext.name())) + .expect("filename was parsed with this extension"); + let target_unpacked = python_builds_dir.join(format!("{hash_prefix}-{basename}")); + // Check if unpacked cache exists first - if so, hard link from it directly + if target_unpacked.is_dir() { + debug!( + "Using unpacked cache at `{}`", + target_unpacked.simplified_display() + ); + + // Remove the target if it already exists. + if path.is_dir() { + debug!("Removing existing directory: {}", path.user_display()); + fs_err::tokio::remove_dir_all(&path).await?; + } + + // Hard link (or copy) from unpacked cache to installation directory + hardlink_or_copy_dir(&target_unpacked, &path)?; + + return Ok(DownloadResult::Fetched(path)); + } + + // Create temp dir in the cache directory to ensure same-filesystem renames + let temp_dir = + tempfile::tempdir_in(&python_builds_dir).map_err(Error::DownloadDirError)?; + + // No unpacked cache - download and extract the archive // Download the archive to the cache, or return a reader if we have it in cache. // TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking // in one step. @@ -1272,7 +1438,11 @@ impl ManagedPythonDownload { Direction::Extract, ) .await?; + + (Some(target_unpacked), temp_dir) } else { + let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?; + // Avoid overlong log lines debug!("Downloading {url}"); debug!( @@ -1291,7 +1461,9 @@ impl ManagedPythonDownload { Direction::Download, ) .await?; - } + + (None, temp_dir) + }; // Extract the top-level directory. let mut extracted = match uv_extract::strip_component(temp_dir.path()) { @@ -1345,14 +1517,65 @@ impl ManagedPythonDownload { fs_err::tokio::remove_dir_all(&path).await?; } - // Persist it to the target. - debug!("Moving {} to {}", extracted.display(), path.user_display()); - rename_with_retry(extracted, &path) - .await - .map_err(|err| Error::CopyError { - to: path.clone(), - err, - })?; + // If caching is enabled, save to unpacked cache and hard link to target + if let Some(target_unpacked) = target_unpacked { + // Move extracted files to unpacked cache using atomic rename + debug!( + "Saving to unpacked cache at `{}`", + target_unpacked.simplified_display() + ); + + // Use a temporary name for atomic creation + // Note: Don't use `with_extension` as the path contains version dots (e.g., "3.10.19") + let temp_unpacked = PathBuf::from(format!( + "{}.tmp.{}", + target_unpacked.display(), + std::process::id() + )); + + // Move extracted to temp cache location + rename_with_retry(&extracted, &temp_unpacked) + .await + .map_err(|err| Error::CopyError { + to: temp_unpacked.clone(), + err, + })?; + + // Atomic rename to final cache location + match fs_err::rename(&temp_unpacked, &target_unpacked) { + Ok(()) => { + debug!( + "Created unpacked cache at `{}`", + target_unpacked.simplified_display() + ); + } + Err(err) + if err.kind() == io::ErrorKind::AlreadyExists + || err.kind() == io::ErrorKind::DirectoryNotEmpty => + { + // Another process won the race - use theirs and clean up ours + debug!("Unpacked cache already exists (concurrent creation)"); + let _ = fs_err::remove_dir_all(&temp_unpacked); + } + Err(err) => { + // Clean up temp directory on error + let _ = fs_err::remove_dir_all(&temp_unpacked); + return Err(err.into()); + } + } + + // Hard link (or copy) from unpacked cache to installation directory + hardlink_or_copy_dir(&target_unpacked, &path)?; + } else { + // No caching - just move to target + debug!("Moving {} to {}", extracted.display(), path.user_display()); + rename_with_retry(extracted, &path) + .await + .map_err(|err| Error::CopyError { + to: path.clone(), + err, + })?; + } Ok(DownloadResult::Fetched(path)) }