Store unpacked Python installations in the cache

This commit is contained in:
Zanie Blue 2025-12-10 09:14:49 -06:00
parent 5a6f2ea319
commit 7be8a7e65c
3 changed files with 237 additions and 12 deletions

1
Cargo.lock generated
View File

@ -6569,6 +6569,7 @@ dependencies = [
"uv-static",
"uv-trampoline-builder",
"uv-warnings",
"walkdir",
"which",
"windows 0.59.0",
"windows-registry",

View File

@ -65,6 +65,7 @@ tokio = { workspace = true }
tokio-util = { workspace = true, features = ["compat"] }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
which = { workspace = true }
[target.'cfg(target_os = "windows")'.dependencies]

View File

@ -8,6 +8,8 @@ use std::task::{Context, Poll};
use std::time::{Duration, SystemTime};
use std::{env, io};
use walkdir::WalkDir;
use futures::TryStreamExt;
use itertools::Itertools;
use owo_colors::OwoColorize;
@ -1072,6 +1074,141 @@ async fn fetch_bytes_from_url(client: &BaseClient, url: &DisplaySafeUrl) -> Resu
Ok(buf)
}
/// Tracks the state of hard link/copy fallback attempts.
///
/// Hard linking might not be supported, but we can't detect this ahead of time,
/// so we'll try hard linking the first file - if this succeeds we'll know later
/// errors are not due to lack of OS/filesystem support. If it fails, we'll switch
/// to copying for the rest of the operation.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
enum Attempt {
#[default]
Initial,
Subsequent,
UseCopyFallback,
}
/// Check if a file should be copied instead of hard-linked.
///
/// These files are modified after installation and must be copied to avoid
/// corrupting the cache:
/// - `_sysconfigdata_*.py` - patched by `ensure_sysconfig_patched()`
/// - `*.pc` files in `pkgconfig/` directories - patched by sysconfig
/// - `libpython*.dylib` on macOS - patched by `ensure_dylib_patched()`
fn should_copy_python_distribution_file(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
let extension = path.extension().and_then(|e| e.to_str());
// _sysconfigdata_*.py files
if file_name.starts_with("_sysconfigdata_")
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
{
return true;
}
// *.pc files in pkgconfig directories
if extension.is_some_and(|ext| ext.eq_ignore_ascii_case("pc")) {
if let Some(parent) = path.parent() {
if parent.file_name().and_then(|n| n.to_str()) == Some("pkgconfig") {
return true;
}
}
return true;
}
// libpython*.dylib on macOS
#[cfg(target_os = "macos")]
if file_name.starts_with("libpython")
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("dylib"))
{
return true;
}
false
}
/// Recursively hard link or copy a directory tree from `src` to `dst`.
///
/// Tries hard linking first for efficiency, falling back to copying if hard links
/// are not supported (e.g., cross-filesystem operations).
///
/// Files that will be patched after installation (sysconfig, pkgconfig, dylib) are
/// always copied to avoid modifying the cached source.
fn hardlink_or_copy_dir(src: &Path, dst: &Path) -> Result<(), Error> {
let mut attempt = Attempt::Initial;
for entry in WalkDir::new(src) {
let entry = entry.map_err(|e| Error::ReadError {
dir: src.to_path_buf(),
err: io::Error::other(e),
})?;
let path = entry.path();
let relative = path.strip_prefix(src).expect("walkdir starts with root");
let target = dst.join(relative);
if entry.file_type().is_dir() {
fs_err::create_dir_all(&target)?;
continue;
}
// Always copy files that will be patched to avoid modifying the cache
if should_copy_python_distribution_file(path) {
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
to: target.clone(),
err,
})?;
continue;
}
match attempt {
Attempt::Initial => {
if let Err(err) = fs_err::hard_link(path, &target) {
debug!(
"Failed to hard link `{}` to `{}`: {}; falling back to copy",
path.display(),
target.display(),
err
);
attempt = Attempt::UseCopyFallback;
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
to: target.clone(),
err,
})?;
} else {
attempt = Attempt::Subsequent;
}
}
Attempt::Subsequent => {
if let Err(err) = fs_err::hard_link(path, &target) {
// Unexpected failure after initial success - still fall back to copy
debug!(
"Unexpected hard link failure for `{}`: {}; falling back to copy",
path.display(),
err
);
attempt = Attempt::UseCopyFallback;
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
to: target.clone(),
err,
})?;
}
}
Attempt::UseCopyFallback => {
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
to: target.clone(),
err,
})?;
}
}
}
Ok(())
}
impl ManagedPythonDownload {
/// Return a display type that includes the build information.
pub fn to_display_with_build(&self) -> ManagedPythonDownloadWithBuild<'_> {
@ -1203,13 +1340,13 @@ impl ManagedPythonDownload {
let ext = SourceDistExtension::from_path(&filename)
.map_err(|err| Error::MissingExtension(url.to_string(), err))?;
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
if let Some(python_builds_dir) =
// Track the unpacked cache path if caching is enabled
let (target_unpacked, temp_dir) = if let Some(python_builds_dir) =
env::var_os(EnvVars::UV_PYTHON_CACHE_DIR).filter(|s| !s.is_empty())
{
let python_builds_dir = PathBuf::from(python_builds_dir);
fs_err::create_dir_all(&python_builds_dir)?;
let hash_prefix = match self.sha256.as_deref() {
Some(sha) => {
// Shorten the hash to avoid too-long-filename errors
@ -1218,7 +1355,36 @@ impl ManagedPythonDownload {
None => "none",
};
let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}"));
// Strip the archive extension for the unpacked directory name
let basename = filename
.strip_suffix(&format!(".{}", ext.name()))
.expect("filename was parsed with this extension");
let target_unpacked = python_builds_dir.join(format!("{hash_prefix}-{basename}"));
// Check if unpacked cache exists first - if so, hard link from it directly
if target_unpacked.is_dir() {
debug!(
"Using unpacked cache at `{}`",
target_unpacked.simplified_display()
);
// Remove the target if it already exists.
if path.is_dir() {
debug!("Removing existing directory: {}", path.user_display());
fs_err::tokio::remove_dir_all(&path).await?;
}
// Hard link (or copy) from unpacked cache to installation directory
hardlink_or_copy_dir(&target_unpacked, &path)?;
return Ok(DownloadResult::Fetched(path));
}
// Create temp dir in the cache directory to ensure same-filesystem renames
let temp_dir =
tempfile::tempdir_in(&python_builds_dir).map_err(Error::DownloadDirError)?;
// No unpacked cache - download and extract the archive
// Download the archive to the cache, or return a reader if we have it in cache.
// TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking
// in one step.
@ -1272,7 +1438,11 @@ impl ManagedPythonDownload {
Direction::Extract,
)
.await?;
(Some(target_unpacked), temp_dir)
} else {
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
// Avoid overlong log lines
debug!("Downloading {url}");
debug!(
@ -1291,7 +1461,9 @@ impl ManagedPythonDownload {
Direction::Download,
)
.await?;
}
(None, temp_dir)
};
// Extract the top-level directory.
let mut extracted = match uv_extract::strip_component(temp_dir.path()) {
@ -1345,7 +1517,57 @@ impl ManagedPythonDownload {
fs_err::tokio::remove_dir_all(&path).await?;
}
// Persist it to the target.
// If caching is enabled, save to unpacked cache and hard link to target
if let Some(target_unpacked) = target_unpacked {
// Move extracted files to unpacked cache using atomic rename
debug!(
"Saving to unpacked cache at `{}`",
target_unpacked.simplified_display()
);
// Use a temporary name for atomic creation
// Note: Don't use `with_extension` as the path contains version dots (e.g., "3.10.19")
let temp_unpacked = PathBuf::from(format!(
"{}.tmp.{}",
target_unpacked.display(),
std::process::id()
));
// Move extracted to temp cache location
rename_with_retry(&extracted, &temp_unpacked)
.await
.map_err(|err| Error::CopyError {
to: temp_unpacked.clone(),
err,
})?;
// Atomic rename to final cache location
match fs_err::rename(&temp_unpacked, &target_unpacked) {
Ok(()) => {
debug!(
"Created unpacked cache at `{}`",
target_unpacked.simplified_display()
);
}
Err(err)
if err.kind() == io::ErrorKind::AlreadyExists
|| err.kind() == io::ErrorKind::DirectoryNotEmpty =>
{
// Another process won the race - use theirs and clean up ours
debug!("Unpacked cache already exists (concurrent creation)");
let _ = fs_err::remove_dir_all(&temp_unpacked);
}
Err(err) => {
// Clean up temp directory on error
let _ = fs_err::remove_dir_all(&temp_unpacked);
return Err(err.into());
}
}
// Hard link (or copy) from unpacked cache to installation directory
hardlink_or_copy_dir(&target_unpacked, &path)?;
} else {
// No caching - just move to target
debug!("Moving {} to {}", extracted.display(), path.user_display());
rename_with_retry(extracted, &path)
.await
@ -1353,6 +1575,7 @@ impl ManagedPythonDownload {
to: path.clone(),
err,
})?;
}
Ok(DownloadResult::Fetched(path))
}