mirror of https://github.com/astral-sh/uv
Store unpacked Python installations in the cache
This commit is contained in:
parent
5a6f2ea319
commit
7be8a7e65c
|
|
@ -6569,6 +6569,7 @@ dependencies = [
|
|||
"uv-static",
|
||||
"uv-trampoline-builder",
|
||||
"uv-warnings",
|
||||
"walkdir",
|
||||
"which",
|
||||
"windows 0.59.0",
|
||||
"windows-registry",
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ tokio = { workspace = true }
|
|||
tokio-util = { workspace = true, features = ["compat"] }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
which = { workspace = true }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ use std::task::{Context, Poll};
|
|||
use std::time::{Duration, SystemTime};
|
||||
use std::{env, io};
|
||||
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use futures::TryStreamExt;
|
||||
use itertools::Itertools;
|
||||
use owo_colors::OwoColorize;
|
||||
|
|
@ -1072,6 +1074,141 @@ async fn fetch_bytes_from_url(client: &BaseClient, url: &DisplaySafeUrl) -> Resu
|
|||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Tracks the state of hard link/copy fallback attempts.
|
||||
///
|
||||
/// Hard linking might not be supported, but we can't detect this ahead of time,
|
||||
/// so we'll try hard linking the first file - if this succeeds we'll know later
|
||||
/// errors are not due to lack of OS/filesystem support. If it fails, we'll switch
|
||||
/// to copying for the rest of the operation.
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||
enum Attempt {
|
||||
#[default]
|
||||
Initial,
|
||||
Subsequent,
|
||||
UseCopyFallback,
|
||||
}
|
||||
|
||||
/// Check if a file should be copied instead of hard-linked.
|
||||
///
|
||||
/// These files are modified after installation and must be copied to avoid
|
||||
/// corrupting the cache:
|
||||
/// - `_sysconfigdata_*.py` - patched by `ensure_sysconfig_patched()`
|
||||
/// - `*.pc` files in `pkgconfig/` directories - patched by sysconfig
|
||||
/// - `libpython*.dylib` on macOS - patched by `ensure_dylib_patched()`
|
||||
fn should_copy_python_distribution_file(path: &Path) -> bool {
|
||||
let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let extension = path.extension().and_then(|e| e.to_str());
|
||||
|
||||
// _sysconfigdata_*.py files
|
||||
if file_name.starts_with("_sysconfigdata_")
|
||||
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// *.pc files in pkgconfig directories
|
||||
if extension.is_some_and(|ext| ext.eq_ignore_ascii_case("pc")) {
|
||||
if let Some(parent) = path.parent() {
|
||||
if parent.file_name().and_then(|n| n.to_str()) == Some("pkgconfig") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// libpython*.dylib on macOS
|
||||
#[cfg(target_os = "macos")]
|
||||
if file_name.starts_with("libpython")
|
||||
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("dylib"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Recursively hard link or copy a directory tree from `src` to `dst`.
|
||||
///
|
||||
/// Tries hard linking first for efficiency, falling back to copying if hard links
|
||||
/// are not supported (e.g., cross-filesystem operations).
|
||||
///
|
||||
/// Files that will be patched after installation (sysconfig, pkgconfig, dylib) are
|
||||
/// always copied to avoid modifying the cached source.
|
||||
fn hardlink_or_copy_dir(src: &Path, dst: &Path) -> Result<(), Error> {
|
||||
let mut attempt = Attempt::Initial;
|
||||
|
||||
for entry in WalkDir::new(src) {
|
||||
let entry = entry.map_err(|e| Error::ReadError {
|
||||
dir: src.to_path_buf(),
|
||||
err: io::Error::other(e),
|
||||
})?;
|
||||
|
||||
let path = entry.path();
|
||||
let relative = path.strip_prefix(src).expect("walkdir starts with root");
|
||||
let target = dst.join(relative);
|
||||
|
||||
if entry.file_type().is_dir() {
|
||||
fs_err::create_dir_all(&target)?;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Always copy files that will be patched to avoid modifying the cache
|
||||
if should_copy_python_distribution_file(path) {
|
||||
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||
to: target.clone(),
|
||||
err,
|
||||
})?;
|
||||
continue;
|
||||
}
|
||||
|
||||
match attempt {
|
||||
Attempt::Initial => {
|
||||
if let Err(err) = fs_err::hard_link(path, &target) {
|
||||
debug!(
|
||||
"Failed to hard link `{}` to `{}`: {}; falling back to copy",
|
||||
path.display(),
|
||||
target.display(),
|
||||
err
|
||||
);
|
||||
attempt = Attempt::UseCopyFallback;
|
||||
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||
to: target.clone(),
|
||||
err,
|
||||
})?;
|
||||
} else {
|
||||
attempt = Attempt::Subsequent;
|
||||
}
|
||||
}
|
||||
Attempt::Subsequent => {
|
||||
if let Err(err) = fs_err::hard_link(path, &target) {
|
||||
// Unexpected failure after initial success - still fall back to copy
|
||||
debug!(
|
||||
"Unexpected hard link failure for `{}`: {}; falling back to copy",
|
||||
path.display(),
|
||||
err
|
||||
);
|
||||
attempt = Attempt::UseCopyFallback;
|
||||
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||
to: target.clone(),
|
||||
err,
|
||||
})?;
|
||||
}
|
||||
}
|
||||
Attempt::UseCopyFallback => {
|
||||
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||
to: target.clone(),
|
||||
err,
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl ManagedPythonDownload {
|
||||
/// Return a display type that includes the build information.
|
||||
pub fn to_display_with_build(&self) -> ManagedPythonDownloadWithBuild<'_> {
|
||||
|
|
@ -1203,13 +1340,13 @@ impl ManagedPythonDownload {
|
|||
let ext = SourceDistExtension::from_path(&filename)
|
||||
.map_err(|err| Error::MissingExtension(url.to_string(), err))?;
|
||||
|
||||
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
|
||||
|
||||
if let Some(python_builds_dir) =
|
||||
// Track the unpacked cache path if caching is enabled
|
||||
let (target_unpacked, temp_dir) = if let Some(python_builds_dir) =
|
||||
env::var_os(EnvVars::UV_PYTHON_CACHE_DIR).filter(|s| !s.is_empty())
|
||||
{
|
||||
let python_builds_dir = PathBuf::from(python_builds_dir);
|
||||
fs_err::create_dir_all(&python_builds_dir)?;
|
||||
|
||||
let hash_prefix = match self.sha256.as_deref() {
|
||||
Some(sha) => {
|
||||
// Shorten the hash to avoid too-long-filename errors
|
||||
|
|
@ -1218,7 +1355,36 @@ impl ManagedPythonDownload {
|
|||
None => "none",
|
||||
};
|
||||
let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}"));
|
||||
// Strip the archive extension for the unpacked directory name
|
||||
let basename = filename
|
||||
.strip_suffix(&format!(".{}", ext.name()))
|
||||
.expect("filename was parsed with this extension");
|
||||
let target_unpacked = python_builds_dir.join(format!("{hash_prefix}-{basename}"));
|
||||
|
||||
// Check if unpacked cache exists first - if so, hard link from it directly
|
||||
if target_unpacked.is_dir() {
|
||||
debug!(
|
||||
"Using unpacked cache at `{}`",
|
||||
target_unpacked.simplified_display()
|
||||
);
|
||||
|
||||
// Remove the target if it already exists.
|
||||
if path.is_dir() {
|
||||
debug!("Removing existing directory: {}", path.user_display());
|
||||
fs_err::tokio::remove_dir_all(&path).await?;
|
||||
}
|
||||
|
||||
// Hard link (or copy) from unpacked cache to installation directory
|
||||
hardlink_or_copy_dir(&target_unpacked, &path)?;
|
||||
|
||||
return Ok(DownloadResult::Fetched(path));
|
||||
}
|
||||
|
||||
// Create temp dir in the cache directory to ensure same-filesystem renames
|
||||
let temp_dir =
|
||||
tempfile::tempdir_in(&python_builds_dir).map_err(Error::DownloadDirError)?;
|
||||
|
||||
// No unpacked cache - download and extract the archive
|
||||
// Download the archive to the cache, or return a reader if we have it in cache.
|
||||
// TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking
|
||||
// in one step.
|
||||
|
|
@ -1272,7 +1438,11 @@ impl ManagedPythonDownload {
|
|||
Direction::Extract,
|
||||
)
|
||||
.await?;
|
||||
|
||||
(Some(target_unpacked), temp_dir)
|
||||
} else {
|
||||
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
|
||||
|
||||
// Avoid overlong log lines
|
||||
debug!("Downloading {url}");
|
||||
debug!(
|
||||
|
|
@ -1291,7 +1461,9 @@ impl ManagedPythonDownload {
|
|||
Direction::Download,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
(None, temp_dir)
|
||||
};
|
||||
|
||||
// Extract the top-level directory.
|
||||
let mut extracted = match uv_extract::strip_component(temp_dir.path()) {
|
||||
|
|
@ -1345,14 +1517,65 @@ impl ManagedPythonDownload {
|
|||
fs_err::tokio::remove_dir_all(&path).await?;
|
||||
}
|
||||
|
||||
// Persist it to the target.
|
||||
debug!("Moving {} to {}", extracted.display(), path.user_display());
|
||||
rename_with_retry(extracted, &path)
|
||||
.await
|
||||
.map_err(|err| Error::CopyError {
|
||||
to: path.clone(),
|
||||
err,
|
||||
})?;
|
||||
// If caching is enabled, save to unpacked cache and hard link to target
|
||||
if let Some(target_unpacked) = target_unpacked {
|
||||
// Move extracted files to unpacked cache using atomic rename
|
||||
debug!(
|
||||
"Saving to unpacked cache at `{}`",
|
||||
target_unpacked.simplified_display()
|
||||
);
|
||||
|
||||
// Use a temporary name for atomic creation
|
||||
// Note: Don't use `with_extension` as the path contains version dots (e.g., "3.10.19")
|
||||
let temp_unpacked = PathBuf::from(format!(
|
||||
"{}.tmp.{}",
|
||||
target_unpacked.display(),
|
||||
std::process::id()
|
||||
));
|
||||
|
||||
// Move extracted to temp cache location
|
||||
rename_with_retry(&extracted, &temp_unpacked)
|
||||
.await
|
||||
.map_err(|err| Error::CopyError {
|
||||
to: temp_unpacked.clone(),
|
||||
err,
|
||||
})?;
|
||||
|
||||
// Atomic rename to final cache location
|
||||
match fs_err::rename(&temp_unpacked, &target_unpacked) {
|
||||
Ok(()) => {
|
||||
debug!(
|
||||
"Created unpacked cache at `{}`",
|
||||
target_unpacked.simplified_display()
|
||||
);
|
||||
}
|
||||
Err(err)
|
||||
if err.kind() == io::ErrorKind::AlreadyExists
|
||||
|| err.kind() == io::ErrorKind::DirectoryNotEmpty =>
|
||||
{
|
||||
// Another process won the race - use theirs and clean up ours
|
||||
debug!("Unpacked cache already exists (concurrent creation)");
|
||||
let _ = fs_err::remove_dir_all(&temp_unpacked);
|
||||
}
|
||||
Err(err) => {
|
||||
// Clean up temp directory on error
|
||||
let _ = fs_err::remove_dir_all(&temp_unpacked);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Hard link (or copy) from unpacked cache to installation directory
|
||||
hardlink_or_copy_dir(&target_unpacked, &path)?;
|
||||
} else {
|
||||
// No caching - just move to target
|
||||
debug!("Moving {} to {}", extracted.display(), path.user_display());
|
||||
rename_with_retry(extracted, &path)
|
||||
.await
|
||||
.map_err(|err| Error::CopyError {
|
||||
to: path.clone(),
|
||||
err,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(DownloadResult::Fetched(path))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue