mirror of https://github.com/astral-sh/uv
Store unpacked Python installations in the cache
This commit is contained in:
parent
5a6f2ea319
commit
7be8a7e65c
|
|
@ -6569,6 +6569,7 @@ dependencies = [
|
||||||
"uv-static",
|
"uv-static",
|
||||||
"uv-trampoline-builder",
|
"uv-trampoline-builder",
|
||||||
"uv-warnings",
|
"uv-warnings",
|
||||||
|
"walkdir",
|
||||||
"which",
|
"which",
|
||||||
"windows 0.59.0",
|
"windows 0.59.0",
|
||||||
"windows-registry",
|
"windows-registry",
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,7 @@ tokio = { workspace = true }
|
||||||
tokio-util = { workspace = true, features = ["compat"] }
|
tokio-util = { workspace = true, features = ["compat"] }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
|
walkdir = { workspace = true }
|
||||||
which = { workspace = true }
|
which = { workspace = true }
|
||||||
|
|
||||||
[target.'cfg(target_os = "windows")'.dependencies]
|
[target.'cfg(target_os = "windows")'.dependencies]
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@ use std::task::{Context, Poll};
|
||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime};
|
||||||
use std::{env, io};
|
use std::{env, io};
|
||||||
|
|
||||||
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use owo_colors::OwoColorize;
|
use owo_colors::OwoColorize;
|
||||||
|
|
@ -1072,6 +1074,141 @@ async fn fetch_bytes_from_url(client: &BaseClient, url: &DisplaySafeUrl) -> Resu
|
||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tracks the state of hard link/copy fallback attempts.
|
||||||
|
///
|
||||||
|
/// Hard linking might not be supported, but we can't detect this ahead of time,
|
||||||
|
/// so we'll try hard linking the first file - if this succeeds we'll know later
|
||||||
|
/// errors are not due to lack of OS/filesystem support. If it fails, we'll switch
|
||||||
|
/// to copying for the rest of the operation.
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum Attempt {
|
||||||
|
#[default]
|
||||||
|
Initial,
|
||||||
|
Subsequent,
|
||||||
|
UseCopyFallback,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a file should be copied instead of hard-linked.
|
||||||
|
///
|
||||||
|
/// These files are modified after installation and must be copied to avoid
|
||||||
|
/// corrupting the cache:
|
||||||
|
/// - `_sysconfigdata_*.py` - patched by `ensure_sysconfig_patched()`
|
||||||
|
/// - `*.pc` files in `pkgconfig/` directories - patched by sysconfig
|
||||||
|
/// - `libpython*.dylib` on macOS - patched by `ensure_dylib_patched()`
|
||||||
|
fn should_copy_python_distribution_file(path: &Path) -> bool {
|
||||||
|
let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
let extension = path.extension().and_then(|e| e.to_str());
|
||||||
|
|
||||||
|
// _sysconfigdata_*.py files
|
||||||
|
if file_name.starts_with("_sysconfigdata_")
|
||||||
|
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// *.pc files in pkgconfig directories
|
||||||
|
if extension.is_some_and(|ext| ext.eq_ignore_ascii_case("pc")) {
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
if parent.file_name().and_then(|n| n.to_str()) == Some("pkgconfig") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// libpython*.dylib on macOS
|
||||||
|
#[cfg(target_os = "macos")]
|
||||||
|
if file_name.starts_with("libpython")
|
||||||
|
&& extension.is_some_and(|ext| ext.eq_ignore_ascii_case("dylib"))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively hard link or copy a directory tree from `src` to `dst`.
|
||||||
|
///
|
||||||
|
/// Tries hard linking first for efficiency, falling back to copying if hard links
|
||||||
|
/// are not supported (e.g., cross-filesystem operations).
|
||||||
|
///
|
||||||
|
/// Files that will be patched after installation (sysconfig, pkgconfig, dylib) are
|
||||||
|
/// always copied to avoid modifying the cached source.
|
||||||
|
fn hardlink_or_copy_dir(src: &Path, dst: &Path) -> Result<(), Error> {
|
||||||
|
let mut attempt = Attempt::Initial;
|
||||||
|
|
||||||
|
for entry in WalkDir::new(src) {
|
||||||
|
let entry = entry.map_err(|e| Error::ReadError {
|
||||||
|
dir: src.to_path_buf(),
|
||||||
|
err: io::Error::other(e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let path = entry.path();
|
||||||
|
let relative = path.strip_prefix(src).expect("walkdir starts with root");
|
||||||
|
let target = dst.join(relative);
|
||||||
|
|
||||||
|
if entry.file_type().is_dir() {
|
||||||
|
fs_err::create_dir_all(&target)?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always copy files that will be patched to avoid modifying the cache
|
||||||
|
if should_copy_python_distribution_file(path) {
|
||||||
|
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||||
|
to: target.clone(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attempt {
|
||||||
|
Attempt::Initial => {
|
||||||
|
if let Err(err) = fs_err::hard_link(path, &target) {
|
||||||
|
debug!(
|
||||||
|
"Failed to hard link `{}` to `{}`: {}; falling back to copy",
|
||||||
|
path.display(),
|
||||||
|
target.display(),
|
||||||
|
err
|
||||||
|
);
|
||||||
|
attempt = Attempt::UseCopyFallback;
|
||||||
|
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||||
|
to: target.clone(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
} else {
|
||||||
|
attempt = Attempt::Subsequent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Attempt::Subsequent => {
|
||||||
|
if let Err(err) = fs_err::hard_link(path, &target) {
|
||||||
|
// Unexpected failure after initial success - still fall back to copy
|
||||||
|
debug!(
|
||||||
|
"Unexpected hard link failure for `{}`: {}; falling back to copy",
|
||||||
|
path.display(),
|
||||||
|
err
|
||||||
|
);
|
||||||
|
attempt = Attempt::UseCopyFallback;
|
||||||
|
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||||
|
to: target.clone(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Attempt::UseCopyFallback => {
|
||||||
|
fs_err::copy(path, &target).map_err(|err| Error::CopyError {
|
||||||
|
to: target.clone(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
impl ManagedPythonDownload {
|
impl ManagedPythonDownload {
|
||||||
/// Return a display type that includes the build information.
|
/// Return a display type that includes the build information.
|
||||||
pub fn to_display_with_build(&self) -> ManagedPythonDownloadWithBuild<'_> {
|
pub fn to_display_with_build(&self) -> ManagedPythonDownloadWithBuild<'_> {
|
||||||
|
|
@ -1203,13 +1340,13 @@ impl ManagedPythonDownload {
|
||||||
let ext = SourceDistExtension::from_path(&filename)
|
let ext = SourceDistExtension::from_path(&filename)
|
||||||
.map_err(|err| Error::MissingExtension(url.to_string(), err))?;
|
.map_err(|err| Error::MissingExtension(url.to_string(), err))?;
|
||||||
|
|
||||||
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
|
// Track the unpacked cache path if caching is enabled
|
||||||
|
let (target_unpacked, temp_dir) = if let Some(python_builds_dir) =
|
||||||
if let Some(python_builds_dir) =
|
|
||||||
env::var_os(EnvVars::UV_PYTHON_CACHE_DIR).filter(|s| !s.is_empty())
|
env::var_os(EnvVars::UV_PYTHON_CACHE_DIR).filter(|s| !s.is_empty())
|
||||||
{
|
{
|
||||||
let python_builds_dir = PathBuf::from(python_builds_dir);
|
let python_builds_dir = PathBuf::from(python_builds_dir);
|
||||||
fs_err::create_dir_all(&python_builds_dir)?;
|
fs_err::create_dir_all(&python_builds_dir)?;
|
||||||
|
|
||||||
let hash_prefix = match self.sha256.as_deref() {
|
let hash_prefix = match self.sha256.as_deref() {
|
||||||
Some(sha) => {
|
Some(sha) => {
|
||||||
// Shorten the hash to avoid too-long-filename errors
|
// Shorten the hash to avoid too-long-filename errors
|
||||||
|
|
@ -1218,7 +1355,36 @@ impl ManagedPythonDownload {
|
||||||
None => "none",
|
None => "none",
|
||||||
};
|
};
|
||||||
let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}"));
|
let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}"));
|
||||||
|
// Strip the archive extension for the unpacked directory name
|
||||||
|
let basename = filename
|
||||||
|
.strip_suffix(&format!(".{}", ext.name()))
|
||||||
|
.expect("filename was parsed with this extension");
|
||||||
|
let target_unpacked = python_builds_dir.join(format!("{hash_prefix}-{basename}"));
|
||||||
|
|
||||||
|
// Check if unpacked cache exists first - if so, hard link from it directly
|
||||||
|
if target_unpacked.is_dir() {
|
||||||
|
debug!(
|
||||||
|
"Using unpacked cache at `{}`",
|
||||||
|
target_unpacked.simplified_display()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Remove the target if it already exists.
|
||||||
|
if path.is_dir() {
|
||||||
|
debug!("Removing existing directory: {}", path.user_display());
|
||||||
|
fs_err::tokio::remove_dir_all(&path).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hard link (or copy) from unpacked cache to installation directory
|
||||||
|
hardlink_or_copy_dir(&target_unpacked, &path)?;
|
||||||
|
|
||||||
|
return Ok(DownloadResult::Fetched(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create temp dir in the cache directory to ensure same-filesystem renames
|
||||||
|
let temp_dir =
|
||||||
|
tempfile::tempdir_in(&python_builds_dir).map_err(Error::DownloadDirError)?;
|
||||||
|
|
||||||
|
// No unpacked cache - download and extract the archive
|
||||||
// Download the archive to the cache, or return a reader if we have it in cache.
|
// Download the archive to the cache, or return a reader if we have it in cache.
|
||||||
// TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking
|
// TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking
|
||||||
// in one step.
|
// in one step.
|
||||||
|
|
@ -1272,7 +1438,11 @@ impl ManagedPythonDownload {
|
||||||
Direction::Extract,
|
Direction::Extract,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
(Some(target_unpacked), temp_dir)
|
||||||
} else {
|
} else {
|
||||||
|
let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?;
|
||||||
|
|
||||||
// Avoid overlong log lines
|
// Avoid overlong log lines
|
||||||
debug!("Downloading {url}");
|
debug!("Downloading {url}");
|
||||||
debug!(
|
debug!(
|
||||||
|
|
@ -1291,7 +1461,9 @@ impl ManagedPythonDownload {
|
||||||
Direction::Download,
|
Direction::Download,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
|
||||||
|
(None, temp_dir)
|
||||||
|
};
|
||||||
|
|
||||||
// Extract the top-level directory.
|
// Extract the top-level directory.
|
||||||
let mut extracted = match uv_extract::strip_component(temp_dir.path()) {
|
let mut extracted = match uv_extract::strip_component(temp_dir.path()) {
|
||||||
|
|
@ -1345,7 +1517,57 @@ impl ManagedPythonDownload {
|
||||||
fs_err::tokio::remove_dir_all(&path).await?;
|
fs_err::tokio::remove_dir_all(&path).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Persist it to the target.
|
// If caching is enabled, save to unpacked cache and hard link to target
|
||||||
|
if let Some(target_unpacked) = target_unpacked {
|
||||||
|
// Move extracted files to unpacked cache using atomic rename
|
||||||
|
debug!(
|
||||||
|
"Saving to unpacked cache at `{}`",
|
||||||
|
target_unpacked.simplified_display()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Use a temporary name for atomic creation
|
||||||
|
// Note: Don't use `with_extension` as the path contains version dots (e.g., "3.10.19")
|
||||||
|
let temp_unpacked = PathBuf::from(format!(
|
||||||
|
"{}.tmp.{}",
|
||||||
|
target_unpacked.display(),
|
||||||
|
std::process::id()
|
||||||
|
));
|
||||||
|
|
||||||
|
// Move extracted to temp cache location
|
||||||
|
rename_with_retry(&extracted, &temp_unpacked)
|
||||||
|
.await
|
||||||
|
.map_err(|err| Error::CopyError {
|
||||||
|
to: temp_unpacked.clone(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Atomic rename to final cache location
|
||||||
|
match fs_err::rename(&temp_unpacked, &target_unpacked) {
|
||||||
|
Ok(()) => {
|
||||||
|
debug!(
|
||||||
|
"Created unpacked cache at `{}`",
|
||||||
|
target_unpacked.simplified_display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(err)
|
||||||
|
if err.kind() == io::ErrorKind::AlreadyExists
|
||||||
|
|| err.kind() == io::ErrorKind::DirectoryNotEmpty =>
|
||||||
|
{
|
||||||
|
// Another process won the race - use theirs and clean up ours
|
||||||
|
debug!("Unpacked cache already exists (concurrent creation)");
|
||||||
|
let _ = fs_err::remove_dir_all(&temp_unpacked);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
// Clean up temp directory on error
|
||||||
|
let _ = fs_err::remove_dir_all(&temp_unpacked);
|
||||||
|
return Err(err.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hard link (or copy) from unpacked cache to installation directory
|
||||||
|
hardlink_or_copy_dir(&target_unpacked, &path)?;
|
||||||
|
} else {
|
||||||
|
// No caching - just move to target
|
||||||
debug!("Moving {} to {}", extracted.display(), path.user_display());
|
debug!("Moving {} to {}", extracted.display(), path.user_display());
|
||||||
rename_with_retry(extracted, &path)
|
rename_with_retry(extracted, &path)
|
||||||
.await
|
.await
|
||||||
|
|
@ -1353,6 +1575,7 @@ impl ManagedPythonDownload {
|
||||||
to: path.clone(),
|
to: path.clone(),
|
||||||
err,
|
err,
|
||||||
})?;
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(DownloadResult::Fetched(path))
|
Ok(DownloadResult::Fetched(path))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue