Use files instead of junctions on Windows (#11269)

Instead of using junctions, we can just write files that contain (as the
file contents) the target path. This requires a little more finesse in
that, as readers, we need to know where to expect these. But it also
means we get to avoid junctions, which have led to a variety of
confusing behaviors. Further, `replace_symlink` should now be on atomic
on Windows.

Closes #11263.
This commit is contained in:
Charlie Marsh 2025-02-07 19:13:19 -05:00 committed by Zanie Blue
parent 59c65c3e77
commit 4d5041dc00
14 changed files with 351 additions and 59 deletions

5
Cargo.lock generated
View File

@ -695,7 +695,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
dependencies = [
"lazy_static",
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]
@ -4744,6 +4744,7 @@ dependencies = [
"uv-cache-info",
"uv-cache-key",
"uv-dirs",
"uv-distribution-filename",
"uv-distribution-types",
"uv-fs",
"uv-normalize",
@ -6051,7 +6052,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]

View File

@ -17,9 +17,10 @@ doctest = false
workspace = true
[dependencies]
uv-dirs = { workspace = true }
uv-cache-info = { workspace = true }
uv-cache-key = { workspace = true }
uv-dirs = { workspace = true }
uv-distribution-filename = { workspace = true }
uv-distribution-types = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-normalize = { workspace = true }

View File

@ -1,7 +1,8 @@
use std::path::Path;
use std::str::FromStr;
/// A unique identifier for an archive (unzipped wheel) in the cache.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[derive(Debug, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)]
pub struct ArchiveId(String);
impl Default for ArchiveId {
@ -22,3 +23,17 @@ impl AsRef<Path> for ArchiveId {
self.0.as_ref()
}
}
impl std::fmt::Display for ArchiveId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl FromStr for ArchiveId {
type Err = <String as FromStr>::Err;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_string()))
}
}

View File

@ -4,6 +4,7 @@ use std::io;
use std::io::Write;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use rustc_hash::FxHashSet;
@ -11,6 +12,7 @@ use tracing::debug;
pub use archive::ArchiveId;
use uv_cache_info::Timestamp;
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::InstalledDist;
use uv_fs::{cachedir, directories, LockedFile};
use uv_normalize::PackageName;
@ -31,6 +33,11 @@ mod cli;
mod removal;
mod wheel;
/// The version of the archive bucket.
///
/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
pub const ARCHIVE_VERSION: u8 = 0;
/// A [`CacheEntry`] which may or may not exist yet.
#[derive(Debug, Clone)]
pub struct CacheEntry(PathBuf);
@ -278,7 +285,7 @@ impl Cache {
// Create a symlink to the directory store.
fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
uv_fs::replace_symlink(archive_entry.path(), path.as_ref())?;
self.create_link(&id, path.as_ref())?;
Ok(id)
}
@ -360,10 +367,30 @@ impl Cache {
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry?;
if entry.file_type().is_symlink() {
if let Ok(target) = fs_err::canonicalize(entry.path()) {
references.insert(target);
}
// Ignore any `.lock` files.
if entry
.path()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
// Identify entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if WheelFilename::from_stem(filename).is_err() {
continue;
}
if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}
}
}
@ -385,10 +412,29 @@ impl Cache {
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry?;
if entry.file_type().is_symlink() {
if let Ok(target) = fs_err::canonicalize(entry.path()) {
references.insert(target);
}
// Ignore any `.lock` files.
if entry
.path()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
// Identify entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if WheelFilename::from_stem(filename).is_err() {
continue;
}
if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}
}
}
@ -488,19 +534,29 @@ impl Cache {
continue;
}
// Remove any symlinks and directories in the revision. The symlinks represent
// unzipped wheels, and the directories represent the source distribution archives.
// Remove everything except the built wheel archive and the metadata.
for entry in fs_err::read_dir(entry.path())? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
debug!("Removing unzipped built wheel entry: {}", path.display());
summary += rm_rf(path)?;
} else if path.is_symlink() {
debug!("Removing unzipped built wheel entry: {}", path.display());
summary += rm_rf(path)?;
// Retain the resolved metadata (`metadata.msgpack`).
if path
.file_name()
.is_some_and(|file_name| file_name == "metadata.msgpack")
{
continue;
}
// Retain any built wheel archives.
if path
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
continue;
}
debug!("Removing unzipped built wheel entry: {}", path.display());
summary += rm_rf(path)?;
}
}
}
@ -513,10 +569,29 @@ impl Cache {
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry?;
if entry.file_type().is_symlink() {
if let Ok(target) = fs_err::canonicalize(entry.path()) {
references.insert(target);
}
// Ignore any `.lock` files.
if entry
.path()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
// Identify entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if WheelFilename::from_stem(filename).is_err() {
continue;
}
if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}
}
}
@ -539,6 +614,164 @@ impl Cache {
Ok(summary)
}
/// Create a link to a directory in the archive bucket.
///
/// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
/// version. On Unix, we create a symlink to the target directory.
#[cfg(windows)]
pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
// Serialize the link.
let link = Link::new(id.clone());
let contents = link.to_string();
// First, attempt to create a file at the location, but fail if it already exists.
match fs_err::OpenOptions::new()
.write(true)
.create_new(true)
.open(dst.as_ref())
{
Ok(mut file) => {
// Write the target path to the file.
file.write_all(contents.as_bytes())?;
Ok(())
}
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
// Write to a temporary file, then move it into place.
let temp_dir = tempfile::tempdir_in(dst.as_ref().parent().unwrap())?;
let temp_file = temp_dir.path().join("link");
fs_err::write(&temp_file, contents.as_bytes())?;
// Move the symlink into the target location.
fs_err::rename(&temp_file, dst.as_ref())?;
Ok(())
}
Err(err) => Err(err),
}
}
/// Resolve an archive link, returning the fully-resolved path.
///
/// Returns an error if the link target does not exist.
#[cfg(windows)]
pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
// Deserialize the link.
let contents = fs_err::read_to_string(path.as_ref())?;
let link = Link::from_str(&contents)?;
// Ignore stale links.
if link.version != ARCHIVE_VERSION {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"The link target does not exist.",
));
}
// Reconstruct the path.
let path = self.archive(&link.id);
path.canonicalize()
}
/// Create a link to a directory in the archive bucket.
///
/// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
/// version. On Unix, we create a symlink to the target directory.
#[cfg(unix)]
pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
// Construct the link target.
let src = self.archive(id);
let dst = dst.as_ref();
// Attempt to create the symlink directly.
match std::os::unix::fs::symlink(&src, dst) {
Ok(()) => Ok(()),
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
// Create a symlink, using a temporary file to ensure atomicity.
let temp_dir = tempfile::tempdir_in(dst.parent().unwrap())?;
let temp_file = temp_dir.path().join("link");
std::os::unix::fs::symlink(&src, &temp_file)?;
// Move the symlink into the target location.
fs_err::rename(&temp_file, dst)?;
Ok(())
}
Err(err) => Err(err),
}
}
/// Resolve an archive link, returning the fully-resolved path.
///
/// Returns an error if the link target does not exist.
#[cfg(unix)]
pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
path.as_ref().canonicalize()
}
}
/// An archive (unzipped wheel) that exists in the local cache.
#[derive(Debug, Clone)]
#[allow(unused)]
struct Link {
/// The unique ID of the entry in the archive bucket.
id: ArchiveId,
/// The version of the archive bucket.
version: u8,
}
#[allow(unused)]
impl Link {
/// Create a new [`Archive`] with the given ID and hashes.
fn new(id: ArchiveId) -> Self {
Self {
id,
version: ARCHIVE_VERSION,
}
}
}
impl Display for Link {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "archive-v{}/{}", self.version, self.id)
}
}
impl FromStr for Link {
type Err = io::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.splitn(2, '/');
let version = parts
.next()
.filter(|s| !s.is_empty())
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version"))?;
let id = parts
.next()
.filter(|s| !s.is_empty())
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing ID"))?;
// Parse the archive version from `archive-v{version}/{id}`.
let version = version
.strip_prefix("archive-v")
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
let version = u8::from_str(version).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse version: {err}"),
)
})?;
// Parse the ID from `archive-v{version}/{id}`.
let id = ArchiveId::from_str(id).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse ID: {err}"),
)
})?;
Ok(Self { id, version })
}
}
pub trait CleanReporter: Send + Sync {
@ -696,7 +929,7 @@ pub enum CacheBucket {
///
/// ...may be cached as:
/// ```text
/// built-wheels-v3/
/// built-wheels-v4/
/// ├── git
/// │   └── 2122faf3e081fb7a
/// │      └── 7a2d650a4a7b4d04
@ -798,7 +1031,7 @@ impl CacheBucket {
match self {
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_prune.rs`.
Self::SourceDistributions => "sdists-v7",
Self::SourceDistributions => "sdists-v8",
Self::FlatIndex => "flat-index-v2",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v4",
@ -808,11 +1041,11 @@ impl CacheBucket {
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_prune.rs`.
Self::Wheels => "wheels-v4",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv-distribution/src/archive.rs`.
// Note that when bumping this, you'll also need to bump
// `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
Self::Archive => "archive-v0",
Self::Builds => "builds-v0",
Self::Environments => "environments-v1",
Self::Environments => "environments-v2",
}
}
@ -1174,3 +1407,30 @@ impl Refresh {
}
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use crate::ArchiveId;
use super::Link;
#[test]
fn test_link_round_trip() {
let id = ArchiveId::new();
let link = Link::new(id);
let s = link.to_string();
let parsed = Link::from_str(&s).unwrap();
assert_eq!(link.id, parsed.id);
assert_eq!(link.version, parsed.version);
}
#[test]
fn test_link_deserialize() {
assert!(Link::from_str("archive-v0/foo").is_ok());
assert!(Link::from_str("archive/foo").is_err());
assert!(Link::from_str("v1/foo").is_err());
assert!(Link::from_str("archive-v0/").is_err());
}
}

View File

@ -138,6 +138,13 @@ impl WheelFilename {
/// Parse a wheel filename from the stem (e.g., `foo-1.2.3-py3-none-any`).
pub fn from_stem(stem: &str) -> Result<Self, WheelFilenameError> {
// The wheel stem should not contain the `.whl` extension.
if std::path::Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
return Err(WheelFilenameError::UnexpectedExtension(stem.to_string()));
}
Self::parse(stem, stem)
}
@ -328,6 +335,8 @@ pub enum WheelFilenameError {
MissingAbiTag(String),
#[error("The wheel filename \"{0}\" is missing a platform tag")]
MissingPlatformTag(String),
#[error("The wheel stem \"{0}\" has an unexpected extension")]
UnexpectedExtension(String),
}
#[cfg(test)]

View File

@ -1,12 +1,7 @@
use uv_cache::{ArchiveId, Cache};
use uv_cache::{ArchiveId, Cache, ARCHIVE_VERSION};
use uv_distribution_types::Hashed;
use uv_pypi_types::HashDigest;
/// The version of the archive bucket.
///
/// Must be kept in-sync with the version in [`uv_cache::CacheBucket::to_str`].
const ARCHIVE_VERSION: u8 = 0;
/// An archive (unzipped wheel) that exists in the local cache.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Archive {

View File

@ -371,7 +371,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
// If the wheel was unzipped previously, respect it. Source distributions are
// cached under a unique revision ID, so unzipped directories are never stale.
match built_wheel.target.canonicalize() {
match self.build_context.cache().resolve_link(&built_wheel.target) {
Ok(archive) => {
return Ok(LocalWheel {
dist: Dist::Source(dist.clone()),

View File

@ -1,6 +1,3 @@
use crate::index::cached_wheel::CachedWheel;
use crate::source::{HttpRevisionPointer, LocalRevisionPointer, HTTP_REVISION, LOCAL_REVISION};
use crate::Error;
use uv_cache::{Cache, CacheBucket, CacheShard, WheelCache};
use uv_cache_info::CacheInfo;
use uv_cache_key::cache_digest;
@ -8,10 +5,13 @@ use uv_configuration::ConfigSettings;
use uv_distribution_types::{
DirectUrlSourceDist, DirectorySourceDist, GitSourceDist, Hashed, PathSourceDist,
};
use uv_fs::symlinks;
use uv_platform_tags::Tags;
use uv_types::HashStrategy;
use crate::index::cached_wheel::CachedWheel;
use crate::source::{HttpRevisionPointer, LocalRevisionPointer, HTTP_REVISION, LOCAL_REVISION};
use crate::Error;
/// A local index of built distributions for a specific source distribution.
#[derive(Debug)]
pub struct BuiltWheelIndex<'a> {
@ -203,8 +203,16 @@ impl<'a> BuiltWheelIndex<'a> {
let mut candidate: Option<CachedWheel> = None;
// Unzipped wheels are stored as symlinks into the archive directory.
for subdir in symlinks(shard) {
match CachedWheel::from_built_source(&subdir) {
for wheel_dir in uv_fs::entries(shard) {
// Ignore any `.lock` files.
if wheel_dir
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
match CachedWheel::from_built_source(&wheel_dir, self.cache) {
None => {}
Some(dist_info) => {
// Pick the wheel with the highest priority

View File

@ -26,7 +26,7 @@ pub struct CachedWheel {
impl CachedWheel {
/// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`).
pub fn from_built_source(path: impl AsRef<Path>) -> Option<Self> {
pub fn from_built_source(path: impl AsRef<Path>, cache: &Cache) -> Option<Self> {
let path = path.as_ref();
// Determine the wheel filename.
@ -34,7 +34,7 @@ impl CachedWheel {
let filename = WheelFilename::from_stem(filename).ok()?;
// Convert to a cached wheel.
let archive = path.canonicalize().ok()?;
let archive = cache.resolve_link(path).ok()?;
let entry = CacheEntry::from_path(archive);
let hashes = Vec::new();
let cache_info = CacheInfo::default();

View File

@ -6,7 +6,7 @@ use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_cache_key::cache_digest;
use uv_configuration::ConfigSettings;
use uv_distribution_types::{CachedRegistryDist, Hashed, Index, IndexLocations, IndexUrl};
use uv_fs::{directories, files, symlinks};
use uv_fs::{directories, files};
use uv_normalize::PackageName;
use uv_platform_tags::Tags;
use uv_types::HashStrategy;
@ -205,8 +205,16 @@ impl<'a> RegistryWheelIndex<'a> {
cache_shard.shard(cache_digest(build_configuration))
};
for wheel_dir in symlinks(cache_shard) {
if let Some(wheel) = CachedWheel::from_built_source(wheel_dir) {
for wheel_dir in uv_fs::entries(cache_shard) {
// Ignore any `.lock` files.
if wheel_dir
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
if let Some(wheel) = CachedWheel::from_built_source(wheel_dir, cache) {
if wheel.filename.compatibility(tags).is_compatible() {
// Enforce hash-checking based on the source distribution.
if revision.satisfies(

View File

@ -535,10 +535,10 @@ pub fn directories(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
.map(|entry| entry.path())
}
/// Iterate over the symlinks in a directory.
/// Iterate over the entries in a directory.
///
/// If the directory does not exist, returns an empty iterator.
pub fn symlinks(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
pub fn entries(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
path.as_ref()
.read_dir()
.ok()
@ -551,11 +551,6 @@ pub fn symlinks(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
None
}
})
.filter(|entry| {
entry
.file_type()
.is_ok_and(|file_type| file_type.is_symlink())
})
.map(|entry| entry.path())
}

View File

@ -81,7 +81,7 @@ impl CachedEnvironment {
let cache_entry = cache.entry(CacheBucket::Environments, interpreter_hash, resolution_hash);
if cache.refresh().is_none() {
if let Ok(root) = fs_err::read_link(cache_entry.path()) {
if let Ok(root) = cache.resolve_link(cache_entry.path()) {
if let Ok(environment) = PythonEnvironment::from_root(root, cache) {
return Ok(Self(environment));
}

View File

@ -42,7 +42,7 @@ pub(crate) async fn uninstall(
if let Some(top_level) = installations.root().parent() {
// Remove the `toolchains` symlink.
match uv_fs::remove_symlink(top_level.join("toolchains")) {
match fs_err::tokio::remove_file(top_level.join("toolchains")).await {
Ok(()) => {}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
Err(err) => return Err(err.into()),

View File

@ -136,7 +136,7 @@ fn prune_cached_env() {
----- stderr -----
DEBUG uv [VERSION] ([COMMIT] DATE)
Pruning cache at: [CACHE_DIR]/
DEBUG Removing dangling cache environment: [CACHE_DIR]/environments-v1/[ENTRY]
DEBUG Removing dangling cache environment: [CACHE_DIR]/environments-v2/[ENTRY]
DEBUG Removing dangling cache archive: [CACHE_DIR]/archive-v0/[ENTRY]
Removed [N] files ([SIZE])
"###);
@ -348,7 +348,7 @@ fn prune_stale_revision() -> Result<()> {
----- stderr -----
DEBUG uv [VERSION] ([COMMIT] DATE)
Pruning cache at: [CACHE_DIR]/
DEBUG Removing dangling source revision: [CACHE_DIR]/sdists-v7/[ENTRY]
DEBUG Removing dangling source revision: [CACHE_DIR]/sdists-v8/[ENTRY]
DEBUG Removing dangling cache archive: [CACHE_DIR]/archive-v0/[ENTRY]
Removed [N] files ([SIZE])
"###);