Recursively merge existing package directories on installation (#516)

Previously, when installing a package we would delete the target
directory before copying (or linking) the contents of the package.
However, this means that we do not properly support namespace packages
which can share a target directory. Instead the last package to be
installed would be override existing packages. Since we install packages
in parallel, this could result in a race condition where the target
directory already exists which is not allowed when using `clonefile`.
See example error in #515.
c7e63d2dce
provides a regression test for this — it fails on `main`.

Here, we implement a recursive merge when the target directory already
exists. Both packages will be installed into the same directory. We no
longer delete the target directory, which seems okay since we uninstall
packages before installing now.

When files conflict, we will likely throw an error still. The correct
behavior to implement in this case is unclear, as if we just take "first
write wins" or "last write wins" we could end up with some files from
one package and some from another resulting in two broken packages. A
possible solution here is to lock the target directories while copying.
This commit is contained in:
Zanie Blue 2023-11-30 10:14:51 -06:00 committed by GitHub
parent 6841c06e2d
commit 5f1f207628
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 109 additions and 18 deletions

81
Cargo.lock generated
View File

@ -1353,7 +1353,7 @@ dependencies = [
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
"windows-core 0.51.1",
]
[[package]]
@ -2847,9 +2847,8 @@ dependencies = [
[[package]]
name = "reflink-copy"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f97f7665e51f23760e9e4949d454a4782c76ef954acaeec9d1b0f48a58e4529e"
version = "0.1.12"
source = "git+https://github.com/konstin/reflink-copy?rev=1fc25b9ca5087bd20f78546009fe98709e9c4343#1fc25b9ca5087bd20f78546009fe98709e9c4343"
dependencies = [
"cfg-if 1.0.0",
"rustix",
@ -4176,12 +4175,12 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.51.1"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca229916c5ee38c2f2bc1e9d8f04df975b4bd93f9955dc69fabb5d91270045c9"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
"windows-core",
"windows-targets 0.48.5",
"windows-core 0.52.0",
"windows-targets 0.52.0",
]
[[package]]
@ -4193,6 +4192,15 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
@ -4241,6 +4249,21 @@ dependencies = [
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
@ -4253,6 +4276,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
@ -4265,6 +4294,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
@ -4277,6 +4312,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
@ -4289,6 +4330,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
@ -4301,6 +4348,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
@ -4313,6 +4366,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
@ -4325,6 +4384,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "winnow"
version = "0.5.19"

View File

@ -56,7 +56,8 @@ pyo3-log = { version = "0.9.0"}
pyproject-toml = { version = "0.8.0" }
rand = { version = "0.8.5" }
rayon = { version = "1.8.0" }
reflink-copy = { version = "0.1.11" }
# For correct IO error handling: https://github.com/cargo-bins/reflink-copy/pull/51
reflink-copy = { git = "https://github.com/konstin/reflink-copy", rev = "1fc25b9ca5087bd20f78546009fe98709e9c4343" }
regex = { version = "1.10.2" }
reqwest = { version = "0.11.22", default-features = false, features = ["json", "gzip", "brotli", "stream", "rustls-tls"] }
reqwest-middleware = { version = "0.2.4" }

View File

@ -297,7 +297,8 @@ fn clone_wheel_files(
// On macOS, directly can be recursively copied with a single `clonefile` call.
// So we only need to iterate over the top-level of the directory, and copy each file or
// subdirectory.
// subdirectory unless the subdirectory exists already in which case we'll need to recursively
// merge its contents with the existing direcotry.
for entry in fs::read_dir(wheel.as_ref())? {
let entry = entry?;
let from = entry.path();
@ -305,20 +306,42 @@ fn clone_wheel_files(
.as_ref()
.join(from.strip_prefix(&wheel).unwrap());
// Delete the destination if it already exists.
fs::remove_dir_all(&to)
.or_else(|_| fs::remove_file(&to))
.ok();
// Copy the file.
reflink_copy::reflink(&from, &to).map_err(|err| Error::Reflink { from, to, err })?;
clone_recursive(&from, &to)?;
count += 1;
}
Ok(count)
}
fn clone_recursive(from: &Path, to: &Path) -> Result<(), Error> {
// Attempt to copy the file or directory
debug!("Cloning {} to {}", from.display(), to.display());
let reflink = reflink_copy::reflink(from, to);
// If copying fails and the directory exists already, it must be merged recursively
if from.is_dir()
&& reflink
.as_ref()
.is_err_and(|err| matches!(err.kind(), std::io::ErrorKind::AlreadyExists))
{
for entry in fs::read_dir(from)? {
let entry = entry?;
let child_from = entry.path();
let child_to = to.join(child_from.strip_prefix(from).unwrap());
clone_recursive(child_from.as_path(), child_to.as_path())?;
}
} else {
// Other errors should be tracked
reflink.map_err(|err| Error::Reflink {
from: from.to_path_buf(),
to: to.to_path_buf(),
err,
})?;
}
Ok(())
}
/// Extract a wheel by copying all of its files into site packages.
fn copy_wheel_files(
site_packages: impl AsRef<Path>,

View File

@ -521,6 +521,8 @@ fn install_git_subdirectories() -> Result<()> {
});
check_command(&venv, "import example_pkg", &temp_dir);
check_command(&venv, "import example_pkg.a", &temp_dir);
check_command(&venv, "import example_pkg.b", &temp_dir);
Ok(())
}