diff --git a/Cargo.lock b/Cargo.lock index 426c82ebf..2ec9a030c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,6 +153,22 @@ dependencies = [ "tempfile", ] +[[package]] +name = "astral-tokio-tar" +version = "0.4.2" +source = "git+https://github.com/astral-sh/tokio-tar?rev=c06006a2cf6a6ca42e11775ddf1502dee8a8c688#c06006a2cf6a6ca42e11775ddf1502dee8a8c688" +dependencies = [ + "filetime", + "futures-core", + "libc", + "portable-atomic", + "redox_syscall 0.3.5", + "rustc-hash", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "async-channel" version = "2.3.1" @@ -1979,22 +1995,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "krata-tokio-tar" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8bd5fee9b96acb5fc36b401896d601e6fdcce52b0e651ce24a3b21fb524e79f" -dependencies = [ - "filetime", - "futures-core", - "libc", - "portable-atomic", - "redox_syscall 0.3.5", - "tokio", - "tokio-stream", - "xattr", -] - [[package]] name = "kurbo" version = "0.8.3" @@ -5060,11 +5060,11 @@ dependencies = [ name = "uv-extract" version = "0.0.1" dependencies = [ + "astral-tokio-tar", "async-compression", "async_zip", "fs-err 3.1.0", "futures", - "krata-tokio-tar", "md-5", "rayon", "reqwest", @@ -5344,6 +5344,7 @@ dependencies = [ name = "uv-publish" version = "0.1.0" dependencies = [ + "astral-tokio-tar", "async-compression", "base64 0.22.1", "fs-err 3.1.0", @@ -5351,7 +5352,6 @@ dependencies = [ "glob", "insta", "itertools 0.14.0", - "krata-tokio-tar", "reqwest", "reqwest-middleware", "reqwest-retry", diff --git a/Cargo.toml b/Cargo.toml index 6604be721..08c718e29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ uv-workspace = { path = "crates/uv-workspace" } anstream = { version = "0.6.15" } anyhow = { version = "1.0.89" } arcstr = { version = "1.2.0" } +astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", rev = "c06006a2cf6a6ca42e11775ddf1502dee8a8c688" } async-channel = { version = "2.3.1" } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-trait = { version = "0.1.82" } @@ -118,7 +119,6 @@ indoc = { version = "2.0.5" } itertools = { version = "0.14.0" } jiff = { version = "0.1.14", features = ["serde"] } junction = { version = "1.2.0" } -krata-tokio-tar = { version = "0.4.2" } mailparse = { version = "0.15.0" } md-5 = { version = "0.10.6" } memchr = { version = "2.7.4" } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 7f3851f46..fc6c3343b 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -20,11 +20,11 @@ uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } uv-pypi-types = { workspace = true } +astral-tokio-tar = { workspace = true } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } async_zip = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } -krata-tokio-tar = { workspace = true } md-5 = { workspace = true } rayon = { workspace = true } reqwest = { workspace = true } diff --git a/crates/uv-extract/src/lib.rs b/crates/uv-extract/src/lib.rs index 70929b210..f87059b3a 100644 --- a/crates/uv-extract/src/lib.rs +++ b/crates/uv-extract/src/lib.rs @@ -5,5 +5,4 @@ mod error; pub mod hash; pub mod stream; mod sync; -mod tar; mod vendor; diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 49ae138c4..f7a029e6d 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -3,6 +3,7 @@ use std::pin::Pin; use futures::StreamExt; use rustc_hash::FxHashSet; +use tokio_tar::EntryType; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; @@ -143,6 +144,16 @@ async fn untar_in( mut archive: tokio_tar::Archive<&'_ mut (dyn tokio::io::AsyncRead + Unpin)>, dst: &Path, ) -> std::io::Result<()> { + // Like `tokio-tar`, canonicalize the destination prior to unpacking. + let dst = fs_err::tokio::canonicalize(dst).await?; + + // Memoize filesystem calls to canonicalize paths. + let mut memo = FxHashSet::default(); + + // Delay any directory entries until the end, to ensure that directory permissions do not + // interfere with descendant extraction. + let mut directories = Vec::new(); + let mut entries = archive.entries()?; let mut pinned = Pin::new(&mut entries); while let Some(entry) = pinned.next().await { @@ -159,7 +170,15 @@ async fn untar_in( continue; } - file.unpack_in(dst).await?; + // Defer the creation of any directory entries. + if file.header().entry_type() == EntryType::Directory { + directories.push(file); + continue; + } + + // Unpack the file into the destination directory. + #[cfg_attr(not(unix), allow(unused_variables))] + let unpacked_at = file.unpack_in_memo(&dst, &mut memo).await?; // Preserve the executable bit. #[cfg(unix)] @@ -172,7 +191,7 @@ async fn untar_in( let mode = file.header().mode()?; let has_any_executable_bit = mode & 0o111; if has_any_executable_bit != 0 { - if let Some(path) = crate::tar::unpacked_at(dst, &file.path()?) { + if let Some(path) = unpacked_at.as_deref() { let permissions = fs_err::tokio::metadata(&path).await?.permissions(); if permissions.mode() & 0o111 != 0o111 { fs_err::tokio::set_permissions( @@ -186,6 +205,13 @@ async fn untar_in( } } } + + // Create any deferred directories in topological order. + directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes())); + for mut dir in directories { + dir.unpack_in_memo(&dst, &mut memo).await?; + } + Ok(()) } diff --git a/crates/uv-extract/src/tar.rs b/crates/uv-extract/src/tar.rs deleted file mode 100644 index 4791a7547..000000000 --- a/crates/uv-extract/src/tar.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::path::{Component, Path, PathBuf}; - -/// Determine the path at which the given tar entry will be unpacked, when unpacking into `dst`. -/// -/// See: -#[cfg_attr(not(unix), allow(dead_code))] -pub(crate) fn unpacked_at(dst: &Path, entry: &Path) -> Option { - let mut file_dst = dst.to_path_buf(); - { - for part in entry.components() { - match part { - // Leading '/' characters, root paths, and '.' - // components are just ignored and treated as "empty - // components" - Component::Prefix(..) | Component::RootDir | Component::CurDir => { - continue; - } - - // If any part of the filename is '..', then skip over - // unpacking the file to prevent directory traversal - // security issues. See, e.g.: CVE-2001-1267, - // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 - Component::ParentDir => return None, - - Component::Normal(part) => file_dst.push(part), - } - } - } - - // Skip cases where only slashes or '.' parts were seen, because - // this is effectively an empty filename. - if *dst == *file_dst { - return None; - } - - // Skip entries without a parent (i.e. outside of FS root) - file_dst.parent()?; - - Some(file_dst) -} diff --git a/crates/uv-publish/Cargo.toml b/crates/uv-publish/Cargo.toml index 06f4fb940..be2a627b6 100644 --- a/crates/uv-publish/Cargo.toml +++ b/crates/uv-publish/Cargo.toml @@ -25,13 +25,13 @@ uv-pypi-types = { workspace = true } uv-static = { workspace = true } uv-warnings = { workspace = true } +astral-tokio-tar = { workspace = true } async-compression = { workspace = true } base64 = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } glob = { workspace = true } itertools = { workspace = true } -krata-tokio-tar = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true }