Use Astral-maintained `tokio-tar` fork (#11174)

## Summary

I shipped one security fix here along with several significant
performance improvements for large TAR files:

- https://github.com/astral-sh/tokio-tar/pull/2
- https://github.com/astral-sh/tokio-tar/pull/4
- https://github.com/astral-sh/tokio-tar/pull/5

I also PR'd the security fix to `edera-dev`
(https://github.com/edera-dev/tokio-tar/pull/4).
This commit is contained in:
Charlie Marsh 2025-02-03 12:51:35 -05:00 committed by GitHub
parent 56684e4c24
commit 7b43baf251
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 49 additions and 64 deletions

36
Cargo.lock generated
View File

@ -153,6 +153,22 @@ dependencies = [
"tempfile",
]
[[package]]
name = "astral-tokio-tar"
version = "0.4.2"
source = "git+https://github.com/astral-sh/tokio-tar?rev=c06006a2cf6a6ca42e11775ddf1502dee8a8c688#c06006a2cf6a6ca42e11775ddf1502dee8a8c688"
dependencies = [
"filetime",
"futures-core",
"libc",
"portable-atomic",
"redox_syscall 0.3.5",
"rustc-hash",
"tokio",
"tokio-stream",
"xattr",
]
[[package]]
name = "async-channel"
version = "2.3.1"
@ -1979,22 +1995,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "krata-tokio-tar"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8bd5fee9b96acb5fc36b401896d601e6fdcce52b0e651ce24a3b21fb524e79f"
dependencies = [
"filetime",
"futures-core",
"libc",
"portable-atomic",
"redox_syscall 0.3.5",
"tokio",
"tokio-stream",
"xattr",
]
[[package]]
name = "kurbo"
version = "0.8.3"
@ -5060,11 +5060,11 @@ dependencies = [
name = "uv-extract"
version = "0.0.1"
dependencies = [
"astral-tokio-tar",
"async-compression",
"async_zip",
"fs-err 3.1.0",
"futures",
"krata-tokio-tar",
"md-5",
"rayon",
"reqwest",
@ -5344,6 +5344,7 @@ dependencies = [
name = "uv-publish"
version = "0.1.0"
dependencies = [
"astral-tokio-tar",
"async-compression",
"base64 0.22.1",
"fs-err 3.1.0",
@ -5351,7 +5352,6 @@ dependencies = [
"glob",
"insta",
"itertools 0.14.0",
"krata-tokio-tar",
"reqwest",
"reqwest-middleware",
"reqwest-retry",

View File

@ -73,6 +73,7 @@ uv-workspace = { path = "crates/uv-workspace" }
anstream = { version = "0.6.15" }
anyhow = { version = "1.0.89" }
arcstr = { version = "1.2.0" }
astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", rev = "c06006a2cf6a6ca42e11775ddf1502dee8a8c688" }
async-channel = { version = "2.3.1" }
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
async-trait = { version = "0.1.82" }
@ -118,7 +119,6 @@ indoc = { version = "2.0.5" }
itertools = { version = "0.14.0" }
jiff = { version = "0.1.14", features = ["serde"] }
junction = { version = "1.2.0" }
krata-tokio-tar = { version = "0.4.2" }
mailparse = { version = "0.15.0" }
md-5 = { version = "0.10.6" }
memchr = { version = "2.7.4" }

View File

@ -20,11 +20,11 @@ uv-configuration = { workspace = true }
uv-distribution-filename = { workspace = true }
uv-pypi-types = { workspace = true }
astral-tokio-tar = { workspace = true }
async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] }
async_zip = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
krata-tokio-tar = { workspace = true }
md-5 = { workspace = true }
rayon = { workspace = true }
reqwest = { workspace = true }

View File

@ -5,5 +5,4 @@ mod error;
pub mod hash;
pub mod stream;
mod sync;
mod tar;
mod vendor;

View File

@ -3,6 +3,7 @@ use std::pin::Pin;
use futures::StreamExt;
use rustc_hash::FxHashSet;
use tokio_tar::EntryType;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::warn;
@ -143,6 +144,16 @@ async fn untar_in(
mut archive: tokio_tar::Archive<&'_ mut (dyn tokio::io::AsyncRead + Unpin)>,
dst: &Path,
) -> std::io::Result<()> {
// Like `tokio-tar`, canonicalize the destination prior to unpacking.
let dst = fs_err::tokio::canonicalize(dst).await?;
// Memoize filesystem calls to canonicalize paths.
let mut memo = FxHashSet::default();
// Delay any directory entries until the end, to ensure that directory permissions do not
// interfere with descendant extraction.
let mut directories = Vec::new();
let mut entries = archive.entries()?;
let mut pinned = Pin::new(&mut entries);
while let Some(entry) = pinned.next().await {
@ -159,7 +170,15 @@ async fn untar_in(
continue;
}
file.unpack_in(dst).await?;
// Defer the creation of any directory entries.
if file.header().entry_type() == EntryType::Directory {
directories.push(file);
continue;
}
// Unpack the file into the destination directory.
#[cfg_attr(not(unix), allow(unused_variables))]
let unpacked_at = file.unpack_in_memo(&dst, &mut memo).await?;
// Preserve the executable bit.
#[cfg(unix)]
@ -172,7 +191,7 @@ async fn untar_in(
let mode = file.header().mode()?;
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
if let Some(path) = crate::tar::unpacked_at(dst, &file.path()?) {
if let Some(path) = unpacked_at.as_deref() {
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
if permissions.mode() & 0o111 != 0o111 {
fs_err::tokio::set_permissions(
@ -186,6 +205,13 @@ async fn untar_in(
}
}
}
// Create any deferred directories in topological order.
directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
for mut dir in directories {
dir.unpack_in_memo(&dst, &mut memo).await?;
}
Ok(())
}

View File

@ -1,40 +0,0 @@
use std::path::{Component, Path, PathBuf};
/// Determine the path at which the given tar entry will be unpacked, when unpacking into `dst`.
///
/// See: <https://github.com/vorot93/tokio-tar/blob/87338a76092330bc6fe60de95d83eae5597332e1/src/entry.rs#L418>
#[cfg_attr(not(unix), allow(dead_code))]
pub(crate) fn unpacked_at(dst: &Path, entry: &Path) -> Option<PathBuf> {
let mut file_dst = dst.to_path_buf();
{
for part in entry.components() {
match part {
// Leading '/' characters, root paths, and '.'
// components are just ignored and treated as "empty
// components"
Component::Prefix(..) | Component::RootDir | Component::CurDir => {
continue;
}
// If any part of the filename is '..', then skip over
// unpacking the file to prevent directory traversal
// security issues. See, e.g.: CVE-2001-1267,
// CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
Component::ParentDir => return None,
Component::Normal(part) => file_dst.push(part),
}
}
}
// Skip cases where only slashes or '.' parts were seen, because
// this is effectively an empty filename.
if *dst == *file_dst {
return None;
}
// Skip entries without a parent (i.e. outside of FS root)
file_dst.parent()?;
Some(file_dst)
}

View File

@ -25,13 +25,13 @@ uv-pypi-types = { workspace = true }
uv-static = { workspace = true }
uv-warnings = { workspace = true }
astral-tokio-tar = { workspace = true }
async-compression = { workspace = true }
base64 = { workspace = true }
fs-err = { workspace = true }
futures = { workspace = true }
glob = { workspace = true }
itertools = { workspace = true }
krata-tokio-tar = { workspace = true }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
reqwest-retry = { workspace = true }