From 4b4128446da9ade3368e103dc6cd2a28fa324cf8 Mon Sep 17 00:00:00 2001 From: Krishnan Chandra <1229365+krishnan-chandra@users.noreply.github.com> Date: Sun, 28 Jul 2024 14:37:48 -0400 Subject: [PATCH] Support xz compressed packages (#5513) ## Summary Closes #2187. The [xz backdoor](https://gist.github.com/thesamesam/223949d5a074ebc3dce9ee78baad9e27) is still fairly recent, but luckily the [Rust `xz2` crate bundles version 5.2.5 of the C `xz` package](https://github.com/alexcrichton/xz2-rs/tree/main/lzma-sys), which is before the backdoor was introduced. It's worth noting that a security risk still exists if you have a compromised version of `xz` installed on your system, but that risk is not introduced by `uv` or the Rust packages in general. ## Test Plan Tried installing the package mentioned in the linked issue: `python-apt @ https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/python-apt/2.7.6/python-apt_2.7.6.tar.xz` (Note that this will only work on Ubuntu - I tried on a Mac and while the archive was extracted properly, the package did not install because of some missing files) --------- Co-authored-by: Charlie Marsh --- Cargo.lock | 23 +++++++++++++++++++++- crates/uv-extract/Cargo.toml | 2 +- crates/uv-extract/src/stream.rs | 35 ++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d92610ebf..5a9dce6c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -212,6 +212,7 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", + "xz2", "zstd", "zstd-safe", ] @@ -2039,6 +2040,17 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mailparse" version = "0.15.0" @@ -2713,7 +2725,7 @@ dependencies = [ "indoc", "libc", "memoffset 0.9.1", - "parking_lot 0.11.2", + "parking_lot 0.12.3", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -5750,6 +5762,15 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 234eee8f6..d866cd435 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -15,7 +15,7 @@ workspace = true [dependencies] pypi-types = { workspace = true } -async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd"] } +async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } async_zip = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index f73839a29..b3a2ae824 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -198,7 +198,25 @@ pub async fn untar_zst( Ok(untar_in(&mut archive, target.as_ref()).await?) } -/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, without requiring `Seek`. +/// Unzip a `.tar.xz` archive into the target directory, without requiring `Seek`. +/// +/// This is useful for unpacking files as they're being downloaded. +pub async fn untar_xz( + reader: R, + target: impl AsRef, +) -> Result<(), Error> { + let reader = tokio::io::BufReader::new(reader); + let decompressed_bytes = async_compression::tokio::bufread::XzDecoder::new(reader); + + let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) + .set_preserve_mtime(false) + .build(); + untar_in(&mut archive, target.as_ref()).await?; + Ok(()) +} + +/// Unzip a `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.zst`, or `.tar.xz` archive into the target directory, +/// without requiring `Seek`. pub async fn archive( reader: R, source: impl AsRef, @@ -258,5 +276,20 @@ pub async fn archive( return Ok(()); } + // `.tar.xz` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("xz")) + && source.as_ref().file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) + { + untar_xz(reader, target).await?; + return Ok(()); + } + Err(Error::UnsupportedArchive(source.as_ref().to_path_buf())) }