diff --git a/Cargo.lock b/Cargo.lock index ab8c7385f..187c54aa3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -251,7 +251,7 @@ dependencies = [ [[package]] name = "async_zip" version = "0.0.17" -source = "git+https://github.com/astral-sh/rs-async-zip?rev=c909fda63fcafe4af496a07bfda28a5aae97e58d#c909fda63fcafe4af496a07bfda28a5aae97e58d" +source = "git+https://github.com/astral-sh/rs-async-zip?rev=285e48742b74ab109887d62e1ae79e7c15fd4878#285e48742b74ab109887d62e1ae79e7c15fd4878" dependencies = [ "async-compression", "crc32fast", @@ -4701,6 +4701,7 @@ dependencies = [ "textwrap", "thiserror 2.0.12", "tokio", + "tokio-util", "toml", "toml_edit", "tracing", @@ -5092,6 +5093,7 @@ dependencies = [ "anyhow", "clap", "fs-err", + "futures", "itertools 0.14.0", "markdown", "owo-colors", @@ -5104,8 +5106,10 @@ dependencies = [ "serde_json", "serde_yaml", "tagu", + "tempfile", "textwrap", "tokio", + "tokio-util", "tracing", "tracing-durations-export", "tracing-subscriber", @@ -5298,6 +5302,7 @@ dependencies = [ "uv-configuration", "uv-distribution-filename", "uv-pypi-types", + "uv-static", "xz2", "zip", ] diff --git a/Cargo.toml b/Cargo.toml index 9cc7af336..0a83379aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,7 +82,7 @@ async-channel = { version = "2.3.1" } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-trait = { version = "0.1.82" } async_http_range_reader = { version = "0.9.1" } -async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "c909fda63fcafe4af496a07bfda28a5aae97e58d", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] } +async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "285e48742b74ab109887d62e1ae79e7c15fd4878", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] } axoupdater = { version = "0.9.0", default-features = false } backon = { version = "1.3.0" } base64 = { version = "0.22.1" } diff --git a/crates/uv-dev/Cargo.toml b/crates/uv-dev/Cargo.toml index c778d842b..37fa0140a 100644 --- a/crates/uv-dev/Cargo.toml +++ b/crates/uv-dev/Cargo.toml @@ -22,7 +22,7 @@ uv-client = { workspace = true } uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } uv-distribution-types = { workspace = true } -uv-extract = { workspace = true, optional = true } +uv-extract = { workspace = true } uv-installer = { workspace = true } uv-macros = { workspace = true } uv-options-metadata = { workspace = true } @@ -39,20 +39,23 @@ anstream = { workspace = true } anyhow = { workspace = true } clap = { workspace = true, features = ["derive", "wrap_help"] } fs-err = { workspace = true, features = ["tokio"] } +futures = { workspace = true } itertools = { workspace = true } markdown = { version = "1.0.0" } owo-colors = { workspace = true } poloto = { version = "19.1.2", optional = true } pretty_assertions = { version = "1.4.1" } -reqwest = { workspace = true } +reqwest = { workspace = true, features = ["stream"] } resvg = { version = "0.29.0", optional = true } schemars = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { version = "0.9.34" } tagu = { version = "0.1.6", optional = true } +tempfile = { workspace = true } textwrap = { workspace = true } tokio = { workspace = true } +tokio-util = { workspace = true } tracing = { workspace = true } tracing-durations-export = { workspace = true, features = ["plot"] } tracing-subscriber = { workspace = true } diff --git a/crates/uv-dev/src/lib.rs b/crates/uv-dev/src/lib.rs index c01cc62c4..f7b7fea53 100644 --- a/crates/uv-dev/src/lib.rs +++ b/crates/uv-dev/src/lib.rs @@ -14,6 +14,7 @@ use crate::generate_options_reference::Args as GenerateOptionsReferenceArgs; use crate::generate_sysconfig_mappings::Args as GenerateSysconfigMetadataArgs; #[cfg(feature = "render")] use crate::render_benchmarks::RenderBenchmarksArgs; +use crate::validate_zip::ValidateZipArgs; use crate::wheel_metadata::WheelMetadataArgs; mod clear_compile; @@ -25,6 +26,7 @@ mod generate_json_schema; mod generate_options_reference; mod generate_sysconfig_mappings; mod render_benchmarks; +mod validate_zip; mod wheel_metadata; const ROOT_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../"); @@ -33,6 +35,8 @@ const ROOT_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../"); enum Cli { /// Display the metadata for a `.whl` at a given URL. WheelMetadata(WheelMetadataArgs), + /// Validate that a `.whl` or `.zip` file at a given URL is a valid ZIP file. + ValidateZip(ValidateZipArgs), /// Compile all `.py` to `.pyc` files in the tree. Compile(CompileArgs), /// Remove all `.pyc` in the tree. @@ -59,6 +63,7 @@ pub async fn run() -> Result<()> { let cli = Cli::parse(); match cli { Cli::WheelMetadata(args) => wheel_metadata::wheel_metadata(args).await?, + Cli::ValidateZip(args) => validate_zip::validate_zip(args).await?, Cli::Compile(args) => compile::compile(args).await?, Cli::ClearCompile(args) => clear_compile::clear_compile(&args)?, Cli::GenerateAll(args) => generate_all::main(&args).await?, diff --git a/crates/uv-dev/src/validate_zip.rs b/crates/uv-dev/src/validate_zip.rs new file mode 100644 index 000000000..237c13911 --- /dev/null +++ b/crates/uv-dev/src/validate_zip.rs @@ -0,0 +1,43 @@ +use std::ops::Deref; + +use anyhow::{Result, bail}; +use clap::Parser; +use futures::TryStreamExt; +use tokio_util::compat::FuturesAsyncReadCompatExt; + +use uv_cache::{Cache, CacheArgs}; +use uv_client::RegistryClientBuilder; +use uv_pep508::VerbatimUrl; +use uv_pypi_types::ParsedUrl; + +#[derive(Parser)] +pub(crate) struct ValidateZipArgs { + url: VerbatimUrl, + #[command(flatten)] + cache_args: CacheArgs, +} + +pub(crate) async fn validate_zip(args: ValidateZipArgs) -> Result<()> { + let cache = Cache::try_from(args.cache_args)?.init()?; + let client = RegistryClientBuilder::new(cache).build(); + + let ParsedUrl::Archive(archive) = ParsedUrl::try_from(args.url.to_url())? else { + bail!("Only archive URLs are supported"); + }; + + let response = client + .uncached_client(&archive.url) + .get(archive.url.deref().clone()) + .send() + .await?; + let reader = response + .bytes_stream() + .map_err(std::io::Error::other) + .into_async_read(); + + let target = tempfile::TempDir::new()?; + + uv_extract::stream::unzip(reader.compat(), target.path()).await?; + + Ok(()) +} diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index c25c2677d..f75c540c9 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -19,6 +19,7 @@ workspace = true uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } uv-pypi-types = { workspace = true } +uv-static = { workspace = true } astral-tokio-tar = { workspace = true } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } diff --git a/crates/uv-extract/src/error.rs b/crates/uv-extract/src/error.rs index ae2fdff1a..de9584729 100644 --- a/crates/uv-extract/src/error.rs +++ b/crates/uv-extract/src/error.rs @@ -14,12 +14,79 @@ pub enum Error { NonSingularArchive(Vec), #[error("The top-level of the archive must only contain a list directory, but it's empty")] EmptyArchive, + #[error("ZIP local header filename at offset {offset} does not use UTF-8 encoding")] + LocalHeaderNotUtf8 { offset: u64 }, + #[error("ZIP central directory entry filename at index {index} does not use UTF-8 encoding")] + CentralDirectoryEntryNotUtf8 { index: u64 }, #[error("Bad CRC (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())] BadCrc32 { path: PathBuf, computed: u32, expected: u32, }, + #[error("Bad uncompressed size (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())] + BadUncompressedSize { + path: PathBuf, + computed: u64, + expected: u64, + }, + #[error("Bad compressed size (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())] + BadCompressedSize { + path: PathBuf, + computed: u64, + expected: u64, + }, + #[error("ZIP file contains multiple entries with different contents for: {}", path.display())] + DuplicateLocalFileHeader { path: PathBuf }, + #[error("ZIP file contains a local file header without a corresponding central-directory record entry for: {} ({offset})", path.display())] + MissingCentralDirectoryEntry { path: PathBuf, offset: u64 }, + #[error("ZIP file contains an end-of-central-directory record entry, but no local file header for: {} ({offset}", path.display())] + MissingLocalFileHeader { path: PathBuf, offset: u64 }, + #[error("ZIP file uses conflicting paths for the local file header at {} (got {}, expected {})", offset, local_path.display(), central_directory_path.display())] + ConflictingPaths { + offset: u64, + local_path: PathBuf, + central_directory_path: PathBuf, + }, + #[error("ZIP file uses conflicting checksums for the local file header and central-directory record (got {local_crc32}, expected {central_directory_crc32}) for: {} ({offset})", path.display())] + ConflictingChecksums { + path: PathBuf, + offset: u64, + local_crc32: u32, + central_directory_crc32: u32, + }, + #[error("ZIP file uses conflicting compressed sizes for the local file header and central-directory record (got {local_compressed_size}, expected {central_directory_compressed_size}) for: {} ({offset})", path.display())] + ConflictingCompressedSizes { + path: PathBuf, + offset: u64, + local_compressed_size: u64, + central_directory_compressed_size: u64, + }, + #[error("ZIP file uses conflicting uncompressed sizes for the local file header and central-directory record (got {local_uncompressed_size}, expected {central_directory_uncompressed_size}) for: {} ({offset})", path.display())] + ConflictingUncompressedSizes { + path: PathBuf, + offset: u64, + local_uncompressed_size: u64, + central_directory_uncompressed_size: u64, + }, + #[error("ZIP file contains trailing contents after the end-of-central-directory record")] + TrailingContents, + #[error( + "ZIP file reports a number of entries in the central directory that conflicts with the actual number of entries (got {actual}, expected {expected})" + )] + ConflictingNumberOfEntries { actual: u64, expected: u64 }, + #[error("Data descriptor is missing for file: {}", path.display())] + MissingDataDescriptor { path: PathBuf }, + #[error("File contains an unexpected data descriptor: {}", path.display())] + UnexpectedDataDescriptor { path: PathBuf }, + #[error( + "ZIP file end-of-central-directory record contains a comment that appears to be an embedded ZIP file" + )] + ZipInZip, + #[error("ZIP64 end-of-central-directory record contains unsupported extensible data")] + ExtensibleData, + #[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())] + DuplicateExecutableFileHeader { path: PathBuf }, } impl Error { diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index f7fc797d7..4845be21f 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,17 +1,59 @@ use std::path::{Component, Path, PathBuf}; use std::pin::Pin; -use futures::StreamExt; -use rustc_hash::FxHashSet; +use async_zip::base::read::cd::Entry; +use async_zip::error::ZipError; +use futures::{AsyncReadExt, StreamExt}; +use rustc_hash::{FxHashMap, FxHashSet}; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; -use tracing::warn; +use tracing::{debug, warn}; use uv_distribution_filename::SourceDistExtension; +use uv_static::EnvVars; use crate::Error; const DEFAULT_BUF_SIZE: usize = 128 * 1024; +#[derive(Debug, Clone, PartialEq, Eq)] +struct LocalHeaderEntry { + /// The relative path of the entry, as computed from the local file header. + relpath: PathBuf, + /// The computed CRC32 checksum of the entry. + crc32: u32, + /// The computed compressed size of the entry. + compressed_size: u64, + /// The computed uncompressed size of the entry. + uncompressed_size: u64, + /// Whether the entry has a data descriptor. + data_descriptor: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct ComputedEntry { + /// The computed CRC32 checksum of the entry. + crc32: u32, + /// The computed uncompressed size of the entry. + uncompressed_size: u64, + /// The computed compressed size of the entry. + compressed_size: u64, +} + +/// Returns `true` if ZIP validation is disabled. +fn insecure_no_validate() -> bool { + // TODO(charlie) Parse this in `EnvironmentOptions`. + let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else { + return false; + }; + let Some(value) = value.to_str() else { + return false; + }; + matches!( + value.to_lowercase().as_str(), + "y" | "yes" | "t" | "true" | "on" | "1" + ) +} + /// Unpack a `.zip` archive into the target directory, without requiring `Seek`. /// /// This is useful for unzipping files as they're being downloaded. If the archive @@ -41,15 +83,24 @@ pub async fn unzip( Some(path) } + // Determine whether ZIP validation is disabled. + let skip_validation = insecure_no_validate(); + let target = target.as_ref(); let mut reader = futures::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader.compat()); let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader); let mut directories = FxHashSet::default(); + let mut local_headers = FxHashMap::default(); + let mut offset = 0; while let Some(mut entry) = zip.next_with_entry().await? { // Construct the (expected) path to the file on-disk. - let path = entry.reader().entry().filename().as_str()?; + let path = match entry.reader().entry().filename().as_str() { + Ok(path) => path, + Err(ZipError::StringNotUtf8) => return Err(Error::LocalHeaderNotUtf8 { offset }), + Err(err) => return Err(err.into()), + }; // Sanitize the file name to prevent directory traversal attacks. let Some(relpath) = enclosed_name(path) else { @@ -57,17 +108,54 @@ pub async fn unzip( // Close current file prior to proceeding, as per: // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ - zip = entry.skip().await?; + (.., zip) = entry.skip().await?; + + // Store the current offset. + offset = zip.offset(); + continue; }; - let path = target.join(&relpath); - let is_dir = entry.reader().entry().dir()?; + + let file_offset = entry.reader().entry().file_offset(); + let expected_compressed_size = entry.reader().entry().compressed_size(); + let expected_uncompressed_size = entry.reader().entry().uncompressed_size(); + let expected_data_descriptor = entry.reader().entry().data_descriptor(); // Either create the directory or write the file to disk. - if is_dir { + let path = target.join(&relpath); + let is_dir = entry.reader().entry().dir()?; + let computed = if is_dir { if directories.insert(path.clone()) { fs_err::tokio::create_dir_all(path).await?; } + + // If this is a directory, we expect the CRC32 to be 0. + if entry.reader().entry().crc32() != 0 { + if !skip_validation { + return Err(Error::BadCrc32 { + path: relpath.clone(), + computed: 0, + expected: entry.reader().entry().crc32(), + }); + } + } + + // If this is a directory, we expect the uncompressed size to be 0. + if entry.reader().entry().uncompressed_size() != 0 { + if !skip_validation { + return Err(Error::BadUncompressedSize { + path: relpath.clone(), + computed: 0, + expected: entry.reader().entry().uncompressed_size(), + }); + } + } + + ComputedEntry { + crc32: 0, + uncompressed_size: 0, + compressed_size: 0, + } } else { if let Some(parent) = path.parent() { if directories.insert(parent.to_path_buf()) { @@ -76,82 +164,374 @@ pub async fn unzip( } // We don't know the file permissions here, because we haven't seen the central directory yet. - let file = fs_err::tokio::File::create(&path).await?; - let size = entry.reader().entry().uncompressed_size(); - let mut writer = if let Ok(size) = usize::try_from(size) { - tokio::io::BufWriter::with_capacity(std::cmp::min(size, 1024 * 1024), file) - } else { - tokio::io::BufWriter::new(file) + let (actual_uncompressed_size, reader) = match fs_err::tokio::File::create_new(&path) + .await + { + Ok(file) => { + // Write the file to disk. + let size = entry.reader().entry().uncompressed_size(); + let mut writer = if let Ok(size) = usize::try_from(size) { + tokio::io::BufWriter::with_capacity(std::cmp::min(size, 1024 * 1024), file) + } else { + tokio::io::BufWriter::new(file) + }; + let mut reader = entry.reader_mut().compat(); + let bytes_read = tokio::io::copy(&mut reader, &mut writer).await?; + let reader = reader.into_inner(); + + (bytes_read, reader) + } + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + debug!( + "Found duplicate local file header for: {}", + relpath.display() + ); + + // Read the existing file into memory. + let existing_contents = fs_err::tokio::read(&path).await?; + + // Read the entry into memory. + let mut expected_contents = Vec::with_capacity(existing_contents.len()); + let entry_reader = entry.reader_mut(); + let bytes_read = entry_reader.read_to_end(&mut expected_contents).await?; + + // Verify that the existing file contents match the expected contents. + if existing_contents != expected_contents { + return Err(Error::DuplicateLocalFileHeader { + path: relpath.clone(), + }); + } + + (bytes_read as u64, entry_reader) + } + Err(err) => return Err(err.into()), }; - let mut reader = entry.reader_mut().compat(); - tokio::io::copy(&mut reader, &mut writer).await?; + + // Validate the uncompressed size. + if actual_uncompressed_size != expected_uncompressed_size { + if !(expected_compressed_size == 0 && expected_data_descriptor) { + if !skip_validation { + return Err(Error::BadUncompressedSize { + path: relpath.clone(), + computed: actual_uncompressed_size, + expected: expected_uncompressed_size, + }); + } + } + } + + // Validate the compressed size. + let actual_compressed_size = reader.bytes_read(); + if actual_compressed_size != expected_compressed_size { + if !(expected_compressed_size == 0 && expected_data_descriptor) { + if !skip_validation { + return Err(Error::BadCompressedSize { + path: relpath.clone(), + computed: actual_compressed_size, + expected: expected_compressed_size, + }); + } + } + } // Validate the CRC of any file we unpack // (It would be nice if async_zip made it harder to Not do this...) - let reader = reader.into_inner(); - let computed = reader.compute_hash(); - let expected = reader.entry().crc32(); - if computed != expected { - let error = Error::BadCrc32 { - path: relpath, - computed, - expected, - }; - // There are some cases where we fail to get a proper CRC. - // This is probably connected to out-of-line data descriptors - // which are problematic to access in a streaming context. - // In those cases the CRC seems to reliably be stubbed inline as 0, - // so we downgrade this to a (hidden-by-default) warning. - if expected == 0 { - warn!("presumed missing CRC: {error}"); - } else { - return Err(error); + let actual_crc32 = reader.compute_hash(); + let expected_crc32 = reader.entry().crc32(); + if actual_crc32 != expected_crc32 { + if !(expected_crc32 == 0 && expected_data_descriptor) { + if !skip_validation { + return Err(Error::BadCrc32 { + path: relpath.clone(), + computed: actual_crc32, + expected: expected_crc32, + }); + } + } + } + + ComputedEntry { + crc32: actual_crc32, + uncompressed_size: actual_uncompressed_size, + compressed_size: actual_compressed_size, + } + }; + + // Close current file prior to proceeding, as per: + // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ + let (descriptor, next) = entry.skip().await?; + + // Verify that the data descriptor field is consistent with the presence (or absence) of a + // data descriptor in the local file header. + if expected_data_descriptor && descriptor.is_none() { + if !skip_validation { + return Err(Error::MissingDataDescriptor { + path: relpath.clone(), + }); + } + } + if !expected_data_descriptor && descriptor.is_some() { + if !skip_validation { + return Err(Error::UnexpectedDataDescriptor { + path: relpath.clone(), + }); + } + } + + // If we have a data descriptor, validate it. + if let Some(descriptor) = descriptor { + if descriptor.crc != computed.crc32 { + if !skip_validation { + return Err(Error::BadCrc32 { + path: relpath.clone(), + computed: computed.crc32, + expected: descriptor.crc, + }); + } + } + if descriptor.uncompressed_size != computed.uncompressed_size { + if !skip_validation { + return Err(Error::BadUncompressedSize { + path: relpath.clone(), + computed: computed.uncompressed_size, + expected: descriptor.uncompressed_size, + }); + } + } + if descriptor.compressed_size != computed.compressed_size { + if !skip_validation { + return Err(Error::BadCompressedSize { + path: relpath.clone(), + computed: computed.compressed_size, + expected: descriptor.compressed_size, + }); } } } - // Close current file prior to proceeding, as per: - // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ - zip = entry.skip().await?; + // Store the offset, for validation, and error if we see a duplicate file. + match local_headers.entry(file_offset) { + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(LocalHeaderEntry { + relpath, + crc32: computed.crc32, + uncompressed_size: computed.uncompressed_size, + compressed_size: expected_compressed_size, + data_descriptor: expected_data_descriptor, + }); + } + std::collections::hash_map::Entry::Occupied(..) => { + if !skip_validation { + return Err(Error::DuplicateLocalFileHeader { + path: relpath.clone(), + }); + } + } + } + + // Advance the reader to the next entry. + zip = next; + + // Store the current offset. + offset = zip.offset(); } - // On Unix, we need to set file permissions, which are stored in the central directory, at the - // end of the archive. The `ZipFileReader` reads until it sees a central directory signature, - // which indicates the first entry in the central directory. So we continue reading from there. + // Record the actual number of entries in the central directory. + let mut num_entries = 0; + + // Track the file modes on Unix, to ensure that they're consistent across duplicates. #[cfg(unix)] - { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; + let mut modes = + FxHashMap::with_capacity_and_hasher(local_headers.len(), rustc_hash::FxBuildHasher); - let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut reader); - while let Some(entry) = directory.next().await? { - if entry.dir()? { - continue; - } + let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut reader, offset); + loop { + match directory.next().await? { + Entry::CentralDirectoryEntry(entry) => { + // Count the number of entries in the central directory. + num_entries += 1; - let Some(mode) = entry.unix_permissions() else { - continue; - }; - - // The executable bit is the only permission we preserve, otherwise we use the OS defaults. - // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100 - let has_any_executable_bit = mode & 0o111; - if has_any_executable_bit != 0 { // Construct the (expected) path to the file on-disk. - let path = entry.filename().as_str()?; - let Some(path) = enclosed_name(path) else { + let path = match entry.filename().as_str() { + Ok(path) => path, + Err(ZipError::StringNotUtf8) => { + return Err(Error::CentralDirectoryEntryNotUtf8 { + index: num_entries - 1, + }); + } + Err(err) => return Err(err.into()), + }; + + // Sanitize the file name to prevent directory traversal attacks. + let Some(relpath) = enclosed_name(path) else { continue; }; - let path = target.join(path); - let permissions = fs_err::tokio::metadata(&path).await?.permissions(); - if permissions.mode() & 0o111 != 0o111 { - fs_err::tokio::set_permissions( - &path, - Permissions::from_mode(permissions.mode() | 0o111), - ) - .await?; + // Validate that various fields are consistent between the local file header and the + // central directory entry. + match local_headers.remove(&entry.file_offset()) { + Some(local_header) => { + if local_header.relpath != relpath { + if !skip_validation { + return Err(Error::ConflictingPaths { + offset: entry.file_offset(), + local_path: local_header.relpath.clone(), + central_directory_path: relpath.clone(), + }); + } + } + if local_header.crc32 != entry.crc32() { + if !skip_validation { + return Err(Error::ConflictingChecksums { + path: relpath.clone(), + offset: entry.file_offset(), + local_crc32: local_header.crc32, + central_directory_crc32: entry.crc32(), + }); + } + } + if local_header.uncompressed_size != entry.uncompressed_size() { + if !skip_validation { + return Err(Error::ConflictingUncompressedSizes { + path: relpath.clone(), + offset: entry.file_offset(), + local_uncompressed_size: local_header.uncompressed_size, + central_directory_uncompressed_size: entry.uncompressed_size(), + }); + } + } + if local_header.compressed_size != entry.compressed_size() { + if !local_header.data_descriptor { + if !skip_validation { + return Err(Error::ConflictingCompressedSizes { + path: relpath.clone(), + offset: entry.file_offset(), + local_compressed_size: local_header.compressed_size, + central_directory_compressed_size: entry.compressed_size(), + }); + } + } + } + } + None => { + if !skip_validation { + return Err(Error::MissingLocalFileHeader { + path: relpath.clone(), + offset: entry.file_offset(), + }); + } + } } + + // On Unix, we need to set file permissions, which are stored in the central directory, at the + // end of the archive. The `ZipFileReader` reads until it sees a central directory signature, + // which indicates the first entry in the central directory. So we continue reading from there. + #[cfg(unix)] + { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + + if entry.dir()? { + continue; + } + + let Some(mode) = entry.unix_permissions() else { + continue; + }; + + // If the file is included multiple times, ensure that the mode is consistent. + match modes.entry(relpath.clone()) { + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(mode); + } + std::collections::hash_map::Entry::Occupied(entry) => { + if mode != *entry.get() { + return Err(Error::DuplicateExecutableFileHeader { + path: relpath.clone(), + }); + } + } + } + + // The executable bit is the only permission we preserve, otherwise we use the OS defaults. + // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100 + let has_any_executable_bit = mode & 0o111; + if has_any_executable_bit != 0 { + let path = target.join(relpath); + let permissions = fs_err::tokio::metadata(&path).await?.permissions(); + if permissions.mode() & 0o111 != 0o111 { + fs_err::tokio::set_permissions( + &path, + Permissions::from_mode(permissions.mode() | 0o111), + ) + .await?; + } + } + } + } + Entry::EndOfCentralDirectoryRecord { + record, + comment, + extensible, + } => { + // Reject ZIP64 end-of-central-directory records with extensible data, as the safety + // tradeoffs don't outweigh the usefulness. We don't ever expect to encounter wheels + // that leverage this feature anyway. + if extensible { + if !skip_validation { + return Err(Error::ExtensibleData); + } + } + + // Sanitize the comment by rejecting bytes `01` to `08`. If the comment contains an + // embedded ZIP file, it _must_ contain one of these bytes, which are otherwise + // very rare (non-printing) characters. + if comment.as_bytes().iter().any(|&b| (1..=8).contains(&b)) { + if !skip_validation { + return Err(Error::ZipInZip); + } + } + + // Validate that the reported number of entries match what we experienced while + // reading the local file headers. + if record.num_entries() != num_entries { + if !skip_validation { + return Err(Error::ConflictingNumberOfEntries { + expected: num_entries, + actual: record.num_entries(), + }); + } + } + + break; + } + } + } + + // If we didn't see the file in the central directory, it means it was not present in the + // archive. + if !skip_validation { + if let Some((key, value)) = local_headers.iter().next() { + return Err(Error::MissingCentralDirectoryEntry { + offset: *key, + path: value.relpath.clone(), + }); + } + } + + // Determine whether the reader is exhausted. + if !skip_validation { + let mut buffer = [0; 1]; + if reader.read(&mut buffer).await? > 0 { + // If the buffer contains a single null byte, ignore it. + if buffer[0] == 0 { + if reader.read(&mut buffer).await? > 0 { + return Err(Error::TrailingContents); + } + + warn!("Ignoring trailing null byte in ZIP archive"); + } else { + return Err(Error::TrailingContents); } } } diff --git a/crates/uv-metadata/src/lib.rs b/crates/uv-metadata/src/lib.rs index 12164fe6b..8b9838f5e 100644 --- a/crates/uv-metadata/src/lib.rs +++ b/crates/uv-metadata/src/lib.rs @@ -282,7 +282,7 @@ pub async fn read_metadata_async_stream( // Close current file to get access to the next one. See docs: // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ - zip = entry.skip().await?; + (.., zip) = entry.skip().await?; } Err(Error::MissingDistInfo) diff --git a/crates/uv-static/src/env_vars.rs b/crates/uv-static/src/env_vars.rs index 4649b9f21..9bbb70275 100644 --- a/crates/uv-static/src/env_vars.rs +++ b/crates/uv-static/src/env_vars.rs @@ -238,6 +238,14 @@ impl EnvVars { /// Equivalent to the `--allow-insecure-host` argument. pub const UV_INSECURE_HOST: &'static str = "UV_INSECURE_HOST"; + /// Disable ZIP validation for streamed wheels and ZIP-based source distributions. + /// + /// WARNING: Disabling ZIP validation can expose your system to security risks by bypassing + /// integrity checks and allowing uv to install potentially malicious ZIP files. If uv rejects + /// a ZIP file due to failing validation, it is likely that the file is malformed; consider + /// filing an issue with the package maintainer. + pub const UV_INSECURE_NO_ZIP_VALIDATION: &'static str = "UV_INSECURE_NO_ZIP_VALIDATION"; + /// Sets the maximum number of in-flight concurrent downloads that uv will /// perform at any given time. pub const UV_CONCURRENT_DOWNLOADS: &'static str = "UV_CONCURRENT_DOWNLOADS"; diff --git a/crates/uv/Cargo.toml b/crates/uv/Cargo.toml index 28d2e2a30..9083d226a 100644 --- a/crates/uv/Cargo.toml +++ b/crates/uv/Cargo.toml @@ -79,13 +79,13 @@ indexmap = { workspace = true } indicatif = { workspace = true } indoc = { workspace = true } itertools = { workspace = true } +h2 = { workspace = true } jiff = { workspace = true } miette = { workspace = true, features = ["fancy-no-backtrace"] } owo-colors = { workspace = true } petgraph = { workspace = true } regex = { workspace = true } reqwest = { workspace = true } -h2 = { workspace = true } rkyv = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } @@ -94,6 +94,7 @@ tempfile = { workspace = true } textwrap = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } +tokio-util = { workspace = true } toml = { workspace = true } toml_edit = { workspace = true } tracing = { workspace = true } diff --git a/crates/uv/tests/it/extract.rs b/crates/uv/tests/it/extract.rs new file mode 100644 index 000000000..2a06d89bf --- /dev/null +++ b/crates/uv/tests/it/extract.rs @@ -0,0 +1,382 @@ +use futures::TryStreamExt; +use tokio_util::compat::FuturesAsyncReadCompatExt; + +async fn unzip(url: &str) -> anyhow::Result<(), uv_extract::Error> { + let response = reqwest::get(url).await.unwrap(); + let reader = response + .bytes_stream() + .map_err(std::io::Error::other) + .into_async_read(); + + let target = tempfile::TempDir::new()?; + uv_extract::stream::unzip(reader.compat(), target.path()).await +} + +#[tokio::test] +async fn malo_accept_comment() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/comment.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_data_descriptor_zip64() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/data_descriptor_zip64.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_data_descriptor() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/data_descriptor.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_deflate() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/deflate.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_normal_deflate_zip64_extra() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/normal_deflate_zip64_extra.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_normal_deflate() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/normal_deflate.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_store() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/store.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_subdir() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/subdir.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_accept_zip64_eocd() { + unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/accept/zip64_eocd.zip").await.unwrap(); + insta::assert_debug_snapshot!((), @"()"); +} + +#[tokio::test] +async fn malo_iffy_8bitcomment() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/8bitcomment.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + ZipInZip, + ) + "); +} + +#[tokio::test] +async fn malo_iffy_extra3byte() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/extra3byte.zip").await; + insta::assert_debug_snapshot!(result, @r" + Ok( + (), + ) + "); +} + +#[tokio::test] +async fn malo_iffy_non_ascii_original_name() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/non_ascii_original_name.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + LocalHeaderNotUtf8 { + offset: 0, + }, + ) + "); +} + +#[tokio::test] +async fn malo_iffy_nosubdir() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/nosubdir.zip").await; + insta::assert_debug_snapshot!(result, @r" + Ok( + (), + ) + "); +} + +#[tokio::test] +async fn malo_iffy_prefix() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/prefix.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + AsyncZip( + UnexpectedHeaderError( + 1482184792, + 67324752, + ), + ), + ) + "); +} + +#[tokio::test] +async fn malo_iffy_suffix_not_comment() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/suffix_not_comment.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + TrailingContents, + ) + "); +} + +#[tokio::test] +async fn malo_iffy_zip64_eocd_extensible_data() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/zip64_eocd_extensible_data.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + ExtensibleData, + ) + "); +} + +#[tokio::test] +async fn malo_iffy_zip64_extra_too_long() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/zip64_extra_too_long.zip").await; + insta::assert_debug_snapshot!(result, @r" + Err( + AsyncZip( + Zip64ExtendedInformationFieldTooLong { + expected: 16, + actual: 8, + }, + ), + ) + "); +} + +#[tokio::test] +async fn malo_iffy_zip64_extra_too_short() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/iffy/zip64_extra_too_short.zip").await; + insta::assert_debug_snapshot!(result, @r#" + Err( + BadCompressedSize { + path: "fixme", + computed: 7, + expected: 4294967295, + }, + ) + "#); +} + +#[tokio::test] +async fn malo_reject_cd_extra_entry() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/cd_extra_entry.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + MissingLocalFileHeader { + path: "fixme", + offset: 0, + } + "#); +} + +#[tokio::test] +async fn malo_reject_cd_missing_entry() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/cd_missing_entry.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + MissingCentralDirectoryEntry { + path: "two", + offset: 42, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_bad_crc_0() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_bad_crc_0.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadCrc32 { + path: "fixme", + computed: 2183870971, + expected: 0, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_bad_crc() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_bad_crc.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadCrc32 { + path: "fixme", + computed: 907060870, + expected: 1, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_bad_csize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_bad_csize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadCompressedSize { + path: "fixme", + computed: 7, + expected: 8, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_bad_usize_no_sig() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_bad_usize_no_sig.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadUncompressedSize { + path: "fixme", + computed: 5, + expected: 6, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_bad_usize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_bad_usize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadUncompressedSize { + path: "fixme", + computed: 5, + expected: 6, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_zip64_csize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_zip64_csize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadCompressedSize { + path: "fixme", + computed: 7, + expected: 8, + } + "#); +} + +#[tokio::test] +async fn malo_reject_data_descriptor_zip64_usize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/data_descriptor_zip64_usize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadUncompressedSize { + path: "fixme", + computed: 5, + expected: 6, + } + "#); +} + +#[tokio::test] +async fn malo_reject_dupe_eocd() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/dupe_eocd.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @"TrailingContents"); +} + +#[tokio::test] +async fn malo_reject_shortextra() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/shortextra.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r" + AsyncZip( + InvalidExtraFieldHeader( + 9, + ), + ) + "); +} + +#[tokio::test] +async fn malo_reject_zip64_extra_csize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/zip64_extra_csize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadCompressedSize { + path: "fixme", + computed: 7, + expected: 8, + } + "#); +} + +#[tokio::test] +async fn malo_reject_zip64_extra_usize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/reject/zip64_extra_usize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadUncompressedSize { + path: "fixme", + computed: 5, + expected: 6, + } + "#); +} + +#[tokio::test] +async fn malo_malicious_second_unicode_extra() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/second_unicode_extra.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r" + AsyncZip( + DuplicateExtraFieldHeader( + 28789, + ), + ) + "); +} + +#[tokio::test] +async fn malo_malicious_short_usize_zip64() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/short_usize_zip64.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r" + AsyncZip( + Zip64ExtendedInformationFieldTooLong { + expected: 16, + actual: 0, + }, + ) + "); +} + +#[tokio::test] +async fn malo_malicious_short_usize() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/short_usize.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r#" + BadUncompressedSize { + path: "file", + computed: 51, + expected: 9, + } + "#); +} + +#[tokio::test] +async fn malo_malicious_zip64_eocd_confusion() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/zip64_eocd_confusion.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @"ExtensibleData"); +} + +#[tokio::test] +async fn malo_malicious_unicode_extra_chain() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/unicode_extra_chain.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @r" + AsyncZip( + DuplicateExtraFieldHeader( + 28789, + ), + ) + "); +} + +#[tokio::test] +async fn malo_malicious_zipinzip() { + let result = unzip("https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/0723f54ceb33a4fdc7f2eddc19635cd704d61c84/malicious/zipinzip.zip").await.unwrap_err(); + insta::assert_debug_snapshot!(result, @"ZipInZip"); +} diff --git a/crates/uv/tests/it/main.rs b/crates/uv/tests/it/main.rs index ad067bc11..0a02b3c44 100644 --- a/crates/uv/tests/it/main.rs +++ b/crates/uv/tests/it/main.rs @@ -131,4 +131,5 @@ mod version; #[cfg(all(feature = "python", feature = "pypi"))] mod workflow; +mod extract; mod workspace; diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 0d374d8c4..4dc571a4c 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -9247,7 +9247,7 @@ fn bad_crc32() -> Result<()> { Resolved 7 packages in [TIME] × Failed to download `osqp @ https://files.pythonhosted.org/packages/00/04/5959347582ab970e9b922f27585d34f7c794ed01125dac26fb4e7dd80205/osqp-1.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl` ├─▶ Failed to extract archive: osqp-1.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - ╰─▶ Bad CRC (got ca5f1131, expected d5c95dfa) for file: osqp/ext_builtin.cpython-311-x86_64-linux-gnu.so + ╰─▶ Bad uncompressed size (got 0007b829, expected 0007b828) for file: osqp/ext_builtin.cpython-311-x86_64-linux-gnu.so " ); @@ -12034,6 +12034,175 @@ fn config_settings_package() -> Result<()> { Ok(()) } +#[test] +fn reject_invalid_streaming_zip() { + let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("cbwheelstreamtest==0.0.1"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `cbwheelstreamtest==0.0.1` + ├─▶ Failed to extract archive: cbwheelstreamtest-0.0.1-py2.py3-none-any.whl + ╰─▶ ZIP file contains multiple entries with different contents for: cbwheelstreamtest/__init__.py + " + ); +} + +#[test] +fn reject_invalid_double_zip() { + let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("cbwheelziptest==0.0.2"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + × Failed to download `cbwheelziptest==0.0.2` + ├─▶ Failed to extract archive: cbwheelziptest-0.0.2-py2.py3-none-any.whl + ╰─▶ ZIP file contains trailing contents after the end-of-central-directory record + " + ); +} + +#[test] +fn reject_invalid_central_directory_offset() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to extract archive: attrs-25.3.0-py3-none-any.whl + ├─▶ Failed to read from zip file + ╰─▶ the end of central directory offset (0xf0d9) did not match the actual offset (0xf9ac) + " + ); +} + +#[test] +fn reject_invalid_crc32_mismatch() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip2/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip2/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to extract archive: attrs-25.3.0-py3-none-any.whl + ╰─▶ Bad uncompressed size (got 0000001b, expected 0000000c) for file: sitecustomize.py + " + ); +} + +#[test] +fn reject_invalid_crc32_non_data_descriptor() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip3/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip3/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to extract archive: attrs-25.3.0-py3-none-any.whl + ╰─▶ Bad uncompressed size (got 0000001b, expected 0000000c) for file: sitecustomize.py + " + ); +} + +#[test] +fn reject_invalid_duplicate_extra_field() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip4/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip4/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to unzip wheel: attrs-25.3.0-py3-none-any.whl + ╰─▶ an extra field with id 0x7075 was duplicated in the header + " + ); +} + +#[test] +fn reject_invalid_short_usize() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip5/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip5/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to extract archive: attrs-25.3.0-py3-none-any.whl + ╰─▶ Bad CRC (got 5100f20e, expected de0ffd6e) for file: attr/_make.py + " + ); +} + +#[test] +fn reject_invalid_chained_extra_field() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip6/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip6/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to unzip wheel: attrs-25.3.0-py3-none-any.whl + ╰─▶ an extra field with id 0x7075 was duplicated in the header + " + ); +} + +#[test] +fn reject_invalid_short_usize_zip64() { + let context = TestContext::new("3.12"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip7/attrs-25.3.0-py3-none-any.whl"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip7/attrs-25.3.0-py3-none-any.whl` + ├─▶ Failed to unzip wheel: attrs-25.3.0-py3-none-any.whl + ╰─▶ zip64 extended information field was too long: expected 16 bytes, but 0 bytes were provided + " + ); +} + /// Test that build dependencies respect locked versions from the resolution. #[test] fn pip_install_build_dependencies_respect_locked_versions() -> Result<()> { diff --git a/docs/reference/environment.md b/docs/reference/environment.md index 50083d99e..dddf09554 100644 --- a/docs/reference/environment.md +++ b/docs/reference/environment.md @@ -162,6 +162,15 @@ to use when creating a new project. Equivalent to the `--allow-insecure-host` argument. +### `UV_INSECURE_NO_ZIP_VALIDATION` + +Disable ZIP validation for streamed wheels and ZIP-based source distributions. + +WARNING: Disabling ZIP validation can expose your system to security risks by bypassing +integrity checks and allowing uv to install potentially malicious ZIP files. If uv rejects +a ZIP file due to failing validation, it is likely that the file is malformed; consider +filing an issue with the package maintainer. + ### `UV_INSTALLER_GHE_BASE_URL` The URL from which to download uv using the standalone installer and `self update` feature,