diff --git a/Cargo.lock b/Cargo.lock index 29afe8699..342455b3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -184,9 +184,8 @@ dependencies = [ [[package]] name = "astral-tokio-tar" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1abb2bfba199d9ec4759b797115ba6ae435bdd920ce99783bb53aeff57ba919b" +version = "0.5.3" +source = "git+https://github.com/astral-sh/tokio-tar?rev=f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688#f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688" dependencies = [ "filetime", "futures-core", diff --git a/Cargo.toml b/Cargo.toml index 5b3fcafd2..880dd67ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ anstream = { version = "0.6.15" } anyhow = { version = "1.0.89" } arcstr = { version = "1.2.0" } arrayvec = { version = "0.7.6" } -astral-tokio-tar = { version = "0.5.2" } +astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", rev = "f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688" } async-channel = { version = "2.3.1" } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-trait = { version = "0.1.82" } diff --git a/crates/uv-extract/src/error.rs b/crates/uv-extract/src/error.rs index de9584729..4b67ff865 100644 --- a/crates/uv-extract/src/error.rs +++ b/crates/uv-extract/src/error.rs @@ -2,12 +2,14 @@ use std::{ffi::OsString, path::PathBuf}; #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Failed to read from zip file")] - Zip(#[from] zip::result::ZipError), - #[error("Failed to read from zip file")] - AsyncZip(#[from] async_zip::error::ZipError), #[error("I/O operation failed during extraction")] - Io(#[from] std::io::Error), + Io(std::io::Error), + #[error("Invalid zip file")] + Zip(#[from] zip::result::ZipError), + #[error("Invalid zip file structure")] + AsyncZip(#[from] async_zip::error::ZipError), + #[error("Invalid tar file")] + Tar(#[from] tokio_tar::TarError), #[error( "The top-level of the archive must only contain a list directory, but it contains: {0:?}" )] @@ -90,6 +92,31 @@ pub enum Error { } impl Error { + /// When reading from an archive, the error can either be an IO error from the underlying + /// operating system, or an error with the archive. Both get wrapper into an IO error through + /// e.g., `io::copy`. This method extracts zip and tar errors, to distinguish them from invalid + /// archives. + pub(crate) fn io_or_compression(err: std::io::Error) -> Self { + if err.kind() != std::io::ErrorKind::Other { + return Self::Io(err); + } + + let err = match err.downcast::() { + Ok(tar_err) => return Self::Tar(tar_err), + Err(err) => err, + }; + let err = match err.downcast::() { + Ok(zip_err) => return Self::AsyncZip(zip_err), + Err(err) => err, + }; + let err = match err.downcast::() { + Ok(zip_err) => return Self::Zip(zip_err), + Err(err) => err, + }; + + Self::Io(err) + } + /// Returns `true` if the error is due to the server not supporting HTTP streaming. Most /// commonly, this is due to serving ZIP files with features that are incompatible with /// streaming, like data descriptors. diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 4845be21f..495abaf00 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -126,7 +126,9 @@ pub async fn unzip( let is_dir = entry.reader().entry().dir()?; let computed = if is_dir { if directories.insert(path.clone()) { - fs_err::tokio::create_dir_all(path).await?; + fs_err::tokio::create_dir_all(path) + .await + .map_err(Error::Io)?; } // If this is a directory, we expect the CRC32 to be 0. @@ -159,7 +161,9 @@ pub async fn unzip( } else { if let Some(parent) = path.parent() { if directories.insert(parent.to_path_buf()) { - fs_err::tokio::create_dir_all(parent).await?; + fs_err::tokio::create_dir_all(parent) + .await + .map_err(Error::Io)?; } } @@ -176,7 +180,9 @@ pub async fn unzip( tokio::io::BufWriter::new(file) }; let mut reader = entry.reader_mut().compat(); - let bytes_read = tokio::io::copy(&mut reader, &mut writer).await?; + let bytes_read = tokio::io::copy(&mut reader, &mut writer) + .await + .map_err(Error::io_or_compression)?; let reader = reader.into_inner(); (bytes_read, reader) @@ -188,12 +194,15 @@ pub async fn unzip( ); // Read the existing file into memory. - let existing_contents = fs_err::tokio::read(&path).await?; + let existing_contents = fs_err::tokio::read(&path).await.map_err(Error::Io)?; // Read the entry into memory. let mut expected_contents = Vec::with_capacity(existing_contents.len()); let entry_reader = entry.reader_mut(); - let bytes_read = entry_reader.read_to_end(&mut expected_contents).await?; + let bytes_read = entry_reader + .read_to_end(&mut expected_contents) + .await + .map_err(Error::io_or_compression)?; // Verify that the existing file contents match the expected contents. if existing_contents != expected_contents { @@ -204,7 +213,7 @@ pub async fn unzip( (bytes_read as u64, entry_reader) } - Err(err) => return Err(err.into()), + Err(err) => return Err(Error::Io(err)), }; // Validate the uncompressed size. @@ -458,13 +467,17 @@ pub async fn unzip( let has_any_executable_bit = mode & 0o111; if has_any_executable_bit != 0 { let path = target.join(relpath); - let permissions = fs_err::tokio::metadata(&path).await?.permissions(); + let permissions = fs_err::tokio::metadata(&path) + .await + .map_err(Error::Io)? + .permissions(); if permissions.mode() & 0o111 != 0o111 { fs_err::tokio::set_permissions( &path, Permissions::from_mode(permissions.mode() | 0o111), ) - .await?; + .await + .map_err(Error::Io)?; } } } @@ -522,10 +535,10 @@ pub async fn unzip( // Determine whether the reader is exhausted. if !skip_validation { let mut buffer = [0; 1]; - if reader.read(&mut buffer).await? > 0 { + if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 { // If the buffer contains a single null byte, ignore it. if buffer[0] == 0 { - if reader.read(&mut buffer).await? > 0 { + if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 { return Err(Error::TrailingContents); } @@ -618,7 +631,9 @@ pub async fn untar_gz( .set_preserve_permissions(false) .set_allow_external_symlinks(false) .build(); - Ok(untar_in(archive, target.as_ref()).await?) + untar_in(archive, target.as_ref()) + .await + .map_err(Error::io_or_compression) } /// Unpack a `.tar.bz2` archive into the target directory, without requiring `Seek`. @@ -638,7 +653,9 @@ pub async fn untar_bz2( .set_preserve_permissions(false) .set_allow_external_symlinks(false) .build(); - Ok(untar_in(archive, target.as_ref()).await?) + untar_in(archive, target.as_ref()) + .await + .map_err(Error::io_or_compression) } /// Unpack a `.tar.zst` archive into the target directory, without requiring `Seek`. @@ -658,7 +675,9 @@ pub async fn untar_zst( .set_preserve_permissions(false) .set_allow_external_symlinks(false) .build(); - Ok(untar_in(archive, target.as_ref()).await?) + untar_in(archive, target.as_ref()) + .await + .map_err(Error::io_or_compression) } /// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`. @@ -678,7 +697,9 @@ pub async fn untar_xz( .set_preserve_permissions(false) .set_allow_external_symlinks(false) .build(); - untar_in(archive, target.as_ref()).await?; + untar_in(archive, target.as_ref()) + .await + .map_err(Error::io_or_compression)?; Ok(()) } @@ -697,7 +718,9 @@ pub async fn untar( .set_preserve_permissions(false) .set_allow_external_symlinks(false) .build(); - untar_in(archive, target.as_ref()).await?; + untar_in(archive, target.as_ref()) + .await + .map_err(Error::io_or_compression)?; Ok(()) } diff --git a/crates/uv-extract/src/sync.rs b/crates/uv-extract/src/sync.rs index b32add257..9674715b5 100644 --- a/crates/uv-extract/src/sync.rs +++ b/crates/uv-extract/src/sync.rs @@ -37,7 +37,7 @@ pub fn unzip( if file.is_dir() { let mut directories = directories.lock().unwrap(); if directories.insert(path.clone()) { - fs_err::create_dir_all(path)?; + fs_err::create_dir_all(path).map_err(Error::Io)?; } return Ok(()); } @@ -45,12 +45,12 @@ pub fn unzip( if let Some(parent) = path.parent() { let mut directories = directories.lock().unwrap(); if directories.insert(parent.to_path_buf()) { - fs_err::create_dir_all(parent)?; + fs_err::create_dir_all(parent).map_err(Error::Io)?; } } // Copy the file contents. - let outfile = fs_err::File::create(&path)?; + let outfile = fs_err::File::create(&path).map_err(Error::Io)?; let size = file.size(); if size > 0 { let mut writer = if let Ok(size) = usize::try_from(size) { @@ -58,7 +58,7 @@ pub fn unzip( } else { std::io::BufWriter::new(outfile) }; - std::io::copy(&mut file, &mut writer)?; + std::io::copy(&mut file, &mut writer).map_err(Error::io_or_compression)?; } // See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being @@ -72,12 +72,13 @@ pub fn unzip( // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100 let has_any_executable_bit = mode & 0o111; if has_any_executable_bit != 0 { - let permissions = fs_err::metadata(&path)?.permissions(); + let permissions = fs_err::metadata(&path).map_err(Error::Io)?.permissions(); if permissions.mode() & 0o111 != 0o111 { fs_err::set_permissions( &path, Permissions::from_mode(permissions.mode() | 0o111), - )?; + ) + .map_err(Error::Io)?; } } } @@ -97,8 +98,10 @@ pub fn unzip( /// This function returns the path to that top-level directory. pub fn strip_component(source: impl AsRef) -> Result { // TODO(konstin): Verify the name of the directory. - let top_level = - fs_err::read_dir(source.as_ref())?.collect::>>()?; + let top_level = fs_err::read_dir(source.as_ref()) + .map_err(Error::Io)? + .collect::>>() + .map_err(Error::Io)?; match top_level.as_slice() { [root] => Ok(root.path()), [] => Err(Error::EmptyArchive), diff --git a/crates/uv/src/commands/build_frontend.rs b/crates/uv/src/commands/build_frontend.rs index 1f7a8ed7f..230a0eca9 100644 --- a/crates/uv/src/commands/build_frontend.rs +++ b/crates/uv/src/commands/build_frontend.rs @@ -390,11 +390,33 @@ async fn build_impl( source: String, #[source] cause: anyhow::Error, + #[help] + help: Option, } + let help = if let Error::Extract(uv_extract::Error::Tar(err)) = &err { + // TODO(konsti): astral-tokio-tar should use a proper error instead of + // encoding everything in strings + if err.to_string().contains("/bin/python") + && std::error::Error::source(err).is_some_and(|err| { + err.to_string().ends_with("outside of the target directory") + }) + { + Some( + "This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions." + .to_string(), + ) + } else { + None + } + } else { + None + }; + let report = miette::Report::new(Diagnostic { source: source.to_string(), cause: err.into(), + help, }); anstream::eprint!("{report:?}"); diff --git a/crates/uv/tests/it/build.rs b/crates/uv/tests/it/build.rs index b4e860329..138a218bb 100644 --- a/crates/uv/tests/it/build.rs +++ b/crates/uv/tests/it/build.rs @@ -2025,3 +2025,62 @@ fn force_pep517() -> Result<()> { Ok(()) } + +/// Check that we show a hint when there's a venv in the source distribution. +/// +/// +// Windows uses trampolines instead of symlinks. You don't want those in your source distribution +// either, but that's for the build backend to catch, we're only checking for the unix error hint +// in uv here. +#[cfg(unix)] +#[test] +fn venv_included_in_sdist() -> Result<()> { + let context = TestContext::new("3.12"); + + context + .init() + .arg("--name") + .arg("project") + .arg("--build-backend") + .arg("hatchling") + .assert() + .success(); + + let pyproject_toml = indoc! {r#" + [project] + name = "project" + version = "0.1.0" + requires-python = ">=3.12.0" + + [tool.hatch.build.targets.sdist.force-include] + ".venv" = ".venv" + + [build-system] + requires = ["hatchling"] + build-backend = "hatchling.build" + "#}; + + context + .temp_dir + .child("pyproject.toml") + .write_str(pyproject_toml)?; + + context.venv().assert().success(); + + // context.filters() + uv_snapshot!(context.filters(), context.build(), @r" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Building source distribution... + × Failed to build `[TEMP_DIR]/` + ├─▶ Invalid tar file + ├─▶ failed to unpack `[CACHE_DIR]/sdists-v9/[TMP]/python` + ╰─▶ symlink destination for [PYTHON-3.12] is outside of the target directory + help: This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions. + "); + + Ok(()) +} diff --git a/crates/uv/tests/it/extract.rs b/crates/uv/tests/it/extract.rs index a3defd552..4595d32ff 100644 --- a/crates/uv/tests/it/extract.rs +++ b/crates/uv/tests/it/extract.rs @@ -22,7 +22,7 @@ async fn unzip(url: &str) -> anyhow::Result<(), uv_extract::Error> { .map_err(std::io::Error::other) .into_async_read(); - let target = tempfile::TempDir::new()?; + let target = tempfile::TempDir::new().map_err(uv_extract::Error::Io)?; uv_extract::stream::unzip(reader.compat(), target.path()).await } diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 935f931a2..fa3357406 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -12108,7 +12108,7 @@ fn reject_invalid_central_directory_offset() { Resolved 1 package in [TIME] × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl` ├─▶ Failed to extract archive: attrs-25.3.0-py3-none-any.whl - ├─▶ Failed to read from zip file + ├─▶ Invalid zip file structure ╰─▶ the end of central directory offset (0xf0d9) did not match the actual offset (0xf9ac) " );