From 340cb67a8b2a708996bd87369d2cadf48ef9df63 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 16 Feb 2024 23:17:36 -0500 Subject: [PATCH] Allow non-nested archives for `hexdump` and others (#1564) ## Summary#1562 It turns out that `hexdump` uses an invalid source distribution format whereby the contents aren't nested in a top-level directory -- instead, they're all just flattened at the top-level. In looking at pip's source (https://github.com/pypa/pip/blob/51de88ca6459fdd5213f86a54b021a80884572f9/src/pip/_internal/utils/unpacking.py#L62), it only strips the top-level directory if all entries have the same directory prefix (i.e., if it's the only thing in the directory). This PR accommodates these "invalid" distributions. I can't find any history on this method in `pip`. It looks like it dates back over 15 years ago, to before `pip` was even called `pip`. Closes https://github.com/astral-sh/uv/issues/1376. --- crates/uv-build/src/lib.rs | 11 +++++++---- crates/uv-distribution/src/source/mod.rs | 6 +++++- crates/uv-extract/src/error.rs | 6 ++++-- crates/uv-extract/src/sync.rs | 16 ++++++++++------ 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/crates/uv-build/src/lib.rs b/crates/uv-build/src/lib.rs index b7efc1ab9..5b4637abb 100644 --- a/crates/uv-build/src/lib.rs +++ b/crates/uv-build/src/lib.rs @@ -226,7 +226,7 @@ impl Pep517Backend { import sys sys.path = [{backend_path}] + sys.path - {import} + {import} "#, backend_path = backend_path_encoded} } } @@ -305,8 +305,11 @@ impl SourceBuild { .map_err(|err| Error::Extraction(extracted.clone(), err))?; // Extract the top-level directory from the archive. - uv_extract::strip_component(&extracted) - .map_err(|err| Error::Extraction(extracted.clone(), err))? + match uv_extract::strip_component(&extracted) { + Ok(top_level) => top_level, + Err(uv_extract::Error::NonSingularArchive(_)) => extracted, + Err(err) => return Err(Error::Extraction(extracted.clone(), err)), + } }; let source_tree = if let Some(subdir) = subdirectory { source_root.join(subdir) @@ -614,7 +617,7 @@ impl SourceBuild { let script = formatdoc! { r#"{} print(backend.build_{}("{}", metadata_directory={})) - "#, pep517_backend.backend_import(), self.build_kind, escaped_wheel_dir, metadata_directory + "#, pep517_backend.backend_import(), self.build_kind, escaped_wheel_dir, metadata_directory }; let span = info_span!( "run_python_script", diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index f271655ce..3c924e656 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -787,7 +787,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { drop(span); // Extract the top-level directory. - let extracted = uv_extract::strip_component(temp_dir.path())?; + let extracted = match uv_extract::strip_component(temp_dir.path()) { + Ok(top_level) => top_level, + Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.into_path(), + Err(err) => return Err(err.into()), + }; // Persist it to the cache. fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) diff --git a/crates/uv-extract/src/error.rs b/crates/uv-extract/src/error.rs index 520d35133..4f9578560 100644 --- a/crates/uv-extract/src/error.rs +++ b/crates/uv-extract/src/error.rs @@ -13,7 +13,9 @@ pub enum Error { #[error("Unsupported archive type: {0}")] UnsupportedArchive(PathBuf), #[error( - "The top level of the archive must only contain a list directory, but it contains: {0:?}" + "The top-level of the archive must only contain a list directory, but it contains: {0:?}" )] - InvalidArchive(Vec), + NonSingularArchive(Vec), + #[error("The top-level of the archive must only contain a list directory, but it's empty")] + EmptyArchive, } diff --git a/crates/uv-extract/src/sync.rs b/crates/uv-extract/src/sync.rs index d0ab1746e..7c5addd8f 100644 --- a/crates/uv-extract/src/sync.rs +++ b/crates/uv-extract/src/sync.rs @@ -115,10 +115,14 @@ pub fn strip_component(source: impl AsRef) -> Result { // TODO(konstin): Verify the name of the directory. let top_level = fs_err::read_dir(source.as_ref())?.collect::>>()?; - let [root] = top_level.as_slice() else { - return Err(Error::InvalidArchive( - top_level.into_iter().map(|e| e.file_name()).collect(), - )); - }; - Ok(root.path()) + match top_level.as_slice() { + [root] => Ok(root.path()), + [] => Err(Error::EmptyArchive), + _ => Err(Error::NonSingularArchive( + top_level + .into_iter() + .map(|entry| entry.file_name()) + .collect(), + )), + } }