From 3bb7f67c715268b6bd00250fd291cf8d13fad9e8 Mon Sep 17 00:00:00 2001 From: Eashwar Ranganathan Date: Thu, 11 Dec 2025 02:37:35 -0800 Subject: [PATCH] Explicitly set EntryType for file entries in tar (#17043) ## Summary This PR explicitly sets the entry type for files in an sdist. This changes the entry type from `AREGTYPE` (the 'legacy' regular file type) to `REGTYPE` (the 'normal' regular file type) in the generated tar. This change works around a bug in the python `tarfile` module that causes all entries after a certain point in the tar to be silently ignored if any entry matches some very specific conditions. In `maturin` this was very visible since the `PKG-INFO` was written at the very end so `twine check` would loudly complain that the `PKG-INFO` was missing and that the sdist was invalid. In `uv` the `PKG-INFO` is written at the beginning so this issue is unlikely to be caught. Note that this change does mean that sdists created with newer versions of the uv build backend will not be byte-for-byte identical with sdists from an older version. See https://github.com/PyO3/maturin/issues/2855#issuecomment-3546501132 ## Test Plan This is the same as the change that was made in maturin to work around the same issue --------- Co-authored-by: konstin --- crates/uv-build-backend/src/lib.rs | 2 +- crates/uv-build-backend/src/source_dist.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index c9dc4e6d2..1bdb68a67 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -662,7 +662,7 @@ mod tests { // Check that the source dist is reproducible across platforms. assert_snapshot!( format!("{:x}", sha2::Sha256::digest(fs_err::read(&source_dist_path).unwrap())), - @"871d1f859140721b67cbeaca074e7a2740c88c38028d0509eba87d1285f1da9e" + @"bb74bff575b135bb39e5c9bce56349441fb0923bb8857e32a5eaf34ec1843967" ); // Check both the files we report and the actual files assert_snapshot!(format_file_list(build.source_dist_list_files, src.path()), @r" diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index 5a3e77b5d..2f0889fe7 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -299,6 +299,10 @@ impl TarGzWriter { impl DirectoryWriter for TarGzWriter { fn write_bytes(&mut self, path: &str, bytes: &[u8]) -> Result<(), Error> { let mut header = Header::new_gnu(); + // Work around bug in Python's std tar module + // https://github.com/python/cpython/issues/141707 + // https://github.com/astral-sh/uv/pull/17043#issuecomment-3636841022 + header.set_entry_type(EntryType::Regular); header.set_size(bytes.len() as u64); // Reasonable default to avoid 0o000 permissions, the user's umask will be applied on // unpacking. @@ -312,6 +316,10 @@ impl DirectoryWriter for TarGzWriter { fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> { let metadata = fs_err::metadata(file)?; let mut header = Header::new_gnu(); + // Work around bug in Python's std tar module + // https://github.com/python/cpython/issues/141707 + // https://github.com/astral-sh/uv/pull/17043#issuecomment-3636841022 + header.set_entry_type(EntryType::Regular); // Preserve the executable bit, especially for scripts #[cfg(unix)] let executable_bit = {