Explicitly set EntryType for file entries in tar (#17043)

## Summary
This PR explicitly sets the entry type for files in an sdist. This
changes the entry type from `AREGTYPE` (the 'legacy' regular file type)
to `REGTYPE` (the 'normal' regular file type) in the generated tar.

This change works around a bug in the python `tarfile` module that
causes all entries after a certain point in the tar to be silently
ignored if any entry matches some very specific conditions. In `maturin`
this was very visible since the `PKG-INFO` was written at the very end
so `twine check` would loudly complain that the `PKG-INFO` was missing
and that the sdist was invalid. In `uv` the `PKG-INFO` is written at the
beginning so this issue is unlikely to be caught.

Note that this change does mean that sdists created with newer versions
of the uv build backend will not be byte-for-byte identical with sdists
from an older version.

See https://github.com/PyO3/maturin/issues/2855#issuecomment-3546501132

## Test Plan
This is the same as the change that was made in maturin to work around
the same issue

---------

Co-authored-by: konstin <konstin@mailbox.org>
This commit is contained in:
Eashwar Ranganathan 2025-12-11 02:37:35 -08:00 committed by GitHub
parent caac4814df
commit 3bb7f67c71
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 1 deletions

View File

@ -662,7 +662,7 @@ mod tests {
// Check that the source dist is reproducible across platforms.
assert_snapshot!(
format!("{:x}", sha2::Sha256::digest(fs_err::read(&source_dist_path).unwrap())),
@"871d1f859140721b67cbeaca074e7a2740c88c38028d0509eba87d1285f1da9e"
@"bb74bff575b135bb39e5c9bce56349441fb0923bb8857e32a5eaf34ec1843967"
);
// Check both the files we report and the actual files
assert_snapshot!(format_file_list(build.source_dist_list_files, src.path()), @r"

View File

@ -299,6 +299,10 @@ impl TarGzWriter {
impl DirectoryWriter for TarGzWriter {
fn write_bytes(&mut self, path: &str, bytes: &[u8]) -> Result<(), Error> {
let mut header = Header::new_gnu();
// Work around bug in Python's std tar module
// https://github.com/python/cpython/issues/141707
// https://github.com/astral-sh/uv/pull/17043#issuecomment-3636841022
header.set_entry_type(EntryType::Regular);
header.set_size(bytes.len() as u64);
// Reasonable default to avoid 0o000 permissions, the user's umask will be applied on
// unpacking.
@ -312,6 +316,10 @@ impl DirectoryWriter for TarGzWriter {
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> {
let metadata = fs_err::metadata(file)?;
let mut header = Header::new_gnu();
// Work around bug in Python's std tar module
// https://github.com/python/cpython/issues/141707
// https://github.com/astral-sh/uv/pull/17043#issuecomment-3636841022
header.set_entry_type(EntryType::Regular);
// Preserve the executable bit, especially for scripts
#[cfg(unix)]
let executable_bit = {