Use existing METADATA parser in wheel installer (#5508)

This commit is contained in:
Charlie Marsh 2024-07-27 14:38:16 -04:00 committed by GitHub
parent ae11317cc0
commit 1734c7ed50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 77 additions and 98 deletions

1
Cargo.lock generated
View File

@ -1821,7 +1821,6 @@ dependencies = [
"distribution-filename", "distribution-filename",
"fs-err", "fs-err",
"indoc", "indoc",
"mailparse",
"pathdiff", "pathdiff",
"pep440_rs", "pep440_rs",
"platform-info", "platform-info",

View File

@ -33,7 +33,6 @@ configparser = { workspace = true }
csv = { workspace = true } csv = { workspace = true }
data-encoding = { workspace = true } data-encoding = { workspace = true }
fs-err = { workspace = true } fs-err = { workspace = true }
mailparse = { workspace = true }
pathdiff = { workspace = true } pathdiff = { workspace = true }
platform-info = { workspace = true } platform-info = { workspace = true }
reflink-copy = { workspace = true } reflink-copy = { workspace = true }

View File

@ -2,28 +2,25 @@
//! reading from a zip file. //! reading from a zip file.
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::SystemTime; use std::time::SystemTime;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use fs_err as fs; use fs_err as fs;
use fs_err::{DirEntry, File}; use fs_err::{DirEntry, File};
use pep440_rs::Version; use pypi_types::{DirectUrl, Metadata12};
use pypi_types::DirectUrl;
use reflink_copy as reflink; use reflink_copy as reflink;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tempfile::tempdir_in; use tempfile::tempdir_in;
use tracing::{debug, instrument}; use tracing::{debug, instrument};
use uv_normalize::PackageName;
use uv_warnings::warn_user_once; use uv_warnings::warn_user_once;
use walkdir::WalkDir; use walkdir::WalkDir;
use crate::script::{scripts_from_ini, Script}; use crate::script::{scripts_from_ini, Script};
use crate::wheel::{ use crate::wheel::{
extra_dist_info, install_data, parse_metadata, parse_wheel_file, read_record_file, extra_dist_info, install_data, parse_wheel_file, read_record_file, write_script_entrypoints,
write_script_entrypoints, LibKind, LibKind,
}; };
use crate::{Error, Layout}; use crate::{Error, Layout};
@ -49,16 +46,15 @@ pub fn install_wheel(
) -> Result<(), Error> { ) -> Result<(), Error> {
let dist_info_prefix = find_dist_info(&wheel)?; let dist_info_prefix = find_dist_info(&wheel)?;
let metadata = dist_info_metadata(&dist_info_prefix, &wheel)?; let metadata = dist_info_metadata(&dist_info_prefix, &wheel)?;
let (name, version) = parse_metadata(&dist_info_prefix, &metadata)?; let Metadata12 { name, version, .. } = Metadata12::parse_metadata(&metadata)
.map_err(|err| Error::InvalidWheel(err.to_string()))?;
// Validate the wheel name and version. // Validate the wheel name and version.
{ {
let name = PackageName::from_str(&name)?;
if name != filename.name { if name != filename.name {
return Err(Error::MismatchedName(name, filename.name.clone())); return Err(Error::MismatchedName(name, filename.name.clone()));
} }
let version = Version::from_str(&version)?;
if version != filename.version && version != filename.version.clone().without_local() { if version != filename.version && version != filename.version.clone().without_local() {
return Err(Error::MismatchedVersion(version, filename.version.clone())); return Err(Error::MismatchedVersion(version, filename.version.clone()));
} }
@ -76,13 +72,13 @@ pub fn install_wheel(
// > 1.c If Root-Is-Purelib == true, unpack archive into purelib (site-packages). // > 1.c If Root-Is-Purelib == true, unpack archive into purelib (site-packages).
// > 1.d Else unpack archive into platlib (site-packages). // > 1.d Else unpack archive into platlib (site-packages).
debug!(name, "Extracting file"); debug!(?name, "Extracting file");
let site_packages = match lib_kind { let site_packages = match lib_kind {
LibKind::Pure => &layout.scheme.purelib, LibKind::Pure => &layout.scheme.purelib,
LibKind::Plat => &layout.scheme.platlib, LibKind::Plat => &layout.scheme.platlib,
}; };
let num_unpacked = link_mode.link_wheel_files(site_packages, &wheel, locks)?; let num_unpacked = link_mode.link_wheel_files(site_packages, &wheel, locks)?;
debug!(name, "Extracted {num_unpacked} files"); debug!(?name, "Extracted {num_unpacked} files");
// Read the RECORD file. // Read the RECORD file.
let mut record_file = File::open( let mut record_file = File::open(
@ -96,9 +92,9 @@ pub fn install_wheel(
parse_scripts(&wheel, &dist_info_prefix, None, layout.python_version.1)?; parse_scripts(&wheel, &dist_info_prefix, None, layout.python_version.1)?;
if console_scripts.is_empty() && gui_scripts.is_empty() { if console_scripts.is_empty() && gui_scripts.is_empty() {
debug!(name, "No entrypoints"); debug!(?name, "No entrypoints");
} else { } else {
debug!(name, "Writing entrypoints"); debug!(?name, "Writing entrypoints");
fs_err::create_dir_all(&layout.scheme.scripts)?; fs_err::create_dir_all(&layout.scheme.scripts)?;
write_script_entrypoints(layout, site_packages, &console_scripts, &mut record, false)?; write_script_entrypoints(layout, site_packages, &console_scripts, &mut record, false)?;
@ -109,7 +105,7 @@ pub fn install_wheel(
// 2.b Move each subtree of distribution-1.0.data/ onto its destination path. Each subdirectory of distribution-1.0.data/ is a key into a dict of destination directories, such as distribution-1.0.data/(purelib|platlib|headers|scripts|data). The initially supported paths are taken from distutils.command.install. // 2.b Move each subtree of distribution-1.0.data/ onto its destination path. Each subdirectory of distribution-1.0.data/ is a key into a dict of destination directories, such as distribution-1.0.data/(purelib|platlib|headers|scripts|data). The initially supported paths are taken from distutils.command.install.
let data_dir = site_packages.join(format!("{dist_info_prefix}.data")); let data_dir = site_packages.join(format!("{dist_info_prefix}.data"));
if data_dir.is_dir() { if data_dir.is_dir() {
debug!(name, "Installing data"); debug!(?name, "Installing data");
install_data( install_data(
layout, layout,
site_packages, site_packages,
@ -124,10 +120,10 @@ pub fn install_wheel(
// 2.e Remove empty distribution-1.0.data directory. // 2.e Remove empty distribution-1.0.data directory.
fs::remove_dir_all(data_dir)?; fs::remove_dir_all(data_dir)?;
} else { } else {
debug!(name, "No data"); debug!(?name, "No data");
} }
debug!(name, "Writing extra metadata"); debug!(?name, "Writing extra metadata");
extra_dist_info( extra_dist_info(
site_packages, site_packages,
&dist_info_prefix, &dist_info_prefix,
@ -137,7 +133,7 @@ pub fn install_wheel(
&mut record, &mut record,
)?; )?;
debug!(name, "Writing record"); debug!(?name, "Writing record");
let mut record_writer = csv::WriterBuilder::new() let mut record_writer = csv::WriterBuilder::new()
.has_headers(false) .has_headers(false)
.escape(b'"') .escape(b'"')

View File

@ -11,36 +11,6 @@ use uv_normalize::PackageName;
use crate::Error; use crate::Error;
/// Returns `true` if the file is a `METADATA` file in a `.dist-info` directory that matches the
/// wheel filename.
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
let Some((dist_info_dir, file)) = path.split_once('/') else {
return false;
};
if file != "METADATA" {
return false;
}
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
return false;
};
let Some((name, version)) = dir_stem.rsplit_once('-') else {
return false;
};
let Ok(name) = PackageName::from_str(name) else {
return false;
};
if name != filename.name {
return false;
}
let Ok(version) = Version::from_str(version) else {
return false;
};
if version != filename.version {
return false;
}
true
}
/// Find the `.dist-info` directory in a zipped wheel. /// Find the `.dist-info` directory in a zipped wheel.
/// ///
/// Returns the dist info dir prefix without the `.dist-info` extension. /// Returns the dist info dir prefix without the `.dist-info` extension.

View File

@ -6,7 +6,6 @@ use std::{env, io};
use data_encoding::BASE64URL_NOPAD; use data_encoding::BASE64URL_NOPAD;
use fs_err as fs; use fs_err as fs;
use fs_err::{DirEntry, File}; use fs_err::{DirEntry, File};
use mailparse::MailHeaderMap;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use tracing::{instrument, warn}; use tracing::{instrument, warn};
@ -16,6 +15,7 @@ use zip::ZipWriter;
use pypi_types::DirectUrl; use pypi_types::DirectUrl;
use uv_fs::{relative_to, Simplified}; use uv_fs::{relative_to, Simplified};
use uv_normalize::PackageName;
use crate::record::RecordEntry; use crate::record::RecordEntry;
use crate::script::Script; use crate::script::Script;
@ -557,7 +557,7 @@ pub(crate) fn install_data(
layout: &Layout, layout: &Layout,
site_packages: &Path, site_packages: &Path,
data_dir: &Path, data_dir: &Path,
dist_name: &str, dist_name: &PackageName,
console_scripts: &[Script], console_scripts: &[Script],
gui_scripts: &[Script], gui_scripts: &[Script],
record: &mut [RecordEntry], record: &mut [RecordEntry],
@ -602,7 +602,7 @@ pub(crate) fn install_data(
} }
} }
Some("headers") => { Some("headers") => {
let target_path = layout.scheme.include.join(dist_name); let target_path = layout.scheme.include.join(dist_name.as_str());
move_folder_recorded(&path, &target_path, site_packages, record)?; move_folder_recorded(&path, &target_path, site_packages, record)?;
} }
Some("purelib") => { Some("purelib") => {
@ -727,49 +727,6 @@ fn parse_key_value_file(
Ok(data) Ok(data)
} }
/// Parse the distribution name and version from a wheel's `dist-info` metadata.
///
/// See: <https://github.com/PyO3/python-pkginfo-rs>
pub(crate) fn parse_metadata(
dist_info_prefix: &str,
content: &[u8],
) -> Result<(String, String), Error> {
// HACK: trick mailparse to parse as UTF-8 instead of ASCII
let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec();
mail.extend_from_slice(content);
let msg = mailparse::parse_mail(&mail).map_err(|err| {
Error::InvalidWheel(format!(
"Invalid metadata in {dist_info_prefix}.dist-info/METADATA: {err}"
))
})?;
let headers = msg.get_headers();
let metadata_version =
headers
.get_first_value("Metadata-Version")
.ok_or(Error::InvalidWheel(format!(
"No `Metadata-Version` field in: {dist_info_prefix}.dist-info/METADATA"
)))?;
// Crude but it should do https://packaging.python.org/en/latest/specifications/core-metadata/#metadata-version
// At time of writing:
// > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”.
if !(metadata_version.starts_with("1.") || metadata_version.starts_with("2.")) {
return Err(Error::InvalidWheel(format!(
"`Metadata-Version` field has unsupported value {metadata_version} in: {dist_info_prefix}.dist-info/METADATA"
)));
}
let name = headers
.get_first_value("Name")
.ok_or(Error::InvalidWheel(format!(
"No `Name` field in: {dist_info_prefix}.dist-info/METADATA"
)))?;
let version = headers
.get_first_value("Version")
.ok_or(Error::InvalidWheel(format!(
"No `Version` field in: {dist_info_prefix}.dist-info/METADATA"
)))?;
Ok((name, version))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::io::Cursor; use std::io::Cursor;

View File

@ -338,6 +338,65 @@ impl Metadata10 {
} }
} }
/// Python Package Metadata 1.2 and later as specified in
/// <https://peps.python.org/pep-0345/>.
///
/// This is a subset of the full metadata specification, and only includes the
/// fields that have been consistent across all versions of the specification later than 1.2.
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct Metadata12 {
pub name: PackageName,
pub version: Version,
pub requires_python: Option<VersionSpecifiers>,
}
impl Metadata12 {
/// Parse the [`Metadata12`] from a `.dist-info` `METADATA` file, as included in a built
/// distribution.
pub fn parse_metadata(content: &[u8]) -> Result<Self, MetadataError> {
let headers = Headers::parse(content)?;
// To rely on a source distribution's `PKG-INFO` file, the `Metadata-Version` field must be
// present and set to a value of at least `2.2`.
let metadata_version = headers
.get_first_value("Metadata-Version")
.ok_or(MetadataError::FieldNotFound("Metadata-Version"))?;
// Parse the version into (major, minor).
let (major, minor) = parse_version(&metadata_version)?;
// At time of writing:
// > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”.
if (major, minor) < (1, 0) || (major, minor) >= (3, 0) {
return Err(MetadataError::InvalidMetadataVersion(metadata_version));
}
let name = PackageName::new(
headers
.get_first_value("Name")
.ok_or(MetadataError::FieldNotFound("Name"))?,
)?;
let version = Version::from_str(
&headers
.get_first_value("Version")
.ok_or(MetadataError::FieldNotFound("Version"))?,
)
.map_err(MetadataError::Pep440VersionError)?;
let requires_python = headers
.get_first_value("Requires-Python")
.map(|requires_python| LenientVersionSpecifiers::from_str(&requires_python))
.transpose()?
.map(VersionSpecifiers::from);
Ok(Self {
name,
version,
requires_python,
})
}
}
/// Parse a `Metadata-Version` field into a (major, minor) tuple. /// Parse a `Metadata-Version` field into a (major, minor) tuple.
fn parse_version(metadata_version: &str) -> Result<(u8, u8), MetadataError> { fn parse_version(metadata_version: &str) -> Result<(u8, u8), MetadataError> {
let (major, minor) = let (major, minor) =

View File

@ -2,7 +2,6 @@
use std::fmt::{self, Write}; use std::fmt::{self, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::time::Instant;
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use itertools::Itertools; use itertools::Itertools;
@ -99,7 +98,7 @@ pub(crate) async fn resolve<InstalledPackages: InstalledPackagesProvider>(
preview: PreviewMode, preview: PreviewMode,
quiet: bool, quiet: bool,
) -> Result<ResolutionGraph, Error> { ) -> Result<ResolutionGraph, Error> {
let start = Instant::now(); let start = std::time::Instant::now();
// Resolve the requirements from the provided sources. // Resolve the requirements from the provided sources.
let requirements = { let requirements = {
@ -261,7 +260,7 @@ pub(crate) async fn resolve<InstalledPackages: InstalledPackagesProvider>(
// Prints a success message after completing resolution. // Prints a success message after completing resolution.
pub(crate) fn resolution_success( pub(crate) fn resolution_success(
resolution: &ResolutionGraph, resolution: &ResolutionGraph,
start: Instant, start: std::time::Instant,
printer: Printer, printer: Printer,
) -> fmt::Result { ) -> fmt::Result {
let s = if resolution.len() == 1 { "" } else { "s" }; let s = if resolution.len() == 1 { "" } else { "s" };