diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index 5ad7207c7..181fac067 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -1,14 +1,16 @@ //! Takes a wheel and installs it into a venv.. use std::io; +use std::io::{Read, Seek}; use std::path::PathBuf; use std::str::FromStr; -use distribution_filename::WheelFilename; use platform_info::PlatformInfoError; use thiserror::Error; use zip::result::ZipError; +use zip::ZipArchive; +use distribution_filename::WheelFilename; pub use install_location::{normalize_name, InstallLocation, LockedDir}; use pep440_rs::Version; use platform_host::{Arch, Os}; @@ -80,8 +82,12 @@ pub enum Error { MissingDistInfo, #[error("Multiple .dist-info directories found: {0}")] MultipleDistInfo(String), + #[error("Invalid wheel size")] + InvalidSize, } +/// Find the `dist-info` directory from a list of files. +/// /// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or /// the metadata name and the dist info name are lowercase, while the wheel name is uppercase. /// Either way, we just search the wheel for the name. @@ -124,12 +130,32 @@ pub fn find_dist_info<'a, T: Copy>( Ok((payload, dist_info_dir)) } +/// Given an archive, read the `dist-info` metadata into a buffer. +pub fn read_dist_info( + filename: &WheelFilename, + archive: &mut ZipArchive, +) -> Result, Error> { + let dist_info_dir = find_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1; + + let mut file = archive + .by_name(&format!("{dist_info_dir}/METADATA")) + .map_err(|err| Error::Zip(filename.to_string(), err))?; + + #[allow(clippy::cast_possible_truncation)] + let mut buffer = Vec::with_capacity(file.size() as usize); + file.read_to_end(&mut buffer)?; + + Ok(buffer) +} + #[cfg(test)] mod test { - use crate::find_dist_info; - use distribution_filename::WheelFilename; use std::str::FromStr; + use distribution_filename::WheelFilename; + + use crate::find_dist_info; + #[test] fn test_dot_in_name() { let files = [ diff --git a/crates/install-wheel-rs/src/linker.rs b/crates/install-wheel-rs/src/linker.rs index ad3ec0c60..7d68a2aed 100644 --- a/crates/install-wheel-rs/src/linker.rs +++ b/crates/install-wheel-rs/src/linker.rs @@ -6,14 +6,13 @@ use std::path::Path; use configparser::ini::Ini; use fs_err as fs; use fs_err::File; -use mailparse::MailHeaderMap; use tracing::{debug, span, Level}; use pypi_types::DirectUrl; use crate::install_location::InstallLocation; use crate::wheel::{ - extra_dist_info, install_data, parse_wheel_version, read_scripts_from_section, + extra_dist_info, install_data, parse_metadata, parse_wheel_version, read_scripts_from_section, write_script_entrypoints, }; use crate::{read_record_file, Error, Script}; @@ -49,7 +48,8 @@ pub fn install_wheel( }; let dist_info_prefix = find_dist_info(&wheel)?; - let (name, _version) = read_metadata(&dist_info_prefix, &wheel)?; + let metadata = dist_info_metadata(&dist_info_prefix, &wheel)?; + let (name, _version) = parse_metadata(&dist_info_prefix, &metadata)?; let _my_span = span!(Level::DEBUG, "install_wheel", name); @@ -128,11 +128,9 @@ pub fn install_wheel( Ok(()) } -/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or -/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase. -/// Either way, we just search the wheel for the name +/// Find the `dist-info` directory in an unzipped wheel. /// -/// +/// See: fn find_dist_info(path: impl AsRef) -> Result { // Iterate over `path` to find the `.dist-info` directory. It should be at the top-level. let Some(dist_info) = fs::read_dir(path.as_ref())?.find_map(|entry| { @@ -162,52 +160,12 @@ fn find_dist_info(path: impl AsRef) -> Result { Ok(dist_info_prefix.to_string_lossy().to_string()) } -/// -fn read_metadata( - dist_info_prefix: &str, - wheel: impl AsRef, -) -> Result<(String, String), Error> { +/// Read the `dist-info` metadata from a directory. +fn dist_info_metadata(dist_info_prefix: &str, wheel: impl AsRef) -> Result, Error> { let metadata_file = wheel .as_ref() .join(format!("{dist_info_prefix}.dist-info/METADATA")); - - let content = fs::read(&metadata_file)?; - - // HACK: trick mailparse to parse as UTF-8 instead of ASCII - let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec(); - mail.extend_from_slice(&content); - let msg = mailparse::parse_mail(&mail).map_err(|err| { - Error::InvalidWheel(format!("Invalid {}: {}", metadata_file.display(), err)) - })?; - let headers = msg.get_headers(); - let metadata_version = - headers - .get_first_value("Metadata-Version") - .ok_or(Error::InvalidWheel(format!( - "No Metadata-Version field in {}", - metadata_file.display() - )))?; - // Crude but it should do https://packaging.python.org/en/latest/specifications/core-metadata/#metadata-version - // At time of writing: - // > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”. - if !(metadata_version.starts_with("1.") || metadata_version.starts_with("2.")) { - return Err(Error::InvalidWheel(format!( - "Metadata-Version field has unsupported value {metadata_version}" - ))); - } - let name = headers - .get_first_value("Name") - .ok_or(Error::InvalidWheel(format!( - "No Name field in {}", - metadata_file.display() - )))?; - let version = headers - .get_first_value("Version") - .ok_or(Error::InvalidWheel(format!( - "No Version field in {}", - metadata_file.display() - )))?; - Ok((name, version)) + Ok(fs::read(metadata_file)?) } /// Parses the `entry_points.txt` entry in the wheel for console scripts diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index 645fcced4..b09135220 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -901,7 +901,7 @@ pub fn install_wheel( compile: bool, check_hashes: bool, // initially used to the console scripts, currently unused. Keeping it because we likely need - // it for validation later + // it for validation later. _extras: &[String], sys_executable: impl AsRef, ) -> Result { @@ -936,8 +936,8 @@ pub fn install_wheel( let dist_info_prefix = find_dist_info(filename, archive.file_names().map(|name| (name, name)))? .1 .to_string(); - let (name, _version) = read_metadata(&dist_info_prefix, &mut archive)?; - // TODO: Check that name and version match + let metadata = dist_info_metadata(&dist_info_prefix, &mut archive)?; + let (name, _version) = parse_metadata(&dist_info_prefix, &metadata)?; let record_path = format!("{dist_info_prefix}.dist-info/RECORD"); let mut record = read_record_file(&mut archive.by_name(&record_path).map_err(|err| { @@ -1037,49 +1037,59 @@ pub fn install_wheel( Ok(filename.get_tag()) } -/// -fn read_metadata( +/// Read the `dist-info` metadata from a wheel archive. +fn dist_info_metadata( dist_info_prefix: &str, archive: &mut ZipArchive, -) -> Result<(String, String), Error> { +) -> Result, Error> { let mut content = Vec::new(); - let metadata_file = format!("{dist_info_prefix}.dist-info/METADATA"); + let dist_info_file = format!("{dist_info_prefix}.dist-info/DIST-INFO"); archive - .by_name(&metadata_file) - .map_err(|err| { - let file = metadata_file.to_string(); - Error::Zip(file, err) - })? + .by_name(&dist_info_file) + .map_err(|err| Error::Zip(dist_info_file.clone(), err))? .read_to_end(&mut content)?; + Ok(content) +} + +/// Parse the distribution name and version from a wheel's `dist-info` metadata. +/// +/// See: +pub(crate) fn parse_metadata( + dist_info_prefix: &str, + content: &[u8], +) -> Result<(String, String), Error> { // HACK: trick mailparse to parse as UTF-8 instead of ASCII let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec(); - mail.extend_from_slice(&content); - let msg = mailparse::parse_mail(&mail) - .map_err(|err| Error::InvalidWheel(format!("Invalid {metadata_file}: {err}")))?; + mail.extend_from_slice(content); + let msg = mailparse::parse_mail(&mail).map_err(|err| { + Error::InvalidWheel(format!( + "Invalid metadata in {dist_info_prefix}.dist-info/METADATA: {err}" + )) + })?; let headers = msg.get_headers(); let metadata_version = headers .get_first_value("Metadata-Version") .ok_or(Error::InvalidWheel(format!( - "No Metadata-Version field in {metadata_file}" + "No `Metadata-Version` field in: {dist_info_prefix}.dist-info/METADATA" )))?; // Crude but it should do https://packaging.python.org/en/latest/specifications/core-metadata/#metadata-version // At time of writing: // > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”. if !(metadata_version.starts_with("1.") || metadata_version.starts_with("2.")) { return Err(Error::InvalidWheel(format!( - "Metadata-Version field has unsupported value {metadata_version}" + "`Metadata-Version` field has unsupported value {metadata_version} in: {dist_info_prefix}.dist-info/METADATA" ))); } let name = headers .get_first_value("Name") .ok_or(Error::InvalidWheel(format!( - "No Name field in {metadata_file}" + "No `Name` field in: {dist_info_prefix}.dist-info/METADATA" )))?; let version = headers .get_first_value("Version") .ok_or(Error::InvalidWheel(format!( - "No Version field in {metadata_file}" + "No `Version` field in: {dist_info_prefix}.dist-info/METADATA" )))?; Ok((name, version)) } diff --git a/crates/puffin-distribution/src/download.rs b/crates/puffin-distribution/src/download.rs index bd11a452c..b4000808c 100644 --- a/crates/puffin-distribution/src/download.rs +++ b/crates/puffin-distribution/src/download.rs @@ -5,7 +5,7 @@ use zip::ZipArchive; use distribution_filename::WheelFilename; use distribution_types::{Dist, SourceDist}; -use install_wheel_rs::find_dist_info; +use install_wheel_rs::read_dist_info; use pypi_types::Metadata21; use crate::error::Error; @@ -106,68 +106,14 @@ impl std::fmt::Display for SourceDistDownload { } } -impl InMemoryWheel { - /// Read the [`Metadata21`] from a wheel. - pub fn read_dist_info(&self) -> Result { - let mut archive = ZipArchive::new(std::io::Cursor::new(&self.buffer))?; - let dist_info_dir = find_dist_info( - &self.filename, - archive.file_names().map(|name| (name, name)), - ) - .map_err(|err| { - Error::DistInfo(Box::new(self.filename.clone()), self.dist.to_string(), err) - })? - .1; - let dist_info = - std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; - Ok(Metadata21::parse(dist_info.as_bytes())?) - } -} - -impl DiskWheel { - /// Read the [`Metadata21`] from a wheel. - pub fn read_dist_info(&self) -> Result { - let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; - let dist_info_dir = find_dist_info( - &self.filename, - archive.file_names().map(|name| (name, name)), - ) - .map_err(|err| { - Error::DistInfo(Box::new(self.filename.clone()), self.dist.to_string(), err) - })? - .1; - let dist_info = - std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; - Ok(Metadata21::parse(dist_info.as_bytes())?) - } -} - impl BuiltWheel { /// Read the [`Metadata21`] from a wheel. pub fn read_dist_info(&self) -> Result { let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; - let dist_info_dir = find_dist_info( - &self.filename, - archive.file_names().map(|name| (name, name)), - ) - .map_err(|err| { + let dist_info = read_dist_info(&self.filename, &mut archive).map_err(|err| { Error::DistInfo(Box::new(self.filename.clone()), self.dist.to_string(), err) - })? - .1; - let dist_info = - std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; - Ok(Metadata21::parse(dist_info.as_bytes())?) - } -} - -impl LocalWheel { - /// Read the [`Metadata21`] from a wheel. - pub fn read_dist_info(&self) -> Result { - match self { - LocalWheel::InMemory(wheel) => wheel.read_dist_info(), - LocalWheel::Disk(wheel) => wheel.read_dist_info(), - LocalWheel::Built(wheel) => wheel.read_dist_info(), - } + })?; + Ok(Metadata21::parse(&dist_info)?) } } diff --git a/crates/puffin-distribution/src/source_dist.rs b/crates/puffin-distribution/src/source_dist.rs index 395f5f486..3a17efa1d 100644 --- a/crates/puffin-distribution/src/source_dist.rs +++ b/crates/puffin-distribution/src/source_dist.rs @@ -4,7 +4,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; -use anyhow::bail; +use anyhow::Result; use fs_err::tokio as fs; use futures::TryStreamExt; use fxhash::FxHashMap; @@ -16,12 +16,13 @@ use tokio::task::JoinError; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{debug, info_span}; use url::Url; +use zip::result::ZipError; use zip::ZipArchive; use distribution_filename::{WheelFilename, WheelFilenameError}; use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl}; -use distribution_types::{Dist, GitSourceDist, Identifier, RemoteSource, SourceDist}; -use install_wheel_rs::find_dist_info; +use distribution_types::{GitSourceDist, Identifier, RemoteSource, SourceDist}; +use install_wheel_rs::read_dist_info; use platform_tags::Tags; use puffin_cache::{digest, CacheBucket, CacheEntry, CanonicalUrl, WheelCache}; use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy}; @@ -30,13 +31,15 @@ use puffin_normalize::PackageName; use puffin_traits::BuildContext; use pypi_types::Metadata21; -use crate::download::BuiltWheel; use crate::locks::LockedFile; use crate::{Download, Reporter, SourceDistDownload}; /// The caller is responsible for adding the source dist information to the error chain #[derive(Debug, Error)] pub enum SourceDistError { + #[error("Building source distributions is disabled")] + NoBuild, + // Network error #[error("Failed to parse URL: `{0}`")] UrlParse(String, #[source] url::ParseError), @@ -54,7 +57,7 @@ pub enum SourceDistError { Serde(#[from] serde_json::Error), // Build error - #[error("Failed to build {0}")] + #[error("Failed to build: {0}")] Build(Box, #[source] anyhow::Error), #[error("Built wheel has an invalid filename")] WheelFilename(#[from] WheelFilenameError), @@ -64,9 +67,13 @@ pub enum SourceDistError { metadata: PackageName, }, #[error("Failed to parse metadata from built wheel")] - Metadata(#[from] crate::error::Error), + Metadata(#[from] pypi_types::Error), + #[error("Failed to read `dist-info` metadata from built wheel")] + DistInfo(#[from] install_wheel_rs::Error), + #[error("Failed to read zip archive from built wheel")] + Zip(#[from] ZipError), - /// Should not occur, i've only seen it when another task panicked + /// Should not occur; only seen when another task panicked. #[error("The task executor is broken, did some other task panic?")] Join(#[from] JoinError), } @@ -192,18 +199,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { // Read the metadata from the wheel. let filename = WheelFilename::from_str(&disk_filename)?; - - // TODO(konstin): Remove duplicated `.dist-info` read. - // See: https://github.com/astral-sh/puffin/issues/484 - let metadata = BuiltWheel { - dist: Dist::Source(source_dist.clone()), - filename: filename.clone(), - path: wheel_dir.join(&disk_filename), - } - .read_dist_info()?; + let path = wheel_dir.join(disk_filename); + let metadata = read_metadata(&filename, &path)?; BuiltWheelMetadata { - path: wheel_dir.join(disk_filename), + path, filename, metadata, } @@ -263,8 +263,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { download.subdirectory.as_deref(), &cache_entry, ) - .await - .map_err(|err| SourceDistError::Build(Box::new(source_dist.clone()), err))?; + .await?; if let Some(task) = task { if let Some(reporter) = self.reporter.as_ref() { @@ -328,8 +327,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { subdirectory, &cache_entry, ) - .await - .map_err(|err| SourceDistError::Build(Box::new(source_dist.clone()), err))?; + .await?; if let Some(task) = task { if let Some(reporter) = self.reporter.as_ref() { reporter.on_build_complete(source_dist, task); @@ -415,8 +413,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { subdirectory.as_deref(), &cache_entry, ) - .await - .map_err(|err| SourceDistError::Build(Box::new(source_dist.clone()), err))?; + .await?; if metadata.name != git_source_dist.name { return Err(SourceDistError::NameMismatch { @@ -507,11 +504,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { source_dist: &Path, subdirectory: Option<&Path>, cache_entry: &CacheEntry, - ) -> anyhow::Result<(String, WheelFilename, Metadata21)> { + ) -> Result<(String, WheelFilename, Metadata21), SourceDistError> { debug!("Building: {dist}"); if self.build_context.no_build() { - bail!("Building source distributions is disabled"); + return Err(SourceDistError::NoBuild); } // Build the wheel. @@ -524,7 +521,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { &cache_entry.dir, &dist.to_string(), ) - .await?; + .await + .map_err(|err| SourceDistError::Build(Box::new(dist.clone()), err))?; if let Some(temp_dir) = temp_dir { temp_dir.close()?; @@ -532,20 +530,23 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { // Read the metadata from the wheel. let filename = WheelFilename::from_str(&disk_filename)?; - - let mut archive = - ZipArchive::new(fs_err::File::open(cache_entry.dir.join(&disk_filename))?)?; - let dist_info_dir = - find_dist_info(&filename, archive.file_names().map(|name| (name, name)))?.1; - let dist_info = - std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; - let metadata = Metadata21::parse(dist_info.as_bytes())?; + let metadata = read_metadata(&filename, cache_entry.dir.join(&disk_filename))?; debug!("Finished building: {dist}"); Ok((disk_filename, filename, metadata)) } } +/// Read the [`Metadata21`] from a built wheel. +fn read_metadata( + filename: &WheelFilename, + wheel: impl Into, +) -> Result { + let mut archive = ZipArchive::new(fs_err::File::open(wheel)?)?; + let dist_info = read_dist_info(filename, &mut archive)?; + Ok(Metadata21::parse(&dist_info)?) +} + trait SourceDistReporter: Send + Sync { /// Callback to invoke when a repository checkout begins. fn on_checkout_start(&self, url: &Url, rev: &str) -> usize;