From 751f7fa9c6f81ce785cf79c11357fbc83f70315c Mon Sep 17 00:00:00 2001 From: konsti Date: Thu, 16 Nov 2023 19:03:44 +0100 Subject: [PATCH] Improve PEP 691 compatibility (#428) [PEP 691](https://peps.python.org/pep-0691/#project-detail) has slightly different, more relaxed rules around file metadata. These changes are now reflected in the `File` struct. This will make it easier to support alternative indices. I had expected that i need to introduce a separate type for that, so i'm happy it's two `Option`s more and an alias. Part of #412 --- Cargo.lock | 1 + crates/puffin-cli/src/commands/pip_sync.rs | 6 +-- crates/puffin-client/src/client.rs | 5 ++- crates/puffin-distribution/Cargo.toml | 1 + crates/puffin-distribution/src/lib.rs | 4 +- crates/puffin-installer/src/downloader.rs | 43 ++++++++++++++-------- crates/puffin-resolver/src/version_map.rs | 4 +- crates/pypi-types/src/simple_json.rs | 16 ++++++-- 8 files changed, 53 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a87cfeef..bc5237e1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2480,6 +2480,7 @@ dependencies = [ "puffin-git", "puffin-normalize", "pypi-types", + "serde", "serde_json", "url", ] diff --git a/crates/puffin-cli/src/commands/pip_sync.rs b/crates/puffin-cli/src/commands/pip_sync.rs index 5cc5d300f..63ebadbc5 100644 --- a/crates/puffin-cli/src/commands/pip_sync.rs +++ b/crates/puffin-cli/src/commands/pip_sync.rs @@ -156,8 +156,8 @@ pub(crate) async fn sync_requirements( continue; }; match &file.yanked { - Yanked::Bool(false) => {} - Yanked::Bool(true) => { + None | Some(Yanked::Bool(false)) => {} + Some(Yanked::Bool(true)) => { writeln!( printer, "{}{} {dist} is yanked. Refresh your lockfile to pin an un-yanked version.", @@ -165,7 +165,7 @@ pub(crate) async fn sync_requirements( ":".bold(), )?; } - Yanked::Reason(reason) => { + Some(Yanked::Reason(reason)) => { writeln!( printer, "{}{} {dist} is yanked (reason: \"{reason}\"). Refresh your lockfile to pin an un-yanked version.", diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs index 1d71174d7..ecf5f20eb 100644 --- a/crates/puffin-client/src/client.rs +++ b/crates/puffin-client/src/client.rs @@ -206,7 +206,10 @@ impl RegistryClient { // If the metadata file is available at its own url (PEP 658), download it from there let url = Url::parse(&file.url)?; let filename = WheelFilename::from_str(&file.filename)?; - if file.data_dist_info_metadata.is_available() { + if file + .dist_info_metadata + .is_some_and(|dist_info_metadata| dist_info_metadata.is_available()) + { let url = Url::parse(&format!("{}.metadata", file.url))?; let cache_dir = self.cache.join(WHEEL_METADATA_FROM_INDEX).join("pypi"); diff --git a/crates/puffin-distribution/Cargo.toml b/crates/puffin-distribution/Cargo.toml index 161ab4500..5c30dc78d 100644 --- a/crates/puffin-distribution/Cargo.toml +++ b/crates/puffin-distribution/Cargo.toml @@ -18,5 +18,6 @@ pypi-types = { path = "../pypi-types" } anyhow = { workspace = true } fs-err = { workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } url = { workspace = true } diff --git a/crates/puffin-distribution/src/lib.rs b/crates/puffin-distribution/src/lib.rs index 58aa7eaf6..99ef2ddf2 100644 --- a/crates/puffin-distribution/src/lib.rs +++ b/crates/puffin-distribution/src/lib.rs @@ -263,7 +263,7 @@ impl RemoteSource for RegistryBuiltDist { } fn size(&self) -> Option { - Some(self.file.size) + self.file.size } } @@ -273,7 +273,7 @@ impl RemoteSource for RegistrySourceDist { } fn size(&self) -> Option { - Some(self.file.size) + self.file.size } } diff --git a/crates/puffin-installer/src/downloader.rs b/crates/puffin-installer/src/downloader.rs index 79b590722..df5bdf20c 100644 --- a/crates/puffin-installer/src/downloader.rs +++ b/crates/puffin-installer/src/downloader.rs @@ -116,9 +116,34 @@ async fn fetch( let reader = client.stream_external(&url).await?; // If the file is greater than 5MB, write it to disk; otherwise, keep it in memory. - let file_size = ByteSize::b(wheel.file.size as u64); - if file_size >= ByteSize::mb(5) { - debug!("Fetching disk-based wheel from registry: {dist} ({file_size})"); + let small_size = if let Some(size) = wheel.file.size { + let byte_size = ByteSize::b(size as u64); + if byte_size < ByteSize::mb(5) { + Some(size) + } else { + None + } + } else { + None + }; + if let Some(small_size) = small_size { + debug!( + "Fetching in-memory wheel from registry: {dist} ({})", + ByteSize::b(small_size as u64) + ); + + // Read into a buffer. + let mut buffer = Vec::with_capacity(small_size); + let mut reader = tokio::io::BufReader::new(reader.compat()); + tokio::io::copy(&mut reader, &mut buffer).await?; + + Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel { + dist, + buffer, + }))) + } else { + let size = small_size.map_or("unknown size".to_string(), |size| size.to_string()); + debug!("Fetching disk-based wheel from registry: {dist} ({size})"); // Download the wheel to a temporary file. let temp_dir = tempfile::tempdir_in(cache)?.into_path(); @@ -131,18 +156,6 @@ async fn fetch( dist, path: wheel_file, }))) - } else { - debug!("Fetching in-memory wheel from registry: {dist} ({file_size})"); - - // Read into a buffer. - let mut buffer = Vec::with_capacity(wheel.file.size); - let mut reader = tokio::io::BufReader::new(reader.compat()); - tokio::io::copy(&mut reader, &mut buffer).await?; - - Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel { - dist, - buffer, - }))) } } diff --git a/crates/puffin-resolver/src/version_map.rs b/crates/puffin-resolver/src/version_map.rs index 650364774..86ff17181 100644 --- a/crates/puffin-resolver/src/version_map.rs +++ b/crates/puffin-resolver/src/version_map.rs @@ -6,7 +6,7 @@ use distribution_filename::{SourceDistFilename, WheelFilename}; use pep440_rs::Version; use platform_tags::{TagPriority, Tags}; use puffin_normalize::PackageName; -use pypi_types::SimpleJson; +use pypi_types::{SimpleJson, Yanked}; use crate::file::{DistFile, SdistFile, WheelFile}; use crate::pubgrub::PubGrubVersion; @@ -45,7 +45,7 @@ impl VersionMap { // When resolving, exclude yanked files. // TODO(konstin): When we fail resolving due to a dependency locked to yanked version, // we should tell the user. - if file.yanked.is_yanked() { + if file.yanked.as_ref().is_some_and(Yanked::is_yanked) { continue; } diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index f30cad16e..5abe25313 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -12,21 +12,25 @@ pub struct SimpleJson { pub versions: Vec, } +/// A single (remote) file belonging to a package, generally either a wheel or a source dist. +/// +/// #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct File { - pub core_metadata: Metadata, - pub data_dist_info_metadata: Metadata, + // Not PEP 691 compliant alias used by pypi + #[serde(alias = "data_dist_info_metadata")] + pub dist_info_metadata: Option, pub filename: String, pub hashes: Hashes, /// Note: Deserialized with [`LenientVersionSpecifiers`] since there are a number of invalid /// versions on pypi #[serde(deserialize_with = "deserialize_version_specifiers_lenient")] pub requires_python: Option, - pub size: usize, + pub size: Option, pub upload_time: String, pub url: String, - pub yanked: Yanked, + pub yanked: Option, } fn deserialize_version_specifiers_lenient<'de, D>( @@ -75,6 +79,10 @@ impl Yanked { } } +/// A dictionary mapping a hash name to a hex encoded digest of the file. +/// +/// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we +/// only support SHA 256 atm. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Hashes { pub sha256: String,