diff --git a/Cargo.lock b/Cargo.lock index c71838587..4a3fafc82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2765,6 +2765,7 @@ dependencies = [ "test-case", "thiserror", "tracing", + "url", ] [[package]] diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 99ef2ddf2..db5c59c4a 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -5,7 +5,7 @@ use url::Url; use pep440_rs::Version; use puffin_normalize::PackageName; -use pypi_types::File; +use pypi_types::{File, IndexUrl}; pub use crate::any::*; pub use crate::cached::*; @@ -62,6 +62,7 @@ pub struct RegistryBuiltDist { pub name: PackageName, pub version: Version, pub file: File, + pub index: IndexUrl, } /// A built distribution (wheel) that exists at an arbitrary URL. @@ -77,6 +78,7 @@ pub struct RegistrySourceDist { pub name: PackageName, pub version: Version, pub file: File, + pub index: IndexUrl, } /// A source distribution that exists at an arbitrary URL. @@ -95,7 +97,7 @@ pub struct GitSourceDist { impl Dist { /// Create a [`Dist`] for a registry-based distribution. - pub fn from_registry(name: PackageName, version: Version, file: File) -> Self { + pub fn from_registry(name: PackageName, version: Version, file: File, index: IndexUrl) -> Self { if Path::new(&file.filename) .extension() .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) @@ -104,12 +106,14 @@ impl Dist { name, version, file, + index, })) } else { Self::Source(SourceDist::Registry(RegistrySourceDist { name, version, file, + index, })) } } diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs index ecf5f20eb..fa368926d 100644 --- a/crates/puffin-client/src/client.rs +++ b/crates/puffin-client/src/client.rs @@ -19,7 +19,7 @@ use url::Url; use distribution_filename::WheelFilename; use install_wheel_rs::find_dist_info; use puffin_normalize::PackageName; -use pypi_types::{File, Metadata21, SimpleJson}; +use pypi_types::{File, IndexUrl, Metadata21, SimpleJson}; use crate::cached_client::CachedClient; use crate::error::Error; @@ -30,8 +30,8 @@ use crate::remote_metadata::{ /// A builder for an [`RegistryClient`]. #[derive(Debug, Clone)] pub struct RegistryClientBuilder { - index: Url, - extra_index: Vec, + index: IndexUrl, + extra_index: Vec, no_index: bool, proxy: Url, retries: u32, @@ -41,7 +41,7 @@ pub struct RegistryClientBuilder { impl RegistryClientBuilder { pub fn new(cache: impl Into) -> Self { Self { - index: Url::parse("https://pypi.org/simple").unwrap(), + index: IndexUrl::from(Url::parse("https://pypi.org/simple").unwrap()), extra_index: vec![], no_index: false, proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(), @@ -54,13 +54,13 @@ impl RegistryClientBuilder { impl RegistryClientBuilder { #[must_use] pub fn index(mut self, index: Url) -> Self { - self.index = index; + self.index = IndexUrl::from(index); self } #[must_use] pub fn extra_index(mut self, extra_index: Vec) -> Self { - self.extra_index = extra_index; + self.extra_index = extra_index.into_iter().map(IndexUrl::from).collect(); self } @@ -135,8 +135,8 @@ impl RegistryClientBuilder { // TODO(konstin): Clean up the clients once we moved everything to common caching #[derive(Debug, Clone)] pub struct RegistryClient { - pub(crate) index: Url, - pub(crate) extra_index: Vec, + pub(crate) index: IndexUrl, + pub(crate) extra_index: Vec, /// Ignore the package index, instead relying on local archives and caches. pub(crate) no_index: bool, pub(crate) client: ClientWithMiddleware, @@ -149,14 +149,14 @@ pub struct RegistryClient { impl RegistryClient { /// Fetch a package from the `PyPI` simple API. - pub async fn simple(&self, package_name: PackageName) -> Result { + pub async fn simple(&self, package_name: PackageName) -> Result<(IndexUrl, SimpleJson), Error> { if self.no_index { return Err(Error::NoIndex(package_name.as_ref().to_string())); } for index in std::iter::once(&self.index).chain(self.extra_index.iter()) { // Format the URL for PyPI. - let mut url = index.clone(); + let mut url: Url = index.clone().into(); url.path_segments_mut().unwrap().push(package_name.as_ref()); url.path_segments_mut().unwrap().push(""); url.set_query(Some("format=application/vnd.pypi.simple.v1+json")); @@ -170,8 +170,9 @@ impl RegistryClient { // Fetch from the index. match self.simple_impl(&url).await { Ok(text) => { - return serde_json::from_str(&text) - .map_err(move |e| Error::from_json_err(e, url)); + let data = serde_json::from_str(&text) + .map_err(move |e| Error::from_json_err(e, url))?; + return Ok((index.clone(), data)); } Err(err) => { if err.status() == Some(StatusCode::NOT_FOUND) { diff --git a/crates/puffin-resolver/src/finder.rs b/crates/puffin-resolver/src/finder.rs index cb2850d4e..2eab301f4 100644 --- a/crates/puffin-resolver/src/finder.rs +++ b/crates/puffin-resolver/src/finder.rs @@ -17,7 +17,7 @@ use platform_tags::{TagPriority, Tags}; use puffin_client::RegistryClient; use puffin_interpreter::InterpreterInfo; use puffin_normalize::PackageName; -use pypi_types::{File, SimpleJson}; +use pypi_types::{File, IndexUrl, SimpleJson}; use crate::error::ResolveError; use crate::resolution::Resolution; @@ -68,7 +68,9 @@ impl<'a> DistFinder<'a> { Request::Package(requirement) => self .client .simple(requirement.name.clone()) - .map_ok(move |metadata| Response::Package(requirement, metadata)), + .map_ok(move |(index, metadata)| { + Response::Package(requirement, index, metadata) + }), }) .buffer_unordered(32) .ready_chunks(32); @@ -104,9 +106,10 @@ impl<'a> DistFinder<'a> { for result in chunk { let result: Response = result?; match result { - Response::Package(requirement, metadata) => { + Response::Package(requirement, index, metadata) => { // Pick a version that satisfies the requirement. - let Some(distribution) = self.select(&requirement, metadata.files) else { + let Some(distribution) = self.select(&requirement, &index, metadata.files) + else { return Err(ResolveError::NotFound(requirement)); }; @@ -134,7 +137,12 @@ impl<'a> DistFinder<'a> { } /// select a version that satisfies the requirement, preferring wheels to source distributions. - fn select(&self, requirement: &Requirement, files: Vec) -> Option { + fn select( + &self, + requirement: &Requirement, + index: &IndexUrl, + files: Vec, + ) -> Option { let mut best_version: Option = None; let mut best_wheel: Option<(Dist, TagPriority)> = None; let mut best_sdist: Option = None; @@ -173,7 +181,7 @@ impl<'a> DistFinder<'a> { .map_or(true, |(.., existing)| priority > *existing) { best_wheel = Some(( - Dist::from_registry(wheel.name, wheel.version, file), + Dist::from_registry(wheel.name, wheel.version, file, index.clone()), priority, )); } @@ -197,7 +205,12 @@ impl<'a> DistFinder<'a> { if requirement.is_satisfied_by(&sdist.version) { best_version = Some(sdist.version.clone()); - best_sdist = Some(Dist::from_registry(sdist.name, sdist.version, file)); + best_sdist = Some(Dist::from_registry( + sdist.name, + sdist.version, + file, + index.clone(), + )); } } } @@ -216,7 +229,7 @@ enum Request { #[derive(Debug)] enum Response { /// The returned metadata for a package. - Package(Requirement, SimpleJson), + Package(Requirement, IndexUrl, SimpleJson), } pub trait Reporter: Send + Sync { diff --git a/crates/puffin-resolver/src/resolution.rs b/crates/puffin-resolver/src/resolution.rs index 376593cc6..7fac38dc2 100644 --- a/crates/puffin-resolver/src/resolution.rs +++ b/crates/puffin-resolver/src/resolution.rs @@ -14,7 +14,7 @@ use distribution_types::{BuiltDist, Dist, Metadata, SourceDist}; use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers}; use pep508_rs::{Requirement, VersionOrUrl}; use puffin_normalize::PackageName; -use pypi_types::File; +use pypi_types::{File, IndexUrl}; use crate::pubgrub::{PubGrubPackage, PubGrubPriority, PubGrubVersion}; @@ -58,7 +58,7 @@ impl Graph { /// Create a new graph from the resolved `PubGrub` state. pub fn from_state( selection: &SelectedDependencies, - pins: &FxHashMap>, + pins: &FxHashMap>, redirects: &WaitMap, state: &State, PubGrubPriority>, ) -> Self { @@ -73,12 +73,13 @@ impl Graph { match package { PubGrubPackage::Package(package_name, None, None) => { let version = Version::from(version.clone()); - let file = pins + let (index, file) = pins .get(package_name) .and_then(|versions| versions.get(&version)) .unwrap() .clone(); - let pinned_package = Dist::from_registry(package_name.clone(), version, file); + let pinned_package = + Dist::from_registry(package_name.clone(), version, file, index); let index = graph.add_node(pinned_package); inverse.insert(package_name, index); diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 8e993ad3d..df5addeb7 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -27,7 +27,7 @@ use puffin_cache::CanonicalUrl; use puffin_client::RegistryClient; use puffin_normalize::{ExtraName, PackageName}; use puffin_traits::BuildContext; -use pypi_types::{File, Metadata21, SimpleJson}; +use pypi_types::{File, IndexUrl, Metadata21, SimpleJson}; use crate::candidate_selector::CandidateSelector; use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter}; @@ -330,7 +330,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { let Some(entry) = self.index.packages.get(package_name) else { continue; }; - let version_map = entry.value(); + let (index, version_map) = entry.value(); // Try to find a compatible version. If there aren't any compatible versions, // short-circuit and return `None`. @@ -345,6 +345,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { candidate.package_name, candidate.version.into(), candidate.file.into(), + index.clone(), ); request_sink.unbounded_send(Request::Dist(distribution))?; } @@ -358,7 +359,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { &self, package: &PubGrubPackage, range: &Range, - pins: &mut FxHashMap>, + pins: &mut FxHashMap>, in_flight: &mut InFlight, request_sink: &futures::channel::mpsc::UnboundedSender, ) -> Result, ResolveError> { @@ -405,7 +406,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { PubGrubPackage::Package(package_name, _extra, None) => { // Wait for the metadata to be available. let entry = self.index.packages.wait(package_name).await.unwrap(); - let version_map = entry.value(); + let (index, version_map) = entry.value(); debug!("Searching for a compatible version of {package_name} ({range})"); @@ -428,7 +429,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { .or_default() .insert( candidate.version.clone().into(), - candidate.file.clone().into(), + (index.clone(), candidate.file.clone().into()), ); let version = candidate.version.clone(); @@ -439,6 +440,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { candidate.package_name, candidate.version.into(), candidate.file.into(), + index.clone(), ); request_sink.unbounded_send(Request::Dist(distribution))?; } @@ -453,7 +455,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { &self, package: &PubGrubPackage, version: &PubGrubVersion, - pins: &mut FxHashMap>, + pins: &mut FxHashMap>, priorities: &mut PubGrubPriorities, in_flight: &mut InFlight, request_sink: &futures::channel::mpsc::UnboundedSender, @@ -498,7 +500,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { .unwrap(), None => { let versions = pins.get(package_name).unwrap(); - let file = versions.get(version.into()).unwrap(); + let (_index, file) = versions.get(version.into()).unwrap(); self.index .distributions .wait(&file.distribution_id()) @@ -550,7 +552,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { while let Some(response) = response_stream.next().await { match response? { - Response::Package(package_name, metadata) => { + Response::Package(package_name, index, metadata) => { trace!("Received package metadata for: {package_name}"); let version_map = VersionMap::from_metadata( metadata, @@ -559,7 +561,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { self.build_context.interpreter_info().version(), self.exclude_newer.as_ref(), ); - self.index.packages.insert(package_name, version_map); + self.index + .packages + .insert(package_name, (index, version_map)); } Response::Dist(Dist::Built(distribution), metadata, ..) => { trace!("Received built distribution metadata for: {distribution}"); @@ -596,7 +600,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { Request::Package(package_name) => { self.client .simple(package_name.clone()) - .map_ok(move |metadata| Response::Package(package_name, metadata)) + .map_ok(move |(index, metadata)| { + Response::Package(package_name, index, metadata) + }) .map_err(ResolveError::Client) .await } @@ -803,7 +809,7 @@ enum Request { #[allow(clippy::large_enum_variant)] enum Response { /// The returned metadata for a package hosted on a registry. - Package(PackageName, SimpleJson), + Package(PackageName, IndexUrl, SimpleJson), /// The returned metadata for a distribution. Dist(Dist, Metadata21, Option), } @@ -839,8 +845,9 @@ impl InFlight { /// In-memory index of package metadata. struct Index { - /// A map from package name to the metadata for that package. - packages: WaitMap, + /// A map from package name to the metadata for that package and the index where the metadata + /// came from. + packages: WaitMap, /// A map from distribution SHA to metadata for that distribution. distributions: WaitMap, diff --git a/crates/pypi-types/Cargo.toml b/crates/pypi-types/Cargo.toml index 2fa065cbe..36dc5cec2 100644 --- a/crates/pypi-types/Cargo.toml +++ b/crates/pypi-types/Cargo.toml @@ -22,6 +22,7 @@ rfc2047-decoder = { workspace = true } serde = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } +url = { workspace = true } [dev-dependencies] indoc = { version = "2.0.4" } diff --git a/crates/pypi-types/src/index_url.rs b/crates/pypi-types/src/index_url.rs new file mode 100644 index 000000000..fa5c3f217 --- /dev/null +++ b/crates/pypi-types/src/index_url.rs @@ -0,0 +1,17 @@ +use url::Url; + +/// The url of an index, newtype'd to avoid mixing it with file urls +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct IndexUrl(Url); + +impl From for IndexUrl { + fn from(url: Url) -> Self { + Self(url) + } +} + +impl From for Url { + fn from(index: IndexUrl) -> Self { + index.0 + } +} diff --git a/crates/pypi-types/src/lib.rs b/crates/pypi-types/src/lib.rs index e5b7ffd1a..e8c5623c0 100644 --- a/crates/pypi-types/src/lib.rs +++ b/crates/pypi-types/src/lib.rs @@ -1,9 +1,11 @@ pub use direct_url::{ArchiveInfo, DirectUrl, VcsInfo, VcsKind}; +pub use index_url::IndexUrl; pub use lenient_requirement::LenientVersionSpecifiers; pub use metadata::{Error, Metadata21}; pub use simple_json::{File, SimpleJson, Yanked}; mod direct_url; +mod index_url; mod lenient_requirement; mod metadata; mod simple_json;