diff --git a/crates/cache-key/src/cache_key.rs b/crates/cache-key/src/cache_key.rs index f1880c5be..019c54eb5 100644 --- a/crates/cache-key/src/cache_key.rs +++ b/crates/cache-key/src/cache_key.rs @@ -8,6 +8,7 @@ use std::num::{ use std::path::{Path, PathBuf}; use seahash::SeaHasher; +use url::Url; /// A trait for types that can be hashed in a stable way across versions and platforms. Equivalent /// to Ruff's [`CacheKey`] trait. @@ -211,6 +212,13 @@ impl CacheKey for PathBuf { } } +impl CacheKey for Url { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.as_str().cache_key(state); + } +} + impl CacheKey for Option { #[inline] fn cache_key(&self, state: &mut CacheKeyHasher) { diff --git a/crates/distribution-types/src/file.rs b/crates/distribution-types/src/file.rs index 0a0fcb81c..0011af614 100644 --- a/crates/distribution-types/src/file.rs +++ b/crates/distribution-types/src/file.rs @@ -4,7 +4,6 @@ use std::path::PathBuf; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use thiserror::Error; -use url::Url; use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError}; use pep508_rs::VerbatimUrl; @@ -42,10 +41,11 @@ impl File { requires_python: file.requires_python.transpose()?, size: file.size, upload_time: file.upload_time, - url: FileLocation::Url( - base.join_relative(&file.url) - .map_err(|err| FileConversionError::Url(file.url.clone(), err))?, - ), + url: if file.url.contains("://") { + FileLocation::AbsoluteUrl(file.url) + } else { + FileLocation::RelativeUrl(base.clone(), file.url) + }, yanked: file.yanked, }) } @@ -55,7 +55,9 @@ impl File { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum FileLocation { /// URL relative to the base URL. - Url(Url), + RelativeUrl(BaseUrl, String), + /// Absolute URL. + AbsoluteUrl(String), /// Absolute path to a file. Path(PathBuf, VerbatimUrl), } @@ -63,7 +65,8 @@ pub enum FileLocation { impl Display for FileLocation { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - FileLocation::Url(url) => Display::fmt(url, f), + FileLocation::RelativeUrl(_base, url) => Display::fmt(&url, f), + FileLocation::AbsoluteUrl(url) => Display::fmt(&url, f), FileLocation::Path(path, _url) => Display::fmt(&path.display(), f), } } diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 5a487807f..bba1cebb8 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -694,17 +694,49 @@ impl Identifier for Path { } } +impl Identifier for String { + fn distribution_id(&self) -> DistributionId { + DistributionId::new(cache_key::digest(&self)) + } + + fn resource_id(&self) -> ResourceId { + ResourceId::new(cache_key::digest(&self)) + } +} + +impl Identifier for &str { + fn distribution_id(&self) -> DistributionId { + DistributionId::new(cache_key::digest(&self)) + } + + fn resource_id(&self) -> ResourceId { + ResourceId::new(cache_key::digest(&self)) + } +} + +impl Identifier for (&Url, &str) { + fn distribution_id(&self) -> DistributionId { + DistributionId::new(cache_key::digest(&self)) + } + + fn resource_id(&self) -> ResourceId { + ResourceId::new(cache_key::digest(&self)) + } +} + impl Identifier for FileLocation { fn distribution_id(&self) -> DistributionId { match self { - FileLocation::Url(url) => url.distribution_id(), + FileLocation::RelativeUrl(base, url) => (base.as_url(), url.as_str()).distribution_id(), + FileLocation::AbsoluteUrl(url) => url.distribution_id(), FileLocation::Path(path, _) => path.distribution_id(), } } fn resource_id(&self) -> ResourceId { match self { - FileLocation::Url(url) => url.resource_id(), + FileLocation::RelativeUrl(base, url) => (base.as_url(), url.as_str()).resource_id(), + FileLocation::AbsoluteUrl(url) => url.resource_id(), FileLocation::Path(path, _) => path.resource_id(), } } diff --git a/crates/pep508-rs/src/verbatim_url.rs b/crates/pep508-rs/src/verbatim_url.rs index eb1736ffc..e5610a159 100644 --- a/crates/pep508-rs/src/verbatim_url.rs +++ b/crates/pep508-rs/src/verbatim_url.rs @@ -13,6 +13,10 @@ use url::Url; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct VerbatimUrl { /// The parsed URL. + #[serde( + serialize_with = "Url::serialize_internal", + deserialize_with = "Url::deserialize_internal" + )] url: Url, /// The URL as it was provided by the user. #[derivative(PartialEq = "ignore")] diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index ad0360376..837b5af0c 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -225,8 +225,14 @@ impl RegistryClient { pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result { let metadata = match &built_dist { BuiltDist::Registry(wheel) => match &wheel.file.url { - FileLocation::Url(url) => { - self.wheel_metadata_registry(&wheel.index, &wheel.file, url) + FileLocation::RelativeUrl(base, url) => { + let url = base.join_relative(url)?; + self.wheel_metadata_registry(&wheel.index, &wheel.file, &url) + .await? + } + FileLocation::AbsoluteUrl(url) => { + let url = Url::parse(url)?; + self.wheel_metadata_registry(&wheel.index, &wheel.file, &url) .await? } FileLocation::Path(path, _url) => { diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 59fe29c5f..21530c6d5 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -111,7 +111,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> match &dist { Dist::Built(BuiltDist::Registry(wheel)) => { let url = match &wheel.file.url { - FileLocation::Url(url) => url, + FileLocation::RelativeUrl(base, url) => base + .join_relative(url) + .map_err(|err| DistributionDatabaseError::Url(url.clone(), err))?, + FileLocation::AbsoluteUrl(url) => Url::parse(url) + .map_err(|err| DistributionDatabaseError::Url(url.clone(), err))?, FileLocation::Path(path, url) => { let cache_entry = self.cache.entry( CacheBucket::Wheels, @@ -143,7 +147,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // for downloading and unzipping (with a buffer in between) and switch // to rayon if this buffer grows large by the time the file is fully // downloaded. - let reader = self.client.stream_external(url).await?; + let reader = self.client.stream_external(&url).await?; // Download and unzip the wheel to a temporary directory. let temp_dir = tempfile::tempdir_in(self.cache.root())?; diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index 4ba7b4c1a..733c18d65 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -99,7 +99,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Registry(registry_source_dist) => { let url = match ®istry_source_dist.file.url { - FileLocation::Url(url) => url, + FileLocation::RelativeUrl(base, url) => base + .join_relative(url) + .map_err(|err| SourceDistError::UrlParse(url.clone(), err))?, + FileLocation::AbsoluteUrl(url) => Url::parse(url) + .map_err(|err| SourceDistError::UrlParse(url.clone(), err))?, FileLocation::Path(path, url) => { let path_source_dist = PathSourceDist { name: registry_source_dist.filename.name.clone(), @@ -123,7 +127,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { self.url( source_dist, ®istry_source_dist.file.filename, - url, + &url, &cache_shard, None, ) @@ -168,7 +172,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Registry(registry_source_dist) => { let url = match ®istry_source_dist.file.url { - FileLocation::Url(url) => url, + FileLocation::RelativeUrl(base, url) => base + .join_relative(url) + .map_err(|err| SourceDistError::UrlParse(url.clone(), err))?, + FileLocation::AbsoluteUrl(url) => Url::parse(url) + .map_err(|err| SourceDistError::UrlParse(url.clone(), err))?, FileLocation::Path(path, url) => { let path_source_dist = PathSourceDist { name: registry_source_dist.filename.name.clone(), @@ -192,7 +200,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { self.url_metadata( source_dist, ®istry_source_dist.file.filename, - url, + &url, &cache_shard, None, ) diff --git a/crates/pypi-types/src/base_url.rs b/crates/pypi-types/src/base_url.rs index 4701bce26..15ea0a63d 100644 --- a/crates/pypi-types/src/base_url.rs +++ b/crates/pypi-types/src/base_url.rs @@ -2,7 +2,13 @@ use serde::{Deserialize, Serialize}; use url::Url; #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] -pub struct BaseUrl(Url); +pub struct BaseUrl( + #[serde( + serialize_with = "Url::serialize_internal", + deserialize_with = "Url::deserialize_internal" + )] + Url, +); impl BaseUrl { /// Parse the given URL. If it's relative, join it to the current [`BaseUrl`]. Allows for @@ -19,6 +25,11 @@ impl BaseUrl { } } } + + /// Return the underlying [`Url`]. + pub fn as_url(&self) -> &Url { + &self.0 + } } impl From for BaseUrl {