diff --git a/Cargo.lock b/Cargo.lock index 3911695a0..d2dbfd450 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4855,6 +4855,7 @@ dependencies = [ "uv-pep508", "uv-platform-tags", "uv-pypi-types", + "uv-small-str", "uv-static", "uv-version", "uv-warnings", @@ -5075,6 +5076,7 @@ dependencies = [ "uv-pep508", "uv-platform-tags", "uv-pypi-types", + "uv-small-str", "version-ranges", ] @@ -5605,6 +5607,7 @@ dependencies = [ "uv-pypi-types", "uv-python", "uv-requirements-txt", + "uv-small-str", "uv-static", "uv-types", "uv-warnings", diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index 529e4afcc..c94687845 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -23,6 +23,7 @@ uv-pep440 = { workspace = true } uv-pep508 = { workspace = true } uv-platform-tags = { workspace = true } uv-pypi-types = { workspace = true } +uv-small-str = { workspace = true } uv-static = { workspace = true } uv-version = { workspace = true } uv-warnings = { workspace = true } diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index 8f7df28e9..bc923f354 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -10,6 +10,7 @@ use uv_cache_key::cache_digest; use uv_distribution_filename::DistFilename; use uv_distribution_types::{File, FileLocation, IndexUrl, UrlString}; use uv_pypi_types::HashDigests; +use uv_small_str::SmallString; use crate::cached_client::{CacheControl, CachedClientError}; use crate::html::SimpleHtml; @@ -186,10 +187,13 @@ impl<'a> FlatIndexClient<'a> { let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url) .map_err(|err| Error::from_html_err(err, url.clone()))?; + // Convert to a reference-counted string. + let base = SmallString::from(base.as_str()); + let unarchived: Vec = files .into_iter() .filter_map(|file| { - match File::try_from(file, base.as_url()) { + match File::try_from(file, &base) { Ok(file) => Some(file), Err(err) => { // Ignore files with unparsable version specifiers. @@ -270,10 +274,11 @@ impl<'a> FlatIndexClient<'a> { } } - let Ok(filename) = entry.file_name().into_string() else { + let filename = entry.file_name(); + let Some(filename) = filename.to_str() else { warn!( "Skipping non-UTF-8 filename in `--find-links` directory: {}", - entry.file_name().to_string_lossy() + filename.to_string_lossy() ); continue; }; @@ -283,16 +288,16 @@ impl<'a> FlatIndexClient<'a> { let file = File { dist_info_metadata: false, - filename: filename.to_string(), + filename: filename.into(), hashes: HashDigests::empty(), requires_python: None, size: None, upload_time_utc_ms: None, - url: FileLocation::AbsoluteUrl(UrlString::from(url)), + url: FileLocation::AbsoluteUrl(UrlString::from(&url)), yanked: None, }; - let Some(filename) = DistFilename::try_from_normalized_filename(&filename) else { + let Some(filename) = DistFilename::try_from_normalized_filename(filename) else { debug!( "Ignoring `--find-links` entry (expected a wheel or source distribution filename): {}", entry.path().display() diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 4e1adb468..2e6fb730a 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -218,8 +218,8 @@ impl SimpleHtml { yanked, requires_python, hashes, - filename: filename.to_string(), - url: decoded.to_string(), + filename: filename.into(), + url: decoded.into(), size, upload_time, })) diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index 55eae1990..76f1d7134 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -14,6 +14,12 @@ use tokio::sync::Semaphore; use tracing::{info_span, instrument, trace, warn, Instrument}; use url::Url; +use crate::base_client::{BaseClientBuilder, ExtraMiddleware}; +use crate::cached_client::CacheControl; +use crate::html::SimpleHtml; +use crate::remote_metadata::wheel_metadata_from_remote_zip; +use crate::rkyvutil::OwnedArchive; +use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind}; use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; use uv_configuration::KeyringProviderType; use uv_configuration::{IndexStrategy, TrustedHost}; @@ -27,13 +33,7 @@ use uv_pep440::Version; use uv_pep508::MarkerEnvironment; use uv_platform_tags::Platform; use uv_pypi_types::{ResolutionMetadata, SimpleJson}; - -use crate::base_client::{BaseClientBuilder, ExtraMiddleware}; -use crate::cached_client::CacheControl; -use crate::html::SimpleHtml; -use crate::remote_metadata::wheel_metadata_from_remote_zip; -use crate::rkyvutil::OwnedArchive; -use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind}; +use uv_small_str::SmallString; /// A builder for an [`RegistryClient`]. #[derive(Debug, Clone)] @@ -894,10 +894,12 @@ impl SimpleMetadata { fn from_files(files: Vec, package_name: &PackageName, base: &Url) -> Self { let mut map: BTreeMap = BTreeMap::default(); + // Convert to a reference-counted string. + let base = SmallString::from(base.as_str()); + // Group the distributions by version and kind for file in files { - let Some(filename) = - DistFilename::try_from_filename(file.filename.as_str(), package_name) + let Some(filename) = DistFilename::try_from_filename(&file.filename, package_name) else { warn!("Skipping file for {package_name}: {}", file.filename); continue; @@ -906,7 +908,7 @@ impl SimpleMetadata { DistFilename::SourceDistFilename(ref inner) => &inner.version, DistFilename::WheelFilename(ref inner) => &inner.version, }; - let file = match File::try_from(file, base) { + let file = match File::try_from(file, &base) { Ok(file) => file, Err(err) => { // Ignore files with unparsable version specifiers. diff --git a/crates/uv-distribution-types/Cargo.toml b/crates/uv-distribution-types/Cargo.toml index 39618e2d7..b4be1aece 100644 --- a/crates/uv-distribution-types/Cargo.toml +++ b/crates/uv-distribution-types/Cargo.toml @@ -27,6 +27,7 @@ uv-pep440 = { workspace = true } uv-pep508 = { workspace = true } uv-platform-tags = { workspace = true } uv-pypi-types = { workspace = true } +uv-small-str = { workspace = true } arcstr = { workspace = true } bitflags = { workspace = true } diff --git a/crates/uv-distribution-types/src/file.rs b/crates/uv-distribution-types/src/file.rs index 52de0c5e9..c1b5feb72 100644 --- a/crates/uv-distribution-types/src/file.rs +++ b/crates/uv-distribution-types/src/file.rs @@ -8,6 +8,7 @@ use url::Url; use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError}; use uv_pep508::split_scheme; use uv_pypi_types::{CoreMetadata, HashDigests, Yanked}; +use uv_small_str::SmallString; /// Error converting [`uv_pypi_types::File`] to [`distribution_type::File`]. #[derive(Debug, thiserror::Error)] @@ -23,7 +24,7 @@ pub enum FileConversionError { #[rkyv(derive(Debug))] pub struct File { pub dist_info_metadata: bool, - pub filename: String, + pub filename: SmallString, pub hashes: HashDigests, pub requires_python: Option, pub size: Option, @@ -38,7 +39,10 @@ pub struct File { impl File { /// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers - pub fn try_from(file: uv_pypi_types::File, base: &Url) -> Result { + pub fn try_from( + file: uv_pypi_types::File, + base: &SmallString, + ) -> Result { Ok(Self { dist_info_metadata: file .core_metadata @@ -56,7 +60,7 @@ impl File { upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond), url: match split_scheme(&file.url) { Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)), - None => FileLocation::RelativeUrl(base.to_string(), file.url), + None => FileLocation::RelativeUrl(base.clone(), file.url), }, yanked: file.yanked, }) @@ -68,7 +72,7 @@ impl File { #[rkyv(derive(Debug))] pub enum FileLocation { /// URL relative to the base URL. - RelativeUrl(String, String), + RelativeUrl(SmallString, SmallString), /// Absolute URL. AbsoluteUrl(UrlString), } @@ -89,12 +93,12 @@ impl FileLocation { match *self { FileLocation::RelativeUrl(ref base, ref path) => { let base_url = Url::parse(base).map_err(|err| ToUrlError::InvalidBase { - base: base.clone(), + base: base.to_string(), err, })?; let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin { - base: base.clone(), - path: path.clone(), + base: base.to_string(), + path: path.to_string(), err, })?; Ok(joined) @@ -102,17 +106,6 @@ impl FileLocation { FileLocation::AbsoluteUrl(ref absolute) => absolute.to_url(), } } - - /// Convert this location to a URL. - /// - /// This method is identical to [`FileLocation::to_url`] except it avoids parsing absolute URLs - /// as they are already guaranteed to be valid. - pub fn to_url_string(&self) -> Result { - match *self { - FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.clone()), - FileLocation::RelativeUrl(_, _) => Ok(self.to_url()?.into()), - } - } } impl Display for FileLocation { @@ -143,18 +136,18 @@ impl Display for FileLocation { )] #[serde(transparent)] #[rkyv(derive(Debug))] -pub struct UrlString(String); +pub struct UrlString(SmallString); impl UrlString { /// Create a new [`UrlString`] from a [`String`]. - pub fn new(url: String) -> Self { + pub fn new(url: SmallString) -> Self { Self(url) } /// Converts a [`UrlString`] to a [`Url`]. pub fn to_url(&self) -> Result { Url::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute { - absolute: self.0.clone(), + absolute: self.0.to_string(), err, }) } @@ -175,8 +168,8 @@ impl UrlString { self.as_ref() .split_once('#') .map(|(path, _)| path) - .unwrap_or(self.as_ref()) - .to_string(), + .map(SmallString::from) + .unwrap_or_else(|| self.0.clone()), ) } } @@ -189,13 +182,13 @@ impl AsRef for UrlString { impl From for UrlString { fn from(value: Url) -> Self { - UrlString(value.to_string()) + Self(value.as_str().into()) } } impl From<&Url> for UrlString { fn from(value: &Url) -> Self { - UrlString(value.to_string()) + Self(value.as_str().into()) } } @@ -248,28 +241,28 @@ mod tests { #[test] fn base_str() { - let url = UrlString("https://example.com/path?query#fragment".to_string()); + let url = UrlString("https://example.com/path?query#fragment".into()); assert_eq!(url.base_str(), "https://example.com/path"); - let url = UrlString("https://example.com/path#fragment".to_string()); + let url = UrlString("https://example.com/path#fragment".into()); assert_eq!(url.base_str(), "https://example.com/path"); - let url = UrlString("https://example.com/path".to_string()); + let url = UrlString("https://example.com/path".into()); assert_eq!(url.base_str(), "https://example.com/path"); } #[test] fn without_fragment() { - let url = UrlString("https://example.com/path?query#fragment".to_string()); + let url = UrlString("https://example.com/path?query#fragment".into()); assert_eq!( url.without_fragment(), - UrlString("https://example.com/path?query".to_string()) + UrlString("https://example.com/path?query".into()) ); - let url = UrlString("https://example.com/path#fragment".to_string()); + let url = UrlString("https://example.com/path#fragment".into()); assert_eq!(url.base_str(), "https://example.com/path"); - let url = UrlString("https://example.com/path".to_string()); + let url = UrlString("https://example.com/path".into()); assert_eq!(url.base_str(), "https://example.com/path"); } } diff --git a/crates/uv-pypi-types/src/base_url.rs b/crates/uv-pypi-types/src/base_url.rs index e3249c069..535407cf6 100644 --- a/crates/uv-pypi-types/src/base_url.rs +++ b/crates/uv-pypi-types/src/base_url.rs @@ -43,6 +43,11 @@ impl BaseUrl { pub fn as_url(&self) -> &Url { &self.0 } + + /// Return the underlying [`Url`] as a serialized string. + pub fn as_str(&self) -> &str { + self.0.as_str() + } } impl From for BaseUrl { diff --git a/crates/uv-pypi-types/src/simple_json.rs b/crates/uv-pypi-types/src/simple_json.rs index 73422d9ed..ba8c0584a 100644 --- a/crates/uv-pypi-types/src/simple_json.rs +++ b/crates/uv-pypi-types/src/simple_json.rs @@ -40,10 +40,12 @@ fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result pub struct File { // PEP 714-renamed field, followed by PEP 691-compliant field, followed by non-PEP 691-compliant // alias used by PyPI. + // + // TODO(charlie): Use a single value here and move this into the deserializer, to save space. pub core_metadata: Option, pub dist_info_metadata: Option, pub data_dist_info_metadata: Option, - pub filename: String, + pub filename: SmallString, pub hashes: Hashes, /// There are a number of invalid specifiers on PyPI, so we first try to parse it into a /// [`VersionSpecifiers`] according to spec (PEP 440), then a [`LenientVersionSpecifiers`] with @@ -53,7 +55,7 @@ pub struct File { pub requires_python: Option>, pub size: Option, pub upload_time: Option, - pub url: String, + pub url: SmallString, pub yanked: Option>, } diff --git a/crates/uv-resolver/Cargo.toml b/crates/uv-resolver/Cargo.toml index 48c82b495..4af1c95d9 100644 --- a/crates/uv-resolver/Cargo.toml +++ b/crates/uv-resolver/Cargo.toml @@ -34,6 +34,7 @@ uv-platform-tags = { workspace = true } uv-pypi-types = { workspace = true } uv-python = { workspace = true } uv-requirements-txt = { workspace = true } +uv-small-str = { workspace = true } uv-static = { workspace = true } uv-types = { workspace = true } uv-warnings = { workspace = true } diff --git a/crates/uv-resolver/src/lock/mod.rs b/crates/uv-resolver/src/lock/mod.rs index ec77845be..a410e1f43 100644 --- a/crates/uv-resolver/src/lock/mod.rs +++ b/crates/uv-resolver/src/lock/mod.rs @@ -43,6 +43,7 @@ use uv_pypi_types::{ redact_credentials, ConflictPackage, Conflicts, HashDigest, HashDigests, ParsedArchiveUrl, ParsedGitUrl, Requirement, RequirementSource, }; +use uv_small_str::SmallString; use uv_types::{BuildContext, HashStrategy}; use uv_workspace::WorkspaceMember; @@ -2395,7 +2396,7 @@ impl Package { let ext = SourceDistExtension::from_path(filename.as_ref())?; let file = Box::new(uv_distribution_types::File { dist_info_metadata: false, - filename: filename.to_string(), + filename: SmallString::from(filename), hashes: sdist.hash().map_or(HashDigests::empty(), |hash| { HashDigests::from(hash.0.clone()) }), @@ -2446,7 +2447,7 @@ impl Package { let ext = SourceDistExtension::from_path(filename.as_ref())?; let file = Box::new(uv_distribution_types::File { dist_info_metadata: false, - filename: filename.to_string(), + filename: SmallString::from(filename), hashes: sdist.hash().map_or(HashDigests::empty(), |hash| { HashDigests::from(hash.0.clone()) }), @@ -4027,7 +4028,7 @@ impl Wheel { }; let file = Box::new(uv_distribution_types::File { dist_info_metadata: false, - filename: filename.to_string(), + filename: SmallString::from(filename.to_string()), hashes: self.hash.iter().map(|h| h.0.clone()).collect(), requires_python: None, size: self.size, @@ -4059,7 +4060,7 @@ impl Wheel { .map_err(|()| LockErrorKind::PathToUrl)?; let file = Box::new(uv_distribution_types::File { dist_info_metadata: false, - filename: filename.to_string(), + filename: SmallString::from(filename.to_string()), hashes: self.hash.iter().map(|h| h.0.clone()).collect(), requires_python: None, size: self.size,