Use `SmallString` for filenames and URLs (#11765)

## Summary

These are never mutated, so there's no need to store them as `String`.
This commit is contained in:
Charlie Marsh 2025-02-24 21:06:57 -10:00 committed by GitHub
parent 76c3caf24f
commit 275db0668d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 70 additions and 56 deletions

3
Cargo.lock generated
View File

@ -4855,6 +4855,7 @@ dependencies = [
"uv-pep508",
"uv-platform-tags",
"uv-pypi-types",
"uv-small-str",
"uv-static",
"uv-version",
"uv-warnings",
@ -5075,6 +5076,7 @@ dependencies = [
"uv-pep508",
"uv-platform-tags",
"uv-pypi-types",
"uv-small-str",
"version-ranges",
]
@ -5605,6 +5607,7 @@ dependencies = [
"uv-pypi-types",
"uv-python",
"uv-requirements-txt",
"uv-small-str",
"uv-static",
"uv-types",
"uv-warnings",

View File

@ -23,6 +23,7 @@ uv-pep440 = { workspace = true }
uv-pep508 = { workspace = true }
uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-small-str = { workspace = true }
uv-static = { workspace = true }
uv-version = { workspace = true }
uv-warnings = { workspace = true }

View File

@ -10,6 +10,7 @@ use uv_cache_key::cache_digest;
use uv_distribution_filename::DistFilename;
use uv_distribution_types::{File, FileLocation, IndexUrl, UrlString};
use uv_pypi_types::HashDigests;
use uv_small_str::SmallString;
use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
@ -186,10 +187,13 @@ impl<'a> FlatIndexClient<'a> {
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
// Convert to a reference-counted string.
let base = SmallString::from(base.as_str());
let unarchived: Vec<File> = files
.into_iter()
.filter_map(|file| {
match File::try_from(file, base.as_url()) {
match File::try_from(file, &base) {
Ok(file) => Some(file),
Err(err) => {
// Ignore files with unparsable version specifiers.
@ -270,10 +274,11 @@ impl<'a> FlatIndexClient<'a> {
}
}
let Ok(filename) = entry.file_name().into_string() else {
let filename = entry.file_name();
let Some(filename) = filename.to_str() else {
warn!(
"Skipping non-UTF-8 filename in `--find-links` directory: {}",
entry.file_name().to_string_lossy()
filename.to_string_lossy()
);
continue;
};
@ -283,16 +288,16 @@ impl<'a> FlatIndexClient<'a> {
let file = File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: filename.into(),
hashes: HashDigests::empty(),
requires_python: None,
size: None,
upload_time_utc_ms: None,
url: FileLocation::AbsoluteUrl(UrlString::from(url)),
url: FileLocation::AbsoluteUrl(UrlString::from(&url)),
yanked: None,
};
let Some(filename) = DistFilename::try_from_normalized_filename(&filename) else {
let Some(filename) = DistFilename::try_from_normalized_filename(filename) else {
debug!(
"Ignoring `--find-links` entry (expected a wheel or source distribution filename): {}",
entry.path().display()

View File

@ -218,8 +218,8 @@ impl SimpleHtml {
yanked,
requires_python,
hashes,
filename: filename.to_string(),
url: decoded.to_string(),
filename: filename.into(),
url: decoded.into(),
size,
upload_time,
}))

View File

@ -14,6 +14,12 @@ use tokio::sync::Semaphore;
use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;
use crate::base_client::{BaseClientBuilder, ExtraMiddleware};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost};
@ -27,13 +33,7 @@ use uv_pep440::Version;
use uv_pep508::MarkerEnvironment;
use uv_platform_tags::Platform;
use uv_pypi_types::{ResolutionMetadata, SimpleJson};
use crate::base_client::{BaseClientBuilder, ExtraMiddleware};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind};
use uv_small_str::SmallString;
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
@ -894,10 +894,12 @@ impl SimpleMetadata {
fn from_files(files: Vec<uv_pypi_types::File>, package_name: &PackageName, base: &Url) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();
// Convert to a reference-counted string.
let base = SmallString::from(base.as_str());
// Group the distributions by version and kind
for file in files {
let Some(filename) =
DistFilename::try_from_filename(file.filename.as_str(), package_name)
let Some(filename) = DistFilename::try_from_filename(&file.filename, package_name)
else {
warn!("Skipping file for {package_name}: {}", file.filename);
continue;
@ -906,7 +908,7 @@ impl SimpleMetadata {
DistFilename::SourceDistFilename(ref inner) => &inner.version,
DistFilename::WheelFilename(ref inner) => &inner.version,
};
let file = match File::try_from(file, base) {
let file = match File::try_from(file, &base) {
Ok(file) => file,
Err(err) => {
// Ignore files with unparsable version specifiers.

View File

@ -27,6 +27,7 @@ uv-pep440 = { workspace = true }
uv-pep508 = { workspace = true }
uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-small-str = { workspace = true }
arcstr = { workspace = true }
bitflags = { workspace = true }

View File

@ -8,6 +8,7 @@ use url::Url;
use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigests, Yanked};
use uv_small_str::SmallString;
/// Error converting [`uv_pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, thiserror::Error)]
@ -23,7 +24,7 @@ pub enum FileConversionError {
#[rkyv(derive(Debug))]
pub struct File {
pub dist_info_metadata: bool,
pub filename: String,
pub filename: SmallString,
pub hashes: HashDigests,
pub requires_python: Option<VersionSpecifiers>,
pub size: Option<u64>,
@ -38,7 +39,10 @@ pub struct File {
impl File {
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
pub fn try_from(file: uv_pypi_types::File, base: &Url) -> Result<Self, FileConversionError> {
pub fn try_from(
file: uv_pypi_types::File,
base: &SmallString,
) -> Result<Self, FileConversionError> {
Ok(Self {
dist_info_metadata: file
.core_metadata
@ -56,7 +60,7 @@ impl File {
upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
url: match split_scheme(&file.url) {
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)),
None => FileLocation::RelativeUrl(base.to_string(), file.url),
None => FileLocation::RelativeUrl(base.clone(), file.url),
},
yanked: file.yanked,
})
@ -68,7 +72,7 @@ impl File {
#[rkyv(derive(Debug))]
pub enum FileLocation {
/// URL relative to the base URL.
RelativeUrl(String, String),
RelativeUrl(SmallString, SmallString),
/// Absolute URL.
AbsoluteUrl(UrlString),
}
@ -89,12 +93,12 @@ impl FileLocation {
match *self {
FileLocation::RelativeUrl(ref base, ref path) => {
let base_url = Url::parse(base).map_err(|err| ToUrlError::InvalidBase {
base: base.clone(),
base: base.to_string(),
err,
})?;
let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin {
base: base.clone(),
path: path.clone(),
base: base.to_string(),
path: path.to_string(),
err,
})?;
Ok(joined)
@ -102,17 +106,6 @@ impl FileLocation {
FileLocation::AbsoluteUrl(ref absolute) => absolute.to_url(),
}
}
/// Convert this location to a URL.
///
/// This method is identical to [`FileLocation::to_url`] except it avoids parsing absolute URLs
/// as they are already guaranteed to be valid.
pub fn to_url_string(&self) -> Result<UrlString, ToUrlError> {
match *self {
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.clone()),
FileLocation::RelativeUrl(_, _) => Ok(self.to_url()?.into()),
}
}
}
impl Display for FileLocation {
@ -143,18 +136,18 @@ impl Display for FileLocation {
)]
#[serde(transparent)]
#[rkyv(derive(Debug))]
pub struct UrlString(String);
pub struct UrlString(SmallString);
impl UrlString {
/// Create a new [`UrlString`] from a [`String`].
pub fn new(url: String) -> Self {
pub fn new(url: SmallString) -> Self {
Self(url)
}
/// Converts a [`UrlString`] to a [`Url`].
pub fn to_url(&self) -> Result<Url, ToUrlError> {
Url::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
absolute: self.0.clone(),
absolute: self.0.to_string(),
err,
})
}
@ -175,8 +168,8 @@ impl UrlString {
self.as_ref()
.split_once('#')
.map(|(path, _)| path)
.unwrap_or(self.as_ref())
.to_string(),
.map(SmallString::from)
.unwrap_or_else(|| self.0.clone()),
)
}
}
@ -189,13 +182,13 @@ impl AsRef<str> for UrlString {
impl From<Url> for UrlString {
fn from(value: Url) -> Self {
UrlString(value.to_string())
Self(value.as_str().into())
}
}
impl From<&Url> for UrlString {
fn from(value: &Url) -> Self {
UrlString(value.to_string())
Self(value.as_str().into())
}
}
@ -248,28 +241,28 @@ mod tests {
#[test]
fn base_str() {
let url = UrlString("https://example.com/path?query#fragment".to_string());
let url = UrlString("https://example.com/path?query#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");
let url = UrlString("https://example.com/path#fragment".to_string());
let url = UrlString("https://example.com/path#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");
let url = UrlString("https://example.com/path".to_string());
let url = UrlString("https://example.com/path".into());
assert_eq!(url.base_str(), "https://example.com/path");
}
#[test]
fn without_fragment() {
let url = UrlString("https://example.com/path?query#fragment".to_string());
let url = UrlString("https://example.com/path?query#fragment".into());
assert_eq!(
url.without_fragment(),
UrlString("https://example.com/path?query".to_string())
UrlString("https://example.com/path?query".into())
);
let url = UrlString("https://example.com/path#fragment".to_string());
let url = UrlString("https://example.com/path#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");
let url = UrlString("https://example.com/path".to_string());
let url = UrlString("https://example.com/path".into());
assert_eq!(url.base_str(), "https://example.com/path");
}
}

View File

@ -43,6 +43,11 @@ impl BaseUrl {
pub fn as_url(&self) -> &Url {
&self.0
}
/// Return the underlying [`Url`] as a serialized string.
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl From<Url> for BaseUrl {

View File

@ -40,10 +40,12 @@ fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>
pub struct File {
// PEP 714-renamed field, followed by PEP 691-compliant field, followed by non-PEP 691-compliant
// alias used by PyPI.
//
// TODO(charlie): Use a single value here and move this into the deserializer, to save space.
pub core_metadata: Option<CoreMetadata>,
pub dist_info_metadata: Option<CoreMetadata>,
pub data_dist_info_metadata: Option<CoreMetadata>,
pub filename: String,
pub filename: SmallString,
pub hashes: Hashes,
/// There are a number of invalid specifiers on PyPI, so we first try to parse it into a
/// [`VersionSpecifiers`] according to spec (PEP 440), then a [`LenientVersionSpecifiers`] with
@ -53,7 +55,7 @@ pub struct File {
pub requires_python: Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>,
pub size: Option<u64>,
pub upload_time: Option<Timestamp>,
pub url: String,
pub url: SmallString,
pub yanked: Option<Box<Yanked>>,
}

View File

@ -34,6 +34,7 @@ uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-python = { workspace = true }
uv-requirements-txt = { workspace = true }
uv-small-str = { workspace = true }
uv-static = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }

View File

@ -43,6 +43,7 @@ use uv_pypi_types::{
redact_credentials, ConflictPackage, Conflicts, HashDigest, HashDigests, ParsedArchiveUrl,
ParsedGitUrl, Requirement, RequirementSource,
};
use uv_small_str::SmallString;
use uv_types::{BuildContext, HashStrategy};
use uv_workspace::WorkspaceMember;
@ -2395,7 +2396,7 @@ impl Package {
let ext = SourceDistExtension::from_path(filename.as_ref())?;
let file = Box::new(uv_distribution_types::File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: SmallString::from(filename),
hashes: sdist.hash().map_or(HashDigests::empty(), |hash| {
HashDigests::from(hash.0.clone())
}),
@ -2446,7 +2447,7 @@ impl Package {
let ext = SourceDistExtension::from_path(filename.as_ref())?;
let file = Box::new(uv_distribution_types::File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: SmallString::from(filename),
hashes: sdist.hash().map_or(HashDigests::empty(), |hash| {
HashDigests::from(hash.0.clone())
}),
@ -4027,7 +4028,7 @@ impl Wheel {
};
let file = Box::new(uv_distribution_types::File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: SmallString::from(filename.to_string()),
hashes: self.hash.iter().map(|h| h.0.clone()).collect(),
requires_python: None,
size: self.size,
@ -4059,7 +4060,7 @@ impl Wheel {
.map_err(|()| LockErrorKind::PathToUrl)?;
let file = Box::new(uv_distribution_types::File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: SmallString::from(filename.to_string()),
hashes: self.hash.iter().map(|h| h.0.clone()).collect(),
requires_python: None,
size: self.size,