diff --git a/Cargo.lock b/Cargo.lock index 5a9ed6550..936a54bc5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -909,6 +909,7 @@ dependencies = [ "tracing", "url", "urlencoding", + "uv-auth", "uv-fs", "uv-git", "uv-normalize", @@ -4193,6 +4194,14 @@ dependencies = [ "which", ] +[[package]] +name = "uv-auth" +version = "0.0.1" +dependencies = [ + "tracing", + "url", +] + [[package]] name = "uv-build" version = "0.0.1" @@ -4284,6 +4293,7 @@ dependencies = [ "tracing", "url", "urlencoding", + "uv-auth", "uv-cache", "uv-fs", "uv-normalize", diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index 00c127440..488410c04 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -18,6 +18,7 @@ distribution-filename = { path = "../distribution-filename", features = ["serde" pep440_rs = { path = "../pep440-rs" } pep508_rs = { path = "../pep508-rs" } platform-tags = { path = "../platform-tags" } +uv-auth = { path = "../uv-auth" } uv-fs = { path = "../uv-fs" } uv-git = { path = "../uv-git", features = ["vendored-openssl"] } uv-normalize = { path = "../uv-normalize" } diff --git a/crates/distribution-types/src/file.rs b/crates/distribution-types/src/file.rs index 765fa0dc4..920d144fe 100644 --- a/crates/distribution-types/src/file.rs +++ b/crates/distribution-types/src/file.rs @@ -6,6 +6,8 @@ use thiserror::Error; use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError}; use pypi_types::{DistInfoMetadata, Hashes, Yanked}; +use url::Url; +use uv_auth::safe_copy_url_auth_to_str; /// Error converting [`pypi_types::File`] to [`distribution_type::File`]. #[derive(Debug, Error)] @@ -39,7 +41,7 @@ pub struct File { impl File { /// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers - pub fn try_from(file: pypi_types::File, base: &str) -> Result { + pub fn try_from(file: pypi_types::File, base: &Url) -> Result { Ok(Self { dist_info_metadata: file.dist_info_metadata, filename: file.filename, @@ -51,7 +53,12 @@ impl File { size: file.size, upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()), url: if file.url.contains("://") { - FileLocation::AbsoluteUrl(file.url) + let url = safe_copy_url_auth_to_str(base, &file.url) + .map_err(|err| FileConversionError::Url(file.url.clone(), err))? + .map(|url| url.to_string()) + .unwrap_or(file.url); + + FileLocation::AbsoluteUrl(url) } else { FileLocation::RelativeUrl(base.to_string(), file.url) }, diff --git a/crates/pypi-types/src/base_url.rs b/crates/pypi-types/src/base_url.rs index ef77dc594..0afef4ebf 100644 --- a/crates/pypi-types/src/base_url.rs +++ b/crates/pypi-types/src/base_url.rs @@ -76,6 +76,12 @@ impl BaseUrl { pub fn as_url(&self) -> &Url { &self.0 } + + /// Convert to the underlying [`Url`]. + #[must_use] + pub fn into_url(self) -> Url { + self.0 + } } impl From for BaseUrl { diff --git a/crates/uv-auth/Cargo.toml b/crates/uv-auth/Cargo.toml new file mode 100644 index 000000000..c5bf668b9 --- /dev/null +++ b/crates/uv-auth/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "uv-auth" +version = "0.0.1" +edition = "2021" + +[dependencies] +url = { workspace = true } +tracing = { workspace = true } diff --git a/crates/uv-client/src/auth.rs b/crates/uv-auth/src/lib.rs similarity index 75% rename from crates/uv-client/src/auth.rs rename to crates/uv-auth/src/lib.rs index e2747d3f0..0f9635d51 100644 --- a/crates/uv-client/src/auth.rs +++ b/crates/uv-auth/src/lib.rs @@ -1,26 +1,39 @@ +/// HTTP authentication utilities. use tracing::warn; use url::Url; -/// HTTP authentication utilities. +/// Optimized version of [`safe_copy_url_auth`] which avoids parsing a string +/// into a URL unless the given URL has authentication to copy. Useful for patterns +/// where the returned URL would immediately be cast into a string. +/// +/// Returns [`Err`] if there is authentication to copy and `new_url` is not a valid URL. +/// Returns [`None`] if there is no authentication to copy. +pub fn safe_copy_url_auth_to_str( + trusted_url: &Url, + new_url: &str, +) -> Result, url::ParseError> { + if trusted_url.username().is_empty() && trusted_url.password().is_none() { + return Ok(None); + } + + let new_url = Url::parse(new_url)?; + Ok(Some(safe_copy_url_auth(trusted_url, new_url))) +} /// Copy authentication from one URL to another URL if applicable. /// /// See [`should_retain_auth`] for details on when authentication is retained. #[must_use] -pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url { - if should_retain_auth(request_url, &response_url) { - response_url - .set_username(request_url.username()) - .unwrap_or_else(|_| { - warn!("Failed to transfer username to response URL: {response_url}") - }); - response_url - .set_password(request_url.password()) - .unwrap_or_else(|_| { - warn!("Failed to transfer password to response URL: {response_url}") - }); +pub fn safe_copy_url_auth(trusted_url: &Url, mut new_url: Url) -> Url { + if should_retain_auth(trusted_url, &new_url) { + new_url + .set_username(trusted_url.username()) + .unwrap_or_else(|_| warn!("Failed to transfer username to response URL: {new_url}")); + new_url + .set_password(trusted_url.password()) + .unwrap_or_else(|_| warn!("Failed to transfer password to response URL: {new_url}")); } - response_url + new_url } /// Determine if authentication information should be retained on a new URL. @@ -28,7 +41,7 @@ pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url { /// /// /// -fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool { +fn should_retain_auth(trusted_url: &Url, new_url: &Url) -> bool { // The "scheme" and "authority" components must match to retain authentication // The "authority", is composed of the host and port. @@ -37,12 +50,12 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool { // Note some clients such as Python's `requests` library allow an upgrade // from `http` to `https` but this is not spec-compliant. // - if request_url.scheme() != response_url.scheme() { + if trusted_url.scheme() != new_url.scheme() { return false; } // The host must always be an exact match. - if request_url.host() != response_url.host() { + if trusted_url.host() != new_url.host() { return false; } @@ -50,7 +63,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool { // The port is only allowed to differ if it it matches the "default port" for the scheme. // However, `reqwest` sets the `port` to `None` if it matches the default port so we do // not need any special handling here. - if request_url.port() != response_url.port() { + if trusted_url.port() != new_url.port() { return false; } @@ -61,7 +74,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool { mod tests { use url::{ParseError, Url}; - use crate::auth::should_retain_auth; + use crate::should_retain_auth; #[test] fn test_should_retain_auth() -> Result<(), ParseError> { diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index cb3f1aa3e..3c6a36c33 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -11,6 +11,7 @@ install-wheel-rs = { path = "../install-wheel-rs" } pep440_rs = { path = "../pep440-rs" } pep508_rs = { path = "../pep508-rs" } platform-tags = { path = "../platform-tags" } +uv-auth = { path = "../uv-auth" } uv-cache = { path = "../uv-cache" } uv-fs = { path = "../uv-fs", features = ["tokio"] } uv-normalize = { path = "../uv-normalize" } diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index 28b66b501..02ec862c2 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -16,10 +16,10 @@ use distribution_types::{ use pep440_rs::Version; use platform_tags::Tags; use pypi_types::{Hashes, Yanked}; +use uv_auth::safe_copy_url_auth; use uv_cache::{Cache, CacheBucket}; use uv_normalize::PackageName; -use crate::auth::safe_copy_auth; use crate::cached_client::{CacheControl, CachedClientError}; use crate::html::SimpleHtml; use crate::{Connectivity, Error, ErrorKind, RegistryClient}; @@ -156,16 +156,17 @@ impl<'a> FlatIndexClient<'a> { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. - let url = safe_copy_auth(url, response.url().clone()); + let url = safe_copy_url_auth(url, response.url().clone()); let text = response.text().await.map_err(ErrorKind::RequestError)?; let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url) .map_err(|err| Error::from_html_err(err, url.clone()))?; + let base = safe_copy_url_auth(&url, base.into_url()); let files: Vec = files .into_iter() .filter_map(|file| { - match File::try_from(file, base.as_url().as_str()) { + match File::try_from(file, &base) { Ok(file) => Some(file), Err(err) => { // Ignore files with unparsable version specifiers. diff --git a/crates/uv-client/src/lib.rs b/crates/uv-client/src/lib.rs index 4d1acdb27..1002f9d3f 100644 --- a/crates/uv-client/src/lib.rs +++ b/crates/uv-client/src/lib.rs @@ -7,7 +7,6 @@ pub use registry_client::{ }; pub use rkyvutil::OwnedArchive; -mod auth; mod cached_client; mod error; mod flat_index; diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index 416b104df..3416c3027 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -21,11 +21,11 @@ use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Nam use install_wheel_rs::{find_dist_info, is_metadata_entry}; use pep440_rs::Version; use pypi_types::{Metadata21, SimpleJson}; +use uv_auth::safe_copy_url_auth; use uv_cache::{Cache, CacheBucket, WheelCache}; use uv_normalize::PackageName; use uv_warnings::warn_user_once; -use crate::auth::safe_copy_auth; use crate::cached_client::CacheControl; use crate::html::SimpleHtml; use crate::middleware::OfflineMiddleware; @@ -249,7 +249,7 @@ impl RegistryClient { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. - let url = safe_copy_auth(&url, response.url().clone()); + let url = safe_copy_url_auth(&url, response.url().clone()); let content_type = response .headers() @@ -271,17 +271,16 @@ impl RegistryClient { let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?; let data: SimpleJson = serde_json::from_slice(bytes.as_ref()) .map_err(|err| Error::from_json_err(err, url.clone()))?; - let metadata = - SimpleMetadata::from_files(data.files, package_name, url.as_str()); - metadata + + SimpleMetadata::from_files(data.files, package_name, &url) } MediaType::Html => { let text = response.text().await.map_err(ErrorKind::RequestError)?; let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url) .map_err(|err| Error::from_html_err(err, url.clone()))?; - let metadata = - SimpleMetadata::from_files(files, package_name, base.as_url().as_str()); - metadata + let base = safe_copy_url_auth(&url, base.into_url()); + + SimpleMetadata::from_files(files, package_name, &base) } }; OwnedArchive::from_unarchived(&unarchived) @@ -670,7 +669,7 @@ impl SimpleMetadata { self.0.iter() } - fn from_files(files: Vec, package_name: &PackageName, base: &str) -> Self { + fn from_files(files: Vec, package_name: &PackageName, base: &Url) -> Self { let mut map: BTreeMap = BTreeMap::default(); // Group the distributions by version and kind @@ -810,11 +809,11 @@ mod tests { } "#; let data: SimpleJson = serde_json::from_str(response).unwrap(); - let base = "https://pypi.org/simple/pyflyby/"; + let base = Url::parse("https://pypi.org/simple/pyflyby/").unwrap(); let simple_metadata = SimpleMetadata::from_files( data.files, &PackageName::from_str("pyflyby").unwrap(), - base, + &base, ); let versions: Vec = simple_metadata .iter()