use std::collections::BTreeMap; use std::env; use std::fmt::Debug; use std::path::Path; use std::str::FromStr; use async_http_range_reader::AsyncHttpRangeReader; use futures::{FutureExt, TryStreamExt}; use http::HeaderMap; use reqwest::{Client, ClientBuilder, Response, StatusCode}; use reqwest_netrc::NetrcMiddleware; use reqwest_retry::policies::ExponentialBackoff; use reqwest_retry::RetryTransientMiddleware; use serde::{Deserialize, Serialize}; use tokio::io::AsyncReadExt; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{debug, info_span, instrument, trace, warn, Instrument}; use url::Url; use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name}; use install_wheel_rs::{find_dist_info, is_metadata_entry}; use pep440_rs::Version; use pypi_types::{Metadata23, SimpleJson}; use uv_auth::safe_copy_url_auth; use uv_cache::{Cache, CacheBucket, WheelCache}; use uv_normalize::PackageName; use uv_version::version; use uv_warnings::warn_user_once; use crate::cached_client::CacheControl; use crate::html::SimpleHtml; use crate::middleware::OfflineMiddleware; use crate::remote_metadata::wheel_metadata_from_remote_zip; use crate::rkyvutil::OwnedArchive; use crate::{CachedClient, CachedClientError, Error, ErrorKind}; /// A builder for an [`RegistryClient`]. #[derive(Debug, Clone)] pub struct RegistryClientBuilder { index_urls: IndexUrls, retries: u32, connectivity: Connectivity, cache: Cache, client: Option, } impl RegistryClientBuilder { pub fn new(cache: Cache) -> Self { Self { index_urls: IndexUrls::default(), cache, connectivity: Connectivity::Online, retries: 3, client: None, } } } impl RegistryClientBuilder { #[must_use] pub fn index_urls(mut self, index_urls: IndexUrls) -> Self { self.index_urls = index_urls; self } #[must_use] pub fn connectivity(mut self, connectivity: Connectivity) -> Self { self.connectivity = connectivity; self } #[must_use] pub fn retries(mut self, retries: u32) -> Self { self.retries = retries; self } #[must_use] pub fn cache(mut self, cache: Cache) -> Self { self.cache = cache; self } #[must_use] pub fn client(mut self, client: Client) -> Self { self.client = Some(client); self } pub fn build(self) -> RegistryClient { // Create user agent. let user_agent_string = format!("uv/{}", version()); // Timeout options, matching https://doc.rust-lang.org/nightly/cargo/reference/config.html#httptimeout // `UV_REQUEST_TIMEOUT` is provided for backwards compatibility with v0.1.6 let default_timeout = 5 * 60; let timeout = env::var("UV_HTTP_TIMEOUT") .or_else(|_| env::var("UV_REQUEST_TIMEOUT")) .or_else(|_| env::var("HTTP_TIMEOUT")) .and_then(|value| { value.parse::() .or_else(|_| { // On parse error, warn and use the default timeout warn_user_once!("Ignoring invalid value from environment for UV_HTTP_TIMEOUT. Expected integer number of seconds, got \"{value}\"."); Ok(default_timeout) }) }) .unwrap_or(default_timeout); debug!("Using registry request timeout of {}s", timeout); // Initialize the base client. let client = self.client.unwrap_or_else(|| { // Disallow any connections. let client_core = ClientBuilder::new() .user_agent(user_agent_string) .pool_max_idle_per_host(20) .timeout(std::time::Duration::from_secs(timeout)); client_core.build().expect("Failed to build HTTP client.") }); // Wrap in any relevant middleware. let client = match self.connectivity { Connectivity::Online => { let client = reqwest_middleware::ClientBuilder::new(client.clone()); // Initialize the retry strategy. let retry_policy = ExponentialBackoff::builder().build_with_max_retries(self.retries); let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); let client = client.with(retry_strategy); // Initialize the netrc middleware. let client = if let Ok(netrc) = NetrcMiddleware::new() { client.with_init(netrc) } else { client }; client.build() } Connectivity::Offline => reqwest_middleware::ClientBuilder::new(client.clone()) .with(OfflineMiddleware) .build(), }; // Wrap in the cache middleware. let client = CachedClient::new(client); RegistryClient { index_urls: self.index_urls, cache: self.cache, connectivity: self.connectivity, client, timeout, } } } /// A client for fetching packages from a `PyPI`-compatible index. #[derive(Debug, Clone)] pub struct RegistryClient { /// The index URLs to use for fetching packages. index_urls: IndexUrls, /// The underlying HTTP client. client: CachedClient, /// Used for the remote wheel METADATA cache. cache: Cache, /// The connectivity mode to use. connectivity: Connectivity, /// Configured client timeout, in seconds. timeout: u64, } impl RegistryClient { /// Return the [`CachedClient`] used by this client. pub fn cached_client(&self) -> &CachedClient { &self.client } /// Return the [`Connectivity`] mode used by this client. pub fn connectivity(&self) -> Connectivity { self.connectivity } /// Return the timeout this client is configured with, in seconds. pub fn timeout(&self) -> u64 { self.timeout } /// Set the index URLs to use for fetching packages. #[must_use] pub fn with_index_url(self, index_urls: IndexUrls) -> Self { Self { index_urls, ..self } } /// Fetch a package from the `PyPI` simple API. /// /// "simple" here refers to [PEP 503 – Simple Repository API](https://peps.python.org/pep-0503/) /// and [PEP 691 – JSON-based Simple API for Python Package Indexes](https://peps.python.org/pep-0691/), /// which the pypi json api approximately implements. #[instrument("simple_api", skip_all, fields(package = % package_name))] pub async fn simple( &self, package_name: &PackageName, ) -> Result<(IndexUrl, OwnedArchive), Error> { let mut it = self.index_urls.indexes().peekable(); if it.peek().is_none() { return Err(ErrorKind::NoIndex(package_name.as_ref().to_string()).into()); } for index in it { let result = self.simple_single_index(package_name, index).await?; return match result { Ok(metadata) => Ok((index.clone(), metadata)), Err(CachedClientError::Client(err)) => match err.into_kind() { ErrorKind::Offline(_) => continue, ErrorKind::ReqwestError(err) => { if err.status() == Some(StatusCode::NOT_FOUND) || err.status() == Some(StatusCode::FORBIDDEN) { continue; } Err(ErrorKind::from(err).into()) } other => Err(other.into()), }, Err(CachedClientError::Callback(err)) => Err(err), }; } match self.connectivity { Connectivity::Online => { Err(ErrorKind::PackageNotFound(package_name.to_string()).into()) } Connectivity::Offline => Err(ErrorKind::Offline(package_name.to_string()).into()), } } async fn simple_single_index( &self, package_name: &PackageName, index: &IndexUrl, ) -> Result, CachedClientError>, Error> { // Format the URL for PyPI. let mut url: Url = index.clone().into(); url.path_segments_mut() .unwrap() .pop_if_empty() .push(package_name.as_ref()) // The URL *must* end in a trailing slash for proper relative path behavior // ref https://github.com/servo/rust-url/issues/333 .push(""); trace!("Fetching metadata for {package_name} from {url}"); let cache_entry = self.cache.entry( CacheBucket::Simple, Path::new(&match index { IndexUrl::Pypi(_) => "pypi".to_string(), IndexUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)), }), format!("{package_name}.rkyv"), ); let cache_control = match self.connectivity { Connectivity::Online => CacheControl::from( self.cache .freshness(&cache_entry, Some(package_name)) .map_err(ErrorKind::Io)?, ), Connectivity::Offline => CacheControl::AllowStale, }; let simple_request = self .client .uncached() .get(url.clone()) .header("Accept-Encoding", "gzip") .header("Accept", MediaType::accepts()) .build() .map_err(ErrorKind::from)?; let parse_simple_response = |response: Response| { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. let url = safe_copy_url_auth(&url, response.url().clone()); let content_type = response .headers() .get("content-type") .ok_or_else(|| Error::from(ErrorKind::MissingContentType(url.clone())))?; let content_type = content_type.to_str().map_err(|err| { Error::from(ErrorKind::InvalidContentTypeHeader(url.clone(), err)) })?; let media_type = content_type.split(';').next().unwrap_or(content_type); let media_type = MediaType::from_str(media_type).ok_or_else(|| { Error::from(ErrorKind::UnsupportedMediaType( url.clone(), media_type.to_string(), )) })?; let unarchived = match media_type { MediaType::Json => { let bytes = response.bytes().await.map_err(ErrorKind::from)?; let data: SimpleJson = serde_json::from_slice(bytes.as_ref()) .map_err(|err| Error::from_json_err(err, url.clone()))?; SimpleMetadata::from_files(data.files, package_name, &url) } MediaType::Html => { let text = response.text().await.map_err(ErrorKind::from)?; let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url) .map_err(|err| Error::from_html_err(err, url.clone()))?; let base = safe_copy_url_auth(&url, base.into_url()); SimpleMetadata::from_files(files, package_name, &base) } }; OwnedArchive::from_unarchived(&unarchived) } .boxed() .instrument(info_span!("parse_simple_api", package = %package_name)) }; let result = self .client .get_cacheable( simple_request, &cache_entry, cache_control, parse_simple_response, ) .await; Ok(result) } /// Fetch the metadata for a remote wheel file. /// /// For a remote wheel, we try the following ways to fetch the metadata: /// 1. From a [PEP 658](https://peps.python.org/pep-0658/) data-dist-info-metadata url /// 2. From a remote wheel by partial zip reading /// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests) #[instrument(skip_all, fields(% built_dist))] pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result { let metadata = match &built_dist { BuiltDist::Registry(wheel) => match &wheel.file.url { FileLocation::RelativeUrl(base, url) => { let url = pypi_types::base_url_join_relative(base, url) .map_err(ErrorKind::JoinRelativeError)?; self.wheel_metadata_registry(&wheel.index, &wheel.file, &url) .await? } FileLocation::AbsoluteUrl(url) => { let url = Url::parse(url).map_err(ErrorKind::UrlParseError)?; self.wheel_metadata_registry(&wheel.index, &wheel.file, &url) .await? } FileLocation::Path(path) => { let file = fs_err::tokio::File::open(&path) .await .map_err(ErrorKind::Io)?; let reader = tokio::io::BufReader::new(file); read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader) .await? } }, BuiltDist::DirectUrl(wheel) => { self.wheel_metadata_no_pep658( &wheel.filename, &wheel.url, WheelCache::Url(&wheel.url), ) .await? } BuiltDist::Path(wheel) => { let file = fs_err::tokio::File::open(&wheel.path) .await .map_err(ErrorKind::Io)?; let reader = tokio::io::BufReader::new(file); read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader).await? } }; if metadata.name != *built_dist.name() { return Err(Error::from(ErrorKind::NameMismatch { metadata: metadata.name, given: built_dist.name().clone(), })); } Ok(metadata) } /// Fetch the metadata from a wheel file. async fn wheel_metadata_registry( &self, index: &IndexUrl, file: &File, url: &Url, ) -> Result { // If the metadata file is available at its own url (PEP 658), download it from there. let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?; if file .dist_info_metadata .as_ref() .is_some_and(pypi_types::DistInfoMetadata::is_available) { let mut url = url.clone(); url.set_path(&format!("{}.metadata", url.path())); let cache_entry = self.cache.entry( CacheBucket::Wheels, WheelCache::Index(index).remote_wheel_dir(filename.name.as_ref()), format!("{}.msgpack", filename.stem()), ); let cache_control = match self.connectivity { Connectivity::Online => CacheControl::from( self.cache .freshness(&cache_entry, Some(&filename.name)) .map_err(ErrorKind::Io)?, ), Connectivity::Offline => CacheControl::AllowStale, }; let response_callback = |response: Response| async { let bytes = response.bytes().await.map_err(ErrorKind::from)?; info_span!("parse_metadata21") .in_scope(|| Metadata23::parse_metadata(bytes.as_ref())) .map_err(|err| { Error::from(ErrorKind::MetadataParseError( filename, url.to_string(), Box::new(err), )) }) }; let req = self .client .uncached() .get(url.clone()) .build() .map_err(ErrorKind::from)?; Ok(self .client .get_serde(req, &cache_entry, cache_control, response_callback) .await?) } else { // If we lack PEP 658 support, try using HTTP range requests to read only the // `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel // into the cache and read from there self.wheel_metadata_no_pep658(&filename, url, WheelCache::Index(index)) .await } } /// Get the wheel metadata if it isn't available in an index through PEP 658 async fn wheel_metadata_no_pep658<'data>( &self, filename: &'data WheelFilename, url: &'data Url, cache_shard: WheelCache<'data>, ) -> Result { let cache_entry = self.cache.entry( CacheBucket::Wheels, cache_shard.remote_wheel_dir(filename.name.as_ref()), format!("{}.msgpack", filename.stem()), ); let cache_control = match self.connectivity { Connectivity::Online => CacheControl::from( self.cache .freshness(&cache_entry, Some(&filename.name)) .map_err(ErrorKind::Io)?, ), Connectivity::Offline => CacheControl::AllowStale, }; let req = self .client .uncached() .head(url.clone()) .header( "accept-encoding", http::HeaderValue::from_static("identity"), ) .build() .map_err(ErrorKind::from)?; // Copy authorization headers from the HEAD request to subsequent requests let mut headers = HeaderMap::default(); if let Some(authorization) = req.headers().get("authorization") { headers.append("authorization", authorization.clone()); } // This response callback is special, we actually make a number of subsequent requests to // fetch the file from the remote zip. let read_metadata_range_request = |response: Response| { async { let mut reader = AsyncHttpRangeReader::from_head_response( self.client.uncached(), response, headers, ) .await .map_err(ErrorKind::AsyncHttpRangeReader)?; trace!("Getting metadata for {filename} by range request"); let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| { Error::from(ErrorKind::MetadataParseError( filename.clone(), url.to_string(), Box::new(err), )) })?; Ok::>(metadata) } .boxed() .instrument(info_span!("read_metadata_range_request", wheel = %filename)) }; let result = self .client .get_serde( req, &cache_entry, cache_control, read_metadata_range_request, ) .await .map_err(crate::Error::from); match result { Ok(metadata) => return Ok(metadata), Err(err) => { if err.kind().is_http_range_requests_unsupported() { // The range request version failed. Fall back to streaming the file to search // for the METADATA file. warn!("Range requests not supported for {filename}; streaming wheel"); } else { return Err(err); } } }; // Create a request to stream the file. let req = self .client .uncached() .get(url.clone()) .build() .map_err(ErrorKind::from)?; // Stream the file, searching for the METADATA. let read_metadata_stream = |response: Response| { async { let reader = response .bytes_stream() .map_err(|err| self.handle_response_errors(err)) .into_async_read(); read_metadata_async_stream(filename, url.to_string(), reader).await } .instrument(info_span!("read_metadata_stream", wheel = %filename)) }; self.client .get_serde(req, &cache_entry, cache_control, read_metadata_stream) .await .map_err(crate::Error::from) } /// Handle a specific `reqwest` error, and convert it to [`io::Error`]. fn handle_response_errors(&self, err: reqwest::Error) -> std::io::Error { if err.is_timeout() { std::io::Error::new( std::io::ErrorKind::TimedOut, format!( "Failed to download distribution due to network timeout. Try increasing UV_HTTP_TIMEOUT (current value: {}s).", self.timeout() ), ) } else { std::io::Error::new(std::io::ErrorKind::Other, err) } } } /// Read a wheel's `METADATA` file from a zip file. async fn read_metadata_async_seek( filename: &WheelFilename, debug_source: String, reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin, ) -> Result { let mut zip_reader = async_zip::tokio::read::seek::ZipFileReader::with_tokio(reader) .await .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; let (metadata_idx, _dist_info_prefix) = find_dist_info( filename, zip_reader .file() .entries() .iter() .enumerate() .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))), ) .map_err(ErrorKind::InstallWheel)?; // Read the contents of the `METADATA` file. let mut contents = Vec::new(); zip_reader .reader_with_entry(metadata_idx) .await .map_err(|err| ErrorKind::Zip(filename.clone(), err))? .read_to_end_checked(&mut contents) .await .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; let metadata = Metadata23::parse_metadata(&contents).map_err(|err| { ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err)) })?; Ok(metadata) } /// Like [`read_metadata_async_seek`], but doesn't use seek. async fn read_metadata_async_stream( filename: &WheelFilename, debug_source: String, reader: R, ) -> Result { let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader); while let Some(mut entry) = zip .next_with_entry() .await .map_err(|err| ErrorKind::Zip(filename.clone(), err))? { // Find the `METADATA` entry. let path = entry .reader() .entry() .filename() .as_str() .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; if is_metadata_entry(path, filename) { let mut reader = entry.reader_mut().compat(); let mut contents = Vec::new(); reader.read_to_end(&mut contents).await.unwrap(); let metadata = Metadata23::parse_metadata(&contents).map_err(|err| { ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err)) })?; return Ok(metadata); } // Close current file to get access to the next one. See docs: // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ zip = entry .skip() .await .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; } Err(ErrorKind::MetadataNotFound(filename.clone(), debug_source).into()) } #[derive( Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, )] #[archive(check_bytes)] #[archive_attr(derive(Debug))] pub struct VersionFiles { pub wheels: Vec, pub source_dists: Vec, } impl VersionFiles { fn push(&mut self, filename: DistFilename, file: File) { match filename { DistFilename::WheelFilename(name) => self.wheels.push(VersionWheel { name, file }), DistFilename::SourceDistFilename(name) => { self.source_dists.push(VersionSourceDist { name, file }) } } } pub fn all(self) -> impl Iterator { self.wheels .into_iter() .map(|VersionWheel { name, file }| (DistFilename::WheelFilename(name), file)) .chain( self.source_dists .into_iter() .map(|VersionSourceDist { name, file }| { (DistFilename::SourceDistFilename(name), file) }), ) } } #[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] #[archive(check_bytes)] #[archive_attr(derive(Debug))] pub struct VersionWheel { pub name: WheelFilename, pub file: File, } #[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] #[archive(check_bytes)] #[archive_attr(derive(Debug))] pub struct VersionSourceDist { pub name: SourceDistFilename, pub file: File, } #[derive( Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, )] #[archive(check_bytes)] #[archive_attr(derive(Debug))] pub struct SimpleMetadata(Vec); #[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] #[archive(check_bytes)] #[archive_attr(derive(Debug))] pub struct SimpleMetadatum { pub version: Version, pub files: VersionFiles, } impl SimpleMetadata { pub fn iter(&self) -> impl DoubleEndedIterator { self.0.iter() } fn from_files(files: Vec, package_name: &PackageName, base: &Url) -> Self { let mut map: BTreeMap = BTreeMap::default(); // Group the distributions by version and kind for file in files { if let Some(filename) = DistFilename::try_from_filename(file.filename.as_str(), package_name) { let version = match filename { DistFilename::SourceDistFilename(ref inner) => &inner.version, DistFilename::WheelFilename(ref inner) => &inner.version, }; let file = match File::try_from(file, base) { Ok(file) => file, Err(err) => { // Ignore files with unparsable version specifiers. warn!("Skipping file for {package_name}: {err}"); continue; } }; match map.entry(version.clone()) { std::collections::btree_map::Entry::Occupied(mut entry) => { entry.get_mut().push(filename, file); } std::collections::btree_map::Entry::Vacant(entry) => { let mut files = VersionFiles::default(); files.push(filename, file); entry.insert(files); } } } } Self( map.into_iter() .map(|(version, files)| SimpleMetadatum { version, files }) .collect(), ) } } impl IntoIterator for SimpleMetadata { type Item = SimpleMetadatum; type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } impl ArchivedSimpleMetadata { pub fn iter(&self) -> impl DoubleEndedIterator> { self.0.iter() } pub fn datum(&self, i: usize) -> Option<&rkyv::Archived> { self.0.get(i) } } #[derive(Debug)] enum MediaType { Json, Html, } impl MediaType { /// Parse a media type from a string, returning `None` if the media type is not supported. fn from_str(s: &str) -> Option { match s { "application/vnd.pypi.simple.v1+json" => Some(Self::Json), "application/vnd.pypi.simple.v1+html" | "text/html" => Some(Self::Html), _ => None, } } /// Return the `Accept` header value for all supported media types. #[inline] const fn accepts() -> &'static str { // See: https://peps.python.org/pep-0691/#version-format-selection "application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html;q=0.2, text/html;q=0.01" } } #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Connectivity { /// Allow access to the network. Online, /// Do not allow access to the network. Offline, } #[cfg(test)] mod tests { use std::str::FromStr; use url::Url; use pypi_types::{JoinRelativeError, SimpleJson}; use uv_normalize::PackageName; use crate::{html::SimpleHtml, SimpleMetadata, SimpleMetadatum}; #[test] fn ignore_failing_files() { // 1.7.7 has an invalid requires-python field (double comma), 1.7.8 is valid let response = r#" { "files": [ { "core-metadata": false, "data-dist-info-metadata": false, "filename": "pyflyby-1.7.7.tar.gz", "hashes": { "sha256": "0c4d953f405a7be1300b440dbdbc6917011a07d8401345a97e72cd410d5fb291" }, "requires-python": ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.2.*, !=3.3.*, !=3.4.*,, !=3.5.*, !=3.6.*, <4", "size": 427200, "upload-time": "2022-05-19T09:14:36.591835Z", "url": "https://files.pythonhosted.org/packages/61/93/9fec62902d0b4fc2521333eba047bff4adbba41f1723a6382367f84ee522/pyflyby-1.7.7.tar.gz", "yanked": false }, { "core-metadata": false, "data-dist-info-metadata": false, "filename": "pyflyby-1.7.8.tar.gz", "hashes": { "sha256": "1ee37474f6da8f98653dbcc208793f50b7ace1d9066f49e2707750a5ba5d53c6" }, "requires-python": ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, <4", "size": 424460, "upload-time": "2022-08-04T10:42:02.190074Z", "url": "https://files.pythonhosted.org/packages/ad/39/17180d9806a1c50197bc63b25d0f1266f745fc3b23f11439fccb3d6baa50/pyflyby-1.7.8.tar.gz", "yanked": false } ] } "#; let data: SimpleJson = serde_json::from_str(response).unwrap(); let base = Url::parse("https://pypi.org/simple/pyflyby/").unwrap(); let simple_metadata = SimpleMetadata::from_files( data.files, &PackageName::from_str("pyflyby").unwrap(), &base, ); let versions: Vec = simple_metadata .iter() .map(|SimpleMetadatum { version, .. }| version.to_string()) .collect(); assert_eq!(versions, ["1.7.8".to_string()]); } /// Test for AWS Code Artifact registry /// Regression coverage of https://github.com/astral-sh/uv/issues/1388 #[test] fn relative_urls_code_artifact() -> Result<(), JoinRelativeError> { let text = r#" Links for flask

Links for flask

Flask-0.1.tar.gz
Flask-0.10.1.tar.gz
flask-3.0.1.tar.gz
"#; // Note the lack of a trailing `/` here is important for coverage of url-join behavior let base = Url::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask") .unwrap(); let SimpleHtml { base, files } = SimpleHtml::parse(text, &base).unwrap(); // Test parsing of the file urls let urls = files .iter() .map(|file| pypi_types::base_url_join_relative(base.as_url().as_str(), &file.url)) .collect::, JoinRelativeError>>()?; let urls = urls.iter().map(|url| url.as_str()).collect::>(); insta::assert_debug_snapshot!(urls, @r###" [ "https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237", "https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373", "https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403", ] "###); Ok(()) } }