uv/crates/uv-client/src/registry_client.rs

809 lines
30 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::collections::BTreeMap;
use std::fmt::Debug;
use std::path::Path;
use std::str::FromStr;
use async_http_range_reader::AsyncHttpRangeReader;
use async_zip::tokio::read::seek::ZipFileReader;
use futures::{FutureExt, TryStreamExt};
use reqwest::{Client, ClientBuilder, Response, StatusCode};
use reqwest_retry::policies::ExponentialBackoff;
use reqwest_retry::RetryTransientMiddleware;
use serde::{Deserialize, Serialize};
use tempfile::tempfile_in;
use tokio::io::BufWriter;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
use install_wheel_rs::find_dist_info;
use pep440_rs::Version;
use pypi_types::{Metadata21, SimpleJson};
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_normalize::PackageName;
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::middleware::OfflineMiddleware;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{CachedClient, CachedClientError, Error, ErrorKind};
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
pub struct RegistryClientBuilder {
index_urls: IndexUrls,
retries: u32,
connectivity: Connectivity,
cache: Cache,
}
impl RegistryClientBuilder {
pub fn new(cache: Cache) -> Self {
Self {
index_urls: IndexUrls::default(),
cache,
connectivity: Connectivity::Online,
retries: 3,
}
}
}
impl RegistryClientBuilder {
#[must_use]
pub fn index_urls(mut self, index_urls: IndexUrls) -> Self {
self.index_urls = index_urls;
self
}
#[must_use]
pub fn connectivity(mut self, connectivity: Connectivity) -> Self {
self.connectivity = connectivity;
self
}
#[must_use]
pub fn retries(mut self, retries: u32) -> Self {
self.retries = retries;
self
}
#[must_use]
pub fn cache<T>(mut self, cache: Cache) -> Self {
self.cache = cache;
self
}
pub fn build(self) -> RegistryClient {
let client_raw = {
// Disallow any connections.
let client_core = ClientBuilder::new()
.user_agent("uv")
.pool_max_idle_per_host(20)
.timeout(std::time::Duration::from_secs(60 * 5));
client_core.build().expect("Failed to build HTTP client.")
};
let uncached_client = match self.connectivity {
Connectivity::Online => {
let retry_policy =
ExponentialBackoff::builder().build_with_max_retries(self.retries);
let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy);
reqwest_middleware::ClientBuilder::new(client_raw.clone())
.with(retry_strategy)
.build()
}
Connectivity::Offline => reqwest_middleware::ClientBuilder::new(client_raw.clone())
.with(OfflineMiddleware)
.build(),
};
RegistryClient {
index_urls: self.index_urls,
cache: self.cache,
connectivity: self.connectivity,
client_raw: client_raw.clone(),
client: CachedClient::new(uncached_client.clone()),
}
}
}
/// A client for fetching packages from a `PyPI`-compatible index.
#[derive(Debug, Clone)]
pub struct RegistryClient {
/// The index URLs to use for fetching packages.
index_urls: IndexUrls,
/// The underlying HTTP client.
client: CachedClient,
/// Don't use this client, it only exists because `async_http_range_reader` needs
/// [`reqwest::Client] instead of [`reqwest_middleware::Client`]
client_raw: Client,
/// Used for the remote wheel METADATA cache
cache: Cache,
/// The connectivity mode to use.
connectivity: Connectivity,
}
impl RegistryClient {
/// Return the [`CachedClient`] used by this client.
pub fn cached_client(&self) -> &CachedClient {
&self.client
}
/// Return the [`Connectivity`] mode used by this client.
pub fn connectivity(&self) -> Connectivity {
self.connectivity
}
/// Fetch a package from the `PyPI` simple API.
///
/// "simple" here refers to [PEP 503 Simple Repository API](https://peps.python.org/pep-0503/)
/// and [PEP 691 JSON-based Simple API for Python Package Indexes](https://peps.python.org/pep-0691/),
/// which the pypi json api approximately implements.
#[instrument("simple_api", skip_all, fields(package = % package_name))]
pub async fn simple(
&self,
package_name: &PackageName,
) -> Result<(IndexUrl, OwnedArchive<SimpleMetadata>), Error> {
if self.index_urls.no_index() {
return Err(ErrorKind::NoIndex(package_name.as_ref().to_string()).into());
}
for index in self.index_urls.indexes() {
let result = self.simple_single_index(package_name, index).await?;
return match result {
Ok(metadata) => Ok((index.clone(), metadata)),
Err(CachedClientError::Client(err)) => match err.into_kind() {
ErrorKind::Offline(_) => continue,
ErrorKind::RequestError(err) => {
if err.status() == Some(StatusCode::NOT_FOUND) {
continue;
}
Err(ErrorKind::RequestError(err).into())
}
other => Err(other.into()),
},
Err(CachedClientError::Callback(err)) => Err(err),
};
}
match self.connectivity {
Connectivity::Online => {
Err(ErrorKind::PackageNotFound(package_name.to_string()).into())
}
Connectivity::Offline => Err(ErrorKind::Offline(package_name.to_string()).into()),
}
}
async fn simple_single_index(
&self,
package_name: &PackageName,
index: &IndexUrl,
) -> Result<Result<OwnedArchive<SimpleMetadata>, CachedClientError<Error>>, Error> {
// Format the URL for PyPI.
let mut url: Url = index.clone().into();
url.path_segments_mut()
.unwrap()
.pop_if_empty()
.push(package_name.as_ref())
// The URL *must* end in a trailing slash for proper relative path behavior
// ref https://github.com/servo/rust-url/issues/333
.push("");
trace!("Fetching metadata for {package_name} from {url}");
let cache_entry = self.cache.entry(
CacheBucket::Simple,
Path::new(&match index {
IndexUrl::Pypi => "pypi".to_string(),
IndexUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)),
}),
format!("{package_name}.rkyv"),
);
let cache_control = match self.connectivity {
Connectivity::Online => CacheControl::from(
self.cache
.freshness(&cache_entry, Some(package_name))
.map_err(ErrorKind::Io)?,
),
Connectivity::Offline => CacheControl::AllowStale,
};
let simple_request = self
.client
.uncached()
.get(url.clone())
.header("Accept-Encoding", "gzip")
.header("Accept", MediaType::accepts())
.build()
.map_err(ErrorKind::RequestError)?;
let parse_simple_response = |response: Response| {
async {
let content_type = response
.headers()
.get("content-type")
.ok_or_else(|| Error::from(ErrorKind::MissingContentType(url.clone())))?;
let content_type = content_type.to_str().map_err(|err| {
Error::from(ErrorKind::InvalidContentTypeHeader(url.clone(), err))
})?;
let media_type = content_type.split(';').next().unwrap_or(content_type);
let media_type = MediaType::from_str(media_type).ok_or_else(|| {
Error::from(ErrorKind::UnsupportedMediaType(
url.clone(),
media_type.to_string(),
))
})?;
let unarchived = match media_type {
MediaType::Json => {
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
.map_err(|err| Error::from_json_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(data.files, package_name, url.as_str());
metadata
}
MediaType::Html => {
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(files, package_name, base.as_url().as_str());
metadata
}
};
OwnedArchive::from_unarchived(&unarchived)
}
.boxed()
.instrument(info_span!("parse_simple_api", package = %package_name))
};
let result = self
.client
.get_cacheable(
simple_request,
&cache_entry,
cache_control,
parse_simple_response,
)
.await;
Ok(result)
}
/// Fetch the metadata for a remote wheel file.
///
/// For a remote wheel, we try the following ways to fetch the metadata:
/// 1. From a [PEP 658](https://peps.python.org/pep-0658/) data-dist-info-metadata url
/// 2. From a remote wheel by partial zip reading
/// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests)
#[instrument(skip_all, fields(% built_dist))]
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata21, Error> {
let metadata = match &built_dist {
BuiltDist::Registry(wheel) => match &wheel.file.url {
FileLocation::RelativeUrl(base, url) => {
let url = pypi_types::base_url_join_relative(base, url)
.map_err(ErrorKind::JoinRelativeError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
}
FileLocation::AbsoluteUrl(url) => {
let url = Url::parse(url).map_err(ErrorKind::UrlParseError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
}
FileLocation::Path(path) => {
let file = fs_err::tokio::File::open(&path)
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
}
},
BuiltDist::DirectUrl(wheel) => {
self.wheel_metadata_no_pep658(
&wheel.filename,
&wheel.url,
WheelCache::Url(&wheel.url),
)
.await?
}
BuiltDist::Path(wheel) => {
let file = fs_err::tokio::File::open(&wheel.path)
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
}
};
if metadata.name != *built_dist.name() {
return Err(Error::from(ErrorKind::NameMismatch {
metadata: metadata.name,
given: built_dist.name().clone(),
}));
}
Ok(metadata)
}
/// Fetch the metadata from a wheel file.
async fn wheel_metadata_registry(
&self,
index: &IndexUrl,
file: &File,
url: &Url,
) -> Result<Metadata21, Error> {
// If the metadata file is available at its own url (PEP 658), download it from there.
let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?;
if file
.dist_info_metadata
.as_ref()
.is_some_and(pypi_types::DistInfoMetadata::is_available)
{
let url = Url::parse(&format!("{}.metadata", url)).map_err(ErrorKind::UrlParseError)?;
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
WheelCache::Index(index).remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control = match self.connectivity {
Connectivity::Online => CacheControl::from(
self.cache
.freshness(&cache_entry, Some(&filename.name))
.map_err(ErrorKind::Io)?,
),
Connectivity::Offline => CacheControl::AllowStale,
};
let response_callback = |response: Response| async {
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
info_span!("parse_metadata21")
.in_scope(|| Metadata21::parse(bytes.as_ref()))
.map_err(|err| {
Error::from(ErrorKind::MetadataParseError(
filename,
url.to_string(),
Box::new(err),
))
})
};
let req = self
.client
.uncached()
.get(url.clone())
.build()
.map_err(ErrorKind::RequestError)?;
Ok(self
.client
.get_serde(req, &cache_entry, cache_control, response_callback)
.await?)
} else {
// If we lack PEP 658 support, try using HTTP range requests to read only the
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
// into the cache and read from there
self.wheel_metadata_no_pep658(&filename, url, WheelCache::Index(index))
.await
}
}
/// Get the wheel metadata if it isn't available in an index through PEP 658
async fn wheel_metadata_no_pep658<'data>(
&self,
filename: &'data WheelFilename,
url: &'data Url,
cache_shard: WheelCache<'data>,
) -> Result<Metadata21, Error> {
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
cache_shard.remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control = match self.connectivity {
Connectivity::Online => CacheControl::from(
self.cache
.freshness(&cache_entry, Some(&filename.name))
.map_err(ErrorKind::Io)?,
),
Connectivity::Offline => CacheControl::AllowStale,
};
// This response callback is special, we actually make a number of subsequent requests to
// fetch the file from the remote zip.
let client = self.client_raw.clone();
let read_metadata_range_request = |response: Response| {
async {
let mut reader = AsyncHttpRangeReader::from_head_response(client, response)
.await
.map_err(ErrorKind::AsyncHttpRangeReader)?;
trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let metadata = Metadata21::parse(text.as_bytes()).map_err(|err| {
Error::from(ErrorKind::MetadataParseError(
filename.clone(),
url.to_string(),
Box::new(err),
))
})?;
Ok::<Metadata21, CachedClientError<Error>>(metadata)
}
.boxed()
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
};
let req = self
.client
.uncached()
.head(url.clone())
.build()
.map_err(ErrorKind::RequestError)?;
let result = self
.client
.get_serde(
req,
&cache_entry,
cache_control,
read_metadata_range_request,
)
.await
.map_err(crate::Error::from);
match result {
Ok(metadata) => return Ok(metadata),
Err(err) => {
if err.kind().is_http_range_requests_unsupported() {
// The range request version failed. Fall back to downloading the entire file
// and the reading the file from the zip the regular way.
warn!("Range requests not supported for {filename}; downloading wheel");
} else {
return Err(err);
}
}
}
// TODO(konstin): Download the wheel into a cache shared with the installer instead
// Note that this branch is only hit when you're not using and the server where
// you host your wheels for some reasons doesn't support range requests
// (tbh we should probably warn here and tell users to get a better registry because
// their current one makes resolution unnecessary slow).
let temp_download = tempfile_in(self.cache.root()).map_err(ErrorKind::CacheWrite)?;
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
let mut reader = self.stream_external(url).await?.compat();
tokio::io::copy(&mut reader, &mut writer)
.await
.map_err(ErrorKind::CacheWrite)?;
let reader = writer.into_inner();
read_metadata_async(filename, url.to_string(), reader).await
}
/// Stream a file from an external URL.
pub async fn stream_external(
&self,
url: &Url,
) -> Result<Box<dyn futures::AsyncRead + Unpin + Send + Sync>, Error> {
Ok(Box::new(
self.client
.uncached()
.get(url.to_string())
.send()
.await
.map_err(ErrorKind::RequestMiddlewareError)?
.error_for_status()
.map_err(ErrorKind::RequestError)?
.bytes_stream()
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
.into_async_read(),
))
}
}
/// It doesn't really fit into `uv_client`, but it avoids cyclical crate dependencies.
async fn read_metadata_async(
filename: &WheelFilename,
debug_source: String,
reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
) -> Result<Metadata21, Error> {
let mut zip_reader = ZipFileReader::with_tokio(reader)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let (metadata_idx, _dist_info_prefix) = find_dist_info(
filename,
zip_reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(idx, e)| Some((idx, e.filename().as_str().ok()?))),
)
.map_err(ErrorKind::InstallWheel)?;
// Read the contents of the METADATA file
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let metadata = Metadata21::parse(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
Ok(metadata)
}
#[derive(
Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
pub struct VersionFiles {
pub wheels: Vec<VersionWheel>,
pub source_dists: Vec<VersionSourceDist>,
}
impl VersionFiles {
fn push(&mut self, filename: DistFilename, file: File) {
match filename {
DistFilename::WheelFilename(name) => self.wheels.push(VersionWheel { name, file }),
DistFilename::SourceDistFilename(name) => {
self.source_dists.push(VersionSourceDist { name, file })
}
}
}
pub fn all(self) -> impl Iterator<Item = (DistFilename, File)> {
self.wheels
.into_iter()
.map(|VersionWheel { name, file }| (DistFilename::WheelFilename(name), file))
.chain(
self.source_dists
.into_iter()
.map(|VersionSourceDist { name, file }| {
(DistFilename::SourceDistFilename(name), file)
}),
)
}
}
#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
pub struct VersionWheel {
pub name: WheelFilename,
pub file: File,
}
#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
pub struct VersionSourceDist {
pub name: SourceDistFilename,
pub file: File,
}
#[derive(
Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
pub struct SimpleMetadata(Vec<SimpleMetadatum>);
#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
pub struct SimpleMetadatum {
pub version: Version,
pub files: VersionFiles,
}
impl SimpleMetadata {
pub fn iter(&self) -> impl DoubleEndedIterator<Item = &SimpleMetadatum> {
self.0.iter()
}
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &str) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();
// Group the distributions by version and kind
for file in files {
if let Some(filename) =
DistFilename::try_from_filename(file.filename.as_str(), package_name)
{
let version = match filename {
DistFilename::SourceDistFilename(ref inner) => &inner.version,
DistFilename::WheelFilename(ref inner) => &inner.version,
};
let file = match File::try_from(file, base) {
Ok(file) => file,
Err(err) => {
// Ignore files with unparsable version specifiers.
warn!("Skipping file for {package_name}: {err}");
continue;
}
};
match map.entry(version.clone()) {
std::collections::btree_map::Entry::Occupied(mut entry) => {
entry.get_mut().push(filename, file);
}
std::collections::btree_map::Entry::Vacant(entry) => {
let mut files = VersionFiles::default();
files.push(filename, file);
entry.insert(files);
}
}
}
}
SimpleMetadata(
map.into_iter()
.map(|(version, files)| SimpleMetadatum { version, files })
.collect(),
)
}
}
impl IntoIterator for SimpleMetadata {
type Item = SimpleMetadatum;
type IntoIter = std::vec::IntoIter<SimpleMetadatum>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl ArchivedSimpleMetadata {
pub fn iter(&self) -> impl DoubleEndedIterator<Item = &rkyv::Archived<SimpleMetadatum>> {
self.0.iter()
}
pub fn datum(&self, i: usize) -> Option<&rkyv::Archived<SimpleMetadatum>> {
self.0.get(i)
}
}
#[derive(Debug)]
enum MediaType {
Json,
Html,
}
impl MediaType {
/// Parse a media type from a string, returning `None` if the media type is not supported.
fn from_str(s: &str) -> Option<Self> {
match s {
"application/vnd.pypi.simple.v1+json" => Some(Self::Json),
"application/vnd.pypi.simple.v1+html" | "text/html" => Some(Self::Html),
_ => None,
}
}
/// Return the `Accept` header value for all supported media types.
#[inline]
const fn accepts() -> &'static str {
// See: https://peps.python.org/pep-0691/#version-format-selection
"application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html;q=0.2, text/html;q=0.01"
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Connectivity {
/// Allow access to the network.
Online,
/// Do not allow access to the network.
Offline,
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use url::Url;
use pypi_types::{JoinRelativeError, SimpleJson};
use uv_normalize::PackageName;
use crate::{html::SimpleHtml, SimpleMetadata, SimpleMetadatum};
#[test]
fn ignore_failing_files() {
// 1.7.7 has an invalid requires-python field (double comma), 1.7.8 is valid
let response = r#"
{
"files": [
{
"core-metadata": false,
"data-dist-info-metadata": false,
"filename": "pyflyby-1.7.7.tar.gz",
"hashes": {
"sha256": "0c4d953f405a7be1300b440dbdbc6917011a07d8401345a97e72cd410d5fb291"
},
"requires-python": ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.2.*, !=3.3.*, !=3.4.*,, !=3.5.*, !=3.6.*, <4",
"size": 427200,
"upload-time": "2022-05-19T09:14:36.591835Z",
"url": "https://files.pythonhosted.org/packages/61/93/9fec62902d0b4fc2521333eba047bff4adbba41f1723a6382367f84ee522/pyflyby-1.7.7.tar.gz",
"yanked": false
},
{
"core-metadata": false,
"data-dist-info-metadata": false,
"filename": "pyflyby-1.7.8.tar.gz",
"hashes": {
"sha256": "1ee37474f6da8f98653dbcc208793f50b7ace1d9066f49e2707750a5ba5d53c6"
},
"requires-python": ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, <4",
"size": 424460,
"upload-time": "2022-08-04T10:42:02.190074Z",
"url": "https://files.pythonhosted.org/packages/ad/39/17180d9806a1c50197bc63b25d0f1266f745fc3b23f11439fccb3d6baa50/pyflyby-1.7.8.tar.gz",
"yanked": false
}
]
}
"#;
let data: SimpleJson = serde_json::from_str(response).unwrap();
let base = "https://pypi.org/simple/pyflyby/";
let simple_metadata = SimpleMetadata::from_files(
data.files,
&PackageName::from_str("pyflyby").unwrap(),
base,
);
let versions: Vec<String> = simple_metadata
.iter()
.map(|SimpleMetadatum { version, .. }| version.to_string())
.collect();
assert_eq!(versions, ["1.7.8".to_string()]);
}
/// Test for AWS Code Artifact registry
/// Regression coverage of https://github.com/astral-sh/uv/issues/1388
#[test]
fn relative_urls_code_artifact() -> Result<(), JoinRelativeError> {
let text = r#"
<!DOCTYPE html>
<html>
<head>
<title>Links for flask</title>
</head>
<body>
<h1>Links for flask</h1>
<a href="0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237" data-gpg-sig="false" >Flask-0.1.tar.gz</a>
<br/>
<a href="0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373" data-gpg-sig="false" >Flask-0.10.1.tar.gz</a>
<br/>
<a href="3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403" data-requires-python="&gt;=3.8" data-gpg-sig="false" >flask-3.0.1.tar.gz</a>
<br/>
</body>
</html>
"#;
// Note the lack of a trailing `/` here is important for coverage of url-join behavior
let base = Url::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask")
.unwrap();
let SimpleHtml { base, files } = SimpleHtml::parse(text, &base).unwrap();
// Test parsing of the file urls
let urls = files
.iter()
.map(|file| pypi_types::base_url_join_relative(base.as_url().as_str(), &file.url))
.collect::<Result<Vec<_>, JoinRelativeError>>()?;
let urls = urls.iter().map(|url| url.as_str()).collect::<Vec<_>>();
insta::assert_debug_snapshot!(urls, @r###"
[
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237",
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373",
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403",
]
"###);
Ok(())
}
}