diff --git a/Cargo.lock b/Cargo.lock index 87873fec4..c8a59fbb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -225,8 +225,8 @@ dependencies = [ [[package]] name = "async_http_range_reader" -version = "0.3.0" -source = "git+https://github.com/baszalmstra/async_http_range_reader#4cafe5afda889d53060e0565c949d4ffd6ef3786" +version = "0.4.0" +source = "git+https://github.com/baszalmstra/async_http_range_reader?rev=8dab2c08ac864fec1df014465264f9a7c8eae905#8dab2c08ac864fec1df014465264f9a7c8eae905" dependencies = [ "bisection", "futures", @@ -2370,9 +2370,10 @@ dependencies = [ "distribution-filename", "fs-err", "futures", + "http", "http-cache-reqwest", + "http-cache-semantics", "install-wheel-rs", - "puffin-cache", "puffin-normalize", "pypi-types", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index f35a77d2d..b0b1eb6a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] anyhow = { version = "1.0.75" } -async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" } +async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", rev = "8dab2c08ac864fec1df014465264f9a7c8eae905" } async_zip = { version = "0.0.15", features = ["tokio", "deflate"] } bitflags = { version = "2.4.1" } bytesize = { version = "1.3.0" } @@ -35,7 +35,9 @@ fxhash = { version = "0.2.1" } glob = { version = "0.3.1" } goblin = { version = "0.7.1" } hex = { version = "0.4.3" } +http = { version = "0.2.9" } http-cache-reqwest = { version = "0.12.0" } +http-cache-semantics = { version = "1.0.1" } indicatif = { version = "0.17.7" } indoc = { version = "2.0.4" } itertools = { version = "0.11.0" } diff --git a/crates/distribution-filename/src/wheel.rs b/crates/distribution-filename/src/wheel.rs index 09fb944d2..c323b5fd2 100644 --- a/crates/distribution-filename/src/wheel.rs +++ b/crates/distribution-filename/src/wheel.rs @@ -137,6 +137,11 @@ impl WheelFilename { self.platform_tag.join(".") ) } + + /// The wheel filename without the extension + pub fn stem(&self) -> String { + format!("{}-{}-{}", self.distribution, self.version, self.get_tag()) + } } impl TryFrom<&Url> for WheelFilename { diff --git a/crates/puffin-cache/src/digest.rs b/crates/puffin-cache/src/digest.rs index 975237b9a..54f7cb204 100644 --- a/crates/puffin-cache/src/digest.rs +++ b/crates/puffin-cache/src/digest.rs @@ -2,7 +2,7 @@ use std::hash::Hasher; use crate::cache_key::{CacheKey, CacheKeyHasher}; -/// Compute a hex string hash of a [`CacheKey`] object. +/// Compute a hex string hash of a `CacheKey` object. /// /// The value returned by [`digest`] should be stable across releases and platforms. pub fn digest(hashable: &H) -> String { diff --git a/crates/puffin-cli/src/commands/pip_compile.rs b/crates/puffin-cli/src/commands/pip_compile.rs index de674e261..04e9c41be 100644 --- a/crates/puffin-cli/src/commands/pip_compile.rs +++ b/crates/puffin-cli/src/commands/pip_compile.rs @@ -128,8 +128,7 @@ pub(crate) async fn pip_compile( // Instantiate a client. let client = { - let mut builder = RegistryClientBuilder::default(); - builder = builder.cache(Some(cache)); + let mut builder = RegistryClientBuilder::new(cache); if let Some(IndexUrls { index, extra_index }) = index_urls { if let Some(index) = index { builder = builder.index(index); @@ -142,7 +141,7 @@ pub(crate) async fn pip_compile( }; let build_dispatch = BuildDispatch::new( - RegistryClientBuilder::default().build(), + client.clone(), cache.to_path_buf(), venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, diff --git a/crates/puffin-cli/src/commands/pip_sync.rs b/crates/puffin-cli/src/commands/pip_sync.rs index e94633f89..32ce918e4 100644 --- a/crates/puffin-cli/src/commands/pip_sync.rs +++ b/crates/puffin-cli/src/commands/pip_sync.rs @@ -110,8 +110,7 @@ pub(crate) async fn sync_requirements( // Instantiate a client. let client = { - let mut builder = RegistryClientBuilder::default(); - builder = builder.cache(Some(cache)); + let mut builder = RegistryClientBuilder::new(cache); if let Some(IndexUrls { index, extra_index }) = index_urls { if let Some(index) = index { builder = builder.index(index); @@ -193,7 +192,7 @@ pub(crate) async fn sync_requirements( let start = std::time::Instant::now(); let build_dispatch = BuildDispatch::new( - RegistryClientBuilder::default().build(), + client, cache.to_path_buf(), venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index 8bd364361..b44c6ebd0 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] distribution-filename = { path = "../distribution-filename" } install-wheel-rs = { path = "../install-wheel-rs" } -puffin-cache = { path = "../puffin-cache" } puffin-normalize = { path = "../puffin-normalize" } pypi-types = { path = "../pypi-types" } @@ -14,7 +13,9 @@ async_http_range_reader = { workspace = true } async_zip = { workspace = true } futures = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } +http = { workspace = true } http-cache-reqwest = { workspace = true } +http-cache-semantics = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true } diff --git a/crates/puffin-client/src/cached_client.rs b/crates/puffin-client/src/cached_client.rs new file mode 100644 index 000000000..815696ea1 --- /dev/null +++ b/crates/puffin-client/src/cached_client.rs @@ -0,0 +1,217 @@ +use std::future::Future; +use std::path::Path; +use std::time::SystemTime; + +use http_cache_semantics::{AfterResponse, BeforeRequest, CachePolicy}; +use reqwest::{Client, Request, Response}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use tempfile::NamedTempFile; +use tracing::{trace, warn}; + +use crate::error::Error; + +#[derive(Debug)] +enum CachedResponse { + /// The cached response is fresh without an HTTP request (e.g. immutable) + FreshCache(Payload), + /// The cached response is fresh after an HTTP request (e.g. 304 not modified) + NotModified(DataWithCachePolicy), + /// There was no prior cached response or the cache was outdated + /// + /// The cache policy is `None` if it isn't storable + ModifiedOrNew(Response, Option), +} + +/// Serialize the actual payload together with its caching information +#[derive(Debug, Deserialize, Serialize)] +struct DataWithCachePolicy { + data: Payload, + cache_policy: CachePolicy, +} + +/// Custom caching layer over [`reqwest::Client`] using `http-cache-semantics`. +/// +/// This effective middleware takes inspiration from the `http-cache` crate, but unlike this crate, +/// we allow running an async callback on the response before caching. We use this to e.g. store a +/// parsed version of the wheel metadata and for our remote zip reader. In the latter case, we want +/// to read a single file from a remote zip using range requests (so we don't have to download the +/// entire file). We send a HEAD request in the caching layer to check if the remote file has +/// changed (and if range requests are supported), and in the callback we make the actual range +/// requests if required. +/// +/// Unlike `http-cache`, all outputs must be serde-able. Currently everything is json, but we can +/// transparently switch to a faster/smaller format. +/// +/// Again unlike `http-cache`, the caller gets full control over the cache key with the assumption +/// that it's a file. TODO(konstin): Centralize the cache bucket management. +#[derive(Debug, Clone)] +pub(crate) struct CachedClient(Client); + +impl CachedClient { + pub(crate) fn new(client: Client) -> Self { + Self(client) + } + + /// Makes a cached request with a custom response transformation + /// + /// If a new response was received (no prior cached response or modified on the remote), the + /// response through `response_callback` and only the result is cached and returned + pub(crate) async fn get_cached_with_callback< + Payload: Serialize + DeserializeOwned, + Callback, + CallbackReturn, + >( + &self, + req: Request, + cache_dir: &Path, + filename: &str, + response_callback: Callback, + ) -> Result + where + Callback: FnOnce(Response) -> CallbackReturn, + CallbackReturn: Future>, + { + let cache_file = cache_dir.join(filename); + let cached = if let Ok(cached) = fs_err::tokio::read(&cache_file).await { + match serde_json::from_slice::>(&cached) { + Ok(data) => Some(data), + Err(err) => { + warn!( + "Broken cache entry at {}, removing: {err}", + cache_file.display() + ); + let _ = fs_err::tokio::remove_file(&cache_file).await; + None + } + } + } else { + None + }; + + let cached_response = self.send_cached(req, cached).await?; + + match cached_response { + CachedResponse::FreshCache(data) => Ok(data), + CachedResponse::NotModified(data_with_cache_policy) => { + let temp_file = NamedTempFile::new_in(cache_dir)?; + fs_err::tokio::write(&temp_file, &serde_json::to_vec(&data_with_cache_policy)?) + .await?; + temp_file.persist(cache_file)?; + Ok(data_with_cache_policy.data) + } + CachedResponse::ModifiedOrNew(res, cache_policy) => { + let data = response_callback(res).await?; + if let Some(cache_policy) = cache_policy { + let data_with_cache_policy = DataWithCachePolicy { data, cache_policy }; + fs_err::tokio::create_dir_all(cache_dir).await?; + let temp_file = NamedTempFile::new_in(cache_dir)?; + fs_err::tokio::write(&temp_file, &serde_json::to_vec(&data_with_cache_policy)?) + .await?; + temp_file.persist(cache_file)?; + Ok(data_with_cache_policy.data) + } else { + Ok(data) + } + } + } + } + + /// `http-cache-semantics` to `reqwest` wrapper + async fn send_cached( + &self, + mut req: Request, + cached: Option>, + ) -> Result, Error> { + // The converted types are from the specific `reqwest` types to the more generic `http` + // types + let mut converted_req = http::Request::try_from( + req.try_clone() + .expect("You can't use streaming request bodies with this function"), + )?; + let cached_response = if let Some(cached) = cached { + match cached + .cache_policy + .before_request(&converted_req, SystemTime::now()) + { + BeforeRequest::Fresh(_) => { + trace!("Fresh cache for {}", req.url()); + CachedResponse::FreshCache(cached.data) + } + BeforeRequest::Stale { request, matches } => { + if !matches { + // This should not happen + warn!( + "Cached request doesn't match current request for {}", + req.url(), + ); + // This will override the bogus cache + return self.fresh_request(req, converted_req).await; + } + trace!("Revalidation request for {}", req.url()); + for header in &request.headers { + req.headers_mut().insert(header.0.clone(), header.1.clone()); + converted_req + .headers_mut() + .insert(header.0.clone(), header.1.clone()); + } + let res = self.0.execute(req).await?.error_for_status()?; + let mut converted_res = http::Response::new(()); + *converted_res.status_mut() = res.status(); + for header in res.headers() { + converted_res.headers_mut().insert( + http::HeaderName::from(header.0), + http::HeaderValue::from(header.1), + ); + } + let after_response = cached.cache_policy.after_response( + &converted_req, + &converted_res, + SystemTime::now(), + ); + match after_response { + AfterResponse::NotModified(new_policy, _parts) => { + CachedResponse::NotModified(DataWithCachePolicy { + data: cached.data, + cache_policy: new_policy, + }) + } + AfterResponse::Modified(new_policy, _parts) => { + CachedResponse::ModifiedOrNew( + res, + new_policy.is_storable().then_some(new_policy), + ) + } + } + } + } + } else { + // No reusable cache + self.fresh_request(req, converted_req).await? + }; + Ok(cached_response) + } + + async fn fresh_request( + &self, + req: Request, + converted_req: http::Request, + ) -> Result, Error> { + trace!("{} {}", req.method(), req.url()); + let res = self.0.execute(req).await?.error_for_status()?; + let mut converted_res = http::Response::new(()); + *converted_res.status_mut() = res.status(); + for header in res.headers() { + converted_res.headers_mut().insert( + http::HeaderName::from(header.0), + http::HeaderValue::from(header.1), + ); + } + let cache_policy = + CachePolicy::new(&converted_req.into_parts().0, &converted_res.into_parts().0); + Ok(CachedResponse::ModifiedOrNew( + res, + cache_policy.is_storable().then_some(cache_policy), + )) + } +} diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs index 1973cabf5..dc5f0fae5 100644 --- a/crates/puffin-client/src/client.rs +++ b/crates/puffin-client/src/client.rs @@ -2,14 +2,11 @@ use std::fmt::Debug; use std::path::PathBuf; use std::str::FromStr; -use async_http_range_reader::{ - AsyncHttpRangeReader, AsyncHttpRangeReaderError, CheckSupportMethod, -}; +use async_http_range_reader::{AsyncHttpRangeReader, AsyncHttpRangeReaderError}; use async_zip::tokio::read::seek::ZipFileReader; use futures::{AsyncRead, StreamExt, TryStreamExt}; use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions}; -use reqwest::header::HeaderMap; -use reqwest::{header, Client, ClientBuilder, StatusCode}; +use reqwest::{Client, ClientBuilder, Response, StatusCode}; use reqwest_middleware::ClientWithMiddleware; use reqwest_retry::policies::ExponentialBackoff; use reqwest_retry::RetryTransientMiddleware; @@ -24,9 +21,10 @@ use install_wheel_rs::find_dist_info; use puffin_normalize::PackageName; use pypi_types::{File, Metadata21, SimpleJson}; +use crate::cached_client::CachedClient; use crate::error::Error; use crate::remote_metadata::{ - wheel_metadata_from_remote_zip, wheel_metadata_get_cached, wheel_metadata_write_cache, + wheel_metadata_from_remote_zip, WHEEL_METADATA_FROM_INDEX, WHEEL_METADATA_FROM_ZIP_CACHE, }; /// A builder for an [`RegistryClient`]. @@ -37,17 +35,17 @@ pub struct RegistryClientBuilder { no_index: bool, proxy: Url, retries: u32, - cache: Option, + cache: PathBuf, } -impl Default for RegistryClientBuilder { - fn default() -> Self { +impl RegistryClientBuilder { + pub fn new(cache: impl Into) -> Self { Self { index: Url::parse("https://pypi.org/simple").unwrap(), extra_index: vec![], no_index: false, proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(), - cache: None, + cache: cache.into(), retries: 0, } } @@ -85,11 +83,8 @@ impl RegistryClientBuilder { } #[must_use] - pub fn cache(mut self, cache: Option) -> Self - where - T: Into, - { - self.cache = cache.map(Into::into); + pub fn cache(mut self, cache: impl Into) -> Self { + self.cache = cache.into(); self } @@ -109,13 +104,13 @@ impl RegistryClientBuilder { let mut client_builder = reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy); - if let Some(path) = &self.cache { - client_builder = client_builder.with(Cache(HttpCache { - mode: CacheMode::Default, - manager: CACacheManager { path: path.clone() }, - options: HttpCacheOptions::default(), - })); - } + client_builder = client_builder.with(Cache(HttpCache { + mode: CacheMode::Default, + manager: CACacheManager { + path: self.cache.clone(), + }, + options: HttpCacheOptions::default(), + })); let retry_policy = ExponentialBackoff::builder().build_with_max_retries(self.retries); let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); @@ -128,14 +123,16 @@ impl RegistryClientBuilder { extra_index: self.extra_index, no_index: self.no_index, client: client_builder.build(), - client_raw, + client_raw: client_raw.clone(), uncached_client: uncached_client_builder.build(), cache: self.cache, + cached_client: CachedClient::new(client_raw), } } } /// A client for fetching packages from a `PyPI`-compatible index. +// TODO(konstin): Clean up the clients once we moved everything to common caching #[derive(Debug, Clone)] pub struct RegistryClient { pub(crate) index: Url, @@ -145,8 +142,9 @@ pub struct RegistryClient { pub(crate) client: ClientWithMiddleware, pub(crate) uncached_client: ClientWithMiddleware, pub(crate) client_raw: Client, + pub(crate) cached_client: CachedClient, /// Used for the remote wheel METADATA cache - pub(crate) cache: Option, + pub(crate) cache: PathBuf, } impl RegistryClient { @@ -207,23 +205,24 @@ impl RegistryClient { // If the metadata file is available at its own url (PEP 658), download it from there let url = Url::parse(&file.url)?; + let filename = WheelFilename::from_str(&file.filename)?; if file.data_dist_info_metadata.is_available() { let url = Url::parse(&format!("{}.metadata", file.url))?; - trace!("Fetching file {} from {}", file.filename, url); - let text = self.wheel_metadata_impl(&url).await.map_err(|err| { - if err.status() == Some(StatusCode::NOT_FOUND) { - Error::FileNotFound(file.filename, err) - } else { - err.into() - } - })?; - Ok(Metadata21::parse(text.as_bytes())?) + let cache_dir = self.cache.join(WHEEL_METADATA_FROM_INDEX).join("pypi"); + let cache_file = format!("{}.json", filename.stem()); + + let response_callback = |response: Response| async move { + Ok(Metadata21::parse(response.bytes().await?.as_ref())?) + }; + let req = self.client_raw.get(url.clone()).build()?; + self.cached_client + .get_cached_with_callback(req, &cache_dir, &cache_file, response_callback) + .await // If we lack PEP 658 support, try using HTTP range requests to read only the // `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel // into the cache and read from there } else { - let filename = WheelFilename::from_str(&file.filename)?; self.wheel_metadata_no_index(&filename, &url).await } } @@ -234,79 +233,82 @@ impl RegistryClient { filename: &WheelFilename, url: &Url, ) -> Result { - Ok( - if let Some(cached_metadata) = - wheel_metadata_get_cached(url, self.cache.as_deref()).await - { - debug!("Cache hit for wheel metadata for {url}"); - cached_metadata - } else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? { - debug!("Using remote zip reader for wheel metadata for {url}"); - let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; - let metadata = Metadata21::parse(text.as_bytes())?; - let is_immutable = headers - .get(header::CACHE_CONTROL) - .and_then(|header| header.to_str().ok()) - .unwrap_or_default() - .split(',') - .any(|entry| entry.trim().to_lowercase() == "immutable"); - if is_immutable { - debug!("Immutable (cacheable) wheel metadata for {url}"); - wheel_metadata_write_cache(url, self.cache.as_deref(), &metadata).await?; - } - metadata - } else { - debug!("Downloading whole wheel to extract metadata from {url}"); - // TODO(konstin): Download the wheel into a cache shared with the installer instead - // Note that this branch is only hit when you're not using and the server where - // you host your wheels for some reasons doesn't support range requests - // (tbh we should probably warn here and tekk users to get a better registry because - // their current one makes resolution unnecessary slow) - let temp_download = tempfile()?; - let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download)); - let mut reader = self.stream_external(url).await?.compat(); - tokio::io::copy(&mut reader, &mut writer).await?; - let temp_download = writer.into_inner(); + let cache_dir = self.cache.join(WHEEL_METADATA_FROM_ZIP_CACHE).join("pypi"); + let cache_file = format!("{}.json", filename.stem()); - let mut reader = ZipFileReader::new(temp_download.compat()) - .await - .map_err(|err| Error::Zip(filename.clone(), err))?; + // This response callback is special, we actually make a number of subsequent requests to + // fetch the file from the remote zip. + let client = self.client_raw.clone(); + let read_metadata_from_initial_response = |response: Response| async { + let mut reader = AsyncHttpRangeReader::from_head_response(client, response).await?; + trace!("Getting metadata for {filename} by range request"); + let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; + let metadata = Metadata21::parse(text.as_bytes())?; + Ok(metadata) + }; - let (metadata_idx, _dist_info_dir) = find_dist_info( - filename, - reader - .file() - .entries() - .iter() - .enumerate() - .filter_map(|(idx, e)| Some((idx, e.entry().filename().as_str().ok()?))), - ) - .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; + let req = self.client_raw.head(url.clone()).build()?; + let result = self + .cached_client + .get_cached_with_callback( + req, + &cache_dir, + &cache_file, + read_metadata_from_initial_response, + ) + .await; - // Read the contents of the METADATA file - let mut contents = Vec::new(); - reader - .reader_with_entry(metadata_idx) - .await - .map_err(|err| Error::Zip(filename.clone(), err))? - .read_to_end_checked(&mut contents) - .await - .map_err(|err| Error::Zip(filename.clone(), err))?; + match result { + Ok(metadata) => { + return Ok(metadata); + } + Err(Error::AsyncHttpRangeReader( + AsyncHttpRangeReaderError::HttpRangeRequestUnsupported, + )) => {} + Err(err) => return Err(err), + } - Metadata21::parse(&contents)? - }, + // The range request version failed (this is bad, the webserver should support this), fall + // back to downloading the entire file and the reading the file from the zip the regular way + + debug!("Range requests not supported for {filename}, downloading whole wheel"); + // TODO(konstin): Download the wheel into a cache shared with the installer instead + // Note that this branch is only hit when you're not using and the server where + // you host your wheels for some reasons doesn't support range requests + // (tbh we should probably warn here and tekk users to get a better registry because + // their current one makes resolution unnecessary slow) + let temp_download = tempfile()?; + let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download)); + let mut reader = self.stream_external(url).await?.compat(); + tokio::io::copy(&mut reader, &mut writer).await?; + let temp_download = writer.into_inner(); + + let mut reader = ZipFileReader::new(temp_download.compat()) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + let (metadata_idx, _dist_info_dir) = find_dist_info( + filename, + reader + .file() + .entries() + .iter() + .enumerate() + .filter_map(|(idx, e)| Some((idx, e.entry().filename().as_str().ok()?))), ) - } + .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; - async fn wheel_metadata_impl(&self, url: &Url) -> Result { - Ok(self - .client - .get(url.clone()) - .send() - .await? - .error_for_status()? - .text() - .await?) + // Read the contents of the METADATA file + let mut contents = Vec::new(); + reader + .reader_with_entry(metadata_idx) + .await + .map_err(|err| Error::Zip(filename.clone(), err))? + .read_to_end_checked(&mut contents) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + Ok(Metadata21::parse(&contents)?) } /// Stream a file from an external URL. @@ -332,23 +334,4 @@ impl RegistryClient { .into_async_read(), )) } - - /// An async for individual files inside a remote zip file, if the server supports it. Returns - /// the headers of the initial request for caching. - async fn range_reader( - &self, - url: Url, - ) -> Result, Error> { - let response = AsyncHttpRangeReader::new( - self.client_raw.clone(), - url.clone(), - CheckSupportMethod::Head, - ) - .await; - match response { - Ok((reader, headers)) => Ok(Some((reader, headers))), - Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported) => Ok(None), - Err(err) => Err(err.into()), - } - } } diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index e913acb6f..e24d794d3 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -28,7 +28,7 @@ pub enum Error { /// The metadata file was not found in the registry. #[error("File `{0}` was not found in the registry at {1}.")] - FileNotFound(String, #[source] reqwest_middleware::Error), + FileNotFound(String, #[source] reqwest::Error), /// A generic request error happened while making a request. Refer to the /// error message for more details. @@ -60,6 +60,13 @@ pub enum Error { #[error(transparent)] IO(#[from] io::Error), + + /// An [`io::Error`] with a filename attached + #[error(transparent)] + Persist(#[from] tempfile::PersistError), + + #[error("Failed to serialize response to cache")] + SerdeJson(#[from] serde_json::Error), } impl Error { diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index 5c2abb19b..cf03227fb 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -1,6 +1,7 @@ pub use client::{RegistryClient, RegistryClientBuilder}; pub use error::Error; +mod cached_client; mod client; mod error; mod remote_metadata; diff --git a/crates/puffin-client/src/remote_metadata.rs b/crates/puffin-client/src/remote_metadata.rs index d2b60f0b4..2afd68e18 100644 --- a/crates/puffin-client/src/remote_metadata.rs +++ b/crates/puffin-client/src/remote_metadata.rs @@ -1,57 +1,19 @@ -use std::io; -use std::path::Path; - use async_http_range_reader::AsyncHttpRangeReader; use async_zip::tokio::read::seek::ZipFileReader; -use fs_err::tokio as fs; use tokio_util::compat::TokioAsyncReadCompatExt; -use url::Url; use distribution_filename::WheelFilename; use install_wheel_rs::find_dist_info; -use puffin_cache::CanonicalUrl; -use pypi_types::Metadata21; use crate::Error; -const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-v0"; - -/// Try to read the cached METADATA previously extracted from a remote zip, if it exists -pub(crate) async fn wheel_metadata_get_cached( - url: &Url, - cache: Option<&Path>, -) -> Option { - // TODO(konstin): Actual good cache layout - let path = cache? - .join(WHEEL_METADATA_FROM_ZIP_CACHE) - .join(puffin_cache::digest(&CanonicalUrl::new(url))); - if !path.is_file() { - return None; - } - let data = fs::read(path).await.ok()?; - serde_json::from_slice(&data).ok() -} - -/// Write the cached METADATA extracted from a remote zip to the cache -pub(crate) async fn wheel_metadata_write_cache( - url: &Url, - cache: Option<&Path>, - metadata: &Metadata21, -) -> io::Result<()> { - let Some(cache) = cache else { - return Ok(()); - }; - // TODO(konstin): Actual good cache layout - let dir = cache.join(WHEEL_METADATA_FROM_ZIP_CACHE); - fs::create_dir_all(&dir).await?; - let path = dir.join(puffin_cache::digest(&CanonicalUrl::new(url))); - fs::write(path, serde_json::to_vec(metadata)?).await -} +pub(crate) const WHEEL_METADATA_FROM_INDEX: &str = "wheel-metadat-index-v0"; +pub(crate) const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-remote-v0"; /// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the /// entire wheel just for the one file. /// -/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3 +/// This method is derived from `prefix-dev/rip`, which is available under the following BSD-3 /// Clause license: /// /// ```text diff --git a/crates/puffin-client/tests/remote_metadata.rs b/crates/puffin-client/tests/remote_metadata.rs index ca8c57aab..0e5061282 100644 --- a/crates/puffin-client/tests/remote_metadata.rs +++ b/crates/puffin-client/tests/remote_metadata.rs @@ -10,9 +10,7 @@ use puffin_client::RegistryClientBuilder; #[tokio::test] async fn remote_metadata_with_and_without_cache() -> Result<()> { let temp_cache = tempdir().unwrap(); - let client = RegistryClientBuilder::default() - .cache(Some(temp_cache.path().to_path_buf())) - .build(); + let client = RegistryClientBuilder::new(temp_cache.path().to_path_buf()).build(); // The first run is without cache (the tempdir is empty), the second has the cache from the // first run for _ in 0..2 { diff --git a/crates/puffin-dev/src/build.rs b/crates/puffin-dev/src/build.rs index 58b0b145f..90e1dafdd 100644 --- a/crates/puffin-dev/src/build.rs +++ b/crates/puffin-dev/src/build.rs @@ -47,7 +47,7 @@ pub(crate) async fn build(args: BuildArgs) -> Result { let venv = Virtualenv::from_env(platform, Some(&cache))?; let build_dispatch = BuildDispatch::new( - RegistryClientBuilder::default().build(), + RegistryClientBuilder::new(cache.clone()).build(), cache, venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, diff --git a/crates/puffin-dev/src/resolve_cli.rs b/crates/puffin-dev/src/resolve_cli.rs index 55cada00b..41289f6aa 100644 --- a/crates/puffin-dev/src/resolve_cli.rs +++ b/crates/puffin-dev/src/resolve_cli.rs @@ -37,7 +37,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> anyhow::Result<()> { let platform = Platform::current()?; let venv = Virtualenv::from_env(platform, Some(&cache))?; let build_dispatch = BuildDispatch::new( - RegistryClientBuilder::default().cache(Some(&cache)).build(), + RegistryClientBuilder::new(cache.clone()).build(), cache.clone(), venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, diff --git a/crates/puffin-dev/src/resolve_many.rs b/crates/puffin-dev/src/resolve_many.rs index b77511a1f..463e7a27c 100644 --- a/crates/puffin-dev/src/resolve_many.rs +++ b/crates/puffin-dev/src/resolve_many.rs @@ -60,7 +60,7 @@ pub(crate) async fn resolve_many(args: ResolveManyArgs) -> anyhow::Result<()> { let platform = Platform::current()?; let venv = Virtualenv::from_env(platform, Some(&cache))?; let build_dispatch = BuildDispatch::new( - RegistryClientBuilder::default().cache(Some(&cache)).build(), + RegistryClientBuilder::new(cache.clone()).build(), cache.clone(), venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, diff --git a/crates/puffin-dev/src/wheel_metadata.rs b/crates/puffin-dev/src/wheel_metadata.rs index 7b998fb5e..811899251 100644 --- a/crates/puffin-dev/src/wheel_metadata.rs +++ b/crates/puffin-dev/src/wheel_metadata.rs @@ -1,8 +1,10 @@ -use std::path::PathBuf; +use std::borrow::Cow; +use std::path::{Path, PathBuf}; use std::str::FromStr; use clap::Parser; use directories::ProjectDirs; +use tempfile::tempdir; use url::Url; use distribution_filename::WheelFilename; @@ -21,14 +23,17 @@ pub(crate) struct WheelMetadataArgs { pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> anyhow::Result<()> { let project_dirs = ProjectDirs::from("", "", "puffin"); - let cache_dir = (!args.no_cache) - .then(|| { - args.cache_dir - .as_deref() - .or_else(|| project_dirs.as_ref().map(ProjectDirs::cache_dir)) - }) - .flatten(); - let client = RegistryClientBuilder::default().cache(cache_dir).build(); + // https://github.com/astral-sh/puffin/issues/366 + let cache_dir = if args.no_cache { + Cow::Owned(tempdir()?.into_path()) + } else if let Some(cache_dir) = args.cache_dir { + Cow::Owned(cache_dir) + } else if let Some(project_dirs) = project_dirs.as_ref() { + Cow::Borrowed(project_dirs.cache_dir()) + } else { + Cow::Borrowed(Path::new(".puffin_cache")) + }; + let client = RegistryClientBuilder::new(cache_dir).build(); let filename = WheelFilename::from_str( args.url diff --git a/crates/puffin-resolver/tests/resolver.rs b/crates/puffin-resolver/tests/resolver.rs index 2ec770c89..874516916 100644 --- a/crates/puffin-resolver/tests/resolver.rs +++ b/crates/puffin-resolver/tests/resolver.rs @@ -10,6 +10,7 @@ use std::str::FromStr; use anyhow::Result; use once_cell::sync::Lazy; +use tempfile::tempdir; use pep508_rs::{MarkerEnvironment, Requirement, StringVersion}; use platform_host::{Arch, Os, Platform}; @@ -64,7 +65,8 @@ impl BuildContext for DummyContext { async fn black() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -87,7 +89,8 @@ async fn black() -> Result<()> { async fn black_colorama() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black[colorama]<=23.9.1").unwrap()], @@ -110,7 +113,8 @@ async fn black_colorama() -> Result<()> { async fn black_python_310() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -135,7 +139,8 @@ async fn black_python_310() -> Result<()> { async fn black_mypy_extensions() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -160,7 +165,8 @@ async fn black_mypy_extensions() -> Result<()> { async fn black_mypy_extensions_extra() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -185,7 +191,8 @@ async fn black_mypy_extensions_extra() -> Result<()> { async fn black_flake8() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -208,7 +215,8 @@ async fn black_flake8() -> Result<()> { async fn black_lowest() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black>21").unwrap()], @@ -231,7 +239,8 @@ async fn black_lowest() -> Result<()> { async fn black_lowest_direct() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black>21").unwrap()], @@ -254,7 +263,8 @@ async fn black_lowest_direct() -> Result<()> { async fn black_respect_preference() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -277,7 +287,8 @@ async fn black_respect_preference() -> Result<()> { async fn black_ignore_preference() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=23.9.1").unwrap()], @@ -300,7 +311,8 @@ async fn black_ignore_preference() -> Result<()> { async fn black_disallow_prerelease() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=20.0").unwrap()], @@ -323,7 +335,8 @@ async fn black_disallow_prerelease() -> Result<()> { async fn black_allow_prerelease_if_necessary() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("black<=20.0").unwrap()], @@ -346,7 +359,8 @@ async fn black_allow_prerelease_if_necessary() -> Result<()> { async fn pylint_disallow_prerelease() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("pylint==2.3.0").unwrap()], @@ -369,7 +383,8 @@ async fn pylint_disallow_prerelease() -> Result<()> { async fn pylint_allow_prerelease() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![Requirement::from_str("pylint==2.3.0").unwrap()], @@ -392,7 +407,8 @@ async fn pylint_allow_prerelease() -> Result<()> { async fn pylint_allow_explicit_prerelease_without_marker() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![ @@ -418,7 +434,8 @@ async fn pylint_allow_explicit_prerelease_without_marker() -> Result<()> { async fn pylint_allow_explicit_prerelease_with_marker() -> Result<()> { colored::control::set_override(false); - let client = RegistryClientBuilder::default().build(); + let tempdir = tempdir()?; + let client = RegistryClientBuilder::new(tempdir.path()).build(); let manifest = Manifest::new( vec![ diff --git a/scripts/benchmarks/requirements/black.in b/scripts/benchmarks/requirements/black.in new file mode 100644 index 000000000..7e66a17d4 --- /dev/null +++ b/scripts/benchmarks/requirements/black.in @@ -0,0 +1 @@ +black diff --git a/scripts/benchmarks/requirements/meine_stadt_transparent.in b/scripts/benchmarks/requirements/meine_stadt_transparent.in new file mode 100644 index 000000000..61fa6fec1 --- /dev/null +++ b/scripts/benchmarks/requirements/meine_stadt_transparent.in @@ -0,0 +1 @@ +meine_stadt_transparent diff --git a/scripts/benchmarks/requirements/mst.in b/scripts/benchmarks/requirements/mst.in deleted file mode 100644 index b3ee1de8b..000000000 --- a/scripts/benchmarks/requirements/mst.in +++ /dev/null @@ -1 +0,0 @@ -meine_stadt_transparent \ No newline at end of file