Add proper caching for pypi metadata fetching kinds (#368)

I intend this to become the main form of caching for puffin: You can
make http requests, you tranform the data to what you really need, you
have control over the cache key, and the cache is always json (or
anything else much faster we want to replace it with as long as it's
serde!)
This commit is contained in:
konsti 2023-11-10 12:03:40 +01:00 committed by GitHub
parent d1b57acaa8
commit 5cef40d87a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 404 additions and 206 deletions

7
Cargo.lock generated
View File

@ -225,8 +225,8 @@ dependencies = [
[[package]]
name = "async_http_range_reader"
version = "0.3.0"
source = "git+https://github.com/baszalmstra/async_http_range_reader#4cafe5afda889d53060e0565c949d4ffd6ef3786"
version = "0.4.0"
source = "git+https://github.com/baszalmstra/async_http_range_reader?rev=8dab2c08ac864fec1df014465264f9a7c8eae905#8dab2c08ac864fec1df014465264f9a7c8eae905"
dependencies = [
"bisection",
"futures",
@ -2370,9 +2370,10 @@ dependencies = [
"distribution-filename",
"fs-err",
"futures",
"http",
"http-cache-reqwest",
"http-cache-semantics",
"install-wheel-rs",
"puffin-cache",
"puffin-normalize",
"pypi-types",
"reqwest",

View File

@ -14,7 +14,7 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
anyhow = { version = "1.0.75" }
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" }
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", rev = "8dab2c08ac864fec1df014465264f9a7c8eae905" }
async_zip = { version = "0.0.15", features = ["tokio", "deflate"] }
bitflags = { version = "2.4.1" }
bytesize = { version = "1.3.0" }
@ -35,7 +35,9 @@ fxhash = { version = "0.2.1" }
glob = { version = "0.3.1" }
goblin = { version = "0.7.1" }
hex = { version = "0.4.3" }
http = { version = "0.2.9" }
http-cache-reqwest = { version = "0.12.0" }
http-cache-semantics = { version = "1.0.1" }
indicatif = { version = "0.17.7" }
indoc = { version = "2.0.4" }
itertools = { version = "0.11.0" }

View File

@ -137,6 +137,11 @@ impl WheelFilename {
self.platform_tag.join(".")
)
}
/// The wheel filename without the extension
pub fn stem(&self) -> String {
format!("{}-{}-{}", self.distribution, self.version, self.get_tag())
}
}
impl TryFrom<&Url> for WheelFilename {

View File

@ -2,7 +2,7 @@ use std::hash::Hasher;
use crate::cache_key::{CacheKey, CacheKeyHasher};
/// Compute a hex string hash of a [`CacheKey`] object.
/// Compute a hex string hash of a `CacheKey` object.
///
/// The value returned by [`digest`] should be stable across releases and platforms.
pub fn digest<H: CacheKey>(hashable: &H) -> String {

View File

@ -128,8 +128,7 @@ pub(crate) async fn pip_compile(
// Instantiate a client.
let client = {
let mut builder = RegistryClientBuilder::default();
builder = builder.cache(Some(cache));
let mut builder = RegistryClientBuilder::new(cache);
if let Some(IndexUrls { index, extra_index }) = index_urls {
if let Some(index) = index {
builder = builder.index(index);
@ -142,7 +141,7 @@ pub(crate) async fn pip_compile(
};
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().build(),
client.clone(),
cache.to_path_buf(),
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,

View File

@ -110,8 +110,7 @@ pub(crate) async fn sync_requirements(
// Instantiate a client.
let client = {
let mut builder = RegistryClientBuilder::default();
builder = builder.cache(Some(cache));
let mut builder = RegistryClientBuilder::new(cache);
if let Some(IndexUrls { index, extra_index }) = index_urls {
if let Some(index) = index {
builder = builder.index(index);
@ -193,7 +192,7 @@ pub(crate) async fn sync_requirements(
let start = std::time::Instant::now();
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().build(),
client,
cache.to_path_buf(),
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,

View File

@ -6,7 +6,6 @@ edition = "2021"
[dependencies]
distribution-filename = { path = "../distribution-filename" }
install-wheel-rs = { path = "../install-wheel-rs" }
puffin-cache = { path = "../puffin-cache" }
puffin-normalize = { path = "../puffin-normalize" }
pypi-types = { path = "../pypi-types" }
@ -14,7 +13,9 @@ async_http_range_reader = { workspace = true }
async_zip = { workspace = true }
futures = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
http = { workspace = true }
http-cache-reqwest = { workspace = true }
http-cache-semantics = { workspace = true }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
reqwest-retry = { workspace = true }

View File

@ -0,0 +1,217 @@
use std::future::Future;
use std::path::Path;
use std::time::SystemTime;
use http_cache_semantics::{AfterResponse, BeforeRequest, CachePolicy};
use reqwest::{Client, Request, Response};
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use tempfile::NamedTempFile;
use tracing::{trace, warn};
use crate::error::Error;
#[derive(Debug)]
enum CachedResponse<Payload: Serialize> {
/// The cached response is fresh without an HTTP request (e.g. immutable)
FreshCache(Payload),
/// The cached response is fresh after an HTTP request (e.g. 304 not modified)
NotModified(DataWithCachePolicy<Payload>),
/// There was no prior cached response or the cache was outdated
///
/// The cache policy is `None` if it isn't storable
ModifiedOrNew(Response, Option<CachePolicy>),
}
/// Serialize the actual payload together with its caching information
#[derive(Debug, Deserialize, Serialize)]
struct DataWithCachePolicy<Payload: Serialize> {
data: Payload,
cache_policy: CachePolicy,
}
/// Custom caching layer over [`reqwest::Client`] using `http-cache-semantics`.
///
/// This effective middleware takes inspiration from the `http-cache` crate, but unlike this crate,
/// we allow running an async callback on the response before caching. We use this to e.g. store a
/// parsed version of the wheel metadata and for our remote zip reader. In the latter case, we want
/// to read a single file from a remote zip using range requests (so we don't have to download the
/// entire file). We send a HEAD request in the caching layer to check if the remote file has
/// changed (and if range requests are supported), and in the callback we make the actual range
/// requests if required.
///
/// Unlike `http-cache`, all outputs must be serde-able. Currently everything is json, but we can
/// transparently switch to a faster/smaller format.
///
/// Again unlike `http-cache`, the caller gets full control over the cache key with the assumption
/// that it's a file. TODO(konstin): Centralize the cache bucket management.
#[derive(Debug, Clone)]
pub(crate) struct CachedClient(Client);
impl CachedClient {
pub(crate) fn new(client: Client) -> Self {
Self(client)
}
/// Makes a cached request with a custom response transformation
///
/// If a new response was received (no prior cached response or modified on the remote), the
/// response through `response_callback` and only the result is cached and returned
pub(crate) async fn get_cached_with_callback<
Payload: Serialize + DeserializeOwned,
Callback,
CallbackReturn,
>(
&self,
req: Request,
cache_dir: &Path,
filename: &str,
response_callback: Callback,
) -> Result<Payload, Error>
where
Callback: FnOnce(Response) -> CallbackReturn,
CallbackReturn: Future<Output = Result<Payload, Error>>,
{
let cache_file = cache_dir.join(filename);
let cached = if let Ok(cached) = fs_err::tokio::read(&cache_file).await {
match serde_json::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
Ok(data) => Some(data),
Err(err) => {
warn!(
"Broken cache entry at {}, removing: {err}",
cache_file.display()
);
let _ = fs_err::tokio::remove_file(&cache_file).await;
None
}
}
} else {
None
};
let cached_response = self.send_cached(req, cached).await?;
match cached_response {
CachedResponse::FreshCache(data) => Ok(data),
CachedResponse::NotModified(data_with_cache_policy) => {
let temp_file = NamedTempFile::new_in(cache_dir)?;
fs_err::tokio::write(&temp_file, &serde_json::to_vec(&data_with_cache_policy)?)
.await?;
temp_file.persist(cache_file)?;
Ok(data_with_cache_policy.data)
}
CachedResponse::ModifiedOrNew(res, cache_policy) => {
let data = response_callback(res).await?;
if let Some(cache_policy) = cache_policy {
let data_with_cache_policy = DataWithCachePolicy { data, cache_policy };
fs_err::tokio::create_dir_all(cache_dir).await?;
let temp_file = NamedTempFile::new_in(cache_dir)?;
fs_err::tokio::write(&temp_file, &serde_json::to_vec(&data_with_cache_policy)?)
.await?;
temp_file.persist(cache_file)?;
Ok(data_with_cache_policy.data)
} else {
Ok(data)
}
}
}
}
/// `http-cache-semantics` to `reqwest` wrapper
async fn send_cached<T: Serialize + DeserializeOwned>(
&self,
mut req: Request,
cached: Option<DataWithCachePolicy<T>>,
) -> Result<CachedResponse<T>, Error> {
// The converted types are from the specific `reqwest` types to the more generic `http`
// types
let mut converted_req = http::Request::try_from(
req.try_clone()
.expect("You can't use streaming request bodies with this function"),
)?;
let cached_response = if let Some(cached) = cached {
match cached
.cache_policy
.before_request(&converted_req, SystemTime::now())
{
BeforeRequest::Fresh(_) => {
trace!("Fresh cache for {}", req.url());
CachedResponse::FreshCache(cached.data)
}
BeforeRequest::Stale { request, matches } => {
if !matches {
// This should not happen
warn!(
"Cached request doesn't match current request for {}",
req.url(),
);
// This will override the bogus cache
return self.fresh_request(req, converted_req).await;
}
trace!("Revalidation request for {}", req.url());
for header in &request.headers {
req.headers_mut().insert(header.0.clone(), header.1.clone());
converted_req
.headers_mut()
.insert(header.0.clone(), header.1.clone());
}
let res = self.0.execute(req).await?.error_for_status()?;
let mut converted_res = http::Response::new(());
*converted_res.status_mut() = res.status();
for header in res.headers() {
converted_res.headers_mut().insert(
http::HeaderName::from(header.0),
http::HeaderValue::from(header.1),
);
}
let after_response = cached.cache_policy.after_response(
&converted_req,
&converted_res,
SystemTime::now(),
);
match after_response {
AfterResponse::NotModified(new_policy, _parts) => {
CachedResponse::NotModified(DataWithCachePolicy {
data: cached.data,
cache_policy: new_policy,
})
}
AfterResponse::Modified(new_policy, _parts) => {
CachedResponse::ModifiedOrNew(
res,
new_policy.is_storable().then_some(new_policy),
)
}
}
}
}
} else {
// No reusable cache
self.fresh_request(req, converted_req).await?
};
Ok(cached_response)
}
async fn fresh_request<T: Serialize>(
&self,
req: Request,
converted_req: http::Request<reqwest::Body>,
) -> Result<CachedResponse<T>, Error> {
trace!("{} {}", req.method(), req.url());
let res = self.0.execute(req).await?.error_for_status()?;
let mut converted_res = http::Response::new(());
*converted_res.status_mut() = res.status();
for header in res.headers() {
converted_res.headers_mut().insert(
http::HeaderName::from(header.0),
http::HeaderValue::from(header.1),
);
}
let cache_policy =
CachePolicy::new(&converted_req.into_parts().0, &converted_res.into_parts().0);
Ok(CachedResponse::ModifiedOrNew(
res,
cache_policy.is_storable().then_some(cache_policy),
))
}
}

View File

@ -2,14 +2,11 @@ use std::fmt::Debug;
use std::path::PathBuf;
use std::str::FromStr;
use async_http_range_reader::{
AsyncHttpRangeReader, AsyncHttpRangeReaderError, CheckSupportMethod,
};
use async_http_range_reader::{AsyncHttpRangeReader, AsyncHttpRangeReaderError};
use async_zip::tokio::read::seek::ZipFileReader;
use futures::{AsyncRead, StreamExt, TryStreamExt};
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use reqwest::header::HeaderMap;
use reqwest::{header, Client, ClientBuilder, StatusCode};
use reqwest::{Client, ClientBuilder, Response, StatusCode};
use reqwest_middleware::ClientWithMiddleware;
use reqwest_retry::policies::ExponentialBackoff;
use reqwest_retry::RetryTransientMiddleware;
@ -24,9 +21,10 @@ use install_wheel_rs::find_dist_info;
use puffin_normalize::PackageName;
use pypi_types::{File, Metadata21, SimpleJson};
use crate::cached_client::CachedClient;
use crate::error::Error;
use crate::remote_metadata::{
wheel_metadata_from_remote_zip, wheel_metadata_get_cached, wheel_metadata_write_cache,
wheel_metadata_from_remote_zip, WHEEL_METADATA_FROM_INDEX, WHEEL_METADATA_FROM_ZIP_CACHE,
};
/// A builder for an [`RegistryClient`].
@ -37,17 +35,17 @@ pub struct RegistryClientBuilder {
no_index: bool,
proxy: Url,
retries: u32,
cache: Option<PathBuf>,
cache: PathBuf,
}
impl Default for RegistryClientBuilder {
fn default() -> Self {
impl RegistryClientBuilder {
pub fn new(cache: impl Into<PathBuf>) -> Self {
Self {
index: Url::parse("https://pypi.org/simple").unwrap(),
extra_index: vec![],
no_index: false,
proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(),
cache: None,
cache: cache.into(),
retries: 0,
}
}
@ -85,11 +83,8 @@ impl RegistryClientBuilder {
}
#[must_use]
pub fn cache<T>(mut self, cache: Option<T>) -> Self
where
T: Into<PathBuf>,
{
self.cache = cache.map(Into::into);
pub fn cache<T>(mut self, cache: impl Into<PathBuf>) -> Self {
self.cache = cache.into();
self
}
@ -109,13 +104,13 @@ impl RegistryClientBuilder {
let mut client_builder =
reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy);
if let Some(path) = &self.cache {
client_builder = client_builder.with(Cache(HttpCache {
mode: CacheMode::Default,
manager: CACacheManager { path: path.clone() },
options: HttpCacheOptions::default(),
}));
}
client_builder = client_builder.with(Cache(HttpCache {
mode: CacheMode::Default,
manager: CACacheManager {
path: self.cache.clone(),
},
options: HttpCacheOptions::default(),
}));
let retry_policy = ExponentialBackoff::builder().build_with_max_retries(self.retries);
let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy);
@ -128,14 +123,16 @@ impl RegistryClientBuilder {
extra_index: self.extra_index,
no_index: self.no_index,
client: client_builder.build(),
client_raw,
client_raw: client_raw.clone(),
uncached_client: uncached_client_builder.build(),
cache: self.cache,
cached_client: CachedClient::new(client_raw),
}
}
}
/// A client for fetching packages from a `PyPI`-compatible index.
// TODO(konstin): Clean up the clients once we moved everything to common caching
#[derive(Debug, Clone)]
pub struct RegistryClient {
pub(crate) index: Url,
@ -145,8 +142,9 @@ pub struct RegistryClient {
pub(crate) client: ClientWithMiddleware,
pub(crate) uncached_client: ClientWithMiddleware,
pub(crate) client_raw: Client,
pub(crate) cached_client: CachedClient,
/// Used for the remote wheel METADATA cache
pub(crate) cache: Option<PathBuf>,
pub(crate) cache: PathBuf,
}
impl RegistryClient {
@ -207,23 +205,24 @@ impl RegistryClient {
// If the metadata file is available at its own url (PEP 658), download it from there
let url = Url::parse(&file.url)?;
let filename = WheelFilename::from_str(&file.filename)?;
if file.data_dist_info_metadata.is_available() {
let url = Url::parse(&format!("{}.metadata", file.url))?;
trace!("Fetching file {} from {}", file.filename, url);
let text = self.wheel_metadata_impl(&url).await.map_err(|err| {
if err.status() == Some(StatusCode::NOT_FOUND) {
Error::FileNotFound(file.filename, err)
} else {
err.into()
}
})?;
Ok(Metadata21::parse(text.as_bytes())?)
let cache_dir = self.cache.join(WHEEL_METADATA_FROM_INDEX).join("pypi");
let cache_file = format!("{}.json", filename.stem());
let response_callback = |response: Response| async move {
Ok(Metadata21::parse(response.bytes().await?.as_ref())?)
};
let req = self.client_raw.get(url.clone()).build()?;
self.cached_client
.get_cached_with_callback(req, &cache_dir, &cache_file, response_callback)
.await
// If we lack PEP 658 support, try using HTTP range requests to read only the
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
// into the cache and read from there
} else {
let filename = WheelFilename::from_str(&file.filename)?;
self.wheel_metadata_no_index(&filename, &url).await
}
}
@ -234,79 +233,82 @@ impl RegistryClient {
filename: &WheelFilename,
url: &Url,
) -> Result<Metadata21, Error> {
Ok(
if let Some(cached_metadata) =
wheel_metadata_get_cached(url, self.cache.as_deref()).await
{
debug!("Cache hit for wheel metadata for {url}");
cached_metadata
} else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? {
debug!("Using remote zip reader for wheel metadata for {url}");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let metadata = Metadata21::parse(text.as_bytes())?;
let is_immutable = headers
.get(header::CACHE_CONTROL)
.and_then(|header| header.to_str().ok())
.unwrap_or_default()
.split(',')
.any(|entry| entry.trim().to_lowercase() == "immutable");
if is_immutable {
debug!("Immutable (cacheable) wheel metadata for {url}");
wheel_metadata_write_cache(url, self.cache.as_deref(), &metadata).await?;
}
metadata
} else {
debug!("Downloading whole wheel to extract metadata from {url}");
// TODO(konstin): Download the wheel into a cache shared with the installer instead
// Note that this branch is only hit when you're not using and the server where
// you host your wheels for some reasons doesn't support range requests
// (tbh we should probably warn here and tekk users to get a better registry because
// their current one makes resolution unnecessary slow)
let temp_download = tempfile()?;
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
let mut reader = self.stream_external(url).await?.compat();
tokio::io::copy(&mut reader, &mut writer).await?;
let temp_download = writer.into_inner();
let cache_dir = self.cache.join(WHEEL_METADATA_FROM_ZIP_CACHE).join("pypi");
let cache_file = format!("{}.json", filename.stem());
let mut reader = ZipFileReader::new(temp_download.compat())
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
// This response callback is special, we actually make a number of subsequent requests to
// fetch the file from the remote zip.
let client = self.client_raw.clone();
let read_metadata_from_initial_response = |response: Response| async {
let mut reader = AsyncHttpRangeReader::from_head_response(client, response).await?;
trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let metadata = Metadata21::parse(text.as_bytes())?;
Ok(metadata)
};
let (metadata_idx, _dist_info_dir) = find_dist_info(
filename,
reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(idx, e)| Some((idx, e.entry().filename().as_str().ok()?))),
)
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
let req = self.client_raw.head(url.clone()).build()?;
let result = self
.cached_client
.get_cached_with_callback(
req,
&cache_dir,
&cache_file,
read_metadata_from_initial_response,
)
.await;
// Read the contents of the METADATA file
let mut contents = Vec::new();
reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
match result {
Ok(metadata) => {
return Ok(metadata);
}
Err(Error::AsyncHttpRangeReader(
AsyncHttpRangeReaderError::HttpRangeRequestUnsupported,
)) => {}
Err(err) => return Err(err),
}
Metadata21::parse(&contents)?
},
// The range request version failed (this is bad, the webserver should support this), fall
// back to downloading the entire file and the reading the file from the zip the regular way
debug!("Range requests not supported for {filename}, downloading whole wheel");
// TODO(konstin): Download the wheel into a cache shared with the installer instead
// Note that this branch is only hit when you're not using and the server where
// you host your wheels for some reasons doesn't support range requests
// (tbh we should probably warn here and tekk users to get a better registry because
// their current one makes resolution unnecessary slow)
let temp_download = tempfile()?;
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
let mut reader = self.stream_external(url).await?.compat();
tokio::io::copy(&mut reader, &mut writer).await?;
let temp_download = writer.into_inner();
let mut reader = ZipFileReader::new(temp_download.compat())
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
let (metadata_idx, _dist_info_dir) = find_dist_info(
filename,
reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(idx, e)| Some((idx, e.entry().filename().as_str().ok()?))),
)
}
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
async fn wheel_metadata_impl(&self, url: &Url) -> Result<String, reqwest_middleware::Error> {
Ok(self
.client
.get(url.clone())
.send()
.await?
.error_for_status()?
.text()
.await?)
// Read the contents of the METADATA file
let mut contents = Vec::new();
reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
Ok(Metadata21::parse(&contents)?)
}
/// Stream a file from an external URL.
@ -332,23 +334,4 @@ impl RegistryClient {
.into_async_read(),
))
}
/// An async for individual files inside a remote zip file, if the server supports it. Returns
/// the headers of the initial request for caching.
async fn range_reader(
&self,
url: Url,
) -> Result<Option<(AsyncHttpRangeReader, HeaderMap)>, Error> {
let response = AsyncHttpRangeReader::new(
self.client_raw.clone(),
url.clone(),
CheckSupportMethod::Head,
)
.await;
match response {
Ok((reader, headers)) => Ok(Some((reader, headers))),
Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported) => Ok(None),
Err(err) => Err(err.into()),
}
}
}

View File

@ -28,7 +28,7 @@ pub enum Error {
/// The metadata file was not found in the registry.
#[error("File `{0}` was not found in the registry at {1}.")]
FileNotFound(String, #[source] reqwest_middleware::Error),
FileNotFound(String, #[source] reqwest::Error),
/// A generic request error happened while making a request. Refer to the
/// error message for more details.
@ -60,6 +60,13 @@ pub enum Error {
#[error(transparent)]
IO(#[from] io::Error),
/// An [`io::Error`] with a filename attached
#[error(transparent)]
Persist(#[from] tempfile::PersistError),
#[error("Failed to serialize response to cache")]
SerdeJson(#[from] serde_json::Error),
}
impl Error {

View File

@ -1,6 +1,7 @@
pub use client::{RegistryClient, RegistryClientBuilder};
pub use error::Error;
mod cached_client;
mod client;
mod error;
mod remote_metadata;

View File

@ -1,57 +1,19 @@
use std::io;
use std::path::Path;
use async_http_range_reader::AsyncHttpRangeReader;
use async_zip::tokio::read::seek::ZipFileReader;
use fs_err::tokio as fs;
use tokio_util::compat::TokioAsyncReadCompatExt;
use url::Url;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info;
use puffin_cache::CanonicalUrl;
use pypi_types::Metadata21;
use crate::Error;
const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-v0";
/// Try to read the cached METADATA previously extracted from a remote zip, if it exists
pub(crate) async fn wheel_metadata_get_cached(
url: &Url,
cache: Option<&Path>,
) -> Option<Metadata21> {
// TODO(konstin): Actual good cache layout
let path = cache?
.join(WHEEL_METADATA_FROM_ZIP_CACHE)
.join(puffin_cache::digest(&CanonicalUrl::new(url)));
if !path.is_file() {
return None;
}
let data = fs::read(path).await.ok()?;
serde_json::from_slice(&data).ok()
}
/// Write the cached METADATA extracted from a remote zip to the cache
pub(crate) async fn wheel_metadata_write_cache(
url: &Url,
cache: Option<&Path>,
metadata: &Metadata21,
) -> io::Result<()> {
let Some(cache) = cache else {
return Ok(());
};
// TODO(konstin): Actual good cache layout
let dir = cache.join(WHEEL_METADATA_FROM_ZIP_CACHE);
fs::create_dir_all(&dir).await?;
let path = dir.join(puffin_cache::digest(&CanonicalUrl::new(url)));
fs::write(path, serde_json::to_vec(metadata)?).await
}
pub(crate) const WHEEL_METADATA_FROM_INDEX: &str = "wheel-metadat-index-v0";
pub(crate) const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-remote-v0";
/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file.
///
/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3
/// This method is derived from `prefix-dev/rip`, which is available under the following BSD-3
/// Clause license:
///
/// ```text

View File

@ -10,9 +10,7 @@ use puffin_client::RegistryClientBuilder;
#[tokio::test]
async fn remote_metadata_with_and_without_cache() -> Result<()> {
let temp_cache = tempdir().unwrap();
let client = RegistryClientBuilder::default()
.cache(Some(temp_cache.path().to_path_buf()))
.build();
let client = RegistryClientBuilder::new(temp_cache.path().to_path_buf()).build();
// The first run is without cache (the tempdir is empty), the second has the cache from the
// first run
for _ in 0..2 {

View File

@ -47,7 +47,7 @@ pub(crate) async fn build(args: BuildArgs) -> Result<PathBuf> {
let venv = Virtualenv::from_env(platform, Some(&cache))?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().build(),
RegistryClientBuilder::new(cache.clone()).build(),
cache,
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,

View File

@ -37,7 +37,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> anyhow::Result<()> {
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(&cache))?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().cache(Some(&cache)).build(),
RegistryClientBuilder::new(cache.clone()).build(),
cache.clone(),
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,

View File

@ -60,7 +60,7 @@ pub(crate) async fn resolve_many(args: ResolveManyArgs) -> anyhow::Result<()> {
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(&cache))?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().cache(Some(&cache)).build(),
RegistryClientBuilder::new(cache.clone()).build(),
cache.clone(),
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,

View File

@ -1,8 +1,10 @@
use std::path::PathBuf;
use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use clap::Parser;
use directories::ProjectDirs;
use tempfile::tempdir;
use url::Url;
use distribution_filename::WheelFilename;
@ -21,14 +23,17 @@ pub(crate) struct WheelMetadataArgs {
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> anyhow::Result<()> {
let project_dirs = ProjectDirs::from("", "", "puffin");
let cache_dir = (!args.no_cache)
.then(|| {
args.cache_dir
.as_deref()
.or_else(|| project_dirs.as_ref().map(ProjectDirs::cache_dir))
})
.flatten();
let client = RegistryClientBuilder::default().cache(cache_dir).build();
// https://github.com/astral-sh/puffin/issues/366
let cache_dir = if args.no_cache {
Cow::Owned(tempdir()?.into_path())
} else if let Some(cache_dir) = args.cache_dir {
Cow::Owned(cache_dir)
} else if let Some(project_dirs) = project_dirs.as_ref() {
Cow::Borrowed(project_dirs.cache_dir())
} else {
Cow::Borrowed(Path::new(".puffin_cache"))
};
let client = RegistryClientBuilder::new(cache_dir).build();
let filename = WheelFilename::from_str(
args.url

View File

@ -10,6 +10,7 @@ use std::str::FromStr;
use anyhow::Result;
use once_cell::sync::Lazy;
use tempfile::tempdir;
use pep508_rs::{MarkerEnvironment, Requirement, StringVersion};
use platform_host::{Arch, Os, Platform};
@ -64,7 +65,8 @@ impl BuildContext for DummyContext {
async fn black() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -87,7 +89,8 @@ async fn black() -> Result<()> {
async fn black_colorama() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black[colorama]<=23.9.1").unwrap()],
@ -110,7 +113,8 @@ async fn black_colorama() -> Result<()> {
async fn black_python_310() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -135,7 +139,8 @@ async fn black_python_310() -> Result<()> {
async fn black_mypy_extensions() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -160,7 +165,8 @@ async fn black_mypy_extensions() -> Result<()> {
async fn black_mypy_extensions_extra() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -185,7 +191,8 @@ async fn black_mypy_extensions_extra() -> Result<()> {
async fn black_flake8() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -208,7 +215,8 @@ async fn black_flake8() -> Result<()> {
async fn black_lowest() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black>21").unwrap()],
@ -231,7 +239,8 @@ async fn black_lowest() -> Result<()> {
async fn black_lowest_direct() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black>21").unwrap()],
@ -254,7 +263,8 @@ async fn black_lowest_direct() -> Result<()> {
async fn black_respect_preference() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -277,7 +287,8 @@ async fn black_respect_preference() -> Result<()> {
async fn black_ignore_preference() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=23.9.1").unwrap()],
@ -300,7 +311,8 @@ async fn black_ignore_preference() -> Result<()> {
async fn black_disallow_prerelease() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=20.0").unwrap()],
@ -323,7 +335,8 @@ async fn black_disallow_prerelease() -> Result<()> {
async fn black_allow_prerelease_if_necessary() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("black<=20.0").unwrap()],
@ -346,7 +359,8 @@ async fn black_allow_prerelease_if_necessary() -> Result<()> {
async fn pylint_disallow_prerelease() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("pylint==2.3.0").unwrap()],
@ -369,7 +383,8 @@ async fn pylint_disallow_prerelease() -> Result<()> {
async fn pylint_allow_prerelease() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![Requirement::from_str("pylint==2.3.0").unwrap()],
@ -392,7 +407,8 @@ async fn pylint_allow_prerelease() -> Result<()> {
async fn pylint_allow_explicit_prerelease_without_marker() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![
@ -418,7 +434,8 @@ async fn pylint_allow_explicit_prerelease_without_marker() -> Result<()> {
async fn pylint_allow_explicit_prerelease_with_marker() -> Result<()> {
colored::control::set_override(false);
let client = RegistryClientBuilder::default().build();
let tempdir = tempdir()?;
let client = RegistryClientBuilder::new(tempdir.path()).build();
let manifest = Manifest::new(
vec![

View File

@ -0,0 +1 @@
black

View File

@ -0,0 +1 @@
meine_stadt_transparent

View File

@ -1 +0,0 @@
meine_stadt_transparent