mirror of https://github.com/astral-sh/uv
Avoid revalidating immutable HTTP responses (#1069)
## Summary If you send a revalidation request to a resource that returns an `immutable` directive, the server apparently returns a 200 instead of a 304? In other words, the server can ignore the revalidation request. This PR adds handling on top of the HTTP cache semantics to respect immutable resources, which is especially useful since all PyPI files are immutable.
This commit is contained in:
parent
5621c414cf
commit
09f5884f28
|
|
@ -0,0 +1,56 @@
|
|||
use http::HeaderValue;
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
/// Cache headers from an HTTP response.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct CacheHeaders(FxHashMap<Box<str>, Option<Box<str>>>);
|
||||
|
||||
impl CacheHeaders {
|
||||
/// Parse the `Cache-Control` header from an HTTP response.
|
||||
///
|
||||
/// See: <https://github.com/kornelski/rusty-http-cache-semantics/blob/8fba3b9a3ddf01ba24f2d1a7994f4e9500644c1a/src/lib.rs#L51>
|
||||
pub(crate) fn from_response<'a>(
|
||||
headers: impl IntoIterator<Item = &'a HeaderValue>,
|
||||
) -> CacheHeaders {
|
||||
let mut cc = FxHashMap::<Box<str>, Option<Box<str>>>::default();
|
||||
let mut is_valid = true;
|
||||
|
||||
for h in headers.into_iter().filter_map(|v| v.to_str().ok()) {
|
||||
for part in h.split(',') {
|
||||
// TODO: lame parsing
|
||||
if part.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let mut kv = part.splitn(2, '=');
|
||||
let k = kv.next().unwrap().trim();
|
||||
if k.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v = kv.next().map(str::trim);
|
||||
match cc.entry(k.into()) {
|
||||
Entry::Occupied(e) => {
|
||||
// When there is more than one value present for a given directive (e.g., two Expires header fields, multiple Cache-Control: max-age directives),
|
||||
// the directive's value is considered invalid. Caches are encouraged to consider responses that have invalid freshness information to be stale
|
||||
if e.get().as_deref() != v {
|
||||
is_valid = false;
|
||||
}
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(v.map(|v| v.trim_matches('"')).map(From::from));
|
||||
// TODO: bad unquoting
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !is_valid {
|
||||
cc.insert("must-revalidate".into(), None);
|
||||
}
|
||||
Self(cc)
|
||||
}
|
||||
|
||||
/// Returns `true` if the response has an `immutable` directive.
|
||||
pub(crate) fn is_immutable(&self) -> bool {
|
||||
self.0.contains_key("immutable")
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
use futures::FutureExt;
|
||||
use std::future::Future;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use futures::FutureExt;
|
||||
use http_cache_semantics::{AfterResponse, BeforeRequest, CachePolicy};
|
||||
use reqwest::{Request, Response};
|
||||
use reqwest_middleware::ClientWithMiddleware;
|
||||
|
|
@ -12,6 +12,8 @@ use tracing::{debug, info_span, instrument, trace, warn, Instrument};
|
|||
use puffin_cache::CacheEntry;
|
||||
use puffin_fs::write_atomic;
|
||||
|
||||
use crate::cache_headers::CacheHeaders;
|
||||
|
||||
/// Either a cached client error or a (user specified) error from the callback
|
||||
#[derive(Debug)]
|
||||
pub enum CachedClientError<CallbackError> {
|
||||
|
|
@ -46,11 +48,15 @@ enum CachedResponse<Payload: Serialize> {
|
|||
ModifiedOrNew(Response, Option<Box<CachePolicy>>),
|
||||
}
|
||||
|
||||
/// Serialize the actual payload together with its caching information
|
||||
/// Serialize the actual payload together with its caching information.
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct DataWithCachePolicy<Payload: Serialize> {
|
||||
pub data: Payload,
|
||||
// The cache policy is large (448 bytes at time of writing), reduce the stack size
|
||||
/// Whether the response should be considered immutable.
|
||||
immutable: bool,
|
||||
/// The [`CachePolicy`] is used to determine if the response is fresh or stale.
|
||||
/// The policy is large (448 bytes at time of writing), so we reduce the stack size by
|
||||
/// boxing it.
|
||||
cache_policy: Box<CachePolicy>,
|
||||
}
|
||||
|
||||
|
|
@ -148,11 +154,18 @@ impl CachedClient {
|
|||
.await
|
||||
}
|
||||
CachedResponse::ModifiedOrNew(res, cache_policy) => {
|
||||
let headers = CacheHeaders::from_response(res.headers().get_all("cache-control"));
|
||||
let immutable = headers.is_immutable();
|
||||
|
||||
let data = response_callback(res)
|
||||
.await
|
||||
.map_err(|err| CachedClientError::Callback(err))?;
|
||||
if let Some(cache_policy) = cache_policy {
|
||||
let data_with_cache_policy = DataWithCachePolicy { data, cache_policy };
|
||||
let data_with_cache_policy = DataWithCachePolicy {
|
||||
data,
|
||||
immutable,
|
||||
cache_policy,
|
||||
};
|
||||
async {
|
||||
fs_err::tokio::create_dir_all(cache_entry.dir())
|
||||
.await
|
||||
|
|
@ -187,6 +200,12 @@ impl CachedClient {
|
|||
)?;
|
||||
let url = req.url().clone();
|
||||
let cached_response = if let Some(cached) = cached {
|
||||
// Avoid sending revalidation requests for immutable responses.
|
||||
if cached.immutable && !cached.cache_policy.is_stale(SystemTime::now()) {
|
||||
debug!("Found immutable response for: {url}");
|
||||
return Ok(CachedResponse::FreshCache(cached.data));
|
||||
}
|
||||
|
||||
match cached
|
||||
.cache_policy
|
||||
.before_request(&converted_req, SystemTime::now())
|
||||
|
|
@ -231,8 +250,12 @@ impl CachedClient {
|
|||
match after_response {
|
||||
AfterResponse::NotModified(new_policy, _parts) => {
|
||||
debug!("Found not-modified response for: {url}");
|
||||
let headers =
|
||||
CacheHeaders::from_response(res.headers().get_all("cache-control"));
|
||||
let immutable = headers.is_immutable();
|
||||
CachedResponse::NotModified(DataWithCachePolicy {
|
||||
data: cached.data,
|
||||
immutable,
|
||||
cache_policy: Box::new(new_policy),
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ pub use registry_client::{
|
|||
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles,
|
||||
};
|
||||
|
||||
mod cache_headers;
|
||||
mod cached_client;
|
||||
mod error;
|
||||
mod flat_index;
|
||||
|
|
|
|||
Loading…
Reference in New Issue