mirror of https://github.com/astral-sh/uv
Shard the registry cache by package (#583)
## Summary This PR modifies the cache structure in a few ways. Most notably, we now shard the set of registry wheels by package, and index them lazily when computing the install plan. This applies both to built wheels: <img width="989" alt="Screen Shot 2023-12-06 at 4 42 19 PM" src="https://github.com/astral-sh/puffin/assets/1309177/0e8a306f-befd-4be9-a63e-2303389837bb"> And remote wheels: <img width="836" alt="Screen Shot 2023-12-06 at 4 42 30 PM" src="https://github.com/astral-sh/puffin/assets/1309177/7fd908cd-dd86-475e-9779-07ed067b4a1a"> For other distributions, we now consistently cache using the package name, which is really just for clarity and debuggability (we could consider omitting these): <img width="955" alt="Screen Shot 2023-12-06 at 4 58 30 PM" src="https://github.com/astral-sh/puffin/assets/1309177/3e8d0f99-df45-429a-9175-d57b54a72e56"> Obliquely closes https://github.com/astral-sh/puffin/issues/575.
This commit is contained in:
parent
aa065f5c97
commit
a825b2db06
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::ops::Deref;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
|
|
@ -37,8 +38,32 @@ impl CacheEntry {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_file(self, file: String) -> Self {
|
||||
Self { file, ..self }
|
||||
pub fn with_file(self, file: impl Into<String>) -> Self {
|
||||
Self {
|
||||
file: file.into(),
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A subdirectory within the cache.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CacheShard(PathBuf);
|
||||
|
||||
impl CacheShard {
|
||||
pub fn entry(&self, file: impl Into<String>) -> CacheEntry {
|
||||
CacheEntry {
|
||||
dir: self.0.clone(),
|
||||
file: file.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CacheShard {
|
||||
type Target = Path;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -82,6 +107,11 @@ impl Cache {
|
|||
self.root.join(cache_bucket.to_str())
|
||||
}
|
||||
|
||||
/// Compute an entry in the cache.
|
||||
pub fn shard(&self, cache_bucket: CacheBucket, dir: impl AsRef<Path>) -> CacheShard {
|
||||
CacheShard(self.bucket(cache_bucket).join(dir.as_ref()))
|
||||
}
|
||||
|
||||
/// Compute an entry in the cache.
|
||||
pub fn entry(
|
||||
&self,
|
||||
|
|
@ -120,17 +150,17 @@ impl Cache {
|
|||
/// are subdirectories of the cache root.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
||||
pub enum CacheBucket {
|
||||
/// Wheels (excluding built wheels), their metadata and cache policy.
|
||||
/// Wheels (excluding built wheels), alongside their metadata and cache policy.
|
||||
///
|
||||
/// There are three kinds from cache entries: Wheel metadata and policy as JSON files, the
|
||||
/// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
|
||||
/// size threshold, we first download the zip file into the cache, then unzip it into a
|
||||
/// directory with the same name, omitting the `.whl` extension.
|
||||
/// directory with the same name (exclusive of the `.whl` extension).
|
||||
///
|
||||
/// Cache structure:
|
||||
/// * `wheel-metadata-v0/pypi/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/<digest(index-url)>/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/url/<digest(url)>/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
///
|
||||
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
|
||||
/// is fetched.
|
||||
|
|
@ -151,11 +181,13 @@ pub enum CacheBucket {
|
|||
/// wheel-v0
|
||||
/// ├── pypi
|
||||
/// │ ...
|
||||
/// │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ ├── pandas
|
||||
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ ...
|
||||
/// └── url
|
||||
/// └── 4b8be67c801a7ecb
|
||||
/// └── flask-3.0.0-py3-none-any.json
|
||||
/// └── flask
|
||||
/// └── flask-3.0.0-py3-none-any.json
|
||||
/// ```
|
||||
///
|
||||
/// We get the following `requirement.txt` from `pip-compile`:
|
||||
|
|
@ -176,16 +208,18 @@ pub enum CacheBucket {
|
|||
/// wheel-v0
|
||||
/// ├── pypi
|
||||
/// │ ...
|
||||
/// │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
/// │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||
/// │ ├── pandas
|
||||
/// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
/// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||
/// │ ...
|
||||
/// └── url
|
||||
/// └── 4b8be67c801a7ecb
|
||||
/// └── flask-3.0.0-py3-none-any.whl
|
||||
/// ├── flask
|
||||
/// │ └── ...
|
||||
/// └── flask-3.0.0.dist-info
|
||||
/// └── ...
|
||||
/// └── flask
|
||||
/// └── flask-3.0.0-py3-none-any.whl
|
||||
/// ├── flask
|
||||
/// │ └── ...
|
||||
/// └── flask-3.0.0.dist-info
|
||||
/// └── ...
|
||||
/// ```
|
||||
///
|
||||
/// If we run first `pip-compile` and then `pip-sync` on the same machine, we get both:
|
||||
|
|
@ -194,24 +228,26 @@ pub enum CacheBucket {
|
|||
/// wheels-v0
|
||||
/// ├── pypi
|
||||
/// │ ├── ...
|
||||
/// │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
/// │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||
/// │ │ ├── pandas
|
||||
/// │ │ │ ├── ...
|
||||
/// │ │ ├── pandas-2.1.3.dist-info
|
||||
/// │ │ │ ├── ...
|
||||
/// │ │ └── pandas.libs
|
||||
/// │ ├── pandas
|
||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
/// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||
/// │ │ ├── pandas
|
||||
/// │ │ │ ├── ...
|
||||
/// │ │ ├── pandas-2.1.3.dist-info
|
||||
/// │ │ │ ├── ...
|
||||
/// │ │ └── pandas.libs
|
||||
/// │ ├── ...
|
||||
/// └── url
|
||||
/// └── 4b8be67c801a7ecb
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// └── flask-3.0.0-py3-none-any
|
||||
/// ├── flask
|
||||
/// │ └── ...
|
||||
/// └── flask-3.0.0.dist-info
|
||||
/// └── ...
|
||||
/// └── flask
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// └── flask-3.0.0-py3-none-any
|
||||
/// ├── flask
|
||||
/// │ └── ...
|
||||
/// └── flask-3.0.0.dist-info
|
||||
/// └── ...
|
||||
Wheels,
|
||||
/// Wheels built from source distributions, their extracted metadata and the cache policy of
|
||||
/// the source distribution.
|
||||
|
|
@ -225,14 +261,14 @@ pub enum CacheBucket {
|
|||
///
|
||||
/// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
|
||||
/// lazily before installing. So when resolving, we only build the wheel and store the archive
|
||||
/// file in the cache, when installing, we unpack it under the same name, omitting the `.whl`
|
||||
/// extension. You may find a mix of wheel archive zip files and unzipped wheel directories in
|
||||
/// the cache.
|
||||
/// file in the cache, when installing, we unpack it under the same name (exclusive of the
|
||||
/// `.whl` extension). You may find a mix of wheel archive zip files and unzipped wheel
|
||||
/// directories in the cache.
|
||||
///
|
||||
/// Cache structure:
|
||||
/// * `built-wheels-v0/pypi/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/<digest(index-url)>/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/url/<digest(url)>/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/pypi/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
///
|
||||
/// But the url filename does not need to be a valid source dist filename
|
||||
|
|
@ -261,14 +297,16 @@ pub enum CacheBucket {
|
|||
/// │ ├── metadata.json
|
||||
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
|
||||
/// ├── pypi
|
||||
/// │ └── django-allauth-0.51.0.tar.gz
|
||||
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
|
||||
/// │ └── metadata.json
|
||||
/// │ └── django
|
||||
/// │ └── django-allauth-0.51.0.tar.gz
|
||||
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
|
||||
/// │ └── metadata.json
|
||||
/// └── url
|
||||
/// └── 6781bd6440ae72c2
|
||||
/// └── werkzeug-3.0.1.tar.gz
|
||||
/// ├── metadata.json
|
||||
/// └── werkzeug-3.0.1-py3-none-any.whl
|
||||
/// └── werkzeug
|
||||
/// └── werkzeug-3.0.1.tar.gz
|
||||
/// ├── metadata.json
|
||||
/// └── werkzeug-3.0.1-py3-none-any.whl
|
||||
/// ```
|
||||
///
|
||||
/// The inside of a `metadata.json`:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ use crate::{digest, CanonicalUrl};
|
|||
|
||||
/// Cache wheels and their metadata, both from remote wheels and built from source distributions.
|
||||
///
|
||||
/// Use [`WheelCache::wheel_dir`] for remote wheel metadata caching and
|
||||
/// Use [`WheelCache::remote_wheel_dir`] for remote wheel metadata caching and
|
||||
/// [`WheelCache::built_wheel_dir`] for built source distributions metadata caching.
|
||||
pub enum WheelCache<'a> {
|
||||
/// Either pypi or an alternative index, which we key by index URL.
|
||||
|
|
@ -38,8 +38,8 @@ impl<'a> WheelCache<'a> {
|
|||
}
|
||||
|
||||
/// Metadata of a remote wheel. See [`CacheBucket::Wheels`]
|
||||
pub fn wheel_dir(&self) -> PathBuf {
|
||||
self.bucket()
|
||||
pub fn remote_wheel_dir(&self, package_name: impl AsRef<Path>) -> PathBuf {
|
||||
self.bucket().join(package_name)
|
||||
}
|
||||
|
||||
/// Metadata of a built source distribution. See [`CacheBucket::BuiltWheels`]
|
||||
|
|
|
|||
|
|
@ -241,7 +241,7 @@ impl RegistryClient {
|
|||
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Index(&index).wheel_dir(),
|
||||
WheelCache::Index(&index).remote_wheel_dir(filename.name.as_ref()),
|
||||
format!("{}.json", filename.stem()),
|
||||
);
|
||||
|
||||
|
|
@ -276,7 +276,7 @@ impl RegistryClient {
|
|||
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
cache_shard.wheel_dir(),
|
||||
cache_shard.remote_wheel_dir(filename.name.as_ref()),
|
||||
format!("{}.json", filename.stem()),
|
||||
);
|
||||
|
||||
|
|
@ -311,13 +311,13 @@ impl RegistryClient {
|
|||
// The range request version failed (this is bad, the webserver should support this), fall
|
||||
// back to downloading the entire file and the reading the file from the zip the regular way
|
||||
|
||||
debug!("Range requests not supported for {filename}, downloading whole wheel");
|
||||
debug!("Range requests not supported for {filename}; downloading wheel");
|
||||
// TODO(konstin): Download the wheel into a cache shared with the installer instead
|
||||
// Note that this branch is only hit when you're not using and the server where
|
||||
// you host your wheels for some reasons doesn't support range requests
|
||||
// (tbh we should probably warn here and tell users to get a better registry because
|
||||
// their current one makes resolution unnecessary slow)
|
||||
let temp_download = tempfile_in(cache_shard.wheel_dir()).map_err(Error::CacheWrite)?;
|
||||
// their current one makes resolution unnecessary slow).
|
||||
let temp_download = tempfile_in(self.cache.root()).map_err(Error::CacheWrite)?;
|
||||
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
|
||||
let mut reader = self.stream_external(url).await?.compat();
|
||||
tokio::io::copy(&mut reader, &mut writer)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use url::Url;
|
|||
|
||||
use distribution_filename::{WheelFilename, WheelFilenameError};
|
||||
use distribution_types::direct_url::DirectGitUrl;
|
||||
use distribution_types::{BuiltDist, Dist, RemoteSource, SourceDist};
|
||||
use distribution_types::{BuiltDist, Dist, Metadata, RemoteSource, SourceDist};
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{Cache, CacheBucket, WheelCache};
|
||||
use puffin_client::RegistryClient;
|
||||
|
|
@ -171,7 +171,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Index(&wheel.index).wheel_dir(),
|
||||
WheelCache::Index(&wheel.index).remote_wheel_dir(filename.name.as_ref()),
|
||||
filename.stem(),
|
||||
);
|
||||
|
||||
|
|
@ -193,12 +193,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Index(&wheel.index).wheel_dir(),
|
||||
WheelCache::Index(&wheel.index).remote_wheel_dir(filename.name.as_ref()),
|
||||
filename.to_string(),
|
||||
);
|
||||
|
||||
// Download the wheel into the cache.
|
||||
// TODO(charlie): Use an atomic write, and remove any existing files or directories.
|
||||
fs::create_dir_all(&cache_entry.dir).await?;
|
||||
let mut writer = fs::File::create(cache_entry.path()).await?;
|
||||
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
|
||||
|
|
@ -225,10 +224,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
let reader = self.client.stream_external(&wheel.url).await?;
|
||||
|
||||
// Download the wheel into the cache.
|
||||
// TODO(charlie): Use an atomic write, and remove any existing files or directories.
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Url(&wheel.url).wheel_dir(),
|
||||
WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()),
|
||||
wheel.filename.to_string(),
|
||||
);
|
||||
fs::create_dir_all(&cache_entry.dir).await?;
|
||||
|
|
@ -252,7 +250,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
Dist::Built(BuiltDist::Path(wheel)) => {
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Url(&wheel.url).wheel_dir(),
|
||||
WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()),
|
||||
wheel.filename.stem(),
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +1,22 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use fs_err as fs;
|
||||
use tracing::warn;
|
||||
|
||||
use distribution_types::CachedWheel;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::CacheShard;
|
||||
|
||||
use crate::index::iter_directories;
|
||||
|
||||
/// A local index of built distributions for a specific source distribution.
|
||||
#[derive(Debug)]
|
||||
pub struct BuiltWheelIndex<'a> {
|
||||
directory: PathBuf,
|
||||
tags: &'a Tags,
|
||||
}
|
||||
pub struct BuiltWheelIndex;
|
||||
|
||||
impl<'a> BuiltWheelIndex<'a> {
|
||||
/// Create a new index of built distributions.
|
||||
impl BuiltWheelIndex {
|
||||
/// Find the "best" distribution in the index for a given source distribution.
|
||||
///
|
||||
/// The `directory` should be the directory containing the built distributions for a specific
|
||||
/// This lookup prefers newer versions over older versions, and aims to maximize compatibility
|
||||
/// with the target platform.
|
||||
///
|
||||
/// The `shard` should point to a directory containing the built distributions for a specific
|
||||
/// source distribution. For example, given the built wheel cache structure:
|
||||
/// ```text
|
||||
/// built-wheels-v0/
|
||||
|
|
@ -28,27 +26,16 @@ impl<'a> BuiltWheelIndex<'a> {
|
|||
/// └── metadata.json
|
||||
/// ```
|
||||
///
|
||||
/// The `directory` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`.
|
||||
pub fn new(directory: impl Into<PathBuf>, tags: &'a Tags) -> Self {
|
||||
Self {
|
||||
directory: directory.into(),
|
||||
tags,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the "best" distribution in the index.
|
||||
///
|
||||
/// This lookup prefers newer versions over older versions, and aims to maximize compatibility
|
||||
/// with the target platform.
|
||||
pub fn find(&self) -> Option<CachedWheel> {
|
||||
/// The `shard` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`.
|
||||
pub fn find(shard: &CacheShard, tags: &Tags) -> Option<CachedWheel> {
|
||||
let mut candidate: Option<CachedWheel> = None;
|
||||
|
||||
for subdir in iter_directories(self.directory.read_dir().ok()?) {
|
||||
for subdir in iter_directories(shard.read_dir().ok()?) {
|
||||
match CachedWheel::from_path(&subdir) {
|
||||
Ok(None) => {}
|
||||
Ok(Some(dist_info)) => {
|
||||
// Pick the wheel with the highest priority
|
||||
let compatibility = dist_info.filename.compatibility(self.tags);
|
||||
let compatibility = dist_info.filename.compatibility(tags);
|
||||
|
||||
// Only consider wheels that are compatible with our tags.
|
||||
if compatibility.is_none() {
|
||||
|
|
@ -62,7 +49,7 @@ impl<'a> BuiltWheelIndex<'a> {
|
|||
if let Some(existing) = candidate.as_ref() {
|
||||
// Override if the wheel is newer, or "more" compatible.
|
||||
if dist_info.filename.version > existing.filename.version
|
||||
|| compatibility > existing.filename.compatibility(self.tags)
|
||||
|| compatibility > existing.filename.compatibility(tags)
|
||||
{
|
||||
candidate = Some(dist_info);
|
||||
}
|
||||
|
|
@ -75,8 +62,7 @@ impl<'a> BuiltWheelIndex<'a> {
|
|||
"Invalid cache entry at {}, removing. {err}",
|
||||
subdir.display()
|
||||
);
|
||||
let result = fs::remove_dir_all(&subdir);
|
||||
if let Err(err) = result {
|
||||
if let Err(err) = fs::remove_dir_all(&subdir) {
|
||||
warn!(
|
||||
"Failed to remove invalid cache entry at {}: {err}",
|
||||
subdir.display()
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
use std::collections::hash_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use fs_err as fs;
|
||||
use fxhash::FxHashMap;
|
||||
use tracing::warn;
|
||||
|
||||
use distribution_types::{CachedRegistryDist, CachedWheel, Metadata};
|
||||
use distribution_types::{CachedRegistryDist, CachedWheel};
|
||||
use pep440_rs::Version;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{Cache, CacheBucket, WheelCache};
|
||||
|
|
@ -15,55 +17,87 @@ use pypi_types::IndexUrls;
|
|||
use crate::index::iter_directories;
|
||||
|
||||
/// A local index of distributions that originate from a registry, like `PyPI`.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct RegistryWheelIndex(FxHashMap<PackageName, BTreeMap<Version, CachedRegistryDist>>);
|
||||
#[derive(Debug)]
|
||||
pub struct RegistryWheelIndex<'a> {
|
||||
cache: &'a Cache,
|
||||
tags: &'a Tags,
|
||||
index_urls: &'a IndexUrls,
|
||||
index: FxHashMap<PackageName, BTreeMap<Version, CachedRegistryDist>>,
|
||||
}
|
||||
|
||||
impl RegistryWheelIndex {
|
||||
/// Build an index of cached distributions from a directory.
|
||||
pub fn from_directory(cache: &Cache, tags: &Tags, index_urls: &IndexUrls) -> Self {
|
||||
let mut index = Self::default();
|
||||
impl<'a> RegistryWheelIndex<'a> {
|
||||
/// Initialize an index of cached distributions from a directory.
|
||||
pub fn new(cache: &'a Cache, tags: &'a Tags, index_urls: &'a IndexUrls) -> Self {
|
||||
Self {
|
||||
cache,
|
||||
tags,
|
||||
index_urls,
|
||||
index: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator over available wheels for a given package.
|
||||
///
|
||||
/// If the package is not yet indexed, this will index the package by reading from the cache.
|
||||
pub fn get(
|
||||
&mut self,
|
||||
name: &PackageName,
|
||||
) -> impl Iterator<Item = (&Version, &CachedRegistryDist)> {
|
||||
let versions = match self.index.entry(name.clone()) {
|
||||
Entry::Occupied(entry) => entry.into_mut(),
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(Self::index(name, self.cache, self.tags, self.index_urls))
|
||||
}
|
||||
};
|
||||
versions.iter().rev()
|
||||
}
|
||||
|
||||
/// Add a package to the index by reading from the cache.
|
||||
fn index(
|
||||
package: &PackageName,
|
||||
cache: &Cache,
|
||||
tags: &Tags,
|
||||
index_urls: &IndexUrls,
|
||||
) -> BTreeMap<Version, CachedRegistryDist> {
|
||||
let mut versions = BTreeMap::new();
|
||||
|
||||
for index_url in index_urls {
|
||||
// Index all the wheels that were downloaded directly from the registry.
|
||||
// TODO(charlie): Shard the cache by package name, and do this lazily.
|
||||
let wheel_dir = cache
|
||||
.bucket(CacheBucket::Wheels)
|
||||
.join(WheelCache::Index(index_url).wheel_dir());
|
||||
let wheel_dir = cache.shard(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Index(index_url).remote_wheel_dir(package.to_string()),
|
||||
);
|
||||
|
||||
index.add_directory(wheel_dir, tags);
|
||||
Self::add_directory(&*wheel_dir, tags, &mut versions);
|
||||
|
||||
// Index all the built wheels, created by downloading and building source distributions
|
||||
// from the registry.
|
||||
// TODO(charlie): Shard the cache by package name, and do this lazily.
|
||||
let built_wheel_dir = cache
|
||||
.bucket(CacheBucket::BuiltWheels)
|
||||
.join(WheelCache::Index(index_url).wheel_dir());
|
||||
let built_wheel_dir = cache.shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Index(index_url).built_wheel_dir(package.to_string()),
|
||||
);
|
||||
|
||||
// Built wheels have one more level of indirection, as they are keyed by the source
|
||||
// distribution filename.
|
||||
let Ok(read_dir) = built_wheel_dir.read_dir() else {
|
||||
continue;
|
||||
};
|
||||
for subdir in iter_directories(read_dir) {
|
||||
index.add_directory(subdir, tags);
|
||||
Self::add_directory(subdir, tags, &mut versions);
|
||||
}
|
||||
}
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
/// Returns a distribution from the index, if it exists.
|
||||
pub fn by_name(
|
||||
&self,
|
||||
name: &PackageName,
|
||||
) -> impl Iterator<Item = (&Version, &CachedRegistryDist)> {
|
||||
// Using static to extend the lifetime
|
||||
static DEFAULT_MAP: BTreeMap<Version, CachedRegistryDist> = BTreeMap::new();
|
||||
self.0.get(name).unwrap_or(&DEFAULT_MAP).iter().rev()
|
||||
versions
|
||||
}
|
||||
|
||||
/// Add the wheels in a given directory to the index.
|
||||
///
|
||||
/// Each subdirectory in the given path is expected to be that of an unzipped wheel.
|
||||
fn add_directory(&mut self, path: impl AsRef<Path>, tags: &Tags) {
|
||||
fn add_directory(
|
||||
path: impl AsRef<Path>,
|
||||
tags: &Tags,
|
||||
versions: &mut BTreeMap<Version, CachedRegistryDist>,
|
||||
) {
|
||||
let Ok(read_dir) = path.as_ref().read_dir() else {
|
||||
return;
|
||||
};
|
||||
|
|
@ -76,20 +110,13 @@ impl RegistryWheelIndex {
|
|||
|
||||
// Pick the wheel with the highest priority
|
||||
let compatibility = dist_info.filename.compatibility(tags);
|
||||
if let Some(existing) = self
|
||||
.0
|
||||
.get_mut(dist_info.name())
|
||||
.and_then(|package| package.get_mut(&dist_info.filename.version))
|
||||
{
|
||||
if let Some(existing) = versions.get_mut(&dist_info.filename.version) {
|
||||
// Override if we have better compatibility
|
||||
if compatibility > existing.filename.compatibility(tags) {
|
||||
*existing = dist_info;
|
||||
}
|
||||
} else if compatibility.is_some() {
|
||||
self.0
|
||||
.entry(dist_info.name().clone())
|
||||
.or_default()
|
||||
.insert(dist_info.filename.version.clone(), dist_info);
|
||||
versions.insert(dist_info.filename.version.clone(), dist_info);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
|
|
@ -97,8 +124,8 @@ impl RegistryWheelIndex {
|
|||
"Invalid cache entry at {}, removing. {err}",
|
||||
wheel_dir.display()
|
||||
);
|
||||
let result = fs::remove_dir_all(&wheel_dir);
|
||||
if let Err(err) = result {
|
||||
|
||||
if let Err(err) = fs::remove_dir_all(&wheel_dir) {
|
||||
warn!(
|
||||
"Failed to remove invalid cache entry at {}: {err}",
|
||||
wheel_dir.display()
|
||||
|
|
|
|||
|
|
@ -24,7 +24,9 @@ use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl};
|
|||
use distribution_types::{GitSourceDist, Metadata, PathSourceDist, RemoteSource, SourceDist};
|
||||
use install_wheel_rs::read_dist_info;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{digest, CacheBucket, CacheEntry, CachedByTimestamp, CanonicalUrl, WheelCache};
|
||||
use puffin_cache::{
|
||||
digest, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, CanonicalUrl, WheelCache,
|
||||
};
|
||||
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
|
||||
use puffin_fs::write_atomic;
|
||||
use puffin_git::{Fetch, GitSource};
|
||||
|
|
@ -161,11 +163,17 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let DirectArchiveUrl { url, subdirectory } =
|
||||
DirectArchiveUrl::from(&direct_url_source_dist.url);
|
||||
|
||||
// For direct URLs, cache directly under the hash of the URL itself.
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Url(&url).remote_wheel_dir(direct_url_source_dist.name().as_ref()),
|
||||
);
|
||||
|
||||
self.url(
|
||||
source_dist,
|
||||
filename,
|
||||
&url,
|
||||
WheelCache::Url(&url),
|
||||
cache_shard,
|
||||
subdirectory.as_deref(),
|
||||
)
|
||||
.await?
|
||||
|
|
@ -174,11 +182,21 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let url = Url::parse(®istry_source_dist.file.url).map_err(|err| {
|
||||
SourceDistError::UrlParse(registry_source_dist.file.url.clone(), err)
|
||||
})?;
|
||||
|
||||
// For registry source distributions, shard by distribution, then by filename.
|
||||
// Ex) `pypi/requests/requests-2.25.1.tar.gz`
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Index(®istry_source_dist.index)
|
||||
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
||||
.join(®istry_source_dist.file.filename),
|
||||
);
|
||||
|
||||
self.url(
|
||||
source_dist,
|
||||
®istry_source_dist.file.filename,
|
||||
&url,
|
||||
WheelCache::Index(®istry_source_dist.index),
|
||||
cache_shard,
|
||||
None,
|
||||
)
|
||||
.await?
|
||||
|
|
@ -197,14 +215,10 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
source_dist: &'data SourceDist,
|
||||
filename: &'data str,
|
||||
url: &'data Url,
|
||||
cache_shard: WheelCache<'data>,
|
||||
cache_shard: CacheShard,
|
||||
subdirectory: Option<&'data Path>,
|
||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||
let cache_entry = self.build_context.cache().entry(
|
||||
CacheBucket::BuiltWheels,
|
||||
cache_shard.built_wheel_dir(filename),
|
||||
METADATA_JSON.to_string(),
|
||||
);
|
||||
let cache_entry = cache_shard.entry(METADATA_JSON.to_string());
|
||||
|
||||
let response_callback = |response| async {
|
||||
// New or changed source distribution, delete all built wheels
|
||||
|
|
@ -345,10 +359,10 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
source_dist: &SourceDist,
|
||||
path_source_dist: &PathSourceDist,
|
||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||
let cache_shard = WheelCache::Path(&path_source_dist.url);
|
||||
let cache_entry = self.build_context.cache().entry(
|
||||
CacheBucket::BuiltWheels,
|
||||
cache_shard.built_wheel_dir(source_dist.name().to_string()),
|
||||
WheelCache::Path(&path_source_dist.url)
|
||||
.remote_wheel_dir(path_source_dist.name().as_ref()),
|
||||
METADATA_JSON.to_string(),
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@ use tracing::debug;
|
|||
|
||||
use distribution_types::direct_url::{git_reference, DirectUrl};
|
||||
use distribution_types::{
|
||||
BuiltDist, CachedDirectUrlDist, CachedDist, Dist, InstalledDist, Metadata, RemoteSource,
|
||||
SourceDist,
|
||||
BuiltDist, CachedDirectUrlDist, CachedDist, Dist, InstalledDist, Metadata, SourceDist,
|
||||
};
|
||||
use pep508_rs::{Requirement, VersionOrUrl};
|
||||
use platform_tags::Tags;
|
||||
|
|
@ -45,7 +44,7 @@ impl InstallPlan {
|
|||
SitePackages::try_from_executable(venv).context("Failed to list installed packages")?;
|
||||
|
||||
// Index all the already-downloaded wheels in the cache.
|
||||
let registry_index = RegistryWheelIndex::from_directory(cache, tags, index_urls);
|
||||
let mut registry_index = RegistryWheelIndex::new(cache, tags, index_urls);
|
||||
|
||||
let mut local = vec![];
|
||||
let mut remote = vec![];
|
||||
|
|
@ -89,9 +88,8 @@ impl InstallPlan {
|
|||
// Identify any locally-available distributions that satisfy the requirement.
|
||||
match requirement.version_or_url.as_ref() {
|
||||
None => {
|
||||
// TODO(charlie): This doesn't respect built wheels.
|
||||
if let Some((_version, distribution)) =
|
||||
registry_index.by_name(&requirement.name).next()
|
||||
registry_index.get(&requirement.name).next()
|
||||
{
|
||||
debug!("Requirement already cached: {distribution}");
|
||||
local.push(CachedDist::Registry(distribution.clone()));
|
||||
|
|
@ -99,12 +97,16 @@ impl InstallPlan {
|
|||
}
|
||||
}
|
||||
Some(VersionOrUrl::VersionSpecifier(specifier)) => {
|
||||
if let Some((_version, distribution)) = registry_index
|
||||
.by_name(&requirement.name)
|
||||
.find(|(version, dist)| {
|
||||
specifier.contains(version)
|
||||
&& requirement.is_satisfied_by(&dist.filename.version)
|
||||
})
|
||||
if let Some(distribution) =
|
||||
registry_index
|
||||
.get(&requirement.name)
|
||||
.find_map(|(version, distribution)| {
|
||||
if specifier.contains(version) {
|
||||
Some(distribution)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
{
|
||||
debug!("Requirement already cached: {distribution}");
|
||||
local.push(CachedDist::Registry(distribution.clone()));
|
||||
|
|
@ -124,7 +126,7 @@ impl InstallPlan {
|
|||
// advance.
|
||||
let cache_entry = cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Url(&wheel.url).wheel_dir(),
|
||||
WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()),
|
||||
wheel.filename.stem(),
|
||||
);
|
||||
|
||||
|
|
@ -145,7 +147,7 @@ impl InstallPlan {
|
|||
// advance.
|
||||
let cache_entry = cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Url(&wheel.url).wheel_dir(),
|
||||
WheelCache::Url(&wheel.url).remote_wheel_dir(wheel.name().as_ref()),
|
||||
wheel.filename.stem(),
|
||||
);
|
||||
|
||||
|
|
@ -164,14 +166,12 @@ impl InstallPlan {
|
|||
Dist::Source(SourceDist::DirectUrl(sdist)) => {
|
||||
// Find the most-compatible wheel from the cache, since we don't know
|
||||
// the filename in advance.
|
||||
let cache_entry = cache.entry(
|
||||
let cache_shard = cache.shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Url(&sdist.url).wheel_dir(),
|
||||
sdist.filename()?.to_string(),
|
||||
WheelCache::Url(&sdist.url).remote_wheel_dir(sdist.name().as_ref()),
|
||||
);
|
||||
let index = BuiltWheelIndex::new(cache_entry.path(), tags);
|
||||
|
||||
if let Some(wheel) = index.find() {
|
||||
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
|
||||
let cached_dist = wheel.into_url_dist(url.clone());
|
||||
debug!("URL source requirement already cached: {cached_dist}");
|
||||
local.push(CachedDist::Url(cached_dist.clone()));
|
||||
|
|
@ -181,14 +181,13 @@ impl InstallPlan {
|
|||
Dist::Source(SourceDist::Path(sdist)) => {
|
||||
// Find the most-compatible wheel from the cache, since we don't know
|
||||
// the filename in advance.
|
||||
let cache_entry = cache.entry(
|
||||
let cache_shard = cache.shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Path(&sdist.url).wheel_dir(),
|
||||
sdist.name().to_string(),
|
||||
WheelCache::Path(&sdist.url)
|
||||
.remote_wheel_dir(sdist.name().as_ref()),
|
||||
);
|
||||
let index = BuiltWheelIndex::new(cache_entry.path(), tags);
|
||||
|
||||
if let Some(wheel) = index.find() {
|
||||
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
|
||||
let cached_dist = wheel.into_url_dist(url.clone());
|
||||
debug!("Path source requirement already cached: {cached_dist}");
|
||||
local.push(CachedDist::Url(cached_dist.clone()));
|
||||
|
|
@ -199,14 +198,12 @@ impl InstallPlan {
|
|||
// Find the most-compatible wheel from the cache, since we don't know
|
||||
// the filename in advance.
|
||||
if let Ok(Some(reference)) = git_reference(&sdist.url) {
|
||||
let cache_entry = cache.entry(
|
||||
let cache_shard = cache.shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Git(&sdist.url).wheel_dir(),
|
||||
reference.to_string(),
|
||||
WheelCache::Git(&sdist.url).built_wheel_dir(reference),
|
||||
);
|
||||
let index = BuiltWheelIndex::new(cache_entry.path(), tags);
|
||||
|
||||
if let Some(wheel) = index.find() {
|
||||
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
|
||||
let cached_dist = wheel.into_url_dist(url.clone());
|
||||
debug!("Git source requirement already cached: {cached_dist}");
|
||||
local.push(CachedDist::Url(cached_dist.clone()));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use std::fmt;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
|
|
@ -54,8 +52,8 @@ impl<'de> Deserialize<'de> for PackageName {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for PackageName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
impl std::fmt::Display for PackageName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue