Reduce overhead in converting resolutions (#11660)

Solving spent a chunk of its time just converting resolutions, the left
two blocks:


![image](https://github.com/user-attachments/assets/6f266440-c6e2-447c-ad7f-f92244f9d09b)

These blocks are `ResolverOutput::from_state` with 1.3% and
`ForkState::into_resolution` with 4.1% of resolver thread runtime for
apache airflow universal.

We reduce the overhead spent in those functions, to now 1.1% and 2.1% of
resolver time spend in those functions by:

Commit 1: Replace the hash set for the edges with a vec in
`ForkState::into_resolution`. We deduplicate edges anyway when
collecting them, and the hash-and-insert was slow.

Commit 2: Reduce the distribution clonign in
`ResolverOutput::from_state` by using an `Arc`.

The same profile excerpt for the resolver with the branch (note that
there is now an unrelated block between the two we optimized):


![image](https://github.com/user-attachments/assets/e36c205d-2cf8-4fe6-a2dd-3020c0515922)

Wall times are noisy, but the profiles show those changes as
improvements.

```
$ hyperfine --warmup 2 "./uv-main pip compile --no-progress scripts/requirements/airflow.in --universal" "./uv-branch pip compile --no-progress scripts/requirements/airflow.in --universal"
Benchmark 1: ./uv-main pip compile --no-progress scripts/requirements/airflow.in --universal
  Time (mean ± σ):      99.1 ms ±   3.8 ms    [User: 111.8 ms, System: 115.5 ms]
  Range (min … max):    93.6 ms … 110.4 ms    29 runs
 
Benchmark 2: ./uv-branch pip compile --no-progress scripts/requirements/airflow.in --universal
  Time (mean ± σ):      97.1 ms ±   4.3 ms    [User: 114.8 ms, System: 112.0 ms]
  Range (min … max):    90.9 ms … 112.4 ms    29 runs
 
Summary
  ./uv-branch pip compile --no-progress scripts/requirements/airflow.in --universal ran
    1.02 ± 0.06 times faster than ./uv-main pip compile --no-progress scripts/requirements/airflow.in --universal
```
This commit is contained in:
konsti 2025-02-20 21:13:01 +01:00 committed by GitHub
parent f9b638a296
commit ae916cff5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 63 additions and 43 deletions

View File

@ -100,7 +100,7 @@ impl DerivationChain {
else {
return false;
};
target == dist.as_ref()
target == dist.as_ref().into()
})?;
// Perform a BFS to find the shortest path to the root.

View File

@ -220,7 +220,7 @@ impl Edge {
impl From<&ResolvedDist> for RequirementSource {
fn from(resolved_dist: &ResolvedDist) -> Self {
match resolved_dist {
ResolvedDist::Installable { dist, .. } => match dist {
ResolvedDist::Installable { dist, .. } => match dist.as_ref() {
Dist::Built(BuiltDist::Registry(wheels)) => {
let wheel = wheels.best_wheel();
RequirementSource::Registry {

View File

@ -1,4 +1,6 @@
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use uv_pep440::Version;
use uv_pep508::PackageName;
use uv_pypi_types::Yanked;
@ -16,10 +18,10 @@ use crate::{
#[allow(clippy::large_enum_variant)]
pub enum ResolvedDist {
Installed {
dist: InstalledDist,
dist: Arc<InstalledDist>,
},
Installable {
dist: Dist,
dist: Arc<Dist>,
version: Option<Version>,
},
}
@ -72,7 +74,7 @@ impl ResolvedDist {
/// Returns the [`Yanked`] status of the distribution, if available.
pub fn yanked(&self) -> Option<&Yanked> {
match self {
Self::Installable { dist, .. } => match dist {
Self::Installable { dist, .. } => match dist.as_ref() {
Dist::Source(SourceDist::Registry(sdist)) => sdist.file.yanked.as_ref(),
Dist::Built(BuiltDist::Registry(wheel)) => wheel.best_wheel().file.yanked.as_ref(),
_ => None,
@ -103,7 +105,7 @@ impl ResolvedDistRef<'_> {
"expected chosen sdist to match prioritized sdist"
);
ResolvedDist::Installable {
dist: Dist::Source(SourceDist::Registry(source)),
dist: Arc::new(Dist::Source(SourceDist::Registry(source))),
version: Some(sdist.version.clone()),
}
}
@ -119,12 +121,12 @@ impl ResolvedDistRef<'_> {
// has at least one wheel, so this always succeeds.
let built = prioritized.built_dist().expect("at least one wheel");
ResolvedDist::Installable {
dist: Dist::Built(BuiltDist::Registry(built)),
dist: Arc::new(Dist::Built(BuiltDist::Registry(built))),
version: Some(wheel.filename.version.clone()),
}
}
Self::Installed { dist } => ResolvedDist::Installed {
dist: (*dist).clone(),
dist: Arc::new((*dist).clone()),
},
}
}

View File

@ -1,4 +1,5 @@
use anyhow::{bail, Result};
use std::sync::Arc;
use tracing::{debug, warn};
use uv_cache::{Cache, CacheBucket, WheelCache};
@ -114,7 +115,7 @@ impl<'a> Planner<'a> {
}
// Identify any cached distributions that satisfy the requirement.
match dist {
match dist.as_ref() {
Dist::Built(BuiltDist::Registry(wheel)) => {
if let Some(distribution) = registry_index.get(wheel.name()).find_map(|entry| {
if *entry.index.url() != wheel.best_wheel().index {
@ -164,7 +165,7 @@ impl<'a> Planner<'a> {
if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? {
let cache_info = pointer.to_cache_info();
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(dist)) {
if archive.satisfies(hasher.get(dist.as_ref())) {
let cached_dist = CachedDirectUrlDist {
filename: wheel.filename.clone(),
url: VerbatimParsedUrl {
@ -216,7 +217,7 @@ impl<'a> Planner<'a> {
if pointer.is_up_to_date(timestamp) {
let cache_info = pointer.to_cache_info();
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(dist)) {
if archive.satisfies(hasher.get(dist.as_ref())) {
let cached_dist = CachedDirectUrlDist {
filename: wheel.filename.clone(),
url: VerbatimParsedUrl {
@ -393,7 +394,7 @@ pub struct Plan {
/// The distributions that are not already installed in the current environment, and are
/// not available in the local cache.
pub remote: Vec<Dist>,
pub remote: Vec<Arc<Dist>>,
/// Any distributions that are already installed in the current environment, but will be
/// re-installed (including upgraded) to satisfy the requirements.

View File

@ -64,15 +64,15 @@ impl<'a, Context: BuildContext> Preparer<'a, Context> {
/// Fetch, build, and unzip the distributions in parallel.
pub fn prepare_stream<'stream>(
&'stream self,
distributions: Vec<Dist>,
distributions: Vec<Arc<Dist>>,
in_flight: &'stream InFlight,
resolution: &'stream Resolution,
) -> impl Stream<Item = Result<CachedDist, Error>> + 'stream {
distributions
.into_iter()
.map(|dist| async {
.map(|dist| async move {
let wheel = self
.get_wheel(dist, in_flight, resolution)
.get_wheel((*dist).clone(), in_flight, resolution)
.boxed_local()
.await?;
if let Some(reporter) = self.reporter.as_ref() {
@ -87,7 +87,7 @@ impl<'a, Context: BuildContext> Preparer<'a, Context> {
#[instrument(skip_all, fields(total = distributions.len()))]
pub async fn prepare(
&self,
mut distributions: Vec<Dist>,
mut distributions: Vec<Arc<Dist>>,
in_flight: &InFlight,
resolution: &Resolution,
) -> Result<Vec<CachedDist>, Error> {

View File

@ -2,6 +2,7 @@ use std::collections::hash_map::Entry;
use std::collections::BTreeSet;
use std::collections::VecDeque;
use std::path::Path;
use std::sync::Arc;
use either::Either;
use itertools::Itertools;
@ -474,7 +475,10 @@ pub trait Installable<'lock> {
build_options,
)?;
let version = package.version().cloned();
let dist = ResolvedDist::Installable { dist, version };
let dist = ResolvedDist::Installable {
dist: Arc::new(dist),
version,
};
let hashes = package.hashes();
Ok(Node::Dist {
dist,
@ -491,7 +495,10 @@ pub trait Installable<'lock> {
&BuildOptions::default(),
)?;
let version = package.version().cloned();
let dist = ResolvedDist::Installable { dist, version };
let dist = ResolvedDist::Installable {
dist: Arc::new(dist),
version,
};
let hashes = package.hashes();
Ok(Node::Dist {
dist,

View File

@ -50,7 +50,7 @@ impl AnnotatedDist {
pub(crate) fn index(&self) -> Option<&IndexUrl> {
match &self.dist {
ResolvedDist::Installed { .. } => None,
ResolvedDist::Installable { dist, .. } => match dist {
ResolvedDist::Installable { dist, .. } => match dist.as_ref() {
Dist::Built(dist) => match dist {
BuiltDist::Registry(dist) => Some(&dist.best_wheel().index),
BuiltDist::DirectUrl(_) => None,

View File

@ -1,5 +1,6 @@
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use indexmap::IndexSet;
use petgraph::{
@ -449,7 +450,7 @@ impl ResolverOutput {
(
ResolvedDist::Installable {
dist,
dist: Arc::new(dist),
version: Some(version.clone()),
},
hashes,

View File

@ -2370,13 +2370,19 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
.boxed_local()
.await?;
Response::Dist { dist, metadata }
Response::Dist {
dist: (*dist).clone(),
metadata,
}
}
ResolvedDist::Installed { dist } => {
let metadata =
provider.get_installed_metadata(&dist).boxed_local().await?;
Response::Installed { dist, metadata }
Response::Installed {
dist: (*dist).clone(),
metadata,
}
}
};
@ -2815,7 +2821,11 @@ impl ForkState {
fn into_resolution(self) -> Resolution {
let solution: FxHashMap<_, _> = self.pubgrub.partial_solution.extract_solution().collect();
let mut edges: FxHashSet<ResolutionDependencyEdge> = FxHashSet::default();
let edge_count: usize = solution
.keys()
.map(|package| self.pubgrub.incompatibilities[package].len())
.sum();
let mut edges: Vec<ResolutionDependencyEdge> = Vec::with_capacity(edge_count);
for (package, self_version) in &solution {
for id in &self.pubgrub.incompatibilities[package] {
let pubgrub::Kind::FromDependencyOf(
@ -2901,7 +2911,7 @@ impl ForkState {
to_dev: dependency_dev.clone(),
marker: *dependency_marker,
};
edges.insert(edge);
edges.push(edge);
}
PubGrubPackageInner::Marker {
@ -2933,7 +2943,7 @@ impl ForkState {
to_dev: None,
marker: *dependency_marker,
};
edges.insert(edge);
edges.push(edge);
}
PubGrubPackageInner::Extra {
@ -2966,7 +2976,7 @@ impl ForkState {
to_dev: None,
marker: *dependency_marker,
};
edges.insert(edge);
edges.push(edge);
// Insert an edge from the dependent package to the base package.
let to_url = self.fork_urls.get(dependency_name);
@ -2986,7 +2996,7 @@ impl ForkState {
to_dev: None,
marker: *dependency_marker,
};
edges.insert(edge);
edges.push(edge);
}
PubGrubPackageInner::Dev {
@ -3018,7 +3028,7 @@ impl ForkState {
to_dev: Some(dependency_dev.clone()),
marker: *dependency_marker,
};
edges.insert(edge);
edges.push(edge);
}
_ => {}
@ -3067,7 +3077,7 @@ pub(crate) struct Resolution {
pub(crate) nodes: FxHashMap<ResolutionPackage, Version>,
/// The directed connections between the nodes, where the marker is the node weight. We don't
/// store the requirement itself, but it can be retrieved from the package metadata.
pub(crate) edges: FxHashSet<ResolutionDependencyEdge>,
pub(crate) edges: Vec<ResolutionDependencyEdge>,
/// Map each package name, version tuple from `packages` to a distribution.
pub(crate) pins: FilePins,
/// The environment setting this resolution was found under.

View File

@ -728,7 +728,7 @@ fn apply_no_virtual_project(resolution: Resolution) -> Resolution {
return true;
};
let Dist::Source(dist) = dist else {
let Dist::Source(dist) = dist.as_ref() else {
return true;
};
@ -748,29 +748,28 @@ fn apply_editable_mode(resolution: Resolution, editable: EditableMode) -> Resolu
// Filter out any editable distributions.
EditableMode::NonEditable => resolution.map(|dist| {
let ResolvedDist::Installable {
dist:
Dist::Source(SourceDist::Directory(DirectorySourceDist {
name,
install_path,
editable: true,
r#virtual: false,
url,
})),
version,
} = dist
let ResolvedDist::Installable { dist, version } = dist else {
return None;
};
let Dist::Source(SourceDist::Directory(DirectorySourceDist {
name,
install_path,
editable: true,
r#virtual: false,
url,
})) = dist.as_ref()
else {
return None;
};
Some(ResolvedDist::Installable {
dist: Dist::Source(SourceDist::Directory(DirectorySourceDist {
dist: Arc::new(Dist::Source(SourceDist::Directory(DirectorySourceDist {
name: name.clone(),
install_path: install_path.clone(),
editable: false,
r#virtual: false,
url: url.clone(),
})),
}))),
version: version.clone(),
})
}),