Migrate to PubGrub's arena for package names (#9448)

## Summary

There's more we can do here, i.e., to leverage the IDs more widely, but
this is a start.
This commit is contained in:
Charlie Marsh 2024-11-26 15:05:39 -05:00 committed by GitHub
parent 8aeaf98f59
commit 916d5d7778
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 111 additions and 84 deletions

4
Cargo.lock generated
View File

@ -2673,7 +2673,7 @@ dependencies = [
[[package]]
name = "pubgrub"
version = "0.2.1"
source = "git+https://github.com/astral-sh/pubgrub?rev=57afc831bf2551f164617a10383cf288bf5d190d#57afc831bf2551f164617a10383cf288bf5d190d"
source = "git+https://github.com/astral-sh/pubgrub?rev=9cd9049a64c7352de2ff3b525b9ae36421b0cc18#9cd9049a64c7352de2ff3b525b9ae36421b0cc18"
dependencies = [
"indexmap",
"log",
@ -5666,7 +5666,7 @@ checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "version-ranges"
version = "0.1.1"
source = "git+https://github.com/astral-sh/pubgrub?rev=57afc831bf2551f164617a10383cf288bf5d190d#57afc831bf2551f164617a10383cf288bf5d190d"
source = "git+https://github.com/astral-sh/pubgrub?rev=9cd9049a64c7352de2ff3b525b9ae36421b0cc18#9cd9049a64c7352de2ff3b525b9ae36421b0cc18"
dependencies = [
"smallvec",
]

View File

@ -130,7 +130,7 @@ petgraph = { version = "0.6.5" }
platform-info = { version = "2.0.3" }
proc-macro2 = { version = "1.0.86" }
procfs = { version = "0.17.0", default-features = false, features = ["flate2"] }
pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "57afc831bf2551f164617a10383cf288bf5d190d" }
pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "9cd9049a64c7352de2ff3b525b9ae36421b0cc18" }
quote = { version = "1.0.37" }
rayon = { version = "1.10.0" }
reflink-copy = { version = "0.1.19" }
@ -175,7 +175,7 @@ unicode-width = { version = "0.1.13" }
unscanny = { version = "0.1.0" }
url = { version = "2.5.2", features = ["serde"] }
urlencoding = { version = "2.1.3" }
version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "57afc831bf2551f164617a10383cf288bf5d190d" }
version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "9cd9049a64c7352de2ff3b525b9ae36421b0cc18" }
walkdir = { version = "2.5.0" }
which = { version = "7.0.0", features = ["regex"] }
windows-registry = { version = "0.3.0" }

View File

@ -1,12 +1,13 @@
use std::cmp::min;
use itertools::Itertools;
use pubgrub::{Range, Term};
use pubgrub::{Id, Range, State, Term};
use rustc_hash::FxHashMap;
use tokio::sync::mpsc::Sender;
use tracing::{debug, trace};
use crate::candidate_selector::CandidateSelector;
use crate::dependency_provider::UvDependencyProvider;
use crate::pubgrub::{PubGrubPackage, PubGrubPackageInner};
use crate::resolver::Request;
use crate::{
@ -39,14 +40,15 @@ enum BatchPrefetchStrategy {
/// Note that these all heuristics that could totally prefetch lots of irrelevant versions.
#[derive(Default)]
pub(crate) struct BatchPrefetcher {
tried_versions: FxHashMap<PubGrubPackage, usize>,
last_prefetch: FxHashMap<PubGrubPackage, usize>,
tried_versions: FxHashMap<Id<PubGrubPackage>, usize>,
last_prefetch: FxHashMap<Id<PubGrubPackage>, usize>,
}
impl BatchPrefetcher {
/// Prefetch a large number of versions if we already unsuccessfully tried many versions.
pub(crate) fn prefetch_batches(
&mut self,
id: Id<PubGrubPackage>,
next: &PubGrubPackage,
index: Option<&IndexUrl>,
version: &Version,
@ -69,7 +71,7 @@ impl BatchPrefetcher {
return Ok(());
};
let (num_tried, do_prefetch) = self.should_prefetch(next);
let (num_tried, do_prefetch) = self.should_prefetch(id);
if !do_prefetch {
return Ok(());
}
@ -220,15 +222,15 @@ impl BatchPrefetcher {
debug!("Prefetching {prefetch_count} {name} versions");
self.last_prefetch.insert(next.clone(), num_tried);
self.last_prefetch.insert(id, num_tried);
Ok(())
}
/// Each time we tried a version for a package, we register that here.
pub(crate) fn version_tried(&mut self, package: PubGrubPackage) {
pub(crate) fn version_tried(&mut self, id: Id<PubGrubPackage>, package: &PubGrubPackage) {
// Only track base packages, no virtual packages from extras.
if matches!(
&*package,
&**package,
PubGrubPackageInner::Package {
extra: None,
dev: None,
@ -236,16 +238,16 @@ impl BatchPrefetcher {
..
}
) {
*self.tried_versions.entry(package).or_default() += 1;
*self.tried_versions.entry(id).or_default() += 1;
}
}
/// After 5, 10, 20, 40 tried versions, prefetch that many versions to start early but not
/// too aggressive. Later we schedule the prefetch of 50 versions every 20 versions, this gives
/// us a good buffer until we see prefetch again and is high enough to saturate the task pool.
fn should_prefetch(&self, next: &PubGrubPackage) -> (usize, bool) {
let num_tried = self.tried_versions.get(next).copied().unwrap_or_default();
let previous_prefetch = self.last_prefetch.get(next).copied().unwrap_or_default();
fn should_prefetch(&self, id: Id<PubGrubPackage>) -> (usize, bool) {
let num_tried = self.tried_versions.get(&id).copied().unwrap_or_default();
let previous_prefetch = self.last_prefetch.get(&id).copied().unwrap_or_default();
let do_prefetch = (num_tried >= 5 && previous_prefetch < 5)
|| (num_tried >= 10 && previous_prefetch < 10)
|| (num_tried >= 20 && previous_prefetch < 20)
@ -257,9 +259,13 @@ impl BatchPrefetcher {
///
/// Note that they may be inflated when we count the same version repeatedly during
/// backtracking.
pub(crate) fn log_tried_versions(&self) {
pub(crate) fn log_tried_versions(&self, state: &State<UvDependencyProvider>) {
let total_versions: usize = self.tried_versions.values().sum();
let mut tried_versions: Vec<_> = self.tried_versions.iter().collect();
let mut tried_versions: Vec<_> = self
.tried_versions
.iter()
.map(|(id, count)| (&state.package_store[*id], *count))
.collect();
tried_versions.sort_by(|(p1, c1), (p2, c2)| {
c1.cmp(c2)
.reverse()

View File

@ -2,8 +2,8 @@ use std::collections::VecDeque;
use petgraph::visit::EdgeRef;
use petgraph::Direction;
use pubgrub::{Kind, Range, SelectedDependencies, State};
use rustc_hash::FxHashSet;
use pubgrub::{Id, Kind, Range, State};
use rustc_hash::{FxHashMap, FxHashSet};
use uv_distribution_types::{
DerivationChain, DerivationStep, DistRef, Edge, Name, Node, Resolution, ResolvedDist,
@ -87,32 +87,35 @@ impl DerivationChainBuilder {
///
/// This is used to construct a derivation chain upon resolution failure.
pub(crate) fn from_state(
package: &PubGrubPackage,
id: Id<PubGrubPackage>,
version: &Version,
state: &State<UvDependencyProvider>,
) -> Option<DerivationChain> {
/// Find a path from the current package to the root package.
fn find_path(
package: &PubGrubPackage,
id: Id<PubGrubPackage>,
version: &Version,
state: &State<UvDependencyProvider>,
solution: &SelectedDependencies<UvDependencyProvider>,
solution: &FxHashMap<Id<PubGrubPackage>, Version>,
path: &mut Vec<DerivationStep>,
) -> bool {
// Retrieve the incompatibilities for the current package.
let Some(incompatibilities) = state.incompatibilities.get(package) else {
let Some(incompatibilities) = state.incompatibilities.get(&id) else {
return false;
};
for index in incompatibilities {
let incompat = &state.incompatibility_store[*index];
// Find a dependency from a package to the current package.
if let Kind::FromDependencyOf(p1, _, p2, v2) = &incompat.kind {
if p2 == package && v2.contains(version) {
if let Some(version) = solution.get(p1) {
if let Kind::FromDependencyOf(id1, _, id2, v2) = &incompat.kind {
if id == *id2 && v2.contains(version) {
if let Some(version) = solution.get(id1) {
let p1 = &state.package_store[*id1];
let p2 = &state.package_store[*id2];
if p1.name_no_root() == p2.name_no_root() {
// Skip proxied dependencies.
if find_path(p1, version, state, solution, path) {
if find_path(*id1, version, state, solution, path) {
return true;
}
} else if let Some(name) = p1.name_no_root() {
@ -126,7 +129,7 @@ impl DerivationChainBuilder {
));
// Recursively search the next package.
if find_path(p1, version, state, solution, path) {
if find_path(*id1, version, state, solution, path) {
return true;
}
@ -143,10 +146,10 @@ impl DerivationChainBuilder {
false
}
let solution = state.partial_solution.extract_solution();
let solution: FxHashMap<_, _> = state.partial_solution.extract_solution().collect();
let path = {
let mut path = vec![];
if !find_path(package, version, state, &solution, &mut path) {
if !find_path(id, version, state, &solution, &mut path) {
return None;
}
path.reverse();

View File

@ -13,7 +13,7 @@ use dashmap::DashMap;
use either::Either;
use futures::{FutureExt, StreamExt};
use itertools::Itertools;
use pubgrub::{Incompatibility, Range, State};
use pubgrub::{Id, Incompatibility, Range, State};
use rustc_hash::{FxHashMap, FxHashSet};
use tokio::sync::mpsc::{self, Receiver, Sender};
use tokio::sync::oneshot;
@ -308,13 +308,9 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
let mut visited = FxHashSet::default();
let root = PubGrubPackage::from(PubGrubPackageInner::Root(self.project.clone()));
let pubgrub = State::init(root.clone(), MIN_VERSION.clone());
let mut prefetcher = BatchPrefetcher::default();
let state = ForkState::new(
State::init(root.clone(), MIN_VERSION.clone()),
root,
self.env.clone(),
self.python_requirement.clone(),
);
let state = ForkState::new(pubgrub, self.env.clone(), self.python_requirement.clone());
let mut preferences = self.preferences.clone();
let mut forked_states = self.env.initial_forked_states(state);
let mut resolutions = vec![];
@ -327,7 +323,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
let start = Instant::now();
loop {
// Run unit propagation.
if let Err(err) = state.pubgrub.unit_propagation(state.next.clone()) {
if let Err(err) = state.pubgrub.unit_propagation(state.next) {
return Err(self.convert_no_solution_err(
err,
state.fork_urls,
@ -342,7 +338,11 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// Pre-visit all candidate packages, to allow metadata to be fetched in parallel.
if self.dependency_mode.is_transitive() {
Self::pre_visit(
state.pubgrub.partial_solution.prioritized_packages(),
state
.pubgrub
.partial_solution
.prioritized_packages()
.map(|(id, range)| (&state.pubgrub.package_store[id], range)),
&self.urls,
&self.indexes,
&state.python_requirement,
@ -354,10 +354,12 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
let Some(highest_priority_pkg) = state
.pubgrub
.partial_solution
.pick_highest_priority_pkg(|package, _range| state.priorities.get(package))
.pick_highest_priority_pkg(|id, _range| {
state.priorities.get(&state.pubgrub.package_store[id])
})
else {
if tracing::enabled!(Level::DEBUG) {
prefetcher.log_tried_versions();
prefetcher.log_tried_versions(&state.pubgrub);
}
debug!(
"{} resolution took {:.3}s",
@ -398,9 +400,14 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
state.next = highest_priority_pkg;
let url = state.next.name().and_then(|name| state.fork_urls.get(name));
let index = state
.next
// TODO(charlie): Remove as many usages of `next_package` as we can.
let next_id = state.next;
let next_package = &state.pubgrub.package_store[state.next];
let url = next_package
.name()
.and_then(|name| state.fork_urls.get(name));
let index = next_package
.name()
.and_then(|name| state.fork_indexes.get(name));
@ -415,17 +422,17 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// since we weren't sure whether it might also be a URL requirement when
// transforming the requirements. For that case, we do another request here
// (idempotent due to caching).
self.request_package(&state.next, url, index, &request_sink)?;
self.request_package(next_package, url, index, &request_sink)?;
prefetcher.version_tried(state.next.clone());
prefetcher.version_tried(next_id, next_package);
let term_intersection = state
.pubgrub
.partial_solution
.term_intersection_for_package(&state.next)
.term_intersection_for_package(next_id)
.expect("a package was chosen but we don't have a term");
let decision = self.choose_version(
&state.next,
next_package,
index,
term_intersection.unwrap_positive(),
&mut state.pins,
@ -440,21 +447,21 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// Pick the next compatible version.
let version = match decision {
None => {
debug!("No compatible version found for: {next}", next = state.next);
debug!("No compatible version found for: {next_package}");
let term_intersection = state
.pubgrub
.partial_solution
.term_intersection_for_package(&state.next)
.term_intersection_for_package(next_id)
.expect("a package was chosen but we don't have a term");
if let PubGrubPackageInner::Package { ref name, .. } = &*state.next {
if let PubGrubPackageInner::Package { ref name, .. } = &**next_package {
// Check if the decision was due to the package being unavailable
if let Some(entry) = self.unavailable_packages.get(name) {
state
.pubgrub
.add_incompatibility(Incompatibility::custom_term(
state.next.clone(),
next_id,
term_intersection.clone(),
UnavailableReason::Package(entry.clone()),
));
@ -465,7 +472,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
state
.pubgrub
.add_incompatibility(Incompatibility::no_versions(
state.next.clone(),
next_id,
term_intersection.clone(),
));
continue;
@ -483,14 +490,15 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// Only consider registry packages for prefetch.
if url.is_none() {
prefetcher.prefetch_batches(
&state.next,
next_id,
next_package,
index,
&version,
term_intersection.unwrap_positive(),
state
.pubgrub
.partial_solution
.unchanging_term_for_package(&state.next),
.unchanging_term_for_package(next_id),
&state.python_requirement,
&request_sink,
&self.index,
@ -500,11 +508,11 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
)?;
}
self.on_progress(&state.next, &version);
self.on_progress(next_package, &version);
if !state
.added_dependencies
.entry(state.next.clone())
.entry(next_id)
.or_default()
.insert(version.clone())
{
@ -513,13 +521,14 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
state
.pubgrub
.partial_solution
.add_decision(state.next.clone(), version);
.add_decision(next_id, version);
continue;
}
// Retrieve that package dependencies.
let forked_deps = self.get_dependencies_forking(
&state.next,
next_id,
next_package,
&version,
&state.fork_urls,
&state.env,
@ -533,13 +542,14 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
state
.pubgrub
.add_incompatibility(Incompatibility::custom_version(
state.next.clone(),
next_id,
version.clone(),
UnavailableReason::Version(reason),
));
}
ForkedDependencies::Unforked(dependencies) => {
state.add_package_version_dependencies(
next_id,
&version,
&self.urls,
&self.indexes,
@ -675,7 +685,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
) -> impl Iterator<Item = Result<ForkState, ResolveError>> + 'a {
debug!(
"Splitting resolution on {}=={} over {} into {} resolution{} with separate markers",
current_state.next,
current_state.pubgrub.package_store[current_state.next],
version,
diverging_packages
.iter()
@ -690,6 +700,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// as it needs to be. We basically move the state
// into `forked_states`, and then only clone it if
// there is at least one more fork to visit.
let package = current_state.next;
let mut cur_state = Some(current_state);
let forks_len = forks.len();
forks
@ -707,6 +718,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
})
.map(move |(fork, mut forked_state)| {
forked_state.add_package_version_dependencies(
package,
version,
&self.urls,
&self.indexes,
@ -1205,6 +1217,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
#[instrument(skip_all, fields(%package, %version))]
fn get_dependencies_forking(
&self,
id: Id<PubGrubPackage>,
package: &PubGrubPackage,
version: &Version,
fork_urls: &ForkUrls,
@ -1213,6 +1226,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
pubgrub: &State<UvDependencyProvider>,
) -> Result<ForkedDependencies, ResolveError> {
let result = self.get_dependencies(
id,
package,
version,
fork_urls,
@ -1236,6 +1250,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
#[instrument(skip_all, fields(%package, %version))]
fn get_dependencies(
&self,
id: Id<PubGrubPackage>,
package: &PubGrubPackage,
version: &Version,
fork_urls: &ForkUrls,
@ -1381,7 +1396,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
));
}
MetadataResponse::Error(dist, err) => {
let chain = DerivationChainBuilder::from_state(package, version, pubgrub)
let chain = DerivationChainBuilder::from_state(id, version, pubgrub)
.unwrap_or_default();
return Err(match &**dist {
Dist::Built(built_dist @ BuiltDist::Path(_)) => ResolveError::Read(
@ -2160,7 +2175,7 @@ pub(crate) struct ForkState {
/// assignments (to packages) from this state's "partial solution."
pubgrub: State<UvDependencyProvider>,
/// The next package on which to run unit propagation.
next: PubGrubPackage,
next: Id<PubGrubPackage>,
/// The set of pinned versions we accrue throughout resolution.
///
/// The key of this map is a package name, and each package name maps to
@ -2190,7 +2205,7 @@ pub(crate) struct ForkState {
priorities: PubGrubPriorities,
/// This keeps track of the set of versions for each package that we've
/// already visited during resolution. This avoids doing redundant work.
added_dependencies: FxHashMap<PubGrubPackage, FxHashSet<Version>>,
added_dependencies: FxHashMap<Id<PubGrubPackage>, FxHashSet<Version>>,
/// The marker expression that created this state.
///
/// The root state always corresponds to a marker expression that is always
@ -2224,13 +2239,12 @@ pub(crate) struct ForkState {
impl ForkState {
fn new(
pubgrub: State<UvDependencyProvider>,
root: PubGrubPackage,
env: ResolverEnvironment,
python_requirement: PythonRequirement,
) -> Self {
Self {
next: pubgrub.root_package,
pubgrub,
next: root,
pins: FilePins::default(),
fork_urls: ForkUrls::default(),
fork_indexes: ForkIndexes::default(),
@ -2245,6 +2259,7 @@ impl ForkState {
/// self-dependencies and handling URLs.
fn add_package_version_dependencies(
&mut self,
for_package: Id<PubGrubPackage>,
for_version: &Version,
urls: &Urls,
indexes: &Indexes,
@ -2277,7 +2292,7 @@ impl ForkState {
}
}
if let Some(name) = self.next.name_no_root() {
if let Some(name) = self.pubgrub.package_store[for_package].name_no_root() {
debug!(
"Adding transitive dependency for {name}=={for_version}: {package}{version}"
);
@ -2308,7 +2323,7 @@ impl ForkState {
}
self.pubgrub.add_package_version_dependencies(
self.next.clone(),
self.next,
for_version.clone(),
dependencies.into_iter().map(|dependency| {
let PubGrubDependency {
@ -2333,26 +2348,26 @@ impl ForkState {
) = reason
{
let package = &self.next;
let python = self.pubgrub.package_store.alloc(PubGrubPackage::from(
PubGrubPackageInner::Python(match kind {
PythonRequirementKind::Installed => PubGrubPython::Installed,
PythonRequirementKind::Target => PubGrubPython::Target,
}),
));
self.pubgrub
.add_incompatibility(Incompatibility::from_dependency(
package.clone(),
*package,
Range::singleton(version.clone()),
(
PubGrubPackage::from(PubGrubPackageInner::Python(match kind {
PythonRequirementKind::Installed => PubGrubPython::Installed,
PythonRequirementKind::Target => PubGrubPython::Target,
})),
release_specifiers_to_ranges(requires_python),
),
(python, release_specifiers_to_ranges(requires_python)),
));
self.pubgrub
.partial_solution
.add_decision(self.next.clone(), version);
.add_decision(self.next, version);
return;
};
self.pubgrub
.add_incompatibility(Incompatibility::custom_version(
self.next.clone(),
self.next,
version.clone(),
UnavailableReason::Version(reason),
));
@ -2374,32 +2389,35 @@ impl ForkState {
}
fn into_resolution(self) -> Resolution {
let solution = self.pubgrub.partial_solution.extract_solution();
let solution: FxHashMap<_, _> = self.pubgrub.partial_solution.extract_solution().collect();
let mut edges: FxHashSet<ResolutionDependencyEdge> = FxHashSet::default();
for (package, self_version) in &solution {
for id in &self.pubgrub.incompatibilities[package] {
let pubgrub::Kind::FromDependencyOf(
ref self_package,
self_package,
ref self_range,
ref dependency_package,
dependency_package,
ref dependency_range,
) = self.pubgrub.incompatibility_store[*id].kind
else {
continue;
};
if package != self_package {
if *package != self_package {
continue;
}
if !self_range.contains(self_version) {
continue;
}
let Some(dependency_version) = solution.get(dependency_package) else {
let Some(dependency_version) = solution.get(&dependency_package) else {
continue;
};
if !dependency_range.contains(dependency_version) {
continue;
}
let self_package = &self.pubgrub.package_store[self_package];
let dependency_package = &self.pubgrub.package_store[dependency_package];
let (self_name, self_extra, self_dev) = match &**self_package {
PubGrubPackageInner::Package {
name: self_name,
@ -2554,7 +2572,7 @@ impl ForkState {
extra,
dev,
marker: None,
} = &*package
} = &*self.pubgrub.package_store[package]
{
Some((
ResolutionPackage {