Use Hashbrown's raw entry API to reduce hashes and clone in priority (#10881)

## Summary

I'm open to not merging this -- I was kind of just interested in what
the API looked like. But the idea is: we can avoid hashing values twice
and unnecessarily cloning within the priority map by using the raw entry
API.
This commit is contained in:
Charlie Marsh 2025-01-23 09:34:37 -05:00 committed by GitHub
parent db4ab9dc8a
commit b2dac9979d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 48 additions and 39 deletions

View File

@ -34,6 +34,8 @@ pub use yanks::AllowedYanks;
/// `ConflictItemRef`. i.e., We can avoid allocs on lookups. /// `ConflictItemRef`. i.e., We can avoid allocs on lookups.
type FxHashbrownSet<T> = hashbrown::HashSet<T, rustc_hash::FxBuildHasher>; type FxHashbrownSet<T> = hashbrown::HashSet<T, rustc_hash::FxBuildHasher>;
type FxHashbrownMap<K, V> = hashbrown::HashMap<K, V, rustc_hash::FxBuildHasher>;
mod candidate_selector; mod candidate_selector;
mod dependency_mode; mod dependency_mode;
mod dependency_provider; mod dependency_provider;

View File

@ -354,3 +354,9 @@ impl std::fmt::Display for PubGrubPackageInner {
} }
} }
} }
impl From<&PubGrubPackage> for PubGrubPackage {
fn from(package: &PubGrubPackage) -> Self {
package.clone()
}
}

View File

@ -1,8 +1,8 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::hash_map::OccupiedEntry;
use hashbrown::hash_map::{EntryRef, OccupiedEntry};
use pubgrub::Range; use pubgrub::Range;
use rustc_hash::FxHashMap; use rustc_hash::FxBuildHasher;
use uv_normalize::PackageName; use uv_normalize::PackageName;
use uv_pep440::Version; use uv_pep440::Version;
@ -10,7 +10,7 @@ use uv_pep440::Version;
use crate::fork_urls::ForkUrls; use crate::fork_urls::ForkUrls;
use crate::pubgrub::package::PubGrubPackage; use crate::pubgrub::package::PubGrubPackage;
use crate::pubgrub::PubGrubPackageInner; use crate::pubgrub::PubGrubPackageInner;
use crate::SentinelRange; use crate::{FxHashbrownMap, SentinelRange};
/// A prioritization map to guide the PubGrub resolution process. /// A prioritization map to guide the PubGrub resolution process.
/// ///
@ -23,8 +23,8 @@ use crate::SentinelRange;
/// See: <https://github.com/pypa/pip/blob/ef78c129b1a966dbbbdb8ebfffc43723e89110d1/src/pip/_internal/resolution/resolvelib/provider.py#L120> /// See: <https://github.com/pypa/pip/blob/ef78c129b1a966dbbbdb8ebfffc43723e89110d1/src/pip/_internal/resolution/resolvelib/provider.py#L120>
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub(crate) struct PubGrubPriorities { pub(crate) struct PubGrubPriorities {
package_priority: FxHashMap<PackageName, PubGrubPriority>, package_priority: FxHashbrownMap<PackageName, PubGrubPriority>,
virtual_package_tiebreaker: FxHashMap<PubGrubPackage, PubGrubTiebreaker>, virtual_package_tiebreaker: FxHashbrownMap<PubGrubPackage, PubGrubTiebreaker>,
} }
impl PubGrubPriorities { impl PubGrubPriorities {
@ -35,27 +35,24 @@ impl PubGrubPriorities {
version: &Range<Version>, version: &Range<Version>,
urls: &ForkUrls, urls: &ForkUrls,
) { ) {
if !self.virtual_package_tiebreaker.contains_key(package) { let len = self.virtual_package_tiebreaker.len();
self.virtual_package_tiebreaker.insert( self.virtual_package_tiebreaker
package.clone(), .entry_ref(package)
PubGrubTiebreaker::from( .or_insert_with(|| {
u32::try_from(self.virtual_package_tiebreaker.len()) PubGrubTiebreaker::from(u32::try_from(len).expect("Less than 2**32 packages"))
.expect("Less than 2**32 packages"), });
),
);
}
let next = self.package_priority.len();
// The root package and Python constraints have no explicit priority, the root package is // The root package and Python constraints have no explicit priority, the root package is
// always first and the Python version (range) is fixed. // always first and the Python version (range) is fixed.
let Some(name) = package.name_no_root() else { let Some(name) = package.name_no_root() else {
return; return;
}; };
match self.package_priority.entry(name.clone()) { let len = self.package_priority.len();
std::collections::hash_map::Entry::Occupied(mut entry) => { match self.package_priority.entry_ref(name) {
EntryRef::Occupied(mut entry) => {
// Preserve the original index. // Preserve the original index.
let index = Self::get_index(&entry).unwrap_or(next); let index = Self::get_index(&entry).unwrap_or(len);
// Compute the priority. // Compute the priority.
let priority = if urls.get(name).is_some() { let priority = if urls.get(name).is_some() {
@ -81,16 +78,16 @@ impl PubGrubPriorities {
entry.insert(priority); entry.insert(priority);
} }
} }
std::collections::hash_map::Entry::Vacant(entry) => { EntryRef::Vacant(entry) => {
// Compute the priority. // Compute the priority.
let priority = if urls.get(name).is_some() { let priority = if urls.get(name).is_some() {
PubGrubPriority::DirectUrl(Reverse(next)) PubGrubPriority::DirectUrl(Reverse(len))
} else if version.as_singleton().is_some() } else if version.as_singleton().is_some()
|| SentinelRange::from(version).is_sentinel() || SentinelRange::from(version).is_sentinel()
{ {
PubGrubPriority::Singleton(Reverse(next)) PubGrubPriority::Singleton(Reverse(len))
} else { } else {
PubGrubPriority::Unspecified(Reverse(next)) PubGrubPriority::Unspecified(Reverse(len))
}; };
// Insert the priority. // Insert the priority.
@ -99,7 +96,9 @@ impl PubGrubPriorities {
} }
} }
fn get_index(entry: &OccupiedEntry<PackageName, PubGrubPriority>) -> Option<usize> { fn get_index(
entry: &OccupiedEntry<'_, PackageName, PubGrubPriority, FxBuildHasher>,
) -> Option<usize> {
match entry.get() { match entry.get() {
PubGrubPriority::ConflictLate(Reverse(index)) PubGrubPriority::ConflictLate(Reverse(index))
| PubGrubPriority::Unspecified(Reverse(index)) | PubGrubPriority::Unspecified(Reverse(index))
@ -133,7 +132,6 @@ impl PubGrubPriorities {
/// Returns whether the priority was changed, i.e., it's the first time we hit this condition /// Returns whether the priority was changed, i.e., it's the first time we hit this condition
/// for the package. /// for the package.
pub(crate) fn mark_conflict_early(&mut self, package: &PubGrubPackage) -> bool { pub(crate) fn mark_conflict_early(&mut self, package: &PubGrubPackage) -> bool {
let next = self.package_priority.len();
let Some(name) = package.name_no_root() else { let Some(name) = package.name_no_root() else {
// Not a correctness bug // Not a correctness bug
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
@ -142,20 +140,22 @@ impl PubGrubPriorities {
return false; return false;
} }
}; };
match self.package_priority.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => { let len = self.package_priority.len();
match self.package_priority.entry_ref(name) {
EntryRef::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictEarly(Reverse(len)));
true
}
EntryRef::Occupied(mut entry) => {
if matches!(entry.get(), PubGrubPriority::ConflictEarly(_)) { if matches!(entry.get(), PubGrubPriority::ConflictEarly(_)) {
// Already in the right category // Already in the right category
return false; return false;
}; };
let index = Self::get_index(&entry).unwrap_or(next); let index = Self::get_index(&entry).unwrap_or(len);
entry.insert(PubGrubPriority::ConflictEarly(Reverse(index))); entry.insert(PubGrubPriority::ConflictEarly(Reverse(index)));
true true
} }
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictEarly(Reverse(next)));
true
}
} }
} }
@ -165,7 +165,6 @@ impl PubGrubPriorities {
/// Returns whether the priority was changed, i.e., it's the first time this package was /// Returns whether the priority was changed, i.e., it's the first time this package was
/// marked as conflicting above the threshold. /// marked as conflicting above the threshold.
pub(crate) fn mark_conflict_late(&mut self, package: &PubGrubPackage) -> bool { pub(crate) fn mark_conflict_late(&mut self, package: &PubGrubPackage) -> bool {
let next = self.package_priority.len();
let Some(name) = package.name_no_root() else { let Some(name) = package.name_no_root() else {
// Not a correctness bug // Not a correctness bug
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
@ -174,8 +173,14 @@ impl PubGrubPriorities {
return false; return false;
} }
}; };
match self.package_priority.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => { let len = self.package_priority.len();
match self.package_priority.entry_ref(name) {
EntryRef::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictLate(Reverse(len)));
true
}
EntryRef::Occupied(mut entry) => {
// The ConflictEarly` match avoids infinite loops. // The ConflictEarly` match avoids infinite loops.
if matches!( if matches!(
entry.get(), entry.get(),
@ -184,14 +189,10 @@ impl PubGrubPriorities {
// Already in the right category // Already in the right category
return false; return false;
}; };
let index = Self::get_index(&entry).unwrap_or(next); let index = Self::get_index(&entry).unwrap_or(len);
entry.insert(PubGrubPriority::ConflictLate(Reverse(index))); entry.insert(PubGrubPriority::ConflictLate(Reverse(index)));
true true
} }
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictLate(Reverse(next)));
true
}
} }
} }
} }