From eebc2f340aa13010f297c95c0ece6dbbf9c92670 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 23 Jan 2024 20:30:33 -0500 Subject: [PATCH] make some things guaranteed to be deterministic (#1065) This PR replaces a few uses of hash maps/sets with btree maps/sets and index maps/sets. This has the benefit of guaranteeing a deterministic order of iteration. I made these changes as part of looking into a flaky test. Unfortunately, I'm not optimistic that anything here will actually fix the flaky test, since I don't believe anything was actually dependent on the order of iteration. --- Cargo.lock | 1 + Cargo.toml | 1 + crates/puffin-client/src/html.rs | 14 ++++++++++++-- crates/puffin-resolver/Cargo.toml | 1 + crates/puffin-resolver/src/error.rs | 17 ++++++++++------- crates/puffin-resolver/src/pubgrub/report.rs | 17 +++++++++-------- crates/pypi-types/src/simple_json.rs | 19 +++++++++++++++++++ 7 files changed, 53 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec149b32d..543c97fa1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2653,6 +2653,7 @@ dependencies = [ "futures", "gourgeist", "http-cache-semantics", + "indexmap 2.1.0", "insta", "install-wheel-rs", "itertools 0.12.0", diff --git a/Cargo.toml b/Cargo.toml index 9148a8a11..e58d3a695 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ hex = { version = "0.4.3" } html-escape = { version = "0.2.13" } http = { version = "0.2.11" } http-cache-semantics = { version = "1.0.2" } +indexmap = { version = "2.1.0" } indicatif = { version = "0.17.7" } indoc = { version = "2.0.4" } itertools = { version = "0.12.0" } diff --git a/crates/puffin-client/src/html.rs b/crates/puffin-client/src/html.rs index bf9b3e349..56e0f1114 100644 --- a/crates/puffin-client/src/html.rs +++ b/crates/puffin-client/src/html.rs @@ -7,11 +7,12 @@ use url::Url; use pep440_rs::VersionSpecifiers; use pypi_types::{BaseUrl, DistInfoMetadata, File, Hashes, Yanked}; +/// A parsed structure from PyPI "HTML" index format for a single package. #[derive(Debug, Clone)] pub(crate) struct SimpleHtml { /// The [`BaseUrl`] to which all relative URLs should be resolved. pub(crate) base: BaseUrl, - /// The list of [`File`]s available for download. + /// The list of [`File`]s available for download sorted by filename. pub(crate) files: Vec, } @@ -37,13 +38,22 @@ impl SimpleHtml { ); // Parse each `` tag, to extract the filename, hash, and URL. - let files: Vec = dom + let mut files: Vec = dom .nodes() .iter() .filter_map(|node| node.as_tag()) .filter(|link| link.name().as_bytes() == b"a") .map(|link| Self::parse_anchor(link)) .collect::, _>>()?; + // While it has not been positively observed, we sort the files + // to ensure we have a defined ordering. Otherwise, if we rely on + // the API to provide a stable ordering and doesn't, it can lead + // non-deterministic behavior elsewhere. (This is somewhat hand-wavy + // and a bit of a band-aide, since arguably, the order of this API + // response probably shouldn't have an impact on things downstream from + // this. That is, if something depends on ordering, then it should + // probably be the thing that does the sorting.) + files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename)); Ok(Self { base, files }) } diff --git a/crates/puffin-resolver/Cargo.toml b/crates/puffin-resolver/Cargo.toml index 32c331e09..dac3ec502 100644 --- a/crates/puffin-resolver/Cargo.toml +++ b/crates/puffin-resolver/Cargo.toml @@ -43,6 +43,7 @@ derivative = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } http-cache-semantics = { workspace = true } +indexmap = { workspace = true } itertools = { workspace = true } once_cell = { workspace = true } owo-colors = { workspace = true } diff --git a/crates/puffin-resolver/src/error.rs b/crates/puffin-resolver/src/error.rs index c78e98a1f..54e6259c4 100644 --- a/crates/puffin-resolver/src/error.rs +++ b/crates/puffin-resolver/src/error.rs @@ -1,9 +1,10 @@ +use std::collections::BTreeSet; use std::convert::Infallible; use std::fmt::Formatter; +use indexmap::IndexMap; use pubgrub::range::Range; use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter}; -use rustc_hash::FxHashMap; use thiserror::Error; use url::Url; @@ -112,7 +113,7 @@ impl From, Infallibl ResolveError::NoSolution(NoSolutionError { derivation_tree, // The following should be populated before display for the best error messages - available_versions: FxHashMap::default(), + available_versions: IndexMap::default(), selector: None, python_requirement: None, }) @@ -131,7 +132,7 @@ impl From, Infallibl #[derive(Debug)] pub struct NoSolutionError { derivation_tree: DerivationTree>, - available_versions: FxHashMap>, + available_versions: IndexMap>, selector: Option, python_requirement: Option, } @@ -170,19 +171,21 @@ impl NoSolutionError { python_requirement: &PythonRequirement, package_versions: &OnceMap, ) -> Self { - let mut available_versions = FxHashMap::default(); + let mut available_versions = IndexMap::default(); for package in self.derivation_tree.packages() { match package { PubGrubPackage::Root(_) => {} PubGrubPackage::Python(PubGrubPython::Installed) => { available_versions.insert( package.clone(), - vec![python_requirement.installed().clone()], + BTreeSet::from([python_requirement.installed().clone()]), ); } PubGrubPackage::Python(PubGrubPython::Target) => { - available_versions - .insert(package.clone(), vec![python_requirement.target().clone()]); + available_versions.insert( + package.clone(), + BTreeSet::from([python_requirement.target().clone()]), + ); } PubGrubPackage::Package(name, ..) => { if let Some(entry) = package_versions.get(name) { diff --git a/crates/puffin-resolver/src/pubgrub/report.rs b/crates/puffin-resolver/src/pubgrub/report.rs index 87ca3fbe3..32ab4a0e7 100644 --- a/crates/puffin-resolver/src/pubgrub/report.rs +++ b/crates/puffin-resolver/src/pubgrub/report.rs @@ -1,15 +1,16 @@ use std::borrow::Cow; use std::cmp::Ordering; +use std::collections::BTreeSet; use std::ops::Bound; use derivative::Derivative; +use indexmap::{IndexMap, IndexSet}; use owo_colors::OwoColorize; use pep440_rs::Version; use pubgrub::range::Range; use pubgrub::report::{DerivationTree, Derived, External, ReportFormatter}; use pubgrub::term::Term; use pubgrub::type_aliases::Map; -use rustc_hash::{FxHashMap, FxHashSet}; use crate::candidate_selector::CandidateSelector; use crate::prerelease_mode::PreReleaseStrategy; @@ -20,7 +21,7 @@ use super::PubGrubPackage; #[derive(Debug)] pub(crate) struct PubGrubReportFormatter<'a> { /// The versions that were available for each package - pub(crate) available_versions: &'a FxHashMap>, + pub(crate) available_versions: &'a IndexMap>, /// The versions that were available for each package pub(crate) python_requirement: Option<&'a PythonRequirement>, @@ -151,8 +152,8 @@ impl ReportFormatter> for PubGrubReportFormatter< [(package @ PubGrubPackage::Package(..), Term::Positive(range))] => { let range = range.simplify( self.available_versions - .get(package) - .unwrap_or(&vec![]) + .get(*package) + .unwrap_or(&BTreeSet::new()) .iter(), ); format!( @@ -163,8 +164,8 @@ impl ReportFormatter> for PubGrubReportFormatter< [(package @ PubGrubPackage::Package(..), Term::Negative(range))] => { let range = range.simplify( self.available_versions - .get(package) - .unwrap_or(&vec![]) + .get(*package) + .unwrap_or(&BTreeSet::new()) .iter(), ); format!( @@ -347,7 +348,7 @@ impl PubGrubReportFormatter<'_> { &self, derivation_tree: &DerivationTree>, selector: &CandidateSelector, - ) -> FxHashSet { + ) -> IndexSet { /// Returns `true` if pre-releases were allowed for a package. fn allowed_prerelease(package: &PubGrubPackage, selector: &CandidateSelector) -> bool { match selector.prerelease_strategy() { @@ -371,7 +372,7 @@ impl PubGrubReportFormatter<'_> { } } - let mut hints = FxHashSet::default(); + let mut hints = IndexSet::default(); match derivation_tree { DerivationTree::External(external) => match external { External::NoVersions(package, set) => { diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index 5f6c99c3e..1c10f5515 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -7,11 +7,30 @@ use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError}; use crate::lenient_requirement::LenientVersionSpecifiers; +/// A collection of "files" from `PyPI`'s JSON API for a single package. #[derive(Debug, Clone, Deserialize)] pub struct SimpleJson { + /// The list of [`File`]s available for download sorted by filename. + #[serde(deserialize_with = "sorted_simple_json_files")] pub files: Vec, } +/// Deserializes a sequence of "simple" files from `PyPI` and ensures that they +/// are sorted in a stable order. +fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + let mut files = >::deserialize(d)?; + // While it has not been positively observed, we sort the files + // to ensure we have a defined ordering. Otherwise, if we rely on + // the API to provide a stable ordering and doesn't, it can lead + // non-deterministic behavior elsewhere. (This is somewhat hand-wavy + // and a bit of a band-aide, since arguably, the order of this API + // response probably shouldn't have an impact on things downstream from + // this. That is, if something depends on ordering, then it should + // probably be the thing that does the sorting.) + files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename)); + Ok(files) +} + /// A single (remote) file belonging to a package, either a wheel or a source distribution. /// ///