Only include visited packages in error message derivation (#1144)

## Summary

This is my guess as to the source of the resolver flake, based on
information and extensive debugging from @zanieb. In short, if we rely
on `self.index.packages` as a source of truth during error reporting, we
open ourselves up to a source of non-determinism, because we fetch
package metadata asynchronously in the background while we solve -- so
packages _could_ be included in or excluded from the index depending on
the order in which those requests are returned.

So, instead, we now track the set of packages that _were_ visited by the
solver. Visiting a package _requires_ that we wait for its metadata to
be available. By limiting analysis to those packages that were visited
during solving, we are faithfully representing the state of the solver
at the time of failure.

Closes #863
This commit is contained in:
Charlie Marsh 2024-01-28 06:27:22 -08:00 committed by GitHub
parent 6f2c235d21
commit 3d10f344f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 27 additions and 25 deletions

View File

@ -1,13 +1,3 @@
[profile.default] [profile.default]
# Mark tests that take longer than 10s as slow # Mark tests that take longer than 10s as slow
slow-timeout = "10s" slow-timeout = "10s"
[[profile.default.overrides]]
# Some of these tests have a non-determinstic snapshot
filter = 'binary(pip_install_scenarios)'
retries = 2
[[profile.no-retry.overrides]]
# An optional profile to avoid retries
filter = 'binary(pip_install_scenarios)'
retries = 0

View File

@ -2,6 +2,7 @@ use std::collections::BTreeSet;
use std::convert::Infallible; use std::convert::Infallible;
use std::fmt::Formatter; use std::fmt::Formatter;
use dashmap::DashSet;
use indexmap::IndexMap; use indexmap::IndexMap;
use pubgrub::range::Range; use pubgrub::range::Range;
use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter}; use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter};
@ -167,6 +168,7 @@ impl NoSolutionError {
pub(crate) fn with_available_versions( pub(crate) fn with_available_versions(
mut self, mut self,
python_requirement: &PythonRequirement, python_requirement: &PythonRequirement,
visited: &DashSet<PackageName>,
package_versions: &OnceMap<PackageName, VersionMap>, package_versions: &OnceMap<PackageName, VersionMap>,
) -> Self { ) -> Self {
let mut available_versions = IndexMap::default(); let mut available_versions = IndexMap::default();
@ -186,15 +188,21 @@ impl NoSolutionError {
); );
} }
PubGrubPackage::Package(name, ..) => { PubGrubPackage::Package(name, ..) => {
if let Some(entry) = package_versions.get(name) { // Avoid including available versions for packages that exist in the derivation
let version_map = entry.value(); // tree, but were never visited during resolution. We _may_ have metadata for
available_versions.insert( // these packages, but it's non-deterministic, and omitting them ensures that
package.clone(), // we represent the state of the resolver at the time of failure.
version_map if visited.contains(name) {
.iter() if let Some(entry) = package_versions.get(name) {
.map(|(version, _)| version.clone()) let version_map = entry.value();
.collect(), available_versions.insert(
); package.clone(),
version_map
.iter()
.map(|(version, _)| version.clone())
.collect(),
);
}
} }
} }
} }

View File

@ -3,7 +3,7 @@
use std::sync::Arc; use std::sync::Arc;
use anyhow::Result; use anyhow::Result;
use dashmap::DashMap; use dashmap::{DashMap, DashSet};
use futures::channel::mpsc::UnboundedReceiver; use futures::channel::mpsc::UnboundedReceiver;
use futures::{pin_mut, FutureExt, StreamExt}; use futures::{pin_mut, FutureExt, StreamExt};
use itertools::Itertools; use itertools::Itertools;
@ -68,6 +68,8 @@ pub struct Resolver<'a, Provider: ResolverProvider> {
index: &'a InMemoryIndex, index: &'a InMemoryIndex,
/// A map from [`PackageId`] to the `Requires-Python` version specifiers for that package. /// A map from [`PackageId`] to the `Requires-Python` version specifiers for that package.
incompatibilities: DashMap<PackageId, VersionSpecifiers>, incompatibilities: DashMap<PackageId, VersionSpecifiers>,
/// The set of all registry-based packages visited during resolution.
visited: DashSet<PackageName>,
editables: FxHashMap<PackageName, (LocalEditable, Metadata21)>, editables: FxHashMap<PackageName, (LocalEditable, Metadata21)>,
reporter: Option<Arc<dyn Reporter>>, reporter: Option<Arc<dyn Reporter>>,
provider: Provider, provider: Provider,
@ -167,6 +169,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> {
Self { Self {
index, index,
incompatibilities: DashMap::default(), incompatibilities: DashMap::default(),
visited: DashSet::default(),
selector, selector,
allowed_urls, allowed_urls,
project: manifest.project, project: manifest.project,
@ -223,7 +226,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> {
if let ResolveError::NoSolution(err) = err { if let ResolveError::NoSolution(err) = err {
ResolveError::NoSolution( ResolveError::NoSolution(
err err
.with_available_versions(&self.python_requirement, &self.index.packages) .with_available_versions(&self.python_requirement, &self.visited, &self.index.packages)
.with_selector(self.selector.clone()) .with_selector(self.selector.clone())
.with_python_requirement(&self.python_requirement) .with_python_requirement(&self.python_requirement)
) )
@ -504,6 +507,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> {
// Wait for the metadata to be available. // Wait for the metadata to be available.
let entry = self.index.packages.wait(package_name).await?; let entry = self.index.packages.wait(package_name).await?;
let version_map = entry.value(); let version_map = entry.value();
self.visited.insert(package_name.clone());
if let Some(extra) = extra { if let Some(extra) = extra {
debug!( debug!(

View File

@ -566,8 +566,8 @@ fn dependency_excludes_range_of_compatible_versions() -> Result<()> {
Because only albatross<=3.0.0 is available and albatross==3.0.0 depends on bluebird==3.0.0, we can conclude that albatross>=3.0.0 depends on bluebird==3.0.0. Because only albatross<=3.0.0 is available and albatross==3.0.0 depends on bluebird==3.0.0, we can conclude that albatross>=3.0.0 depends on bluebird==3.0.0.
And because we know from (2) that all versions of crow, bluebird!=1.0.0, albatross<3.0.0 are incompatible, we can conclude that all versions of crow depend on one of: And because we know from (2) that all versions of crow, bluebird!=1.0.0, albatross<3.0.0 are incompatible, we can conclude that all versions of crow depend on one of:
bluebird<=1.0.0 bluebird==1.0.0
bluebird>=3.0.0 bluebird==3.0.0
And because you require crow and you require bluebird>=2.0.0,<3.0.0, we can conclude that the requirements are unsatisfiable. And because you require crow and you require bluebird>=2.0.0,<3.0.0, we can conclude that the requirements are unsatisfiable.
"###); "###);
@ -697,8 +697,8 @@ fn dependency_excludes_non_contiguous_range_of_compatible_versions() -> Result<(
Because only albatross<=3.0.0 is available and albatross==3.0.0 depends on bluebird==3.0.0, we can conclude that albatross>=3.0.0 depends on bluebird==3.0.0. Because only albatross<=3.0.0 is available and albatross==3.0.0 depends on bluebird==3.0.0, we can conclude that albatross>=3.0.0 depends on bluebird==3.0.0.
And because we know from (2) that all versions of crow, bluebird!=1.0.0, albatross<3.0.0 are incompatible, we can conclude that all versions of crow depend on one of: And because we know from (2) that all versions of crow, bluebird!=1.0.0, albatross<3.0.0 are incompatible, we can conclude that all versions of crow depend on one of:
bluebird<=1.0.0 bluebird==1.0.0
bluebird>=3.0.0 bluebird==3.0.0
And because you require crow and you require bluebird>=2.0.0,<3.0.0, we can conclude that the requirements are unsatisfiable. And because you require crow and you require bluebird>=2.0.0,<3.0.0, we can conclude that the requirements are unsatisfiable.
"###); "###);