diff --git a/crates/uv-resolver/src/lock/installable.rs b/crates/uv-resolver/src/lock/installable.rs index 6d1c6bbf7..9b2c1eb74 100644 --- a/crates/uv-resolver/src/lock/installable.rs +++ b/crates/uv-resolver/src/lock/installable.rs @@ -267,8 +267,137 @@ pub trait Installable<'lock> { } } - let mut all_activated_extras: BTreeSet<(&PackageName, &ExtraName)> = - activated_extras.iter().copied().collect(); + // Below, we traverse the dependency graph in a breadth first manner + // twice. It's only in the second traversal that we actually build + // up our resolution graph. In the first traversal, we accumulate all + // activated extras. This includes the extras explicitly enabled on + // the CLI (which were gathered above) and the extras enabled via + // dependency specifications like `foo[extra]`. We need to do this + // to correctly support conflicting extras. + // + // In particular, the way conflicting extras works is by forking the + // resolver based on the extras that are declared as conflicting. But + // this forking needs to be made manifest somehow in the lock file to + // avoid multiple versions of the same package being installed into the + // environment. This is why "conflict markers" were invented. For + // example, you might have both `torch` and `torch+cpu` in your + // dependency graph, where the latter is only enabled when the `cpu` + // extra is enabled, and the former is specifically *not* enabled + // when the `cpu` extra is enabled. + // + // In order to evaluate these conflict markers correctly, we need to + // know whether the `cpu` extra is enabled when we visit the `torch` + // dependency. If we think it's disabled, then we'll erroneously + // include it if the extra is actually enabled. But in order to tell + // if it's enabled, we need to traverse the entire dependency graph + // first to inspect which extras are enabled! + // + // Of course, we don't need to do this at all if there aren't any + // conflicts. In which case, we skip all of this and just do the one + // traversal below. + if !self.lock().conflicts().is_empty() { + let mut activated_extras_set: BTreeSet<(&PackageName, &ExtraName)> = + activated_extras.iter().copied().collect(); + let mut queue = queue.clone(); + let mut seen = seen.clone(); + while let Some((package, extra)) = queue.pop_front() { + let deps = if let Some(extra) = extra { + Either::Left( + package + .optional_dependencies + .get(extra) + .into_iter() + .flatten(), + ) + } else { + Either::Right(package.dependencies.iter()) + }; + for dep in deps { + let mut additional_activated_extras = vec![]; + for extra in &dep.extra { + let key = (&dep.package_id.name, extra); + if !activated_extras_set.contains(&key) { + additional_activated_extras.push(key); + } + } + let temp_activated_extras = if additional_activated_extras.is_empty() { + Cow::Borrowed(&activated_extras) + } else { + let mut owned = activated_extras.clone(); + owned.extend_from_slice(&additional_activated_extras); + Cow::Owned(owned) + }; + if !dep.complexified_marker.evaluate( + marker_env, + &temp_activated_extras, + &activated_groups, + ) { + continue; + } + // It is, I believe, possible to be here for a dependency that + // will ultimately not be included in the final resolution. + // Specifically, carrying on from the example in the comments + // above, we might visit `torch` first and thus not know if + // the `cpu` feature is enabled or not, and thus, the marker + // evaluation above will pass. + // + // So is this a problem? Well, this is the main reason why we + // do two graph traversals. On the second traversal below, we + // will have seen all of the enabled extras, and so `torch` + // will be excluded. + // + // But could this lead to a bigger list of activated extras + // than we actually have? I believe that is indeed possible, + // but I think it is only a problem if it leads to extras that + // *conflict* with one another being simultaneously enabled. + // However, after this first traversal, we check our set of + // accumulated extras to ensure that there are no conflicts. If + // there are, we raise an error. ---AG + + for key in additional_activated_extras { + activated_extras_set.insert(key); + activated_extras.push(key); + } + let dep_dist = self.lock().find_by_id(&dep.package_id); + // Push its dependencies on the queue. + if seen.insert((&dep.package_id, None)) { + queue.push_back((dep_dist, None)); + } + for extra in &dep.extra { + if seen.insert((&dep.package_id, Some(extra))) { + queue.push_back((dep_dist, Some(extra))); + } + } + } + } + // At time of writing, it's somewhat expected that the set of + // conflicting extras is pretty small. With that said, the + // time complexity of the following routine is pretty gross. + // Namely, `set.contains` is linear in the size of the set, + // iteration over all conflicts is also obviously linear in + // the number of conflicting sets and then for each of those, + // we visit every possible pair of activated extra from above, + // which is quadratic in the total number of extras enabled. I + // believe the simplest improvement here, if it's necessary, is + // to adjust the `Conflicts` internals to own these sorts of + // checks. ---AG + for set in self.lock().conflicts().iter() { + for ((pkg1, extra1), (pkg2, extra2)) in + activated_extras_set.iter().tuple_combinations() + { + if set.contains(pkg1, *extra1) && set.contains(pkg2, *extra2) { + return Err(LockErrorKind::ConflictingExtra { + package1: (*pkg1).clone(), + extra1: (*extra1).clone(), + package2: (*pkg2).clone(), + extra2: (*extra2).clone(), + } + .into()); + } + } + } + } + while let Some((package, extra)) = queue.pop_front() { let deps = if let Some(extra) = extra { Either::Left( @@ -282,15 +411,6 @@ pub trait Installable<'lock> { Either::Right(package.dependencies.iter()) }; for dep in deps { - let mut activated_extras = Cow::Borrowed(&*activated_extras); - if !dep.extra.is_empty() { - let mut extended = activated_extras.to_vec(); - for extra in &dep.extra { - extended.push((&dep.package_id.name, extra)); - all_activated_extras.insert((&dep.package_id.name, extra)); - } - activated_extras = Cow::Owned(extended); - } if !dep.complexified_marker.evaluate( marker_env, &activated_extras, @@ -343,32 +463,6 @@ pub trait Installable<'lock> { } } - // At time of writing, it's somewhat expected that the set of - // conflicting extras is pretty small. With that said, the - // time complexity of the following routine is pretty gross. - // Namely, `set.contains` is linear in the size of the set, - // iteration over all conflicts is also obviously linear in - // the number of conflicting sets and then for each of those, - // we visit every possible pair of activated extra from above, - // which is quadratic in the total number of extras enabled. I - // believe the simplest improvement here, if it's necessary, is - // to adjust the `Conflicts` internals to own these sorts of - // checks. ---AG - for set in self.lock().conflicts().iter() { - for ((pkg1, extra1), (pkg2, extra2)) in all_activated_extras.iter().tuple_combinations() - { - if set.contains(pkg1, *extra1) && set.contains(pkg2, *extra2) { - return Err(LockErrorKind::ConflictingExtra { - package1: (*pkg1).clone(), - extra1: (*extra1).clone(), - package2: (*pkg2).clone(), - extra2: (*extra2).clone(), - } - .into()); - } - } - } - Ok(Resolution::new(petgraph)) }