Optimize flattening in apache airflow workspace (#11313)

## Motivation

No-op `uv lock` in apache airflow
(891c67f210ab7c877d1f00ea6ea3d3cdbb0e96ef) is slow, which makes `uv run`
slow, too.

Reference project:

```
$ hyperfine "uv run python -c \"print('hi')\""
Benchmark 1: uv run python -c "print('hi')"
Time (mean ± σ):      16.3 ms ±   1.5 ms    [User: 9.8 ms, System: 6.4 ms]
Range (min … max):    13.0 ms …  20.0 ms    186 runs
```

Apache airflow before:

```
$ hyperfine "uv run python -c \"print('hi')\""
Benchmark 1: uv run python -c "print('hi')"
Time (mean ± σ):     161.0 ms ±   5.2 ms    [User: 135.3 ms, System: 24.1 ms]
Range (min … max):   155.0 ms … 176.3 ms    18 runs
```

## Optimization

`FlatRequiresDist::from_requirements` is taking 50% of main thread
runtime.

Before:


![image](https://github.com/user-attachments/assets/10ea76eb-d1e9-477c-b400-39e653eb8f3a)

After both commits:


![image](https://github.com/user-attachments/assets/5c578ff6-f80b-46bb-9b5f-8be8435c3d85)

Apache airflow after the first commit:

```
$ hyperfine "uv-profiling run python -c \"print('hi')\""
Benchmark 1: uv-profiling run python -c "print('hi')"
  Time (mean ± σ):     122.3 ms ±   5.4 ms    [User: 96.1 ms, System: 24.7 ms]
  Range (min … max):   114.0 ms … 133.2 ms    23 runs
```

Apache airflow after the second commit:

```
$ hyperfine "uv-profiling run python -c \"print('hi')\""
Benchmark 1: uv-profiling run python -c "print('hi')"
  Time (mean ± σ):     108.5 ms ±   3.4 ms    [User: 83.2 ms, System: 24.2 ms]
  Range (min … max):   103.6 ms … 119.9 ms    28 runs
```
This commit is contained in:
konsti 2025-02-07 23:08:40 +01:00 committed by GitHub
parent 711766e79c
commit 6e5479f5db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 50 additions and 34 deletions

View File

@ -106,7 +106,7 @@ impl BuildRequires {
project_workspace.project_root(), project_workspace.project_root(),
project_sources, project_sources,
project_indexes, project_indexes,
extra.as_ref(), extra.as_deref(),
group, group,
locations, locations,
project_workspace.workspace(), project_workspace.workspace(),
@ -181,7 +181,7 @@ impl BuildRequires {
workspace.install_path(), workspace.install_path(),
project_sources, project_sources,
project_indexes, project_indexes,
extra.as_ref(), extra.as_deref(),
group, group,
locations, locations,
workspace, workspace,

View File

@ -362,7 +362,7 @@ impl LoweredRequirement {
requirement requirement
.marker .marker
.top_level_extra_name() .top_level_extra_name()
.is_some_and(|extra| extra == *target) .is_some_and(|extra| &*extra == target)
}) })
}) })
.cloned() .cloned()

View File

@ -209,7 +209,7 @@ impl RequiresDist {
project_workspace.project_root(), project_workspace.project_root(),
project_sources, project_sources,
project_indexes, project_indexes,
extra.as_ref(), extra.as_deref(),
group, group,
locations, locations,
project_workspace.workspace(), project_workspace.workspace(),
@ -262,7 +262,7 @@ impl RequiresDist {
// If there is no such requirement with the extra, error. // If there is no such requirement with the extra, error.
if !metadata.requires_dist.iter().any(|requirement| { if !metadata.requires_dist.iter().any(|requirement| {
requirement.name == *name requirement.name == *name
&& requirement.marker.top_level_extra_name().as_ref() == Some(extra) && requirement.marker.top_level_extra_name().as_deref() == Some(extra)
}) { }) {
return Err(MetadataError::IncompleteSourceExtra( return Err(MetadataError::IncompleteSourceExtra(
name.clone(), name.clone(),
@ -370,6 +370,12 @@ impl FlatRequiresDist {
return Self(requirements); return Self(requirements);
} }
// Memoize the top level extras, in the same order as `requirements`
let top_level_extras: Vec<_> = requirements
.iter()
.map(|req| req.marker.top_level_extra_name())
.collect();
// Transitively process all extras that are recursively included. // Transitively process all extras that are recursively included.
let mut flattened = requirements.clone(); let mut flattened = requirements.clone();
let mut seen = FxHashSet::<(ExtraName, MarkerTree)>::default(); let mut seen = FxHashSet::<(ExtraName, MarkerTree)>::default();
@ -384,8 +390,10 @@ impl FlatRequiresDist {
} }
// Find the requirements for the extra. // Find the requirements for the extra.
for requirement in &requirements { for (requirement, top_level_extra) in requirements.iter().zip(top_level_extras.iter()) {
if requirement.marker.top_level_extra_name().as_ref() == Some(&extra) { if top_level_extra.as_deref() != Some(&extra) {
continue;
}
let requirement = { let requirement = {
let mut marker = marker; let mut marker = marker;
marker.and(requirement.marker); marker.and(requirement.marker);
@ -413,7 +421,6 @@ impl FlatRequiresDist {
} }
} }
} }
}
// Drop all the self-references now that we've flattened them out. // Drop all the self-references now that we've flattened them out.
flattened.retain(|req| req.name != *name); flattened.retain(|req| req.name != *name);

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
use std::ops::{Bound, Deref}; use std::ops::{Bound, Deref};
@ -772,7 +773,7 @@ impl MarkerTree {
} }
/// Returns the underlying [`MarkerTreeKind`] of the root node. /// Returns the underlying [`MarkerTreeKind`] of the root node.
pub fn kind(&self) -> MarkerTreeKind<'_> { pub fn kind(self) -> MarkerTreeKind<'static> {
if self.is_true() { if self.is_true() {
return MarkerTreeKind::True; return MarkerTreeKind::True;
} }
@ -1002,11 +1003,19 @@ impl MarkerTree {
/// ///
/// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the /// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the
/// main conjunction. /// main conjunction.
pub fn top_level_extra_name(self) -> Option<ExtraName> { pub fn top_level_extra_name(self) -> Option<Cow<'static, ExtraName>> {
// Fast path: The marker is only a `extra == "..."`.
if let MarkerTreeKind::Extra(marker) = self.kind() {
if marker.edge(true).is_true() {
let CanonicalMarkerValueExtra::Extra(extra) = marker.name;
return Some(Cow::Borrowed(extra));
}
}
let extra_expression = self.top_level_extra()?; let extra_expression = self.top_level_extra()?;
match extra_expression { match extra_expression {
MarkerExpression::Extra { name, .. } => name.into_extra(), MarkerExpression::Extra { name, .. } => name.into_extra().map(Cow::Owned),
_ => unreachable!(), _ => unreachable!(),
} }
} }