Optimize flattening in apache airflow workspace (#11313)

## Motivation

No-op `uv lock` in apache airflow
(891c67f210ab7c877d1f00ea6ea3d3cdbb0e96ef) is slow, which makes `uv run`
slow, too.

Reference project:

```
$ hyperfine "uv run python -c \"print('hi')\""
Benchmark 1: uv run python -c "print('hi')"
Time (mean ± σ):      16.3 ms ±   1.5 ms    [User: 9.8 ms, System: 6.4 ms]
Range (min … max):    13.0 ms …  20.0 ms    186 runs
```

Apache airflow before:

```
$ hyperfine "uv run python -c \"print('hi')\""
Benchmark 1: uv run python -c "print('hi')"
Time (mean ± σ):     161.0 ms ±   5.2 ms    [User: 135.3 ms, System: 24.1 ms]
Range (min … max):   155.0 ms … 176.3 ms    18 runs
```

## Optimization

`FlatRequiresDist::from_requirements` is taking 50% of main thread
runtime.

Before:


![image](https://github.com/user-attachments/assets/10ea76eb-d1e9-477c-b400-39e653eb8f3a)

After both commits:


![image](https://github.com/user-attachments/assets/5c578ff6-f80b-46bb-9b5f-8be8435c3d85)

Apache airflow after the first commit:

```
$ hyperfine "uv-profiling run python -c \"print('hi')\""
Benchmark 1: uv-profiling run python -c "print('hi')"
  Time (mean ± σ):     122.3 ms ±   5.4 ms    [User: 96.1 ms, System: 24.7 ms]
  Range (min … max):   114.0 ms … 133.2 ms    23 runs
```

Apache airflow after the second commit:

```
$ hyperfine "uv-profiling run python -c \"print('hi')\""
Benchmark 1: uv-profiling run python -c "print('hi')"
  Time (mean ± σ):     108.5 ms ±   3.4 ms    [User: 83.2 ms, System: 24.2 ms]
  Range (min … max):   103.6 ms … 119.9 ms    28 runs
```
This commit is contained in:
konsti 2025-02-07 23:08:40 +01:00 committed by GitHub
parent 711766e79c
commit 6e5479f5db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 50 additions and 34 deletions

View File

@ -106,7 +106,7 @@ impl BuildRequires {
project_workspace.project_root(),
project_sources,
project_indexes,
extra.as_ref(),
extra.as_deref(),
group,
locations,
project_workspace.workspace(),
@ -181,7 +181,7 @@ impl BuildRequires {
workspace.install_path(),
project_sources,
project_indexes,
extra.as_ref(),
extra.as_deref(),
group,
locations,
workspace,

View File

@ -362,7 +362,7 @@ impl LoweredRequirement {
requirement
.marker
.top_level_extra_name()
.is_some_and(|extra| extra == *target)
.is_some_and(|extra| &*extra == target)
})
})
.cloned()

View File

@ -209,7 +209,7 @@ impl RequiresDist {
project_workspace.project_root(),
project_sources,
project_indexes,
extra.as_ref(),
extra.as_deref(),
group,
locations,
project_workspace.workspace(),
@ -262,7 +262,7 @@ impl RequiresDist {
// If there is no such requirement with the extra, error.
if !metadata.requires_dist.iter().any(|requirement| {
requirement.name == *name
&& requirement.marker.top_level_extra_name().as_ref() == Some(extra)
&& requirement.marker.top_level_extra_name().as_deref() == Some(extra)
}) {
return Err(MetadataError::IncompleteSourceExtra(
name.clone(),
@ -370,6 +370,12 @@ impl FlatRequiresDist {
return Self(requirements);
}
// Memoize the top level extras, in the same order as `requirements`
let top_level_extras: Vec<_> = requirements
.iter()
.map(|req| req.marker.top_level_extra_name())
.collect();
// Transitively process all extras that are recursively included.
let mut flattened = requirements.clone();
let mut seen = FxHashSet::<(ExtraName, MarkerTree)>::default();
@ -384,33 +390,34 @@ impl FlatRequiresDist {
}
// Find the requirements for the extra.
for requirement in &requirements {
if requirement.marker.top_level_extra_name().as_ref() == Some(&extra) {
let requirement = {
let mut marker = marker;
marker.and(requirement.marker);
uv_pypi_types::Requirement {
name: requirement.name.clone(),
extras: requirement.extras.clone(),
groups: requirement.groups.clone(),
source: requirement.source.clone(),
origin: requirement.origin.clone(),
marker: marker.simplify_extras(slice::from_ref(&extra)),
}
};
if requirement.name == *name {
// Add each transitively included extra.
queue.extend(
requirement
.extras
.iter()
.cloned()
.map(|extra| (extra, requirement.marker)),
);
} else {
// Add the requirements for that extra.
flattened.push(requirement);
for (requirement, top_level_extra) in requirements.iter().zip(top_level_extras.iter()) {
if top_level_extra.as_deref() != Some(&extra) {
continue;
}
let requirement = {
let mut marker = marker;
marker.and(requirement.marker);
uv_pypi_types::Requirement {
name: requirement.name.clone(),
extras: requirement.extras.clone(),
groups: requirement.groups.clone(),
source: requirement.source.clone(),
origin: requirement.origin.clone(),
marker: marker.simplify_extras(slice::from_ref(&extra)),
}
};
if requirement.name == *name {
// Add each transitively included extra.
queue.extend(
requirement
.extras
.iter()
.cloned()
.map(|extra| (extra, requirement.marker)),
);
} else {
// Add the requirements for that extra.
flattened.push(requirement);
}
}
}

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt::{self, Display, Formatter};
use std::ops::{Bound, Deref};
@ -772,7 +773,7 @@ impl MarkerTree {
}
/// Returns the underlying [`MarkerTreeKind`] of the root node.
pub fn kind(&self) -> MarkerTreeKind<'_> {
pub fn kind(self) -> MarkerTreeKind<'static> {
if self.is_true() {
return MarkerTreeKind::True;
}
@ -1002,11 +1003,19 @@ impl MarkerTree {
///
/// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the
/// main conjunction.
pub fn top_level_extra_name(self) -> Option<ExtraName> {
pub fn top_level_extra_name(self) -> Option<Cow<'static, ExtraName>> {
// Fast path: The marker is only a `extra == "..."`.
if let MarkerTreeKind::Extra(marker) = self.kind() {
if marker.edge(true).is_true() {
let CanonicalMarkerValueExtra::Extra(extra) = marker.name;
return Some(Cow::Borrowed(extra));
}
}
let extra_expression = self.top_level_extra()?;
match extra_expression {
MarkerExpression::Extra { name, .. } => name.into_extra(),
MarkerExpression::Extra { name, .. } => name.into_extra().map(Cow::Owned),
_ => unreachable!(),
}
}