diff --git a/Cargo.lock b/Cargo.lock index 0680e0c28..ce0325b9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4719,7 +4719,6 @@ dependencies = [ "uv-configuration", "uv-dispatch", "uv-distribution", - "uv-distribution-filename", "uv-distribution-types", "uv-extract", "uv-install-wheel", diff --git a/crates/uv-bench/Cargo.toml b/crates/uv-bench/Cargo.toml index 65ce78731..5d55fafd7 100644 --- a/crates/uv-bench/Cargo.toml +++ b/crates/uv-bench/Cargo.toml @@ -18,11 +18,6 @@ workspace = true doctest = false bench = false -[[bench]] -name = "distribution-filename" -path = "benches/distribution_filename.rs" -harness = false - [[bench]] name = "uv" path = "benches/uv.rs" @@ -34,7 +29,6 @@ uv-client = { workspace = true } uv-configuration = { workspace = true } uv-dispatch = { workspace = true } uv-distribution = { workspace = true } -uv-distribution-filename = { workspace = true } uv-distribution-types = { workspace = true } uv-extract = { workspace = true, optional = true } uv-install-wheel = { workspace = true } diff --git a/crates/uv-bench/benches/distribution_filename.rs b/crates/uv-bench/benches/distribution_filename.rs deleted file mode 100644 index 99d72cf05..000000000 --- a/crates/uv-bench/benches/distribution_filename.rs +++ /dev/null @@ -1,168 +0,0 @@ -use std::str::FromStr; - -use uv_bench::criterion::{ - BenchmarkId, Criterion, Throughput, criterion_group, criterion_main, measurement::WallTime, -}; -use uv_distribution_filename::WheelFilename; -use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag, Tags}; - -/// A set of platform tags extracted from burntsushi's Archlinux workstation. -/// We could just re-create these via `Tags::from_env`, but those might differ -/// depending on the platform. This way, we always use the same data. It also -/// lets us assert tag compatibility regardless of where the benchmarks run. -const PLATFORM_TAGS: &[(&str, &str, &str)] = include!("../inputs/platform_tags.rs"); - -/// A set of wheel names used in the benchmarks below. We pick short and long -/// names, as well as compatible and not-compatibles (with `PLATFORM_TAGS`) -/// names. -/// -/// The tuple is (name, filename, compatible) where `name` is a descriptive -/// name for humans used in the benchmark definition. And `filename` is the -/// actual wheel filename we want to benchmark operation on. And `compatible` -/// indicates whether the tags in the wheel filename are expected to be -/// compatible with the tags in `PLATFORM_TAGS`. -const WHEEL_NAMES: &[(&str, &str, bool)] = &[ - // This tests a case with a very short name that *is* compatible with - // PLATFORM_TAGS. It only uses one tag for each component (one Python - // version, one ABI and one platform). - ( - "flyte-short-compatible", - "ipython-2.1.0-py3-none-any.whl", - true, - ), - // This tests a case with a long name that is *not* compatible. That - // is, all platform tags need to be checked against the tags in the - // wheel filename. This is essentially the worst possible practical - // case. - ( - "flyte-long-incompatible", - "protobuf-3.5.2.post1-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", - false, - ), - // This tests a case with a long name that *is* compatible. We - // expect this to be (on average) quicker because the compatibility - // check stops as soon as a positive match is found. (Where as the - // incompatible case needs to check all tags.) - ( - "flyte-long-compatible", - "coverage-6.6.0b1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - true, - ), -]; - -/// A list of names that are candidates for wheel filenames but will ultimately -/// fail to parse. -const INVALID_WHEEL_NAMES: &[(&str, &str)] = &[ - ("flyte-short-extension", "mock-5.1.0.tar.gz"), - ( - "flyte-long-extension", - "Pillow-5.4.0.dev0-py3.7-macosx-10.13-x86_64.egg", - ), -]; - -/// Benchmarks the construction of platform tags. -/// -/// This only happens ~once per program startup. Originally, construction was -/// trivial. But to speed up `WheelFilename::is_compatible`, we added some -/// extra processing. We thus expect construction to become slower, but we -/// write a benchmark to ensure it is still "reasonable." -fn benchmark_build_platform_tags(c: &mut Criterion) { - let tags: Vec<(LanguageTag, AbiTag, PlatformTag)> = PLATFORM_TAGS - .iter() - .map(|&(py, abi, plat)| { - ( - LanguageTag::from_str(py).unwrap(), - AbiTag::from_str(abi).unwrap(), - PlatformTag::from_str(plat).unwrap(), - ) - }) - .collect(); - - let mut group = c.benchmark_group("build_platform_tags"); - group.bench_function(BenchmarkId::from_parameter("burntsushi-archlinux"), |b| { - b.iter(|| std::hint::black_box(Tags::new(tags.clone()))); - }); - group.finish(); -} - -/// Benchmarks `WheelFilename::from_str`. This has been observed to take some -/// non-trivial time in profiling (although, at time of writing, not as much -/// as tag compatibility). In the process of optimizing tag compatibility, -/// we tweaked wheel filename parsing. This benchmark was therefore added to -/// ensure we didn't regress here. -fn benchmark_wheelname_parsing(c: &mut Criterion) { - let mut group = c.benchmark_group("wheelname_parsing"); - for (name, filename, _) in WHEEL_NAMES.iter().copied() { - let len = u64::try_from(filename.len()).expect("length fits in u64"); - group.throughput(Throughput::Bytes(len)); - group.bench_function(BenchmarkId::from_parameter(name), |b| { - b.iter(|| { - filename - .parse::() - .expect("valid wheel filename"); - }); - }); - } - group.finish(); -} - -/// Benchmarks `WheelFilename::from_str` when it fails. This routine is called -/// on every filename in a package's metadata. A non-trivial portion of which -/// are not wheel filenames. Ensuring that the error path is fast is thus -/// probably a good idea. -fn benchmark_wheelname_parsing_failure(c: &mut Criterion) { - let mut group = c.benchmark_group("wheelname_parsing_failure"); - for (name, filename) in INVALID_WHEEL_NAMES.iter().copied() { - let len = u64::try_from(filename.len()).expect("length fits in u64"); - group.throughput(Throughput::Bytes(len)); - group.bench_function(BenchmarkId::from_parameter(name), |b| { - b.iter(|| { - filename - .parse::() - .expect_err("invalid wheel filename"); - }); - }); - } - group.finish(); -} - -/// Benchmarks the `WheelFilename::is_compatible` routine. This was revealed -/// to be the #1 bottleneck in the resolver. The main issue was that the -/// set of platform tags (generated once) is quite large, and the original -/// implementation did an exhaustive search over each of them for each tag in -/// the wheel filename. -fn benchmark_wheelname_tag_compatibility(c: &mut Criterion) { - let tags: Vec<(LanguageTag, AbiTag, PlatformTag)> = PLATFORM_TAGS - .iter() - .map(|&(py, abi, plat)| { - ( - LanguageTag::from_str(py).unwrap(), - AbiTag::from_str(abi).unwrap(), - PlatformTag::from_str(plat).unwrap(), - ) - }) - .collect(); - let tags = Tags::new(tags); - - let mut group = c.benchmark_group("wheelname_tag_compatibility"); - for (name, filename, expected) in WHEEL_NAMES.iter().copied() { - let wheelname: WheelFilename = filename.parse().expect("valid wheel filename"); - let len = u64::try_from(filename.len()).expect("length fits in u64"); - group.throughput(Throughput::Bytes(len)); - group.bench_function(BenchmarkId::from_parameter(name), |b| { - b.iter(|| { - assert_eq!(expected, wheelname.is_compatible(&tags)); - }); - }); - } - group.finish(); -} - -criterion_group!( - uv_distribution_filename, - benchmark_build_platform_tags, - benchmark_wheelname_parsing, - benchmark_wheelname_parsing_failure, - benchmark_wheelname_tag_compatibility, -); -criterion_main!(uv_distribution_filename);