diff --git a/Cargo.lock b/Cargo.lock index 6f5d9fc531..43f67cb821 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1949,6 +1949,8 @@ dependencies = [ "mimalloc", "once_cell", "ruff", + "ruff_python_ast", + "rustpython-parser", "serde", "serde_json", "tikv-jemallocator", diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index 76219a5acd..2585eeb7f6 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -16,6 +16,10 @@ bench = false name = "linter" harness = false +[[bench]] +name = "parser" +harness = false + [dependencies] once_cell.workspace = true serde.workspace = true @@ -25,7 +29,9 @@ ureq = "2.6.2" [dev-dependencies] ruff.path = "../ruff" +ruff_python_ast.path = "../ruff_python_ast" criterion = { version = "0.4.0"} +rustpython-parser.workspace = true [target.'cfg(target_os = "windows")'.dev-dependencies] mimalloc = "0.1.34" diff --git a/crates/ruff_benchmark/benches/parser.rs b/crates/ruff_benchmark/benches/parser.rs new file mode 100644 index 0000000000..e339e91966 --- /dev/null +++ b/crates/ruff_benchmark/benches/parser.rs @@ -0,0 +1,82 @@ +use criterion::measurement::WallTime; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use ruff_benchmark::{TestCase, TestCaseSpeed, TestFile, TestFileDownloadError}; +use ruff_python_ast::visitor::{walk_stmt, Visitor}; +use rustpython_parser::ast::Stmt; +use std::time::Duration; + +#[cfg(target_os = "windows")] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +#[cfg(all( + not(target_os = "windows"), + not(target_os = "openbsd"), + any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64" + ) +))] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +fn create_test_cases() -> Result, TestFileDownloadError> { + Ok(vec![ + TestCase::fast(TestFile::try_download("numpy/globals.py", "https://raw.githubusercontent.com/numpy/numpy/89d64415e349ca75a25250f22b874aa16e5c0973/numpy/_globals.py")?), + TestCase::normal(TestFile::try_download( + "pydantic/types.py", + "https://raw.githubusercontent.com/pydantic/pydantic/83b3c49e99ceb4599d9286a3d793cea44ac36d4b/pydantic/types.py", + )?), + TestCase::normal(TestFile::try_download("numpy/ctypeslib.py", "https://raw.githubusercontent.com/numpy/numpy/e42c9503a14d66adfd41356ef5640c6975c45218/numpy/ctypeslib.py")?), + TestCase::slow(TestFile::try_download( + "large/dataset.py", + "https://raw.githubusercontent.com/DHI/mikeio/b7d26418f4db2909b0aa965253dbe83194d7bb5b/tests/test_dataset.py", + )?), + ]) +} + +struct CountVisitor { + count: usize, +} + +impl<'a> Visitor<'a> for CountVisitor { + fn visit_stmt(&mut self, stmt: &'a Stmt) { + walk_stmt(self, stmt); + self.count += 1; + } +} + +fn benchmark_parser(criterion: &mut Criterion) { + let test_cases = create_test_cases().unwrap(); + let mut group = criterion.benchmark_group("parser"); + + for case in test_cases { + group.throughput(Throughput::Bytes(case.code().len() as u64)); + group.measurement_time(match case.speed() { + TestCaseSpeed::Fast => Duration::from_secs(10), + TestCaseSpeed::Normal => Duration::from_secs(20), + TestCaseSpeed::Slow => Duration::from_secs(45), + }); + + group.bench_with_input( + BenchmarkId::from_parameter(case.name()), + &case, + |b, case| { + b.iter(|| { + let parsed = + rustpython_parser::parse_program(case.code(), case.name()).unwrap(); + + let mut visitor = CountVisitor { count: 0 }; + visitor.visit_body(&parsed); + visitor.count + }); + }, + ); + } + + group.finish(); +} + +criterion_group!(parser, benchmark_parser); +criterion_main!(parser);