From d5700d7c6987a54843830b27833109b28ecd5953 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 14 Mar 2023 08:35:07 +0100 Subject: [PATCH] Add Micro Benchmark (#3466) --- .cargo/config.toml | 1 + Cargo.lock | 12 ++ Cargo.toml | 10 ++ crates/ruff/Cargo.toml | 12 +- crates/ruff_benchmark/Cargo.toml | 31 +++++ crates/ruff_benchmark/README.md | 87 ++++++++++++++ crates/ruff_benchmark/benches/linter.rs | 71 ++++++++++++ crates/ruff_benchmark/src/lib.rs | 144 ++++++++++++++++++++++++ 8 files changed, 362 insertions(+), 6 deletions(-) create mode 100644 crates/ruff_benchmark/Cargo.toml create mode 100644 crates/ruff_benchmark/README.md create mode 100644 crates/ruff_benchmark/benches/linter.rs create mode 100644 crates/ruff_benchmark/src/lib.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index 5fef0dd0b6..27bc03c090 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,5 +1,6 @@ [alias] dev = "run --package ruff_dev --bin ruff_dev" +benchmark = "bench -p ruff_benchmark --" [target.'cfg(all())'] rustflags = [ diff --git a/Cargo.lock b/Cargo.lock index c4efad6169..164cdfaca9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2024,6 +2024,18 @@ dependencies = [ "toml", ] +[[package]] +name = "ruff_benchmark" +version = "0.0.0" +dependencies = [ + "criterion", + "mimalloc", + "ruff", + "tikv-jemallocator", + "ureq", + "url", +] + [[package]] name = "ruff_cache" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index d8723972f0..76e3891cbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,10 @@ members = ["crates/*"] [workspace.package] edition = "2021" rust-version = "1.67" +homepage = "https://beta.ruff.rs/docs/" +documentation = "https://beta.ruff.rs/docs/" +repository = "https://github.com/charliermarsh/ruff" +authors = ["Charlie Marsh "] [workspace.dependencies] anyhow = { version = "1.0.69" } @@ -59,3 +63,9 @@ opt-level = 3 # https://github.com/bytecodealliance/wasm-tools/blob/b5c3d98e40590512a3b12470ef358d5c7b983b15/crates/wasmparser/src/limits.rs#L29 [profile.dev.package.rustpython-parser] opt-level = 1 + +# Use the `--profile release-debug` flag to show symbols in release mode. +# e.g. `cargo build --profile release-debug` +[profile.release-debug] +inherits = "release" +debug = 1 diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 7414731c9a..bc67ee1b5b 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "ruff" version = "0.0.255" -authors = ["Charlie Marsh "] -edition = { workspace = true } -rust-version = { workspace = true } -documentation = "https://github.com/charliermarsh/ruff" -homepage = "https://github.com/charliermarsh/ruff" -repository = "https://github.com/charliermarsh/ruff" +authors.workspace = true +edition.workspace = true +rust-version.workspace = true +documentation.workspace = true +homepage.workspace = true +repository.workspace = true readme = "README.md" license = "MIT" diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml new file mode 100644 index 0000000000..0d4899b11e --- /dev/null +++ b/crates/ruff_benchmark/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "ruff_benchmark" +version = "0.0.0" +publish = false +edition.workspace = true +authors.workspace = true +homepage.workspace = true +documentation.workspace = true +repository.workspace = true +description = "Ruff Micro-benchmarks" + +[lib] +bench = false + +[[bench]] +name = "linter" +harness = false + +[dependencies] +ruff.path = "../ruff" +url = "2.3.1" +ureq = "2.6.2" + +[dev-dependencies] +criterion = { version = "0.4.0"} + +[target.'cfg(target_os = "windows")'.dependencies] +mimalloc = "0.1.34" + +[target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dependencies] +tikv-jemallocator = "0.5.0" diff --git a/crates/ruff_benchmark/README.md b/crates/ruff_benchmark/README.md new file mode 100644 index 0000000000..292d0dac3b --- /dev/null +++ b/crates/ruff_benchmark/README.md @@ -0,0 +1,87 @@ +## Ruff Micro-benchmarks + +Benchmarks for the different Ruff-tools. + +### Run Benchmark + +You can run the benchmarks with + +```shell +cargo benchmark +``` + +### Benchmark driven Development + +You can use `--save-baseline=` to store an initial baseline benchmark (e.g. on `main`) and then use +`--benchmark=` to compare against that benchmark. Criterion will print a message telling you if the benchmark improved/regressed compared to that baseline. + +```shell +# Run once on your "baseline" code +cargo benchmark --save-baseline=main + +# Then iterate with +cargo benchmark --baseline=main +``` + +### PR Summary +You can use `--save-baseline` and `critcmp` to get a pretty comparison between two recordings. +This is useful to illustrate the improvements of a PR. + +```shell +# On main +cargo benchmark --save-baseline=main + +# After applying your changes +cargo benchmark --save-baseline=pr + +critcmp main pr +``` + +You must install [`critcmp`](https://github.com/BurntSushi/critcmp) for the comparison. + +```bash +cargo install critcmp +``` + +### Tips + +* Use `cargo benchmark ` to only run specific benchmarks. For example: `cargo benchmark linter/pydantic` to only run the pydantic tests. +* Use `cargo benchmark --quiet` for a more cleaned up output (without statistical relevance) +* Use `cargo benchmark --quick` to get faster results (more prone to noise) + +## Profiling + +### Linux + +Install `perf` and build `ruff_benchmark` with the `release-debug` profile and then run it with perf + +```shell +cargo bench -p ruff_benchmark --no-run --profile=release-debug && perf record -g -F 9999 cargo bench -p ruff_benchmark --profile=release-debug -- --profile-time=1 +``` + +Then convert the recorded profile + +```shell +perf script -F +pid > /tmp/test.perf +``` + +You can now view the converted file with [firefox profiler](https://profiler.firefox.com/) + +You can find a more in-depth guide [here](https://profiler.firefox.com/docs/#/./guide-perf-profiling) + +### Mac + +Install [`cargo-instruments`](https://crates.io/crates/cargo-instruments): + +```shell +cargo install cargo-instruments +``` + +Then run the profiler with + +```shell +cargo instruments -t time --bench linter --profile release-debug -p ruff_benchmark -- --profile-time=1 +``` + +* `-t`: Specifies what to profile. Useful options are `time` to profile the wall time and `alloc` for profiling the allocations. +* You may want to pass an additional filter to run a single test file diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs new file mode 100644 index 0000000000..c0777b6e6c --- /dev/null +++ b/crates/ruff_benchmark/benches/linter.rs @@ -0,0 +1,71 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use ruff::linter::lint_only; +use ruff::settings::{flags, Settings}; +use ruff_benchmark::{TestCase, TestCaseSpeed, TestFile, TestFileDownloadError}; +use std::time::Duration; + +#[cfg(target_os = "windows")] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +#[cfg(all( + not(target_os = "windows"), + not(target_os = "openbsd"), + any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64" + ) +))] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +fn create_test_cases() -> Result, TestFileDownloadError> { + Ok(vec![ + TestCase::fast(TestFile::try_download("numpy/globals.py", "https://github.com/numpy/numpy/blob/89d64415e349ca75a25250f22b874aa16e5c0973/numpy/_globals.py")?), + TestCase::normal(TestFile::try_download( + "pydantic/types.py", + "https://raw.githubusercontent.com/pydantic/pydantic/main/pydantic/types.py", + )?), + TestCase::normal(TestFile::try_download("numpy/ctypeslib.py", "https://github.com/numpy/numpy/blob/main/numpy/ctypeslib.py")?), + TestCase::slow(TestFile::try_download( + "large/dataset.py", + "https://raw.githubusercontent.com/DHI/mikeio/b7d26418f4db2909b0aa965253dbe83194d7bb5b/tests/test_dataset.py", + )?), + ]) +} + +fn benchmark_linter(criterion: &mut Criterion) { + let test_cases = create_test_cases().unwrap(); + let mut group = criterion.benchmark_group("linter"); + + for case in test_cases { + group.throughput(Throughput::Bytes(case.code().len() as u64)); + group.measurement_time(match case.speed() { + TestCaseSpeed::Fast => Duration::from_secs(10), + TestCaseSpeed::Normal => Duration::from_secs(20), + TestCaseSpeed::Slow => Duration::from_secs(30), + }); + group.bench_with_input( + BenchmarkId::from_parameter(case.name()), + &case, + |b, case| { + b.iter(|| { + lint_only( + case.code(), + &case.path(), + None, + &black_box(Settings::default()), + flags::Noqa::Enabled, + flags::Autofix::Enabled, + ) + }); + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, benchmark_linter); +criterion_main!(benches); diff --git a/crates/ruff_benchmark/src/lib.rs b/crates/ruff_benchmark/src/lib.rs new file mode 100644 index 0000000000..af7eb91ae9 --- /dev/null +++ b/crates/ruff_benchmark/src/lib.rs @@ -0,0 +1,144 @@ +use std::fmt::{Display, Formatter}; +use std::path::{Path, PathBuf}; +use url::Url; + +/// Relative size of a test case. Benchmarks can use it to configure the time for how long a benchmark should run to get stable results. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub enum TestCaseSpeed { + /// A test case that is fast to run + Fast, + + /// A normal test case + Normal, + + /// A slow test case + Slow, +} + +#[derive(Debug, Clone)] +pub struct TestCase { + file: TestFile, + speed: TestCaseSpeed, +} + +impl TestCase { + pub fn fast(file: TestFile) -> Self { + Self { + file, + speed: TestCaseSpeed::Fast, + } + } + + pub fn normal(file: TestFile) -> Self { + Self { + file, + speed: TestCaseSpeed::Normal, + } + } + + pub fn slow(file: TestFile) -> Self { + Self { + file, + speed: TestCaseSpeed::Slow, + } + } +} + +impl TestCase { + pub fn code(&self) -> &str { + &self.file.code + } + + pub fn name(&self) -> &str { + &self.file.name + } + + pub fn speed(&self) -> TestCaseSpeed { + self.speed + } + + pub fn path(&self) -> PathBuf { + Path::new("target").join(self.name()) + } +} + +#[derive(Debug, Clone)] +pub struct TestFile { + name: String, + code: String, +} + +impl TestFile { + pub fn new(name: String, code: String) -> Self { + Self { name, code } + } + + #[allow(clippy::print_stderr)] + pub fn try_download(name: &str, url: &str) -> Result { + let url = Url::parse(url)?; + + let cached_filename = Path::new("target").join(name); + + if let Ok(content) = std::fs::read_to_string(&cached_filename) { + Ok(TestFile::new(name.to_string(), content)) + } else { + // File not yet cached, download and cache it in the target directory + let response = ureq::get(url.as_str()).call()?; + + let content = response.into_string()?; + + // SAFETY: There's always the `target` directory + let parent = cached_filename.parent().unwrap(); + if let Err(error) = std::fs::create_dir_all(parent) { + eprintln!("Failed to crate the directory for the test case {name}: {error}"); + } else if let Err(error) = std::fs::write(cached_filename, &content) { + eprintln!("Failed to cache test case file downloaded from {url}: {error}"); + } + + Ok(TestFile::new(name.to_string(), content)) + } + } +} + +#[derive(Debug)] +pub enum TestFileDownloadError { + UrlParse(url::ParseError), + Request(Box), + Download(std::io::Error), +} + +impl From for TestFileDownloadError { + fn from(value: url::ParseError) -> Self { + Self::UrlParse(value) + } +} + +impl From for TestFileDownloadError { + fn from(value: ureq::Error) -> Self { + Self::Request(Box::new(value)) + } +} + +impl From for TestFileDownloadError { + fn from(value: std::io::Error) -> Self { + Self::Download(value) + } +} + +impl Display for TestFileDownloadError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TestFileDownloadError::UrlParse(inner) => { + write!(f, "Failed to parse url: {inner}") + } + TestFileDownloadError::Request(inner) => { + write!(f, "Failed to download file: {inner}") + } + TestFileDownloadError::Download(inner) => { + write!(f, "Failed to download file: {inner}") + } + } + } +} + +impl std::error::Error for TestFileDownloadError {}