From a956226d95d069ed0230763aa93b9cfa8555e286 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Mon, 17 Jul 2023 03:05:44 +0200 Subject: [PATCH] perf: only compute start offset for overlong lines (#5811) Moves the computation of the `start_offset` for overlong lines to just before the result is returned. There is a slight overhead for overlong lines (double the work for the first `limit` characters). In practice this results in a speedup on the CPython codebase. Most lines are not overlong, or are not enforced because the line ends with a URL, or does not contain whitespace. Nonetheless, the 0.3% of overlong lines are a lot compared to other violations. ### Before ![selected before](https://github.com/astral-sh/ruff/assets/9756388/d32047df-7fd2-4ae8-8333-1a3679ce000f) _Selected W505 and E501_ ![all before](https://github.com/astral-sh/ruff/assets/9756388/98495118-c474-46ff-873c-fb58a78cfe15) _All rules_ ### After ![selected after](https://github.com/astral-sh/ruff/assets/9756388/e4bd7f10-ff7e-4d52-8267-27cace8c5471) _Selected W505 and E501_ ![all after](https://github.com/astral-sh/ruff/assets/9756388/573bdbe2-c64f-4f22-9659-c68726ff52c0) _All rules_ CPython line statistics: - Number of Python lines: 867.696 - Number of overlong lines: 2.963 (0.3%)
Benchmark selected: ```shell cargo build --release && hyperfine --warmup 10 --min-runs 50 \ "./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select W505,E501" ``` Benchmark all: ```shell cargo build --release && hyperfine --warmup 10 --min-runs 50 \ "./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select ALL" ``` Overlong lines in CPython ```shell cargo run -p ruff_cli -- check crates/ruff/resources/test/cpython/Lib --no-cache --select=E501,W505 --statistics ``` Total Python lines: ```shell find crates/ruff/resources/test/cpython/ -name '*.py' | xargs wc -l ```
(Performance tested on Mac M1) --- CONTRIBUTING.md | 7 ++++++ crates/ruff/src/rules/pycodestyle/helpers.rs | 24 +++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ae047347e0..dfda88dfb4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -419,6 +419,13 @@ Summary 159.43 ± 2.48 times faster than 'pycodestyle crates/ruff/resources/test/cpython' ``` +To benchmark a subset of rules, e.g. `LineTooLong` and `DocLineTooLong`: + +```shell +cargo build --release && hyperfine --warmup 10 \ + "./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select W505,E501" +``` + You can run `poetry install` from `./scripts/benchmarks` to create a working environment for the above. All reported benchmarks were computed using the versions specified by `./scripts/benchmarks/pyproject.toml` on Python 3.11. diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index f99524e743..42781c1cd9 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -53,16 +53,13 @@ pub(super) fn is_overlong( task_tags: &[String], tab_size: TabSize, ) -> Option { - let mut start_offset = line.start(); - let mut width = LineWidth::new(tab_size); - - for c in line.chars() { - if width < limit { - start_offset += c.text_len(); - } - width = width.add_char(c); + // Each character is between 1-4 bytes. If the number of bytes is smaller than the limit, it cannot be overlong. + if line.len() < limit.get() { + return None; } + let mut width = LineWidth::new(tab_size); + width = width.add_str(line.as_str()); if width <= limit { return None; } @@ -91,6 +88,17 @@ pub(super) fn is_overlong( } } + // Obtain the start offset of the part of the line that exceeds the limit + let mut start_offset = line.start(); + let mut start_width = LineWidth::new(tab_size); + for c in line.chars() { + if start_width < limit { + start_offset += c.text_len(); + start_width = start_width.add_char(c); + } else { + break; + } + } Some(Overlong { range: TextRange::new(start_offset, line.end()), width: width.get(),