mirror of https://github.com/astral-sh/ruff
perf: only compute start offset for overlong lines (#5811)
Moves the computation of the `start_offset` for overlong lines to just before the result is returned. There is a slight overhead for overlong lines (double the work for the first `limit` characters). In practice this results in a speedup on the CPython codebase. Most lines are not overlong, or are not enforced because the line ends with a URL, or does not contain whitespace. Nonetheless, the 0.3% of overlong lines are a lot compared to other violations. ### Before  _Selected W505 and E501_  _All rules_ ### After  _Selected W505 and E501_  _All rules_ CPython line statistics: - Number of Python lines: 867.696 - Number of overlong lines: 2.963 (0.3%) <details> Benchmark selected: ```shell cargo build --release && hyperfine --warmup 10 --min-runs 50 \ "./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select W505,E501" ``` Benchmark all: ```shell cargo build --release && hyperfine --warmup 10 --min-runs 50 \ "./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select ALL" ``` Overlong lines in CPython ```shell cargo run -p ruff_cli -- check crates/ruff/resources/test/cpython/Lib --no-cache --select=E501,W505 --statistics ``` Total Python lines: ```shell find crates/ruff/resources/test/cpython/ -name '*.py' | xargs wc -l ``` </details> (Performance tested on Mac M1)
This commit is contained in:
parent
1dd52ad139
commit
a956226d95
|
|
@ -419,6 +419,13 @@ Summary
|
|||
159.43 ± 2.48 times faster than 'pycodestyle crates/ruff/resources/test/cpython'
|
||||
```
|
||||
|
||||
To benchmark a subset of rules, e.g. `LineTooLong` and `DocLineTooLong`:
|
||||
|
||||
```shell
|
||||
cargo build --release && hyperfine --warmup 10 \
|
||||
"./target/release/ruff ./crates/ruff/resources/test/cpython/ --no-cache -e --select W505,E501"
|
||||
```
|
||||
|
||||
You can run `poetry install` from `./scripts/benchmarks` to create a working environment for the
|
||||
above. All reported benchmarks were computed using the versions specified by
|
||||
`./scripts/benchmarks/pyproject.toml` on Python 3.11.
|
||||
|
|
|
|||
|
|
@ -53,16 +53,13 @@ pub(super) fn is_overlong(
|
|||
task_tags: &[String],
|
||||
tab_size: TabSize,
|
||||
) -> Option<Overlong> {
|
||||
let mut start_offset = line.start();
|
||||
// Each character is between 1-4 bytes. If the number of bytes is smaller than the limit, it cannot be overlong.
|
||||
if line.len() < limit.get() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut width = LineWidth::new(tab_size);
|
||||
|
||||
for c in line.chars() {
|
||||
if width < limit {
|
||||
start_offset += c.text_len();
|
||||
}
|
||||
width = width.add_char(c);
|
||||
}
|
||||
|
||||
width = width.add_str(line.as_str());
|
||||
if width <= limit {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -91,6 +88,17 @@ pub(super) fn is_overlong(
|
|||
}
|
||||
}
|
||||
|
||||
// Obtain the start offset of the part of the line that exceeds the limit
|
||||
let mut start_offset = line.start();
|
||||
let mut start_width = LineWidth::new(tab_size);
|
||||
for c in line.chars() {
|
||||
if start_width < limit {
|
||||
start_offset += c.text_len();
|
||||
start_width = start_width.add_char(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Some(Overlong {
|
||||
range: TextRange::new(start_offset, line.end()),
|
||||
width: width.get(),
|
||||
|
|
|
|||
Loading…
Reference in New Issue