mirror of https://github.com/astral-sh/ruff
Use a rope to manage string slicing (#576)
This commit is contained in:
parent
c92f5c14a3
commit
e5f30ff5a8
|
|
@ -2209,6 +2209,16 @@ dependencies = [
|
|||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ropey"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd22239fafefc42138ca5da064f3c17726a80d2379d817a3521240e78dd0064"
|
||||
dependencies = [
|
||||
"smallvec",
|
||||
"str_indices",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.0.99"
|
||||
|
|
@ -2239,6 +2249,7 @@ dependencies = [
|
|||
"path-absolutize",
|
||||
"rayon",
|
||||
"regex",
|
||||
"ropey",
|
||||
"rustpython-ast",
|
||||
"rustpython-common",
|
||||
"rustpython-parser",
|
||||
|
|
@ -2559,6 +2570,12 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "str_indices"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d9199fa80c817e074620be84374a520062ebac833f358d74b37060ce4a0f2c0"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.4"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"flake8_to_ruff",
|
||||
"flake8_to_ruff",
|
||||
]
|
||||
|
||||
[package]
|
||||
|
|
@ -31,6 +31,7 @@ once_cell = { version = "1.13.1" }
|
|||
path-absolutize = { version = "3.0.14", features = ["once_cell_cache", "use_unix_paths_on_wasm"] }
|
||||
rayon = { version = "1.5.3" }
|
||||
regex = { version = "1.6.0" }
|
||||
ropey = { version = "1.5.0" }
|
||||
rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
use std::path::Path;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use ropey::Rope;
|
||||
use ruff::fs;
|
||||
use ruff::source_code_locator::compute_offsets;
|
||||
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
|
||||
c.bench_function("compute_offsets", |b| {
|
||||
b.iter(|| compute_offsets(black_box(&contents)))
|
||||
c.bench_function("rope", |b| {
|
||||
b.iter(|| {
|
||||
let rope = Rope::from_str(black_box(&contents));
|
||||
rope.line_to_char(black_box(4));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,60 +1,38 @@
|
|||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||
|
||||
use once_cell::unsync::OnceCell;
|
||||
use ropey::Rope;
|
||||
use rustpython_ast::Location;
|
||||
|
||||
use crate::ast::types::Range;
|
||||
|
||||
pub struct SourceCodeLocator<'a> {
|
||||
contents: &'a str,
|
||||
offsets: OnceCell<Vec<Vec<usize>>>,
|
||||
}
|
||||
|
||||
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
|
||||
let mut offsets = vec![vec![]];
|
||||
let mut line_index = 0;
|
||||
let mut char_index = 0;
|
||||
let mut newline = false;
|
||||
for (i, char) in contents.char_indices() {
|
||||
offsets[line_index].push(i);
|
||||
|
||||
newline = char == '\n';
|
||||
if newline {
|
||||
line_index += 1;
|
||||
offsets.push(vec![]);
|
||||
char_index = i + char.len_utf8();
|
||||
}
|
||||
}
|
||||
// If we end in a newline, add an extra character to indicate the start of that
|
||||
// line.
|
||||
if newline {
|
||||
offsets[line_index].push(char_index);
|
||||
}
|
||||
offsets
|
||||
rope: OnceCell<Rope>,
|
||||
}
|
||||
|
||||
impl<'a> SourceCodeLocator<'a> {
|
||||
pub fn new(contents: &'a str) -> Self {
|
||||
SourceCodeLocator {
|
||||
contents,
|
||||
offsets: OnceCell::new(),
|
||||
rope: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
|
||||
self.offsets.get_or_init(|| compute_offsets(self.contents))
|
||||
fn get_or_init_rope(&self) -> &Rope {
|
||||
self.rope.get_or_init(|| Rope::from_str(self.contents))
|
||||
}
|
||||
|
||||
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
|
||||
let offsets = self.get_or_init_offsets();
|
||||
let offset = offsets[location.row() - 1][location.column()];
|
||||
let rope = self.get_or_init_rope();
|
||||
let offset = rope.line_to_char(location.row() - 1) + location.column();
|
||||
&self.contents[offset..]
|
||||
}
|
||||
|
||||
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
|
||||
let offsets = self.get_or_init_offsets();
|
||||
let start = offsets[range.location.row() - 1][range.location.column()];
|
||||
let end = offsets[range.end_location.row() - 1][range.end_location.column()];
|
||||
let rope = self.get_or_init_rope();
|
||||
let start = rope.line_to_char(range.location.row() - 1) + range.location.column();
|
||||
let end = rope.line_to_char(range.end_location.row() - 1) + range.end_location.column();
|
||||
&self.contents[start..end]
|
||||
}
|
||||
|
||||
|
|
@ -63,11 +41,13 @@ impl<'a> SourceCodeLocator<'a> {
|
|||
outer: &Range,
|
||||
inner: &Range,
|
||||
) -> (&'a str, &'a str, &'a str) {
|
||||
let offsets = self.get_or_init_offsets();
|
||||
let outer_start = offsets[outer.location.row() - 1][outer.location.column()];
|
||||
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column()];
|
||||
let inner_start = offsets[inner.location.row() - 1][inner.location.column()];
|
||||
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column()];
|
||||
let rope = self.get_or_init_rope();
|
||||
let outer_start = rope.line_to_char(outer.location.row() - 1) + outer.location.column();
|
||||
let outer_end =
|
||||
rope.line_to_char(outer.end_location.row() - 1) + outer.end_location.column();
|
||||
let inner_start = rope.line_to_char(inner.location.row() - 1) + inner.location.column();
|
||||
let inner_end =
|
||||
rope.line_to_char(inner.end_location.row() - 1) + inner.end_location.column();
|
||||
(
|
||||
&self.contents[outer_start..inner_start],
|
||||
&self.contents[inner_start..inner_end],
|
||||
|
|
@ -75,41 +55,3 @@ impl<'a> SourceCodeLocator<'a> {
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::source_code_locator::SourceCodeLocator;
|
||||
|
||||
#[test]
|
||||
fn source_code_locator_init() {
|
||||
let content = "x = 1";
|
||||
let locator = SourceCodeLocator::new(content);
|
||||
let offsets = locator.get_or_init_offsets();
|
||||
assert_eq!(offsets.len(), 1);
|
||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
|
||||
|
||||
let content = "x = 1\n";
|
||||
let locator = SourceCodeLocator::new(content);
|
||||
let offsets = locator.get_or_init_offsets();
|
||||
assert_eq!(offsets.len(), 2);
|
||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
||||
assert_eq!(offsets[1], [6]);
|
||||
|
||||
let content = "x = 1\ny = 2\nz = x + y\n";
|
||||
let locator = SourceCodeLocator::new(content);
|
||||
let offsets = locator.get_or_init_offsets();
|
||||
assert_eq!(offsets.len(), 4);
|
||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
|
||||
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
|
||||
assert_eq!(offsets[3], [22]);
|
||||
|
||||
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
|
||||
let locator = SourceCodeLocator::new(content);
|
||||
let offsets = locator.get_or_init_offsets();
|
||||
assert_eq!(offsets.len(), 3);
|
||||
assert_eq!(offsets[0], [0, 1, 2, 5]);
|
||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
|
||||
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue