mirror of https://github.com/astral-sh/ruff
Use a rope to manage string slicing (#576)
This commit is contained in:
parent
c92f5c14a3
commit
e5f30ff5a8
|
|
@ -2209,6 +2209,16 @@ dependencies = [
|
||||||
"winapi 0.3.9",
|
"winapi 0.3.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ropey"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbd22239fafefc42138ca5da064f3c17726a80d2379d817a3521240e78dd0064"
|
||||||
|
dependencies = [
|
||||||
|
"smallvec",
|
||||||
|
"str_indices",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruff"
|
name = "ruff"
|
||||||
version = "0.0.99"
|
version = "0.0.99"
|
||||||
|
|
@ -2239,6 +2249,7 @@ dependencies = [
|
||||||
"path-absolutize",
|
"path-absolutize",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
|
"ropey",
|
||||||
"rustpython-ast",
|
"rustpython-ast",
|
||||||
"rustpython-common",
|
"rustpython-common",
|
||||||
"rustpython-parser",
|
"rustpython-parser",
|
||||||
|
|
@ -2559,6 +2570,12 @@ version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "str_indices"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9d9199fa80c817e074620be84374a520062ebac833f358d74b37060ce4a0f2c0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "string_cache"
|
name = "string_cache"
|
||||||
version = "0.8.4"
|
version = "0.8.4"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"flake8_to_ruff",
|
"flake8_to_ruff",
|
||||||
]
|
]
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
|
|
@ -31,6 +31,7 @@ once_cell = { version = "1.13.1" }
|
||||||
path-absolutize = { version = "3.0.14", features = ["once_cell_cache", "use_unix_paths_on_wasm"] }
|
path-absolutize = { version = "3.0.14", features = ["once_cell_cache", "use_unix_paths_on_wasm"] }
|
||||||
rayon = { version = "1.5.3" }
|
rayon = { version = "1.5.3" }
|
||||||
regex = { version = "1.6.0" }
|
regex = { version = "1.6.0" }
|
||||||
|
ropey = { version = "1.5.0" }
|
||||||
rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||||
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||||
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,16 @@
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
use ropey::Rope;
|
||||||
use ruff::fs;
|
use ruff::fs;
|
||||||
use ruff::source_code_locator::compute_offsets;
|
|
||||||
|
|
||||||
fn criterion_benchmark(c: &mut Criterion) {
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
|
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
|
||||||
c.bench_function("compute_offsets", |b| {
|
c.bench_function("rope", |b| {
|
||||||
b.iter(|| compute_offsets(black_box(&contents)))
|
b.iter(|| {
|
||||||
|
let rope = Rope::from_str(black_box(&contents));
|
||||||
|
rope.line_to_char(black_box(4));
|
||||||
|
})
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,60 +1,38 @@
|
||||||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||||
|
|
||||||
use once_cell::unsync::OnceCell;
|
use once_cell::unsync::OnceCell;
|
||||||
|
use ropey::Rope;
|
||||||
use rustpython_ast::Location;
|
use rustpython_ast::Location;
|
||||||
|
|
||||||
use crate::ast::types::Range;
|
use crate::ast::types::Range;
|
||||||
|
|
||||||
pub struct SourceCodeLocator<'a> {
|
pub struct SourceCodeLocator<'a> {
|
||||||
contents: &'a str,
|
contents: &'a str,
|
||||||
offsets: OnceCell<Vec<Vec<usize>>>,
|
rope: OnceCell<Rope>,
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
|
|
||||||
let mut offsets = vec![vec![]];
|
|
||||||
let mut line_index = 0;
|
|
||||||
let mut char_index = 0;
|
|
||||||
let mut newline = false;
|
|
||||||
for (i, char) in contents.char_indices() {
|
|
||||||
offsets[line_index].push(i);
|
|
||||||
|
|
||||||
newline = char == '\n';
|
|
||||||
if newline {
|
|
||||||
line_index += 1;
|
|
||||||
offsets.push(vec![]);
|
|
||||||
char_index = i + char.len_utf8();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we end in a newline, add an extra character to indicate the start of that
|
|
||||||
// line.
|
|
||||||
if newline {
|
|
||||||
offsets[line_index].push(char_index);
|
|
||||||
}
|
|
||||||
offsets
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SourceCodeLocator<'a> {
|
impl<'a> SourceCodeLocator<'a> {
|
||||||
pub fn new(contents: &'a str) -> Self {
|
pub fn new(contents: &'a str) -> Self {
|
||||||
SourceCodeLocator {
|
SourceCodeLocator {
|
||||||
contents,
|
contents,
|
||||||
offsets: OnceCell::new(),
|
rope: OnceCell::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
|
fn get_or_init_rope(&self) -> &Rope {
|
||||||
self.offsets.get_or_init(|| compute_offsets(self.contents))
|
self.rope.get_or_init(|| Rope::from_str(self.contents))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
|
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
|
||||||
let offsets = self.get_or_init_offsets();
|
let rope = self.get_or_init_rope();
|
||||||
let offset = offsets[location.row() - 1][location.column()];
|
let offset = rope.line_to_char(location.row() - 1) + location.column();
|
||||||
&self.contents[offset..]
|
&self.contents[offset..]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
|
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
|
||||||
let offsets = self.get_or_init_offsets();
|
let rope = self.get_or_init_rope();
|
||||||
let start = offsets[range.location.row() - 1][range.location.column()];
|
let start = rope.line_to_char(range.location.row() - 1) + range.location.column();
|
||||||
let end = offsets[range.end_location.row() - 1][range.end_location.column()];
|
let end = rope.line_to_char(range.end_location.row() - 1) + range.end_location.column();
|
||||||
&self.contents[start..end]
|
&self.contents[start..end]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,11 +41,13 @@ impl<'a> SourceCodeLocator<'a> {
|
||||||
outer: &Range,
|
outer: &Range,
|
||||||
inner: &Range,
|
inner: &Range,
|
||||||
) -> (&'a str, &'a str, &'a str) {
|
) -> (&'a str, &'a str, &'a str) {
|
||||||
let offsets = self.get_or_init_offsets();
|
let rope = self.get_or_init_rope();
|
||||||
let outer_start = offsets[outer.location.row() - 1][outer.location.column()];
|
let outer_start = rope.line_to_char(outer.location.row() - 1) + outer.location.column();
|
||||||
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column()];
|
let outer_end =
|
||||||
let inner_start = offsets[inner.location.row() - 1][inner.location.column()];
|
rope.line_to_char(outer.end_location.row() - 1) + outer.end_location.column();
|
||||||
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column()];
|
let inner_start = rope.line_to_char(inner.location.row() - 1) + inner.location.column();
|
||||||
|
let inner_end =
|
||||||
|
rope.line_to_char(inner.end_location.row() - 1) + inner.end_location.column();
|
||||||
(
|
(
|
||||||
&self.contents[outer_start..inner_start],
|
&self.contents[outer_start..inner_start],
|
||||||
&self.contents[inner_start..inner_end],
|
&self.contents[inner_start..inner_end],
|
||||||
|
|
@ -75,41 +55,3 @@ impl<'a> SourceCodeLocator<'a> {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::source_code_locator::SourceCodeLocator;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn source_code_locator_init() {
|
|
||||||
let content = "x = 1";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 1);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
|
|
||||||
|
|
||||||
let content = "x = 1\n";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 2);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
|
||||||
assert_eq!(offsets[1], [6]);
|
|
||||||
|
|
||||||
let content = "x = 1\ny = 2\nz = x + y\n";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 4);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
|
||||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
|
|
||||||
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
|
|
||||||
assert_eq!(offsets[3], [22]);
|
|
||||||
|
|
||||||
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 3);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 5]);
|
|
||||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
|
|
||||||
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue