Use a rope to manage string slicing (#576)

This commit is contained in:
Charlie Marsh 2022-11-03 23:23:38 -04:00 committed by GitHub
parent c92f5c14a3
commit e5f30ff5a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 79 deletions

17
Cargo.lock generated
View File

@ -2209,6 +2209,16 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "ropey"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd22239fafefc42138ca5da064f3c17726a80d2379d817a3521240e78dd0064"
dependencies = [
"smallvec",
"str_indices",
]
[[package]]
name = "ruff"
version = "0.0.99"
@ -2239,6 +2249,7 @@ dependencies = [
"path-absolutize",
"rayon",
"regex",
"ropey",
"rustpython-ast",
"rustpython-common",
"rustpython-parser",
@ -2559,6 +2570,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "str_indices"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d9199fa80c817e074620be84374a520062ebac833f358d74b37060ce4a0f2c0"
[[package]]
name = "string_cache"
version = "0.8.4"

View File

@ -1,6 +1,6 @@
[workspace]
members = [
"flake8_to_ruff",
"flake8_to_ruff",
]
[package]
@ -31,6 +31,7 @@ once_cell = { version = "1.13.1" }
path-absolutize = { version = "3.0.14", features = ["once_cell_cache", "use_unix_paths_on_wasm"] }
rayon = { version = "1.5.3" }
regex = { version = "1.6.0" }
ropey = { version = "1.5.0" }
rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "77b821a1941019fe34f73ce17cea013ae1b98fd0" }

View File

@ -1,13 +1,16 @@
use std::path::Path;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ropey::Rope;
use ruff::fs;
use ruff::source_code_locator::compute_offsets;
fn criterion_benchmark(c: &mut Criterion) {
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
c.bench_function("compute_offsets", |b| {
b.iter(|| compute_offsets(black_box(&contents)))
c.bench_function("rope", |b| {
b.iter(|| {
let rope = Rope::from_str(black_box(&contents));
rope.line_to_char(black_box(4));
})
});
}

View File

@ -1,60 +1,38 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use once_cell::unsync::OnceCell;
use ropey::Rope;
use rustpython_ast::Location;
use crate::ast::types::Range;
pub struct SourceCodeLocator<'a> {
contents: &'a str,
offsets: OnceCell<Vec<Vec<usize>>>,
}
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
let mut offsets = vec![vec![]];
let mut line_index = 0;
let mut char_index = 0;
let mut newline = false;
for (i, char) in contents.char_indices() {
offsets[line_index].push(i);
newline = char == '\n';
if newline {
line_index += 1;
offsets.push(vec![]);
char_index = i + char.len_utf8();
}
}
// If we end in a newline, add an extra character to indicate the start of that
// line.
if newline {
offsets[line_index].push(char_index);
}
offsets
rope: OnceCell<Rope>,
}
impl<'a> SourceCodeLocator<'a> {
pub fn new(contents: &'a str) -> Self {
SourceCodeLocator {
contents,
offsets: OnceCell::new(),
rope: OnceCell::new(),
}
}
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
self.offsets.get_or_init(|| compute_offsets(self.contents))
fn get_or_init_rope(&self) -> &Rope {
self.rope.get_or_init(|| Rope::from_str(self.contents))
}
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
let offsets = self.get_or_init_offsets();
let offset = offsets[location.row() - 1][location.column()];
let rope = self.get_or_init_rope();
let offset = rope.line_to_char(location.row() - 1) + location.column();
&self.contents[offset..]
}
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
let offsets = self.get_or_init_offsets();
let start = offsets[range.location.row() - 1][range.location.column()];
let end = offsets[range.end_location.row() - 1][range.end_location.column()];
let rope = self.get_or_init_rope();
let start = rope.line_to_char(range.location.row() - 1) + range.location.column();
let end = rope.line_to_char(range.end_location.row() - 1) + range.end_location.column();
&self.contents[start..end]
}
@ -63,11 +41,13 @@ impl<'a> SourceCodeLocator<'a> {
outer: &Range,
inner: &Range,
) -> (&'a str, &'a str, &'a str) {
let offsets = self.get_or_init_offsets();
let outer_start = offsets[outer.location.row() - 1][outer.location.column()];
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column()];
let inner_start = offsets[inner.location.row() - 1][inner.location.column()];
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column()];
let rope = self.get_or_init_rope();
let outer_start = rope.line_to_char(outer.location.row() - 1) + outer.location.column();
let outer_end =
rope.line_to_char(outer.end_location.row() - 1) + outer.end_location.column();
let inner_start = rope.line_to_char(inner.location.row() - 1) + inner.location.column();
let inner_end =
rope.line_to_char(inner.end_location.row() - 1) + inner.end_location.column();
(
&self.contents[outer_start..inner_start],
&self.contents[inner_start..inner_end],
@ -75,41 +55,3 @@ impl<'a> SourceCodeLocator<'a> {
)
}
}
#[cfg(test)]
mod tests {
use crate::source_code_locator::SourceCodeLocator;
#[test]
fn source_code_locator_init() {
let content = "x = 1";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 1);
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
let content = "x = 1\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 2);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6]);
let content = "x = 1\ny = 2\nz = x + y\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 4);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
assert_eq!(offsets[3], [22]);
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 3);
assert_eq!(offsets[0], [0, 1, 2, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
}
}