mirror of https://github.com/astral-sh/ruff
Use `AhoCorasick` to speed up quote match (#9773)
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary When I was looking at the v0.2.0 release, this method showed up in a CodSpeed regression (we were calling it more), so I decided to quickly look at speeding it up. @BurntSushi suggested using Aho-Corasick, and it looks like it's about 7 or 8x faster: ```text Parser/AhoCorasick time: [8.5646 ns 8.5914 ns 8.6191 ns] Parser/Iterator time: [64.992 ns 65.124 ns 65.271 ns] ``` ## Test Plan `cargo test`
This commit is contained in:
parent
b947dde8ad
commit
ea1c089652
|
|
@ -2264,10 +2264,12 @@ dependencies = [
|
||||||
name = "ruff_python_ast"
|
name = "ruff_python_ast"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
"bitflags 2.4.1",
|
"bitflags 2.4.1",
|
||||||
"insta",
|
"insta",
|
||||||
"is-macro",
|
"is-macro",
|
||||||
"itertools 0.12.0",
|
"itertools 0.12.0",
|
||||||
|
"once_cell",
|
||||||
"ruff_python_parser",
|
"ruff_python_parser",
|
||||||
"ruff_python_trivia",
|
"ruff_python_trivia",
|
||||||
"ruff_source_file",
|
"ruff_source_file",
|
||||||
|
|
|
||||||
|
|
@ -197,7 +197,7 @@ impl<'a> Checker<'a> {
|
||||||
let trailing_quote = trailing_quote(self.locator.slice(string_range))?;
|
let trailing_quote = trailing_quote(self.locator.slice(string_range))?;
|
||||||
|
|
||||||
// Invert the quote character, if it's a single quote.
|
// Invert the quote character, if it's a single quote.
|
||||||
match *trailing_quote {
|
match trailing_quote {
|
||||||
"'" => Some(Quote::Double),
|
"'" => Some(Quote::Double),
|
||||||
"\"" => Some(Quote::Single),
|
"\"" => Some(Quote::Single),
|
||||||
_ => None,
|
_ => None,
|
||||||
|
|
|
||||||
|
|
@ -298,7 +298,7 @@ fn try_convert_to_f_string(
|
||||||
converted.push(']');
|
converted.push(']');
|
||||||
}
|
}
|
||||||
FieldNamePart::StringIndex(index) => {
|
FieldNamePart::StringIndex(index) => {
|
||||||
let quote = match *trailing_quote {
|
let quote = match trailing_quote {
|
||||||
"'" | "'''" | "\"\"\"" => '"',
|
"'" | "'''" | "\"\"\"" => '"',
|
||||||
"\"" => '\'',
|
"\"" => '\'',
|
||||||
_ => unreachable!("invalid trailing quote"),
|
_ => unreachable!("invalid trailing quote"),
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,11 @@ ruff_python_trivia = { path = "../ruff_python_trivia" }
|
||||||
ruff_source_file = { path = "../ruff_source_file" }
|
ruff_source_file = { path = "../ruff_source_file" }
|
||||||
ruff_text_size = { path = "../ruff_text_size" }
|
ruff_text_size = { path = "../ruff_text_size" }
|
||||||
|
|
||||||
|
aho-corasick = { workspace = true }
|
||||||
bitflags = { workspace = true }
|
bitflags = { workspace = true }
|
||||||
is-macro = { workspace = true }
|
is-macro = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
|
once_cell = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
serde = { workspace = true, optional = true }
|
serde = { workspace = true, optional = true }
|
||||||
smallvec = { workspace = true }
|
smallvec = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
use aho_corasick::{AhoCorasick, AhoCorasickKind, Anchored, Input, MatchKind, StartKind};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
use ruff_text_size::{TextLen, TextRange};
|
use ruff_text_size::{TextLen, TextRange};
|
||||||
|
|
||||||
/// Includes all permutations of `r`, `u`, `f`, and `fr` (`ur` is invalid, as is `uf`). This
|
/// Includes all permutations of `r`, `u`, `f`, and `fr` (`ur` is invalid, as is `uf`). This
|
||||||
|
|
@ -124,18 +127,6 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
|
||||||
"b'",
|
"b'",
|
||||||
];
|
];
|
||||||
|
|
||||||
#[rustfmt::skip]
|
|
||||||
const TRIPLE_QUOTE_SUFFIXES: &[&str] = &[
|
|
||||||
"\"\"\"",
|
|
||||||
"'''",
|
|
||||||
];
|
|
||||||
|
|
||||||
#[rustfmt::skip]
|
|
||||||
const SINGLE_QUOTE_SUFFIXES: &[&str] = &[
|
|
||||||
"\"",
|
|
||||||
"'",
|
|
||||||
];
|
|
||||||
|
|
||||||
/// Strip the leading and trailing quotes from a string.
|
/// Strip the leading and trailing quotes from a string.
|
||||||
/// Assumes that the string is a valid string literal, but does not verify that the string
|
/// Assumes that the string is a valid string literal, but does not verify that the string
|
||||||
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
|
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
|
||||||
|
|
@ -155,28 +146,41 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
|
||||||
|
static PREFIX_MATCHER: Lazy<AhoCorasick> = Lazy::new(|| {
|
||||||
|
AhoCorasick::builder()
|
||||||
|
.start_kind(StartKind::Anchored)
|
||||||
|
.match_kind(MatchKind::LeftmostLongest)
|
||||||
|
.kind(Some(AhoCorasickKind::DFA))
|
||||||
|
.build(
|
||||||
|
TRIPLE_QUOTE_STR_PREFIXES
|
||||||
|
.iter()
|
||||||
|
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||||
|
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
||||||
|
.chain(SINGLE_QUOTE_BYTE_PREFIXES),
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
/// Return the leading quote for a string or byte literal (e.g., `"""`).
|
/// Return the leading quote for a string or byte literal (e.g., `"""`).
|
||||||
pub fn leading_quote(content: &str) -> Option<&str> {
|
pub fn leading_quote(content: &str) -> Option<&str> {
|
||||||
TRIPLE_QUOTE_STR_PREFIXES
|
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
|
||||||
.iter()
|
Some(&content[mat.start()..mat.end()])
|
||||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
|
||||||
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
|
||||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
|
|
||||||
.find_map(|pattern| {
|
|
||||||
if content.starts_with(pattern) {
|
|
||||||
Some(*pattern)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
|
/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
|
||||||
pub fn trailing_quote(content: &str) -> Option<&&str> {
|
pub fn trailing_quote(content: &str) -> Option<&str> {
|
||||||
TRIPLE_QUOTE_SUFFIXES
|
if content.ends_with("'''") {
|
||||||
.iter()
|
Some("'''")
|
||||||
.chain(SINGLE_QUOTE_SUFFIXES)
|
} else if content.ends_with("\"\"\"") {
|
||||||
.find(|&pattern| content.ends_with(pattern))
|
Some("\"\"\"")
|
||||||
|
} else if content.ends_with('\'') {
|
||||||
|
Some("'")
|
||||||
|
} else if content.ends_with('\"') {
|
||||||
|
Some("\"")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return `true` if the string is a triple-quote string or byte prefix.
|
/// Return `true` if the string is a triple-quote string or byte prefix.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue