mirror of https://github.com/astral-sh/ruff
[ty] Optimize query string matching
While this doesn't typically matter, when ty returns a very large list of symbols, this can have an impact. Specifically, when searching `async` in home-assistant, this gets times closer to 500ms versus closer to 600ms before this change. It looks like an overall ~50ms improvement (so around 10%), but variance is all over the place and I didn't do any statistical tests. But this does make intuitive sense. Previously, we were allocating intermediate strings, doing UTF-8 decoding and consulting Unicode casing tables. Now we're just doing what is likely a single DFA scan. In effect, we front load all of the Unicode junk into regex compilation.
This commit is contained in:
parent
330bb4efbf
commit
8ead02e0b1
|
|
@ -1,6 +1,8 @@
|
||||||
//! Implements logic used by the document symbol provider, workspace symbol
|
//! Implements logic used by the document symbol provider, workspace symbol
|
||||||
//! provider, and auto-import feature of the completion provider.
|
//! provider, and auto-import feature of the completion provider.
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
use ruff_db::files::File;
|
use ruff_db::files::File;
|
||||||
use ruff_db::parsed::parsed_module;
|
use ruff_db::parsed::parsed_module;
|
||||||
use ruff_python_ast::visitor::source_order::{self, SourceOrderVisitor};
|
use ruff_python_ast::visitor::source_order::{self, SourceOrderVisitor};
|
||||||
|
|
@ -16,7 +18,59 @@ pub struct SymbolsOptions {
|
||||||
/// Include only symbols in the global scope
|
/// Include only symbols in the global scope
|
||||||
pub global_only: bool,
|
pub global_only: bool,
|
||||||
/// Query string for filtering symbol names
|
/// Query string for filtering symbol names
|
||||||
pub query_string: Option<String>,
|
pub query_string: Option<QueryPattern>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct QueryPattern {
|
||||||
|
re: Option<Regex>,
|
||||||
|
original: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueryPattern {
|
||||||
|
pub fn new(literal_query_string: &str) -> QueryPattern {
|
||||||
|
let mut pattern = "(?i)".to_string();
|
||||||
|
for ch in literal_query_string.chars() {
|
||||||
|
pattern.push_str(®ex::escape(ch.encode_utf8(&mut [0; 4])));
|
||||||
|
pattern.push_str(".*");
|
||||||
|
}
|
||||||
|
// In theory regex compilation could fail if the pattern string
|
||||||
|
// was long enough to exceed the default regex compilation size
|
||||||
|
// limit. But this length would be approaching ~10MB or so.
|
||||||
|
QueryPattern {
|
||||||
|
re: Regex::new(&pattern).ok(),
|
||||||
|
original: literal_query_string.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match(&self, symbol: &SymbolInfo) -> bool {
|
||||||
|
self.is_match_symbol_name(&symbol.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_match_symbol_name(&self, symbol_name: &str) -> bool {
|
||||||
|
if let Some(ref re) = self.re {
|
||||||
|
re.is_match(symbol_name)
|
||||||
|
} else {
|
||||||
|
// This is a degenerate case. The only way
|
||||||
|
// we should get here is if the query string
|
||||||
|
// was thousands (or more) characters long.
|
||||||
|
symbol_name.contains(&self.original)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for QueryPattern {
|
||||||
|
fn from(literal_query_string: &str) -> QueryPattern {
|
||||||
|
QueryPattern::new(literal_query_string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for QueryPattern {}
|
||||||
|
|
||||||
|
impl PartialEq for QueryPattern {
|
||||||
|
fn eq(&self, rhs: &QueryPattern) -> bool {
|
||||||
|
self.original == rhs.original
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Symbol information for IDE features like document outline.
|
/// Symbol information for IDE features like document outline.
|
||||||
|
|
@ -88,7 +142,7 @@ pub(crate) fn symbols_for_file(
|
||||||
visitor.visit_body(&module.syntax().body);
|
visitor.visit_body(&module.syntax().body);
|
||||||
let mut symbols = visitor.symbols;
|
let mut symbols = visitor.symbols;
|
||||||
if let Some(ref query) = options.query_string {
|
if let Some(ref query) = options.query_string {
|
||||||
symbols.retain(|symbol| is_pattern_in_symbol(query, &symbol.name));
|
symbols.retain(|symbol| query.is_match(symbol));
|
||||||
}
|
}
|
||||||
symbols
|
symbols
|
||||||
}
|
}
|
||||||
|
|
@ -284,31 +338,10 @@ impl SourceOrderVisitor<'_> for SymbolVisitor<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if symbol name contains all characters in the query
|
|
||||||
/// string in order. The comparison is case insensitive.
|
|
||||||
fn is_pattern_in_symbol(query_string: &str, symbol_name: &str) -> bool {
|
|
||||||
let typed_lower = query_string.to_lowercase();
|
|
||||||
let symbol_lower = symbol_name.to_lowercase();
|
|
||||||
let typed_chars: Vec<char> = typed_lower.chars().collect();
|
|
||||||
let symbol_chars: Vec<char> = symbol_lower.chars().collect();
|
|
||||||
|
|
||||||
let mut typed_pos = 0;
|
|
||||||
let mut symbol_pos = 0;
|
|
||||||
|
|
||||||
while typed_pos < typed_chars.len() && symbol_pos < symbol_chars.len() {
|
|
||||||
if typed_chars[typed_pos] == symbol_chars[symbol_pos] {
|
|
||||||
typed_pos += 1;
|
|
||||||
}
|
|
||||||
symbol_pos += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
typed_pos == typed_chars.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
fn matches(query: &str, symbol: &str) -> bool {
|
fn matches(query: &str, symbol: &str) -> bool {
|
||||||
super::is_pattern_in_symbol(query, symbol)
|
super::QueryPattern::new(query).is_match_symbol_name(symbol)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ pub fn workspace_symbols(db: &dyn Db, query: &str) -> Vec<WorkspaceSymbolInfo> {
|
||||||
let options = SymbolsOptions {
|
let options = SymbolsOptions {
|
||||||
hierarchical: false, // Workspace symbols are always flat
|
hierarchical: false, // Workspace symbols are always flat
|
||||||
global_only: false,
|
global_only: false,
|
||||||
query_string: Some(query.to_string()),
|
query_string: Some(query.into()),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get all files in the project
|
// Get all files in the project
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue