From 8ead02e0b1186f04ee7df8bb92bdb674feef8a17 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 21 Aug 2025 14:37:35 -0400 Subject: [PATCH] [ty] Optimize query string matching While this doesn't typically matter, when ty returns a very large list of symbols, this can have an impact. Specifically, when searching `async` in home-assistant, this gets times closer to 500ms versus closer to 600ms before this change. It looks like an overall ~50ms improvement (so around 10%), but variance is all over the place and I didn't do any statistical tests. But this does make intuitive sense. Previously, we were allocating intermediate strings, doing UTF-8 decoding and consulting Unicode casing tables. Now we're just doing what is likely a single DFA scan. In effect, we front load all of the Unicode junk into regex compilation. --- crates/ty_ide/src/symbols.rs | 81 ++++++++++++++++++-------- crates/ty_ide/src/workspace_symbols.rs | 2 +- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/crates/ty_ide/src/symbols.rs b/crates/ty_ide/src/symbols.rs index 0764a39322..ef4614b3fc 100644 --- a/crates/ty_ide/src/symbols.rs +++ b/crates/ty_ide/src/symbols.rs @@ -1,6 +1,8 @@ //! Implements logic used by the document symbol provider, workspace symbol //! provider, and auto-import feature of the completion provider. +use regex::Regex; + use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_python_ast::visitor::source_order::{self, SourceOrderVisitor}; @@ -16,7 +18,59 @@ pub struct SymbolsOptions { /// Include only symbols in the global scope pub global_only: bool, /// Query string for filtering symbol names - pub query_string: Option, + pub query_string: Option, +} + +#[derive(Clone, Debug)] +pub struct QueryPattern { + re: Option, + original: String, +} + +impl QueryPattern { + pub fn new(literal_query_string: &str) -> QueryPattern { + let mut pattern = "(?i)".to_string(); + for ch in literal_query_string.chars() { + pattern.push_str(®ex::escape(ch.encode_utf8(&mut [0; 4]))); + pattern.push_str(".*"); + } + // In theory regex compilation could fail if the pattern string + // was long enough to exceed the default regex compilation size + // limit. But this length would be approaching ~10MB or so. + QueryPattern { + re: Regex::new(&pattern).ok(), + original: literal_query_string.to_string(), + } + } + + fn is_match(&self, symbol: &SymbolInfo) -> bool { + self.is_match_symbol_name(&symbol.name) + } + + fn is_match_symbol_name(&self, symbol_name: &str) -> bool { + if let Some(ref re) = self.re { + re.is_match(symbol_name) + } else { + // This is a degenerate case. The only way + // we should get here is if the query string + // was thousands (or more) characters long. + symbol_name.contains(&self.original) + } + } +} + +impl From<&str> for QueryPattern { + fn from(literal_query_string: &str) -> QueryPattern { + QueryPattern::new(literal_query_string) + } +} + +impl Eq for QueryPattern {} + +impl PartialEq for QueryPattern { + fn eq(&self, rhs: &QueryPattern) -> bool { + self.original == rhs.original + } } /// Symbol information for IDE features like document outline. @@ -88,7 +142,7 @@ pub(crate) fn symbols_for_file( visitor.visit_body(&module.syntax().body); let mut symbols = visitor.symbols; if let Some(ref query) = options.query_string { - symbols.retain(|symbol| is_pattern_in_symbol(query, &symbol.name)); + symbols.retain(|symbol| query.is_match(symbol)); } symbols } @@ -284,31 +338,10 @@ impl SourceOrderVisitor<'_> for SymbolVisitor<'_> { } } -/// Returns true if symbol name contains all characters in the query -/// string in order. The comparison is case insensitive. -fn is_pattern_in_symbol(query_string: &str, symbol_name: &str) -> bool { - let typed_lower = query_string.to_lowercase(); - let symbol_lower = symbol_name.to_lowercase(); - let typed_chars: Vec = typed_lower.chars().collect(); - let symbol_chars: Vec = symbol_lower.chars().collect(); - - let mut typed_pos = 0; - let mut symbol_pos = 0; - - while typed_pos < typed_chars.len() && symbol_pos < symbol_chars.len() { - if typed_chars[typed_pos] == symbol_chars[symbol_pos] { - typed_pos += 1; - } - symbol_pos += 1; - } - - typed_pos == typed_chars.len() -} - #[cfg(test)] mod tests { fn matches(query: &str, symbol: &str) -> bool { - super::is_pattern_in_symbol(query, symbol) + super::QueryPattern::new(query).is_match_symbol_name(symbol) } #[test] diff --git a/crates/ty_ide/src/workspace_symbols.rs b/crates/ty_ide/src/workspace_symbols.rs index 6ac203327a..5bda7090e5 100644 --- a/crates/ty_ide/src/workspace_symbols.rs +++ b/crates/ty_ide/src/workspace_symbols.rs @@ -16,7 +16,7 @@ pub fn workspace_symbols(db: &dyn Db, query: &str) -> Vec { let options = SymbolsOptions { hierarchical: false, // Workspace symbols are always flat global_only: false, - query_string: Some(query.to_string()), + query_string: Some(query.into()), }; // Get all files in the project