From 92c5f62ec01b83396c62e4e2dc33223a7a8d7373 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 3 Dec 2025 12:16:18 +0100 Subject: [PATCH] [ty] Enable LRU collection for parsed module (#21749) --- crates/ruff_db/src/parsed.rs | 17 ++++---- crates/ruff_memory_usage/src/lib.rs | 43 ++++++++++++++++--- crates/ty_ide/src/symbols.rs | 10 +++++ crates/ty_project/src/db.rs | 6 ++- crates/ty_python_semantic/src/ast_node_ref.rs | 9 ++-- .../src/module_resolver/module.rs | 2 +- 6 files changed, 65 insertions(+), 22 deletions(-) diff --git a/crates/ruff_db/src/parsed.rs b/crates/ruff_db/src/parsed.rs index ad241b3ded..9d9604aea0 100644 --- a/crates/ruff_db/src/parsed.rs +++ b/crates/ruff_db/src/parsed.rs @@ -21,7 +21,11 @@ use crate::source::source_text; /// reflected in the changed AST offsets. /// The other reason is that Ruff's AST doesn't implement `Eq` which Salsa requires /// for determining if a query result is unchanged. -#[salsa::tracked(returns(ref), no_eq, heap_size=ruff_memory_usage::heap_size)] +/// +/// The LRU capacity of 200 was picked without any empirical evidence that it's optimal, +/// instead it's a wild guess that it should be unlikely that incremental changes involve +/// more than 200 modules. Parsed ASTs within the same revision are never evicted by Salsa. +#[salsa::tracked(returns(ref), no_eq, heap_size=ruff_memory_usage::heap_size, lru=200)] pub fn parsed_module(db: &dyn Db, file: File) -> ParsedModule { let _span = tracing::trace_span!("parsed_module", ?file).entered(); @@ -92,14 +96,9 @@ impl ParsedModule { self.inner.store(None); } - /// Returns the pointer address of this [`ParsedModule`]. - /// - /// The pointer uniquely identifies the module within the current Salsa revision, - /// regardless of whether particular [`ParsedModuleRef`] instances are garbage collected. - pub fn addr(&self) -> usize { - // Note that the outer `Arc` in `inner` is stable across garbage collection, while the inner - // `Arc` within the `ArcSwap` may change. - Arc::as_ptr(&self.inner).addr() + /// Returns the file to which this module belongs. + pub fn file(&self) -> File { + self.file } } diff --git a/crates/ruff_memory_usage/src/lib.rs b/crates/ruff_memory_usage/src/lib.rs index e75c75808e..81444d5197 100644 --- a/crates/ruff_memory_usage/src/lib.rs +++ b/crates/ruff_memory_usage/src/lib.rs @@ -1,17 +1,46 @@ -use std::sync::{LazyLock, Mutex}; +use std::cell::RefCell; use get_size2::{GetSize, StandardTracker}; use ordermap::{OrderMap, OrderSet}; +thread_local! { + pub static TRACKER: RefCell>= const { RefCell::new(None) }; +} + +struct TrackerGuard(Option); + +impl Drop for TrackerGuard { + fn drop(&mut self) { + TRACKER.set(self.0.take()); + } +} + +pub fn attach_tracker(tracker: StandardTracker, f: impl FnOnce() -> R) -> R { + let prev = TRACKER.replace(Some(tracker)); + let _guard = TrackerGuard(prev); + f() +} + +fn with_tracker(f: F) -> R +where + F: FnOnce(Option<&mut StandardTracker>) -> R, +{ + TRACKER.with(|tracker| { + let mut tracker = tracker.borrow_mut(); + f(tracker.as_mut()) + }) +} + /// Returns the memory usage of the provided object, using a global tracker to avoid /// double-counting shared objects. pub fn heap_size(value: &T) -> usize { - static TRACKER: LazyLock> = - LazyLock::new(|| Mutex::new(StandardTracker::new())); - - value - .get_heap_size_with_tracker(&mut *TRACKER.lock().unwrap()) - .0 + with_tracker(|tracker| { + if let Some(tracker) = tracker { + value.get_heap_size_with_tracker(tracker).0 + } else { + value.get_heap_size() + } + }) } /// An implementation of [`GetSize::get_heap_size`] for [`OrderSet`]. diff --git a/crates/ty_ide/src/symbols.rs b/crates/ty_ide/src/symbols.rs index 01b177619e..a2278f40e6 100644 --- a/crates/ty_ide/src/symbols.rs +++ b/crates/ty_ide/src/symbols.rs @@ -379,6 +379,16 @@ pub(crate) fn symbols_for_file_global_only(db: &dyn Db, file: File) -> FlatSymbo global_only: true, }; visitor.visit_body(&module.syntax().body); + + if file + .path(db) + .as_system_path() + .is_none_or(|path| !db.project().is_file_included(db, path)) + { + // Eagerly clear ASTs of third party files. + parsed.clear(); + } + FlatSymbols { symbols: visitor.symbols, } diff --git a/crates/ty_project/src/db.rs b/crates/ty_project/src/db.rs index 8f6ec20c95..9c8eab8204 100644 --- a/crates/ty_project/src/db.rs +++ b/crates/ty_project/src/db.rs @@ -7,6 +7,7 @@ pub use self::changes::ChangeResult; use crate::CollectReporter; use crate::metadata::settings::file_settings; use crate::{ProgressReporter, Project, ProjectMetadata}; +use get_size2::StandardTracker; use ruff_db::Db as SourceDb; use ruff_db::diagnostic::Diagnostic; use ruff_db::files::{File, Files}; @@ -129,7 +130,10 @@ impl ProjectDatabase { /// Returns a [`SalsaMemoryDump`] that can be use to dump Salsa memory usage information /// to the CLI after a typechecker run. pub fn salsa_memory_dump(&self) -> SalsaMemoryDump { - let memory_usage = ::memory_usage(self); + let memory_usage = ruff_memory_usage::attach_tracker(StandardTracker::new(), || { + ::memory_usage(self) + }); + let mut ingredients = memory_usage .structs .into_iter() diff --git a/crates/ty_python_semantic/src/ast_node_ref.rs b/crates/ty_python_semantic/src/ast_node_ref.rs index 14916ec807..a3d1fae49a 100644 --- a/crates/ty_python_semantic/src/ast_node_ref.rs +++ b/crates/ty_python_semantic/src/ast_node_ref.rs @@ -1,6 +1,8 @@ use std::fmt::Debug; use std::marker::PhantomData; +#[cfg(debug_assertions)] +use ruff_db::files::File; use ruff_db::parsed::ParsedModuleRef; use ruff_python_ast::{AnyNodeRef, NodeIndex}; use ruff_python_ast::{AnyRootNodeRef, HasNodeIndex}; @@ -44,7 +46,7 @@ pub struct AstNodeRef { // cannot implement `Eq`, as indices are only unique within a given instance of the // AST. #[cfg(debug_assertions)] - module_addr: usize, + file: File, _node: PhantomData, } @@ -72,7 +74,7 @@ where Self { index, #[cfg(debug_assertions)] - module_addr: module_ref.module().addr(), + file: module_ref.module().file(), #[cfg(debug_assertions)] kind: AnyNodeRef::from(node).kind(), #[cfg(debug_assertions)] @@ -88,8 +90,7 @@ where #[track_caller] pub fn node<'ast>(&self, module_ref: &'ast ParsedModuleRef) -> &'ast T { #[cfg(debug_assertions)] - assert_eq!(module_ref.module().addr(), self.module_addr); - + assert_eq!(module_ref.module().file(), self.file); // The user guarantees that the module is from the same file and Salsa // revision, so the file contents cannot have changed. module_ref diff --git a/crates/ty_python_semantic/src/module_resolver/module.rs b/crates/ty_python_semantic/src/module_resolver/module.rs index 6927c3b89f..118c2aff45 100644 --- a/crates/ty_python_semantic/src/module_resolver/module.rs +++ b/crates/ty_python_semantic/src/module_resolver/module.rs @@ -120,7 +120,7 @@ impl std::fmt::Debug for Module<'_> { } #[allow(clippy::ref_option)] -#[salsa::tracked(returns(ref))] +#[salsa::tracked(returns(ref), heap_size=ruff_memory_usage::heap_size)] fn all_submodule_names_for_package<'db>( db: &'db dyn Db, module: Module<'db>,