[ty] Enable LRU collection for parsed module (#21749)

This commit is contained in:
Micha Reiser 2025-12-03 12:16:18 +01:00 committed by GitHub
parent 21e5a57296
commit 92c5f62ec0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 65 additions and 22 deletions

View File

@ -21,7 +21,11 @@ use crate::source::source_text;
/// reflected in the changed AST offsets.
/// The other reason is that Ruff's AST doesn't implement `Eq` which Salsa requires
/// for determining if a query result is unchanged.
#[salsa::tracked(returns(ref), no_eq, heap_size=ruff_memory_usage::heap_size)]
///
/// The LRU capacity of 200 was picked without any empirical evidence that it's optimal,
/// instead it's a wild guess that it should be unlikely that incremental changes involve
/// more than 200 modules. Parsed ASTs within the same revision are never evicted by Salsa.
#[salsa::tracked(returns(ref), no_eq, heap_size=ruff_memory_usage::heap_size, lru=200)]
pub fn parsed_module(db: &dyn Db, file: File) -> ParsedModule {
let _span = tracing::trace_span!("parsed_module", ?file).entered();
@ -92,14 +96,9 @@ impl ParsedModule {
self.inner.store(None);
}
/// Returns the pointer address of this [`ParsedModule`].
///
/// The pointer uniquely identifies the module within the current Salsa revision,
/// regardless of whether particular [`ParsedModuleRef`] instances are garbage collected.
pub fn addr(&self) -> usize {
// Note that the outer `Arc` in `inner` is stable across garbage collection, while the inner
// `Arc` within the `ArcSwap` may change.
Arc::as_ptr(&self.inner).addr()
/// Returns the file to which this module belongs.
pub fn file(&self) -> File {
self.file
}
}

View File

@ -1,17 +1,46 @@
use std::sync::{LazyLock, Mutex};
use std::cell::RefCell;
use get_size2::{GetSize, StandardTracker};
use ordermap::{OrderMap, OrderSet};
thread_local! {
pub static TRACKER: RefCell<Option<StandardTracker>>= const { RefCell::new(None) };
}
struct TrackerGuard(Option<StandardTracker>);
impl Drop for TrackerGuard {
fn drop(&mut self) {
TRACKER.set(self.0.take());
}
}
pub fn attach_tracker<R>(tracker: StandardTracker, f: impl FnOnce() -> R) -> R {
let prev = TRACKER.replace(Some(tracker));
let _guard = TrackerGuard(prev);
f()
}
fn with_tracker<F, R>(f: F) -> R
where
F: FnOnce(Option<&mut StandardTracker>) -> R,
{
TRACKER.with(|tracker| {
let mut tracker = tracker.borrow_mut();
f(tracker.as_mut())
})
}
/// Returns the memory usage of the provided object, using a global tracker to avoid
/// double-counting shared objects.
pub fn heap_size<T: GetSize>(value: &T) -> usize {
static TRACKER: LazyLock<Mutex<StandardTracker>> =
LazyLock::new(|| Mutex::new(StandardTracker::new()));
value
.get_heap_size_with_tracker(&mut *TRACKER.lock().unwrap())
.0
with_tracker(|tracker| {
if let Some(tracker) = tracker {
value.get_heap_size_with_tracker(tracker).0
} else {
value.get_heap_size()
}
})
}
/// An implementation of [`GetSize::get_heap_size`] for [`OrderSet`].

View File

@ -379,6 +379,16 @@ pub(crate) fn symbols_for_file_global_only(db: &dyn Db, file: File) -> FlatSymbo
global_only: true,
};
visitor.visit_body(&module.syntax().body);
if file
.path(db)
.as_system_path()
.is_none_or(|path| !db.project().is_file_included(db, path))
{
// Eagerly clear ASTs of third party files.
parsed.clear();
}
FlatSymbols {
symbols: visitor.symbols,
}

View File

@ -7,6 +7,7 @@ pub use self::changes::ChangeResult;
use crate::CollectReporter;
use crate::metadata::settings::file_settings;
use crate::{ProgressReporter, Project, ProjectMetadata};
use get_size2::StandardTracker;
use ruff_db::Db as SourceDb;
use ruff_db::diagnostic::Diagnostic;
use ruff_db::files::{File, Files};
@ -129,7 +130,10 @@ impl ProjectDatabase {
/// Returns a [`SalsaMemoryDump`] that can be use to dump Salsa memory usage information
/// to the CLI after a typechecker run.
pub fn salsa_memory_dump(&self) -> SalsaMemoryDump {
let memory_usage = <dyn salsa::Database>::memory_usage(self);
let memory_usage = ruff_memory_usage::attach_tracker(StandardTracker::new(), || {
<dyn salsa::Database>::memory_usage(self)
});
let mut ingredients = memory_usage
.structs
.into_iter()

View File

@ -1,6 +1,8 @@
use std::fmt::Debug;
use std::marker::PhantomData;
#[cfg(debug_assertions)]
use ruff_db::files::File;
use ruff_db::parsed::ParsedModuleRef;
use ruff_python_ast::{AnyNodeRef, NodeIndex};
use ruff_python_ast::{AnyRootNodeRef, HasNodeIndex};
@ -44,7 +46,7 @@ pub struct AstNodeRef<T> {
// cannot implement `Eq`, as indices are only unique within a given instance of the
// AST.
#[cfg(debug_assertions)]
module_addr: usize,
file: File,
_node: PhantomData<T>,
}
@ -72,7 +74,7 @@ where
Self {
index,
#[cfg(debug_assertions)]
module_addr: module_ref.module().addr(),
file: module_ref.module().file(),
#[cfg(debug_assertions)]
kind: AnyNodeRef::from(node).kind(),
#[cfg(debug_assertions)]
@ -88,8 +90,7 @@ where
#[track_caller]
pub fn node<'ast>(&self, module_ref: &'ast ParsedModuleRef) -> &'ast T {
#[cfg(debug_assertions)]
assert_eq!(module_ref.module().addr(), self.module_addr);
assert_eq!(module_ref.module().file(), self.file);
// The user guarantees that the module is from the same file and Salsa
// revision, so the file contents cannot have changed.
module_ref

View File

@ -120,7 +120,7 @@ impl std::fmt::Debug for Module<'_> {
}
#[allow(clippy::ref_option)]
#[salsa::tracked(returns(ref))]
#[salsa::tracked(returns(ref), heap_size=ruff_memory_usage::heap_size)]
fn all_submodule_names_for_package<'db>(
db: &'db dyn Db,
module: Module<'db>,