From 87f6f08ef53edc2cbe8632d612f6d4fd016fe2ff Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 10 Jul 2025 15:16:56 +0200 Subject: [PATCH] [ty] Make `check_file` a salsa query (#19255) ## Summary We noticed that all files get reparsed when workspace diagnostics are enabled. I realised that this is because `check_file_impl` access the parsed module but itself isn't a salsa query. This pr makes `check_file_impl` a salsa query, so that we only access the `parsed_module` when the file actually changed. I decided to remove the salsa query from `check_types` because most functions it calls are salsa queries itself and having both `check_types` and `check_file` as salsa querise has the downside that we double cache the diagnostics. ## Test Plan **Before** ``` 2025-07-10 12:54:16.620766000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0c))}: File `/Users/micha/astral/test/yaml/yaml-stubs/__init__.pyi` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.621942000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c13))}: File `/Users/micha/astral/test/ignore2 2/nested-repository/main.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.622107000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c09))}: File `/Users/micha/astral/test/notebook.ipynb` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.622357000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c04))}: File `/Users/micha/astral/test/no-trailing.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.622634000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c02))}: File `/Users/micha/astral/test/simple.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.623056000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c07))}: File `/Users/micha/astral/test/open/more.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.623254000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c11))}: File `/Users/micha/astral/test/ignore-bug/backend/src/subdir/log/some_logging_lib.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.623450000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0f))}: File `/Users/micha/astral/test/yaml/tomllib/__init__.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.624599000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c05))}: File `/Users/micha/astral/test/create.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.624784000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c00))}: File `/Users/micha/astral/test/lib.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.624911000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0a))}: File `/Users/micha/astral/test/sub/test.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625032000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c12))}: File `/Users/micha/astral/test/ignore2/nested-repository/main.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625101000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c08))}: File `/Users/micha/astral/test/open/test.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625227000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c03))}: File `/Users/micha/astral/test/pseudocode_with_bom.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625353000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0b))}: File `/Users/micha/astral/test/yaml/yaml-stubs/loader.pyi` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625543000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c01))}: File `/Users/micha/astral/test/test_trailing.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625616000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0d))}: File `/Users/micha/astral/test/yaml/tomllib/_re.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625667000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c06))}: File `/Users/micha/astral/test/yaml/main.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.625779000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c10))}: File `/Users/micha/astral/test/yaml/tomllib/_types.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.627526000 WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0e))}: File `/Users/micha/astral/test/yaml/tomllib/_parser.py` was reparsed after being collected in the current Salsa revision 2025-07-10 12:54:16.627959000 DEBUG request{id=19 method="workspace/diagnostic"}:Project::check: Checking all files took 0.007s ``` Now, no more logs regarding reparsing --- crates/ty_project/src/lib.rs | 143 +++++++++--------- crates/ty_python_semantic/src/types.rs | 5 +- .../src/types/diagnostic.rs | 4 + crates/ty_python_semantic/src/types/infer.rs | 4 +- crates/ty_test/src/lib.rs | 2 +- 5 files changed, 82 insertions(+), 76 deletions(-) diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index 00a3fa0fea..e486f8c239 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -257,8 +257,11 @@ impl Project { tracing::debug_span!(parent: project_span, "check_file", ?file); let _entered = check_file_span.entered(); - let result = self.check_file_impl(&db, file); - file_diagnostics.lock().unwrap().extend(result); + let result = check_file_impl(&db, file); + file_diagnostics + .lock() + .unwrap() + .extend(result.iter().map(Clone::clone)); reporter.report_file(&file); }); @@ -285,7 +288,7 @@ impl Project { return Vec::new(); } - self.check_file_impl(db, file) + check_file_impl(db, file).iter().map(Clone::clone).collect() } /// Opens a file in the project. @@ -466,71 +469,73 @@ impl Project { self.set_file_set(db).to(IndexedFiles::lazy()); } } +} - fn check_file_impl(self, db: &dyn Db, file: File) -> Vec { - let mut diagnostics: Vec = Vec::new(); +#[salsa::tracked(returns(deref), heap_size=get_size2::GetSize::get_heap_size)] +pub(crate) fn check_file_impl(db: &dyn Db, file: File) -> Box<[Diagnostic]> { + let mut diagnostics: Vec = Vec::new(); - // Abort checking if there are IO errors. - let source = source_text(db, file); + // Abort checking if there are IO errors. + let source = source_text(db, file); - if let Some(read_error) = source.read_error() { - diagnostics.push( - IOErrorDiagnostic { - file: Some(file), - error: read_error.clone().into(), - } - .to_diagnostic(), - ); - return diagnostics; - } - - let parsed = parsed_module(db, file); - - let parsed_ref = parsed.load(db); - diagnostics.extend( - parsed_ref - .errors() - .iter() - .map(|error| Diagnostic::invalid_syntax(file, &error.error, error)), - ); - - diagnostics.extend(parsed_ref.unsupported_syntax_errors().iter().map(|error| { - let mut error = Diagnostic::invalid_syntax(file, error, error); - add_inferred_python_version_hint_to_diagnostic(db, &mut error, "parsing syntax"); - error - })); - - { - let db = AssertUnwindSafe(db); - match catch(&**db, file, || check_types(*db, file)) { - Ok(Some(type_check_diagnostics)) => { - diagnostics.extend(type_check_diagnostics.into_iter().cloned()); - } - Ok(None) => {} - Err(diagnostic) => diagnostics.push(diagnostic), + if let Some(read_error) = source.read_error() { + diagnostics.push( + IOErrorDiagnostic { + file: Some(file), + error: read_error.clone().into(), } - } - - if self - .open_fileset(db) - .is_none_or(|files| !files.contains(&file)) - { - // Drop the AST now that we are done checking this file. It is not currently open, - // so it is unlikely to be accessed again soon. If any queries need to access the AST - // from across files, it will be re-parsed. - parsed.clear(); - } - - diagnostics.sort_unstable_by_key(|diagnostic| { - diagnostic - .primary_span() - .and_then(|span| span.range()) - .unwrap_or_default() - .start() - }); - - diagnostics + .to_diagnostic(), + ); + return diagnostics.into_boxed_slice(); } + + let parsed = parsed_module(db, file); + + let parsed_ref = parsed.load(db); + diagnostics.extend( + parsed_ref + .errors() + .iter() + .map(|error| Diagnostic::invalid_syntax(file, &error.error, error)), + ); + + diagnostics.extend(parsed_ref.unsupported_syntax_errors().iter().map(|error| { + let mut error = Diagnostic::invalid_syntax(file, error, error); + add_inferred_python_version_hint_to_diagnostic(db, &mut error, "parsing syntax"); + error + })); + + { + let db = AssertUnwindSafe(db); + match catch(&**db, file, || check_types(*db, file)) { + Ok(Some(type_check_diagnostics)) => { + diagnostics.extend(type_check_diagnostics); + } + Ok(None) => {} + Err(diagnostic) => diagnostics.push(diagnostic), + } + } + + if db + .project() + .open_fileset(db) + .is_none_or(|files| !files.contains(&file)) + { + // Drop the AST now that we are done checking this file. It is not currently open, + // so it is unlikely to be accessed again soon. If any queries need to access the AST + // from across files, it will be re-parsed. + parsed.clear(); + } + + diagnostics.sort_unstable_by_key(|diagnostic| { + diagnostic + .primary_span() + .and_then(|span| span.range()) + .unwrap_or_default() + .start() + }); + + diagnostics.into_boxed_slice() } #[derive(Debug)] @@ -701,8 +706,8 @@ where #[cfg(test)] mod tests { - use crate::Db; use crate::ProjectMetadata; + use crate::check_file_impl; use crate::db::tests::TestDb; use ruff_db::Db as _; use ruff_db::files::system_path_to_file; @@ -741,9 +746,8 @@ mod tests { assert_eq!(source_text(&db, file).as_str(), ""); assert_eq!( - db.project() - .check_file_impl(&db, file) - .into_iter() + check_file_impl(&db, file) + .iter() .map(|diagnostic| diagnostic.primary_message().to_string()) .collect::>(), vec!["Failed to read file: No such file or directory".to_string()] @@ -758,9 +762,8 @@ mod tests { assert_eq!(source_text(&db, file).as_str(), ""); assert_eq!( - db.project() - .check_file_impl(&db, file) - .into_iter() + check_file_impl(&db, file) + .iter() .map(|diagnostic| diagnostic.primary_message().to_string()) .collect::>(), vec![] as Vec diff --git a/crates/ty_python_semantic/src/types.rs b/crates/ty_python_semantic/src/types.rs index 3ffc148be5..42defc64f3 100644 --- a/crates/ty_python_semantic/src/types.rs +++ b/crates/ty_python_semantic/src/types.rs @@ -88,8 +88,7 @@ mod definition; #[cfg(test)] mod property_tests; -#[salsa::tracked(returns(ref), heap_size=get_size2::GetSize::get_heap_size)] -pub fn check_types(db: &dyn Db, file: File) -> TypeCheckDiagnostics { +pub fn check_types(db: &dyn Db, file: File) -> Vec { let _span = tracing::trace_span!("check_types", ?file).entered(); tracing::debug!("Checking file '{path}'", path = file.path(db)); @@ -111,7 +110,7 @@ pub fn check_types(db: &dyn Db, file: File) -> TypeCheckDiagnostics { check_suppressions(db, file, &mut diagnostics); - diagnostics + diagnostics.into_vec() } /// Infer the type of a binding. diff --git a/crates/ty_python_semantic/src/types/diagnostic.rs b/crates/ty_python_semantic/src/types/diagnostic.rs index ecc870530d..e3cf81f75f 100644 --- a/crates/ty_python_semantic/src/types/diagnostic.rs +++ b/crates/ty_python_semantic/src/types/diagnostic.rs @@ -1598,6 +1598,10 @@ impl TypeCheckDiagnostics { self.diagnostics.shrink_to_fit(); } + pub(crate) fn into_vec(self) -> Vec { + self.diagnostics + } + pub fn iter(&self) -> std::slice::Iter<'_, Diagnostic> { self.diagnostics.iter() } diff --git a/crates/ty_python_semantic/src/types/infer.rs b/crates/ty_python_semantic/src/types/infer.rs index 6cc0a6ec3b..8790a1d0d3 100644 --- a/crates/ty_python_semantic/src/types/infer.rs +++ b/crates/ty_python_semantic/src/types/infer.rs @@ -10035,7 +10035,7 @@ mod tests { } #[track_caller] - fn assert_diagnostic_messages(diagnostics: &TypeCheckDiagnostics, expected: &[&str]) { + fn assert_diagnostic_messages(diagnostics: &[Diagnostic], expected: &[&str]) { let messages: Vec<&str> = diagnostics .iter() .map(Diagnostic::primary_message) @@ -10048,7 +10048,7 @@ mod tests { let file = system_path_to_file(db, filename).unwrap(); let diagnostics = check_types(db, file); - assert_diagnostic_messages(diagnostics, expected); + assert_diagnostic_messages(&diagnostics, expected); } #[test] diff --git a/crates/ty_test/src/lib.rs b/crates/ty_test/src/lib.rs index 827d6faaef..8532bbdd02 100644 --- a/crates/ty_test/src/lib.rs +++ b/crates/ty_test/src/lib.rs @@ -340,7 +340,7 @@ fn run_test( Err(failures) => return Some(failures), }; - diagnostics.extend(type_diagnostics.into_iter().cloned()); + diagnostics.extend(type_diagnostics); diagnostics.sort_by(|left, right| { left.rendering_sort_key(db) .cmp(&right.rendering_sort_key(db))