From 64700d296ffc1b03ddaceb0fa27e38efa0e2a044 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 2 May 2024 20:26:02 +0200 Subject: [PATCH] Remove ImportMap (#11234) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR removes the `ImportMap` implementation and all its routing through ruff. The import map was added in https://github.com/astral-sh/ruff/pull/3243 but we then never ended up using it to do cross file analysis. We are now working on adding multifile analysis to ruff, and revisit import resolution as part of it. ``` hyperfine --warmup 10 --runs 20 --setup "./target/release/ruff clean" \ "./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I" \ "./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I" Benchmark 1: ./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I Time (mean ± σ): 37.6 ms ± 0.9 ms [User: 52.2 ms, System: 63.7 ms] Range (min … max): 35.8 ms … 39.8 ms 20 runs Benchmark 2: ./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I Time (mean ± σ): 36.0 ms ± 0.7 ms [User: 50.3 ms, System: 58.4 ms] Range (min … max): 34.5 ms … 37.6 ms 20 runs Summary ./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I ran 1.04 ± 0.03 times faster than ./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I ``` I suspect that the performance improvement should even be more significant for users that otherwise don't have any diagnostics. ``` hyperfine --warmup 10 --runs 20 --setup "cd ../ecosystem/airflow && ../../ruff/target/release/ruff clean" \ "./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I" \ "./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I" Benchmark 1: ./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I Time (mean ± σ): 53.7 ms ± 1.8 ms [User: 68.4 ms, System: 63.0 ms] Range (min … max): 51.1 ms … 58.7 ms 20 runs Benchmark 2: ./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I Time (mean ± σ): 50.8 ms ± 1.4 ms [User: 50.7 ms, System: 60.9 ms] Range (min … max): 48.5 ms … 55.3 ms 20 runs Summary ./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I ran 1.06 ± 0.05 times faster than ./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I ``` ## Test Plan `cargo test` --- crates/ruff/src/cache.rs | 7 +-- crates/ruff/src/commands/check.rs | 2 - crates/ruff/src/diagnostics.rs | 17 +---- crates/ruff_linter/src/checkers/imports.rs | 65 +------------------- crates/ruff_linter/src/linter.rs | 35 ++++------- crates/ruff_linter/src/rules/pyflakes/mod.rs | 2 +- crates/ruff_linter/src/test.rs | 10 +-- crates/ruff_python_ast/src/imports.rs | 62 ------------------- crates/ruff_server/src/lint.rs | 3 +- crates/ruff_wasm/src/lib.rs | 3 +- fuzz/fuzz_targets/ruff_formatter_validity.rs | 4 +- 11 files changed, 29 insertions(+), 181 deletions(-) diff --git a/crates/ruff/src/cache.rs b/crates/ruff/src/cache.rs index 001969a267..136555fb1b 100644 --- a/crates/ruff/src/cache.rs +++ b/crates/ruff/src/cache.rs @@ -23,7 +23,6 @@ use ruff_linter::message::Message; use ruff_linter::{warn_user, VERSION}; use ruff_macros::CacheKey; use ruff_notebook::NotebookIndex; -use ruff_python_ast::imports::ImportMap; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::resolver::Resolver; @@ -348,7 +347,7 @@ impl FileCache { } else { FxHashMap::default() }; - Diagnostics::new(messages, lint.imports.clone(), notebook_indexes) + Diagnostics::new(messages, notebook_indexes) }) } } @@ -394,7 +393,7 @@ pub(crate) fn init(path: &Path) -> Result<()> { #[derive(Deserialize, Debug, Serialize, PartialEq)] pub(crate) struct LintCacheData { /// Imports made. - pub(super) imports: ImportMap, + // pub(super) imports: ImportMap, /// Diagnostic messages. pub(super) messages: Vec, /// Source code of the file. @@ -410,7 +409,6 @@ pub(crate) struct LintCacheData { impl LintCacheData { pub(crate) fn from_messages( messages: &[Message], - imports: ImportMap, notebook_index: Option, ) -> Self { let source = if let Some(msg) = messages.first() { @@ -438,7 +436,6 @@ impl LintCacheData { .collect(); Self { - imports, messages, source, notebook_index, diff --git a/crates/ruff/src/commands/check.rs b/crates/ruff/src/commands/check.rs index d62a93426b..0b421639a1 100644 --- a/crates/ruff/src/commands/check.rs +++ b/crates/ruff/src/commands/check.rs @@ -17,7 +17,6 @@ use ruff_linter::registry::Rule; use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::{fs, warn_user_once, IOError}; -use ruff_python_ast::imports::ImportMap; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::resolver::{ @@ -134,7 +133,6 @@ pub(crate) fn check( dummy, TextSize::default(), )], - ImportMap::default(), FxHashMap::default(), ) } else { diff --git a/crates/ruff/src/diagnostics.rs b/crates/ruff/src/diagnostics.rs index ad15e88a30..9f32e4af8f 100644 --- a/crates/ruff/src/diagnostics.rs +++ b/crates/ruff/src/diagnostics.rs @@ -23,7 +23,6 @@ use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::{fs, IOError, SyntaxError}; use ruff_notebook::{Notebook, NotebookError, NotebookIndex}; -use ruff_python_ast::imports::ImportMap; use ruff_python_ast::{PySourceType, SourceType, TomlSourceType}; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; @@ -35,20 +34,17 @@ use crate::cache::{Cache, FileCacheKey, LintCacheData}; pub(crate) struct Diagnostics { pub(crate) messages: Vec, pub(crate) fixed: FixMap, - pub(crate) imports: ImportMap, pub(crate) notebook_indexes: FxHashMap, } impl Diagnostics { pub(crate) fn new( messages: Vec, - imports: ImportMap, notebook_indexes: FxHashMap, ) -> Self { Self { messages, fixed: FixMap::default(), - imports, notebook_indexes, } } @@ -92,7 +88,6 @@ impl Diagnostics { dummy, TextSize::default(), )], - ImportMap::default(), FxHashMap::default(), ) } else { @@ -127,7 +122,6 @@ impl Add for Diagnostics { impl AddAssign for Diagnostics { fn add_assign(&mut self, other: Self) { self.messages.extend(other.messages); - self.imports.extend(other.imports); self.fixed += other.fixed; self.notebook_indexes.extend(other.notebook_indexes); } @@ -267,7 +261,7 @@ pub(crate) fn lint_path( // Lint the file. let ( LinterResult { - data: (messages, imports), + data: messages, error: parse_error, }, transformed, @@ -335,8 +329,6 @@ pub(crate) fn lint_path( (result, transformed, fixed) }; - let imports = imports.unwrap_or_default(); - if let Some((cache, relative_path, key)) = caching { // We don't cache parsing errors. if parse_error.is_none() { @@ -354,7 +346,6 @@ pub(crate) fn lint_path( &key, LintCacheData::from_messages( &messages, - imports.clone(), transformed.as_ipy_notebook().map(Notebook::index).cloned(), ), ); @@ -378,7 +369,6 @@ pub(crate) fn lint_path( Ok(Diagnostics { messages, fixed: FixMap::from_iter([(fs::relativize_path(path), fixed)]), - imports, notebook_indexes, }) } @@ -416,7 +406,7 @@ pub(crate) fn lint_stdin( // Lint the inputs. let ( LinterResult { - data: (messages, imports), + data: messages, error: parse_error, }, transformed, @@ -494,8 +484,6 @@ pub(crate) fn lint_stdin( (result, transformed, fixed) }; - let imports = imports.unwrap_or_default(); - if let Some(error) = parse_error { error!( "{}", @@ -518,7 +506,6 @@ pub(crate) fn lint_stdin( fs::relativize_path(path.unwrap_or_else(|| Path::new("-"))), fixed, )]), - imports, notebook_indexes, }) } diff --git a/crates/ruff_linter/src/checkers/imports.rs b/crates/ruff_linter/src/checkers/imports.rs index 2815a6a828..2bc19b7412 100644 --- a/crates/ruff_linter/src/checkers/imports.rs +++ b/crates/ruff_linter/src/checkers/imports.rs @@ -1,17 +1,13 @@ //! Lint rules based on import analysis. -use std::borrow::Cow; use std::path::Path; use ruff_diagnostics::Diagnostic; use ruff_notebook::CellOffsets; -use ruff_python_ast::helpers::to_module_path; -use ruff_python_ast::imports::{ImportMap, ModuleImport}; use ruff_python_ast::statement_visitor::StatementVisitor; -use ruff_python_ast::{self as ast, PySourceType, Stmt, Suite}; +use ruff_python_ast::{PySourceType, Suite}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; use ruff_source_file::Locator; -use ruff_text_size::Ranged; use crate::directives::IsortDirectives; use crate::registry::Rule; @@ -19,57 +15,6 @@ use crate::rules::isort; use crate::rules::isort::block::{Block, BlockBuilder}; use crate::settings::LinterSettings; -fn extract_import_map(path: &Path, package: Option<&Path>, blocks: &[&Block]) -> Option { - let module_path = to_module_path(package?, path)?; - - let num_imports = blocks.iter().map(|block| block.imports.len()).sum(); - let mut module_imports = Vec::with_capacity(num_imports); - for stmt in blocks.iter().flat_map(|block| &block.imports) { - match stmt { - Stmt::Import(ast::StmtImport { names, range: _ }) => { - module_imports.extend( - names - .iter() - .map(|name| ModuleImport::new(name.name.to_string(), stmt.range())), - ); - } - Stmt::ImportFrom(ast::StmtImportFrom { - module, - names, - level, - range: _, - }) => { - let level = *level as usize; - let module = if let Some(module) = module { - let module: &String = module.as_ref(); - if level == 0 { - Cow::Borrowed(module) - } else { - if module_path.len() <= level { - continue; - } - let prefix = module_path[..module_path.len() - level].join("."); - Cow::Owned(format!("{prefix}.{module}")) - } - } else { - if module_path.len() <= level { - continue; - } - Cow::Owned(module_path[..module_path.len() - level].join(".")) - }; - module_imports.extend(names.iter().map(|name| { - ModuleImport::new(format!("{}.{}", module, name.name), name.range()) - })); - } - _ => panic!("Expected Stmt::Import | Stmt::ImportFrom"), - } - } - - let mut import_map = ImportMap::default(); - import_map.insert(module_path.join("."), module_imports); - Some(import_map) -} - #[allow(clippy::too_many_arguments)] pub(crate) fn check_imports( python_ast: &Suite, @@ -78,11 +23,10 @@ pub(crate) fn check_imports( directives: &IsortDirectives, settings: &LinterSettings, stylist: &Stylist, - path: &Path, package: Option<&Path>, source_type: PySourceType, cell_offsets: Option<&CellOffsets>, -) -> (Vec, Option) { +) -> Vec { // Extract all import blocks from the AST. let tracker = { let mut tracker = @@ -122,8 +66,5 @@ pub(crate) fn check_imports( )); } - // Extract import map. - let imports = extract_import_map(path, package, &blocks); - - (diagnostics, imports) + diagnostics } diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index cd5b95f417..cb3ac3318b 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -10,7 +10,6 @@ use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_notebook::Notebook; -use ruff_python_ast::imports::ImportMap; use ruff_python_ast::{PySourceType, Suite}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; @@ -62,7 +61,7 @@ pub type FixTable = FxHashMap; pub struct FixerResult<'a> { /// The result returned by the linter, after applying any fixes. - pub result: LinterResult<(Vec, Option)>, + pub result: LinterResult>, /// The resulting source code, after applying any fixes. pub transformed: Cow<'a, SourceKind>, /// The number of fixes applied for each [`Rule`]. @@ -84,10 +83,9 @@ pub fn check_path( source_kind: &SourceKind, source_type: PySourceType, tokens: TokenSource, -) -> LinterResult<(Vec, Option)> { +) -> LinterResult> { // Aggregate all diagnostics. let mut diagnostics = vec![]; - let mut imports = None; let mut error = None; // Collect doc lines. This requires a rare mix of tokens (for comments) and AST @@ -169,19 +167,18 @@ pub fn check_path( )); } if use_imports { - let (import_diagnostics, module_imports) = check_imports( + let import_diagnostics = check_imports( &python_ast, locator, indexer, &directives.isort, settings, stylist, - path, package, source_type, cell_offsets, ); - imports = module_imports; + diagnostics.extend(import_diagnostics); } if use_doc_lines { @@ -340,7 +337,7 @@ pub fn check_path( } } - LinterResult::new((diagnostics, imports), error) + LinterResult::new(diagnostics, error) } const MAX_ITERATIONS: usize = 100; @@ -410,7 +407,7 @@ pub fn add_noqa_to_path( // TODO(dhruvmanila): Add support for Jupyter Notebooks add_noqa( path, - &diagnostics.0, + &diagnostics, &locator, indexer.comment_ranges(), &settings.external, @@ -429,7 +426,7 @@ pub fn lint_only( source_kind: &SourceKind, source_type: PySourceType, data: ParseSource, -) -> LinterResult<(Vec, Option)> { +) -> LinterResult> { // Tokenize once. let tokens = data.into_token_source(source_kind, source_type); @@ -465,12 +462,7 @@ pub fn lint_only( tokens, ); - result.map(|(diagnostics, imports)| { - ( - diagnostics_to_messages(diagnostics, path, &locator, &directives), - imports, - ) - }) + result.map(|diagnostics| diagnostics_to_messages(diagnostics, path, &locator, &directives)) } /// Convert from diagnostics to messages. @@ -583,7 +575,7 @@ pub fn lint_fix<'a>( code: fixed_contents, fixes: applied, source_map, - }) = fix_file(&result.data.0, &locator, unsafe_fixes) + }) = fix_file(&result.data, &locator, unsafe_fixes) { if iterations < MAX_ITERATIONS { // Count the number of fixed errors. @@ -600,15 +592,12 @@ pub fn lint_fix<'a>( continue; } - report_failed_to_converge_error(path, transformed.source_code(), &result.data.0); + report_failed_to_converge_error(path, transformed.source_code(), &result.data); } return Ok(FixerResult { - result: result.map(|(diagnostics, imports)| { - ( - diagnostics_to_messages(diagnostics, path, &locator, &directives), - imports, - ) + result: result.map(|diagnostics| { + diagnostics_to_messages(diagnostics, path, &locator, &directives) }), transformed, fixed, diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index 6f5673b1ec..a773661401 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -611,7 +611,7 @@ mod tests { &indexer, ); let LinterResult { - data: (mut diagnostics, ..), + data: mut diagnostics, .. } = check_path( Path::new(""), diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index ff9cf99da2..d83c5f5403 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -10,6 +10,9 @@ use itertools::Itertools; use rustc_hash::FxHashMap; use ruff_diagnostics::{Applicability, Diagnostic, FixAvailability}; +use ruff_notebook::Notebook; +#[cfg(not(fuzzing))] +use ruff_notebook::NotebookError; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; @@ -29,9 +32,6 @@ use crate::rules::pycodestyle::rules::syntax_error; use crate::settings::types::UnsafeFixes; use crate::settings::{flags, LinterSettings}; use crate::source_kind::SourceKind; -use ruff_notebook::Notebook; -#[cfg(not(fuzzing))] -use ruff_notebook::NotebookError; #[cfg(not(fuzzing))] pub(crate) fn test_resource_path(path: impl AsRef) -> std::path::PathBuf { @@ -123,7 +123,7 @@ pub(crate) fn test_contents<'a>( &indexer, ); let LinterResult { - data: (diagnostics, _imports), + data: diagnostics, error, } = check_path( path, @@ -190,7 +190,7 @@ pub(crate) fn test_contents<'a>( ); let LinterResult { - data: (fixed_diagnostics, _), + data: fixed_diagnostics, error: fixed_error, } = check_path( path, diff --git a/crates/ruff_python_ast/src/imports.rs b/crates/ruff_python_ast/src/imports.rs index 42eae9fa65..838819d357 100644 --- a/crates/ruff_python_ast/src/imports.rs +++ b/crates/ruff_python_ast/src/imports.rs @@ -1,8 +1,3 @@ -use ruff_text_size::TextRange; -use rustc_hash::FxHashMap; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - /// A representation of an individual name imported via any import statement. #[derive(Debug, Clone, PartialEq, Eq)] pub enum AnyImport<'a> { @@ -117,60 +112,3 @@ impl FutureImport for AnyImport<'_> { } } } - -/// A representation of a module reference in an import statement. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct ModuleImport { - module: String, - range: TextRange, -} - -impl ModuleImport { - pub fn new(module: String, range: TextRange) -> Self { - Self { module, range } - } -} - -impl From<&ModuleImport> for TextRange { - fn from(import: &ModuleImport) -> TextRange { - import.range - } -} - -/// A representation of the import dependencies between modules. -#[derive(Debug, Clone, Default, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct ImportMap { - /// A map from dot-delimited module name to the list of imports in that module. - module_to_imports: FxHashMap>, -} - -impl ImportMap { - pub fn new() -> Self { - Self { - module_to_imports: FxHashMap::default(), - } - } - - pub fn insert(&mut self, module: String, imports_vec: Vec) { - self.module_to_imports.insert(module, imports_vec); - } - - pub fn extend(&mut self, other: Self) { - self.module_to_imports.extend(other.module_to_imports); - } - - pub fn iter(&self) -> std::collections::hash_map::Iter> { - self.module_to_imports.iter() - } -} - -impl<'a> IntoIterator for &'a ImportMap { - type IntoIter = std::collections::hash_map::Iter<'a, String, Vec>; - type Item = (&'a String, &'a Vec); - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index 02475ad0b7..fdf4c54a77 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -79,8 +79,7 @@ pub(crate) fn check( // Generate checks. let LinterResult { - data: (diagnostics, _imports), - .. + data: diagnostics, .. } = check_path( &document_path, package, diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index e6ca842f5d..c9dd3603e9 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -179,8 +179,7 @@ impl Workspace { // Generate checks. let LinterResult { - data: (diagnostics, _imports), - .. + data: diagnostics, .. } = check_path( Path::new(""), None, diff --git a/fuzz/fuzz_targets/ruff_formatter_validity.rs b/fuzz/fuzz_targets/ruff_formatter_validity.rs index 286509992f..3f8f7d886d 100644 --- a/fuzz/fuzz_targets/ruff_formatter_validity.rs +++ b/fuzz/fuzz_targets/ruff_formatter_validity.rs @@ -43,7 +43,7 @@ fn do_fuzz(case: &[u8]) -> Corpus { let mut warnings = HashMap::new(); - for msg in linter_results.data.0 { + for msg in linter_results.data { let count: &mut usize = warnings.entry(msg.kind.name).or_default(); *count += 1; } @@ -67,7 +67,7 @@ fn do_fuzz(case: &[u8]) -> Corpus { "formatter introduced a parse error" ); - for msg in linter_results.data.0 { + for msg in linter_results.data { if let Some(count) = warnings.get_mut(&msg.kind.name) { if let Some(decremented) = count.checked_sub(1) { *count = decremented;