diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index a98061fc82..b8f6bbd00d 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -2,7 +2,7 @@ use ruff_benchmark::criterion::{ criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, Throughput, }; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; -use ruff_linter::linter::lint_only; +use ruff_linter::linter::{lint_only, ParseSource}; use ruff_linter::rule_selector::PreviewOptions; use ruff_linter::settings::rule_table::RuleTable; use ruff_linter::settings::types::PreviewMode; @@ -10,6 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::SourceKind; use ruff_linter::{registry::Rule, RuleSelector}; use ruff_python_ast::PySourceType; +use ruff_python_parser::{lexer, parse_program_tokens, Mode}; #[cfg(target_os = "windows")] #[global_allocator] @@ -53,7 +54,13 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { BenchmarkId::from_parameter(case.name()), &case, |b, case| { - let kind = SourceKind::Python(case.code().to_string()); + // Tokenize the source. + let tokens = lexer::lex(case.code(), Mode::Module).collect::>(); + + // Parse the source. + let ast = + parse_program_tokens(tokens.clone(), case.code(), case.name(), false).unwrap(); + b.iter(|| { let path = case.path(); let result = lint_only( @@ -61,8 +68,12 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { None, settings, flags::Noqa::Enabled, - &kind, + &SourceKind::Python(case.code().to_string()), PySourceType::from(path.as_path()), + ParseSource::Precomputed { + tokens: &tokens, + ast: &ast, + }, ); // Assert that file contains no parse errors diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index 076963fee5..b29354b12e 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -12,7 +12,7 @@ use rustc_hash::FxHashMap; use crate::cache::{Cache, FileCacheKey, LintCacheData}; use ruff_diagnostics::Diagnostic; -use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; +use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult, ParseSource}; use ruff_linter::logging::DisplayParseError; use ruff_linter::message::Message; use ruff_linter::pyproject_toml::lint_pyproject_toml; @@ -303,12 +303,28 @@ pub(crate) fn lint_path( (result, fixed) } else { // If we fail to fix, lint the original source code. - let result = lint_only(path, package, settings, noqa, &source_kind, source_type); + let result = lint_only( + path, + package, + settings, + noqa, + &source_kind, + source_type, + ParseSource::None, + ); let fixed = FxHashMap::default(); (result, fixed) } } else { - let result = lint_only(path, package, settings, noqa, &source_kind, source_type); + let result = lint_only( + path, + package, + settings, + noqa, + &source_kind, + source_type, + ParseSource::None, + ); let fixed = FxHashMap::default(); (result, fixed) }; @@ -444,6 +460,7 @@ pub(crate) fn lint_stdin( noqa, &source_kind, source_type, + ParseSource::None, ); let fixed = FxHashMap::default(); @@ -462,6 +479,7 @@ pub(crate) fn lint_stdin( noqa, &source_kind, source_type, + ParseSource::None, ); let fixed = FxHashMap::default(); (result, fixed) diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index f6382e8b30..2d81cc961c 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -11,7 +11,7 @@ use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_notebook::Notebook; use ruff_python_ast::imports::ImportMap; -use ruff_python_ast::PySourceType; +use ruff_python_ast::{PySourceType, Suite}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; use ruff_python_parser::lexer::LexResult; @@ -73,7 +73,6 @@ pub struct FixerResult<'a> { pub fn check_path( path: &Path, package: Option<&Path>, - tokens: Vec, locator: &Locator, stylist: &Stylist, indexer: &Indexer, @@ -82,6 +81,7 @@ pub fn check_path( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, + tokens: TokenSource, ) -> LinterResult<(Vec, Option)> { // Aggregate all diagnostics. let mut diagnostics = vec![]; @@ -144,12 +144,8 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_imports()); if use_ast || use_imports || use_doc_lines { - match ruff_python_parser::parse_program_tokens( - tokens, - source_kind.source_code(), - &path.to_string_lossy(), - source_type.is_ipynb(), - ) { + // Parse, if the AST wasn't pre-provided provided. + match tokens.into_ast_source(source_kind, source_type, path) { Ok(python_ast) => { let cell_offsets = source_kind.as_ipy_notebook().map(Notebook::cell_offsets); if use_ast { @@ -325,7 +321,6 @@ pub fn add_noqa_to_path( } = check_path( path, package, - tokens, &locator, &stylist, &indexer, @@ -334,6 +329,7 @@ pub fn add_noqa_to_path( flags::Noqa::Disabled, source_kind, source_type, + TokenSource::Tokens(tokens), ); // Log any parse errors. @@ -365,10 +361,10 @@ pub fn lint_only( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, + data: ParseSource, ) -> LinterResult<(Vec, Option)> { // Tokenize once. - let tokens: Vec = - ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + let tokens = data.into_token_source(source_kind, source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(source_kind.source_code()); @@ -391,7 +387,6 @@ pub fn lint_only( let result = check_path( path, package, - tokens, &locator, &stylist, &indexer, @@ -400,6 +395,7 @@ pub fn lint_only( noqa, source_kind, source_type, + tokens, ); result.map(|(diagnostics, imports)| { @@ -487,7 +483,6 @@ pub fn lint_fix<'a>( let result = check_path( path, package, - tokens, &locator, &stylist, &indexer, @@ -496,6 +491,7 @@ pub fn lint_fix<'a>( noqa, &transformed, source_type, + TokenSource::Tokens(tokens), ); if iterations == 0 { @@ -632,6 +628,95 @@ This indicates a bug in Ruff. If you could open an issue at: } } +#[derive(Debug, Clone)] +pub enum ParseSource<'a> { + /// Extract the tokens and AST from the given source code. + None, + /// Use the precomputed tokens and AST. + Precomputed { + tokens: &'a [LexResult], + ast: &'a Suite, + }, +} + +impl<'a> ParseSource<'a> { + /// Convert to a [`TokenSource`], tokenizing if necessary. + fn into_token_source( + self, + source_kind: &SourceKind, + source_type: PySourceType, + ) -> TokenSource<'a> { + match self { + Self::None => TokenSource::Tokens(ruff_python_parser::tokenize( + source_kind.source_code(), + source_type.as_mode(), + )), + Self::Precomputed { tokens, ast } => TokenSource::Precomputed { tokens, ast }, + } + } +} + +#[derive(Debug, Clone)] +pub enum TokenSource<'a> { + /// Use the precomputed tokens to generate the AST. + Tokens(Vec), + /// Use the precomputed tokens and AST. + Precomputed { + tokens: &'a [LexResult], + ast: &'a Suite, + }, +} + +impl Deref for TokenSource<'_> { + type Target = [LexResult]; + + fn deref(&self) -> &Self::Target { + match self { + Self::Tokens(tokens) => tokens, + Self::Precomputed { tokens, .. } => tokens, + } + } +} + +impl<'a> TokenSource<'a> { + /// Convert to an [`AstSource`], parsing if necessary. + fn into_ast_source( + self, + source_kind: &SourceKind, + source_type: PySourceType, + path: &Path, + ) -> Result, ParseError> { + match self { + Self::Tokens(tokens) => Ok(AstSource::Ast(ruff_python_parser::parse_program_tokens( + tokens, + source_kind.source_code(), + &path.to_string_lossy(), + source_type.is_ipynb(), + )?)), + Self::Precomputed { ast, .. } => Ok(AstSource::Precomputed(ast)), + } + } +} + +#[derive(Debug, Clone)] +pub enum AstSource<'a> { + /// Extract the AST from the given source code. + Ast(Suite), + /// Use the precomputed AST. + Precomputed(&'a Suite), +} + +impl Deref for AstSource<'_> { + type Target = Suite; + + fn deref(&self) -> &Self::Target { + match self { + Self::Ast(ast) => ast, + Self::Precomputed(ast) => ast, + } + } +} + #[cfg(test)] mod tests { use std::path::Path; diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index e4a313f54b..84b54f9126 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -23,7 +23,7 @@ mod tests { use ruff_source_file::Locator; use ruff_text_size::Ranged; - use crate::linter::{check_path, LinterResult}; + use crate::linter::{check_path, LinterResult, TokenSource}; use crate::registry::{AsRule, Linter, Rule}; use crate::rules::pyflakes; use crate::settings::types::PreviewMode; @@ -560,7 +560,6 @@ mod tests { } = check_path( Path::new(""), None, - tokens, &locator, &stylist, &indexer, @@ -569,6 +568,7 @@ mod tests { flags::Noqa::Enabled, &source_kind, source_type, + TokenSource::Tokens(tokens), ); diagnostics.sort_by_key(Ranged::start); let actual = diagnostics diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index d193e49cc9..9f7ec4d802 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -21,7 +21,7 @@ use ruff_text_size::Ranged; use crate::directives; use crate::fix::{fix_file, FixResult}; -use crate::linter::{check_path, LinterResult}; +use crate::linter::{check_path, LinterResult, TokenSource}; use crate::message::{Emitter, EmitterContext, Message, TextEmitter}; use crate::packaging::detect_package_root; use crate::registry::AsRule; @@ -129,7 +129,6 @@ pub(crate) fn test_contents<'a>( path, path.parent() .and_then(|parent| detect_package_root(parent, &settings.namespace_packages)), - tokens, &locator, &stylist, &indexer, @@ -138,6 +137,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, source_kind, source_type, + TokenSource::Tokens(tokens), ); let source_has_errors = error.is_some(); @@ -195,7 +195,6 @@ pub(crate) fn test_contents<'a>( } = check_path( path, None, - tokens, &locator, &stylist, &indexer, @@ -204,6 +203,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, &transformed, source_type, + TokenSource::Tokens(tokens), ); if let Some(fixed_error) = fixed_error { diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 3831bc28d0..180c2839fe 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1293,7 +1293,7 @@ impl FusedIterator for Lexer<'_> {} /// [lexer] implementation. /// /// [lexer]: crate::lexer -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct LexicalError { /// The type of error that occurred. pub error: LexicalErrorType, @@ -1309,7 +1309,7 @@ impl LexicalError { } /// Represents the different types of errors that can occur during lexing. -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum LexicalErrorType { // TODO: Can probably be removed, the places it is used seem to be able // to use the `UnicodeError` variant instead. diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index ab9106ff31..3abef4c8a4 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -409,7 +409,7 @@ pub(crate) fn concatenated_strings( // TODO: consolidate these with ParseError /// An error that occurred during parsing of an f-string. -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] struct FStringError { /// The type of error that occurred. pub(crate) error: FStringErrorType, @@ -427,7 +427,7 @@ impl From for LexicalError { } /// Represents the different types of errors that can occur during parsing of an f-string. -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum FStringErrorType { /// Expected a right brace after an opened left brace. UnclosedLbrace, diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index a067cde03e..5316fcf7dd 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -7,7 +7,7 @@ use wasm_bindgen::prelude::*; use ruff_formatter::{FormatResult, Formatted, IndentStyle}; use ruff_linter::directives; use ruff_linter::line_width::{IndentWidth, LineLength}; -use ruff_linter::linter::{check_path, LinterResult}; +use ruff_linter::linter::{check_path, LinterResult, TokenSource}; use ruff_linter::registry::AsRule; use ruff_linter::settings::types::PythonVersion; use ruff_linter::settings::{flags, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX}; @@ -182,7 +182,6 @@ impl Workspace { } = check_path( Path::new(""), None, - tokens, &locator, &stylist, &indexer, @@ -191,6 +190,7 @@ impl Workspace { flags::Noqa::Enabled, &source_kind, source_type, + TokenSource::Tokens(tokens), ); let source_code = locator.to_source_code();