diff --git a/Cargo.lock b/Cargo.lock index 0a8a6690d2..f652703645 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1960,7 +1960,6 @@ dependencies = [ "ruff_linter", "ruff_python_ast", "ruff_python_formatter", - "ruff_python_index", "ruff_python_parser", "serde", "serde_json", @@ -2008,6 +2007,7 @@ dependencies = [ "ruff_python_parser", "ruff_python_stdlib", "ruff_python_trivia", + "ruff_text_size", "ruff_workspace", "schemars", "serde", @@ -2184,6 +2184,7 @@ dependencies = [ "ruff_python_literal", "ruff_python_parser", "ruff_source_file", + "ruff_text_size", ] [[package]] @@ -2202,7 +2203,6 @@ dependencies = [ "ruff_formatter", "ruff_macros", "ruff_python_ast", - "ruff_python_index", "ruff_python_parser", "ruff_python_trivia", "ruff_source_file", @@ -2253,6 +2253,7 @@ dependencies = [ "itertools 0.13.0", "memchr", "ruff_python_ast", + "ruff_python_trivia", "ruff_source_file", "ruff_text_size", "rustc-hash", @@ -2310,7 +2311,6 @@ name = "ruff_python_trivia_integration_tests" version = "0.0.0" dependencies = [ "insta", - "ruff_python_index", "ruff_python_parser", "ruff_python_trivia", "ruff_source_file", @@ -2385,7 +2385,6 @@ dependencies = [ "ruff_python_formatter", "ruff_python_index", "ruff_python_parser", - "ruff_python_trivia", "ruff_source_file", "ruff_text_size", "ruff_workspace", diff --git a/crates/red_knot/src/parse.rs b/crates/red_knot/src/parse.rs index 6856315dcb..4e3cd4d422 100644 --- a/crates/red_knot/src/parse.rs +++ b/crates/red_knot/src/parse.rs @@ -32,17 +32,19 @@ impl Parsed { let result = ruff_python_parser::parse(text, Mode::Module); let (module, errors) = match result { - Ok(ast::Mod::Module(module)) => (module, vec![]), - Ok(ast::Mod::Expression(expression)) => ( - ast::ModModule { - range: expression.range(), - body: vec![ast::Stmt::Expr(ast::StmtExpr { + Ok(parsed) => match parsed.into_syntax() { + ast::Mod::Module(module) => (module, vec![]), + ast::Mod::Expression(expression) => ( + ast::ModModule { range: expression.range(), - value: expression.body, - })], - }, - vec![], - ), + body: vec![ast::Stmt::Expr(ast::StmtExpr { + range: expression.range(), + value: expression.body, + })], + }, + vec![], + ), + }, Err(errors) => ( ast::ModModule { range: TextRange::default(), diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index d631472f23..e8ff3a09b8 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -44,7 +44,6 @@ codspeed-criterion-compat = { workspace = true, default-features = false, option ruff_linter = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_formatter = { workspace = true } -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } [lints] diff --git a/crates/ruff_benchmark/benches/formatter.rs b/crates/ruff_benchmark/benches/formatter.rs index 98c3a97f2c..cb6db8608f 100644 --- a/crates/ruff_benchmark/benches/formatter.rs +++ b/crates/ruff_benchmark/benches/formatter.rs @@ -5,9 +5,7 @@ use ruff_benchmark::criterion::{ }; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions}; -use ruff_python_index::CommentRangesBuilder; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{allocate_tokens_vec, parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[cfg(target_os = "windows")] #[global_allocator] @@ -52,28 +50,15 @@ fn benchmark_formatter(criterion: &mut Criterion) { BenchmarkId::from_parameter(case.name()), &case, |b, case| { - let mut tokens = allocate_tokens_vec(case.code()); - let mut comment_ranges = CommentRangesBuilder::default(); - - for result in lex(case.code(), Mode::Module) { - let (token, range) = result.expect("Input to be a valid python program."); - - comment_ranges.visit_token(&token, range); - tokens.push(Ok((token, range))); - } - - let comment_ranges = comment_ranges.finish(); - - // Parse the AST. - let module = parse_tokens(tokens, case.code(), Mode::Module) - .expect("Input to be a valid python program"); + // Parse the source. + let parsed = + parse(case.code(), Mode::Module).expect("Input should be a valid Python code"); b.iter(|| { let options = PyFormatOptions::from_extension(Path::new(case.name())) .with_preview(PreviewMode::Enabled); - let formatted = - format_module_ast(&module, &comment_ranges, case.code(), options) - .expect("Formatting to succeed"); + let formatted = format_module_ast(&parsed, case.code(), options) + .expect("Formatting to succeed"); formatted.print().expect("Printing to succeed") }); diff --git a/crates/ruff_benchmark/benches/lexer.rs b/crates/ruff_benchmark/benches/lexer.rs index c31cb84b5e..64b68a7a35 100644 --- a/crates/ruff_benchmark/benches/lexer.rs +++ b/crates/ruff_benchmark/benches/lexer.rs @@ -2,7 +2,7 @@ use ruff_benchmark::criterion::{ criterion_group, criterion_main, measurement::WallTime, BenchmarkId, Criterion, Throughput, }; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; -use ruff_python_parser::{lexer, Mode}; +use ruff_python_parser::{lexer, Mode, TokenKind}; #[cfg(target_os = "windows")] #[global_allocator] @@ -47,9 +47,15 @@ fn benchmark_lexer(criterion: &mut Criterion) { &case, |b, case| { b.iter(|| { - let result = - lexer::lex(case.code(), Mode::Module).find(std::result::Result::is_err); - assert_eq!(result, None, "Input to be a valid Python program"); + let mut lexer = lexer::lex(case.code(), Mode::Module); + loop { + let token = lexer.next_token(); + match token { + TokenKind::EndOfFile => break, + TokenKind::Unknown => panic!("Input to be a valid Python source code"), + _ => {} + } + } }); }, ); diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index fcc1d7da42..1301d9e7cc 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -10,7 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::SourceKind; use ruff_linter::{registry::Rule, RuleSelector}; use ruff_python_ast::PySourceType; -use ruff_python_parser::{parse_program_tokens, tokenize, Mode}; +use ruff_python_parser::parse_module; #[cfg(target_os = "windows")] #[global_allocator] @@ -54,15 +54,13 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { BenchmarkId::from_parameter(case.name()), &case, |b, case| { - // Tokenize the source. - let tokens = tokenize(case.code(), Mode::Module); - // Parse the source. - let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap(); + let parsed = + parse_module(case.code()).expect("Input should be a valid Python code"); b.iter_batched( - || (ast.clone(), tokens.clone()), - |(ast, tokens)| { + || parsed.clone(), + |parsed| { let path = case.path(); let result = lint_only( &path, @@ -71,7 +69,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { flags::Noqa::Enabled, &SourceKind::Python(case.code().to_string()), PySourceType::from(path.as_path()), - ParseSource::Precomputed { tokens, ast }, + ParseSource::Precomputed(parsed), ); // Assert that file contains no parse errors diff --git a/crates/ruff_benchmark/benches/parser.rs b/crates/ruff_benchmark/benches/parser.rs index 0aca2772f9..ec2fa671c1 100644 --- a/crates/ruff_benchmark/benches/parser.rs +++ b/crates/ruff_benchmark/benches/parser.rs @@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{ use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; use ruff_python_ast::Stmt; -use ruff_python_parser::parse_suite; +use ruff_python_parser::parse_module; #[cfg(target_os = "windows")] #[global_allocator] @@ -60,7 +60,9 @@ fn benchmark_parser(criterion: &mut Criterion) { &case, |b, case| { b.iter(|| { - let parsed = parse_suite(case.code()).unwrap(); + let parsed = parse_module(case.code()) + .expect("Input should be a valid Python code") + .into_suite(); let mut visitor = CountVisitor { count: 0 }; visitor.visit_body(&parsed); diff --git a/crates/ruff_dev/Cargo.toml b/crates/ruff_dev/Cargo.toml index 632c12f473..d5ccc937fd 100644 --- a/crates/ruff_dev/Cargo.toml +++ b/crates/ruff_dev/Cargo.toml @@ -22,6 +22,7 @@ ruff_python_formatter = { workspace = true } ruff_python_parser = { workspace = true } ruff_python_stdlib = { workspace = true } ruff_python_trivia = { workspace = true } +ruff_text_size = { workspace = true } ruff_workspace = { workspace = true, features = ["schemars"] } anyhow = { workspace = true } diff --git a/crates/ruff_dev/src/print_ast.rs b/crates/ruff_dev/src/print_ast.rs index cb72d0403a..35206ca45e 100644 --- a/crates/ruff_dev/src/print_ast.rs +++ b/crates/ruff_dev/src/print_ast.rs @@ -24,7 +24,7 @@ pub(crate) fn main(args: &Args) -> Result<()> { args.file.display() ) })?; - let python_ast = parse(source_kind.source_code(), source_type.as_mode())?; + let python_ast = parse(source_kind.source_code(), source_type.as_mode())?.into_syntax(); println!("{python_ast:#?}"); Ok(()) } diff --git a/crates/ruff_dev/src/print_tokens.rs b/crates/ruff_dev/src/print_tokens.rs index a36f9a2c60..c767727fdd 100644 --- a/crates/ruff_dev/src/print_tokens.rs +++ b/crates/ruff_dev/src/print_tokens.rs @@ -7,7 +7,8 @@ use anyhow::Result; use ruff_linter::source_kind::SourceKind; use ruff_python_ast::PySourceType; -use ruff_python_parser::{lexer, AsMode}; +use ruff_python_parser::parse_unchecked_source; +use ruff_text_size::Ranged; #[derive(clap::Args)] pub(crate) struct Args { @@ -24,11 +25,13 @@ pub(crate) fn main(args: &Args) -> Result<()> { args.file.display() ) })?; - for (tok, range) in lexer::lex(source_kind.source_code(), source_type.as_mode()).flatten() { + let parsed = parse_unchecked_source(source_kind.source_code(), source_type); + for token in parsed.tokens() { println!( - "{start:#?} {tok:#?} {end:#?}", - start = range.start(), - end = range.end() + "{start:#?} {kind:#?} {end:#?}", + start = token.start(), + end = token.end(), + kind = token.kind(), ); } Ok(()) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 4407dadc21..9c12ac0333 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -1160,7 +1160,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { } } if checker.enabled(Rule::PrintfStringFormatting) { - pyupgrade::rules::printf_string_formatting(checker, expr, right); + pyupgrade::rules::printf_string_formatting(checker, bin_op, format_string); } if checker.enabled(Rule::BadStringFormatCharacter) { pylint::rules::bad_string_format_character::percent( diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index 94419de40f..70561392e7 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -765,7 +765,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { pyupgrade::rules::deprecated_c_element_tree(checker, stmt); } if checker.enabled(Rule::DeprecatedImport) { - pyupgrade::rules::deprecated_import(checker, stmt, names, module, level); + pyupgrade::rules::deprecated_import(checker, import_from); } if checker.enabled(Rule::UnnecessaryBuiltinImport) { if let Some(module) = module { diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index faaf41595a..5f26244df7 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -32,8 +32,10 @@ use itertools::Itertools; use log::debug; use ruff_python_ast::{ self as ast, AnyParameterRef, Comprehension, ElifElseClause, ExceptHandler, Expr, ExprContext, - FStringElement, Keyword, MatchCase, Parameter, Parameters, Pattern, Stmt, Suite, UnaryOp, + FStringElement, Keyword, MatchCase, ModModule, Parameter, Parameters, Pattern, Stmt, Suite, + UnaryOp, }; +use ruff_python_parser::Parsed; use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_diagnostics::{Diagnostic, IsolationLevel}; @@ -174,6 +176,8 @@ impl ExpectedDocstringKind { } pub(crate) struct Checker<'a> { + /// The parsed [`Parsed`]. + parsed: &'a Parsed, /// The [`Path`] to the file under analysis. path: &'a Path, /// The [`Path`] to the package containing the current file. @@ -223,6 +227,7 @@ pub(crate) struct Checker<'a> { impl<'a> Checker<'a> { #[allow(clippy::too_many_arguments)] pub(crate) fn new( + parsed: &'a Parsed, settings: &'a LinterSettings, noqa_line_for: &'a NoqaMapping, noqa: flags::Noqa, @@ -232,12 +237,12 @@ impl<'a> Checker<'a> { locator: &'a Locator, stylist: &'a Stylist, indexer: &'a Indexer, - importer: Importer<'a>, source_type: PySourceType, cell_offsets: Option<&'a CellOffsets>, notebook_index: Option<&'a NotebookIndex>, ) -> Checker<'a> { Checker { + parsed, settings, noqa_line_for, noqa, @@ -248,7 +253,7 @@ impl<'a> Checker<'a> { locator, stylist, indexer, - importer, + importer: Importer::new(parsed, locator, stylist), semantic: SemanticModel::new(&settings.typing_modules, path, module), visit: deferred::Visit::default(), analyze: deferred::Analyze::default(), @@ -318,6 +323,11 @@ impl<'a> Checker<'a> { } } + /// The [`Parsed`] output for the current file, which contains the tokens, AST, and more. + pub(crate) const fn parsed(&self) -> &'a Parsed { + self.parsed + } + /// The [`Locator`] for the current file, which enables extraction of source code from byte /// offsets. pub(crate) const fn locator(&self) -> &'a Locator<'a> { @@ -2326,7 +2336,7 @@ impl<'a> Checker<'a> { #[allow(clippy::too_many_arguments)] pub(crate) fn check_ast( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, indexer: &Indexer, @@ -2356,10 +2366,11 @@ pub(crate) fn check_ast( } else { ModuleSource::File(path) }, - python_ast, + python_ast: parsed.suite(), }; let mut checker = Checker::new( + parsed, settings, noqa_line_for, noqa, @@ -2369,7 +2380,6 @@ pub(crate) fn check_ast( locator, stylist, indexer, - Importer::new(python_ast, locator, stylist), source_type, cell_offsets, notebook_index, @@ -2377,8 +2387,8 @@ pub(crate) fn check_ast( checker.bind_builtins(); // Iterate over the AST. - checker.visit_module(python_ast); - checker.visit_body(python_ast); + checker.visit_module(parsed.suite()); + checker.visit_body(parsed.suite()); // Visit any deferred syntax nodes. Take care to visit in order, such that we avoid adding // new deferred nodes after visiting nodes of that kind. For example, visiting a deferred diff --git a/crates/ruff_linter/src/checkers/filesystem.rs b/crates/ruff_linter/src/checkers/filesystem.rs index 2d9a3431e6..c71db50cb3 100644 --- a/crates/ruff_linter/src/checkers/filesystem.rs +++ b/crates/ruff_linter/src/checkers/filesystem.rs @@ -1,7 +1,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use crate::registry::Rule; @@ -13,7 +13,7 @@ pub(crate) fn check_file_path( path: &Path, package: Option<&Path>, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Vec { let mut diagnostics: Vec = vec![]; @@ -24,7 +24,7 @@ pub(crate) fn check_file_path( path, package, locator, - indexer, + comment_ranges, &settings.project_root, &settings.src, ) { diff --git a/crates/ruff_linter/src/checkers/imports.rs b/crates/ruff_linter/src/checkers/imports.rs index 2bc19b7412..c2cc0fccb4 100644 --- a/crates/ruff_linter/src/checkers/imports.rs +++ b/crates/ruff_linter/src/checkers/imports.rs @@ -4,9 +4,10 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; use ruff_notebook::CellOffsets; use ruff_python_ast::statement_visitor::StatementVisitor; -use ruff_python_ast::{PySourceType, Suite}; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_source_file::Locator; use crate::directives::IsortDirectives; @@ -17,7 +18,7 @@ use crate::settings::LinterSettings; #[allow(clippy::too_many_arguments)] pub(crate) fn check_imports( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, indexer: &Indexer, directives: &IsortDirectives, @@ -31,7 +32,7 @@ pub(crate) fn check_imports( let tracker = { let mut tracker = BlockBuilder::new(locator, directives, source_type.is_stub(), cell_offsets); - tracker.visit_body(python_ast); + tracker.visit_body(parsed.suite()); tracker }; @@ -50,6 +51,7 @@ pub(crate) fn check_imports( settings, package, source_type, + parsed, ) { diagnostics.push(diagnostic); } @@ -58,7 +60,7 @@ pub(crate) fn check_imports( } if settings.rules.enabled(Rule::MissingRequiredImport) { diagnostics.extend(isort::rules::add_required_imports( - python_ast, + parsed, locator, stylist, settings, diff --git a/crates/ruff_linter/src/checkers/logical_lines.rs b/crates/ruff_linter/src/checkers/logical_lines.rs index 4044e6c18a..ef9a7a8dae 100644 --- a/crates/ruff_linter/src/checkers/logical_lines.rs +++ b/crates/ruff_linter/src/checkers/logical_lines.rs @@ -2,8 +2,7 @@ use crate::line_width::IndentWidth; use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::TokenKind; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -34,7 +33,7 @@ pub(crate) fn expand_indent(line: &str, indent_width: IndentWidth) -> usize { } pub(crate) fn check_logical_lines( - tokens: &[LexResult], + tokens: &Tokens, locator: &Locator, indexer: &Indexer, stylist: &Stylist, diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index fbb9abff63..938c6be6e4 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -3,6 +3,7 @@ use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextSize; @@ -19,6 +20,7 @@ pub(crate) fn check_physical_lines( locator: &Locator, stylist: &Stylist, indexer: &Indexer, + comment_ranges: &CommentRanges, doc_lines: &[TextSize], settings: &LinterSettings, ) -> Vec { @@ -42,7 +44,7 @@ pub(crate) fn check_physical_lines( .is_some() { if enforce_doc_line_too_long { - if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) { + if let Some(diagnostic) = doc_line_too_long(&line, comment_ranges, settings) { diagnostics.push(diagnostic); } } @@ -55,7 +57,7 @@ pub(crate) fn check_physical_lines( } if enforce_line_too_long { - if let Some(diagnostic) = line_too_long(&line, indexer, settings) { + if let Some(diagnostic) = line_too_long(&line, comment_ranges, settings) { diagnostics.push(diagnostic); } } @@ -90,8 +92,7 @@ pub(crate) fn check_physical_lines( mod tests { use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; - use ruff_python_parser::lexer::lex; - use ruff_python_parser::Mode; + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use crate::line_width::LineLength; @@ -105,15 +106,16 @@ mod tests { fn e501_non_ascii_char() { let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8. let locator = Locator::new(line); - let tokens: Vec<_> = lex(line, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&tokens, &locator); - let stylist = Stylist::from_tokens(&tokens, &locator); + let parsed = parse_module(line).unwrap(); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let check_with_max_line_length = |line_length: LineLength| { check_physical_lines( &locator, &stylist, &indexer, + parsed.comment_ranges(), &[], &LinterSettings { pycodestyle: pycodestyle::settings::Settings { diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 3f6e430f01..0c59df7857 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -3,15 +3,16 @@ use std::path::Path; use ruff_notebook::CellOffsets; -use ruff_python_ast::PySourceType; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_diagnostics::Diagnostic; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_source_file::Locator; +use ruff_text_size::Ranged; use crate::directives::TodoComment; -use crate::linter::TokenSource; use crate::registry::{AsRule, Rule}; use crate::rules::pycodestyle::rules::BlankLinesChecker; use crate::rules::{ @@ -22,7 +23,7 @@ use crate::settings::LinterSettings; #[allow(clippy::too_many_arguments)] pub(crate) fn check_tokens( - tokens: &TokenSource, + parsed: &Parsed, path: &Path, locator: &Locator, indexer: &Indexer, @@ -33,6 +34,9 @@ pub(crate) fn check_tokens( ) -> Vec { let mut diagnostics: Vec = vec![]; + let tokens = parsed.tokens(); + let comment_ranges = parsed.comment_ranges(); + if settings.rules.any_enabled(&[ Rule::BlankLineBetweenMethods, Rule::BlankLinesTopLevel, @@ -42,22 +46,22 @@ pub(crate) fn check_tokens( Rule::BlankLinesBeforeNestedDefinition, ]) { BlankLinesChecker::new(locator, stylist, settings, source_type, cell_offsets) - .check_lines(tokens.kinds(), &mut diagnostics); + .check_lines(tokens, &mut diagnostics); } if settings.rules.enabled(Rule::BlanketTypeIgnore) { - pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, indexer, locator); + pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, comment_ranges, locator); } if settings.rules.enabled(Rule::EmptyComment) { - pylint::rules::empty_comments(&mut diagnostics, indexer, locator); + pylint::rules::empty_comments(&mut diagnostics, comment_ranges, locator); } if settings .rules .enabled(Rule::AmbiguousUnicodeCharacterComment) { - for range in indexer.comment_ranges() { + for range in comment_ranges { ruff::rules::ambiguous_unicode_character_comment( &mut diagnostics, locator, @@ -68,11 +72,16 @@ pub(crate) fn check_tokens( } if settings.rules.enabled(Rule::CommentedOutCode) { - eradicate::rules::commented_out_code(&mut diagnostics, locator, indexer, settings); + eradicate::rules::commented_out_code(&mut diagnostics, locator, comment_ranges, settings); } if settings.rules.enabled(Rule::UTF8EncodingDeclaration) { - pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer); + pyupgrade::rules::unnecessary_coding_comment( + &mut diagnostics, + locator, + indexer, + comment_ranges, + ); } if settings.rules.enabled(Rule::TabIndentation) { @@ -86,8 +95,13 @@ pub(crate) fn check_tokens( Rule::InvalidCharacterNul, Rule::InvalidCharacterZeroWidthSpace, ]) { - for (token, range) in tokens.kinds() { - pylint::rules::invalid_string_characters(&mut diagnostics, token, range, locator); + for token in tokens.up_to_first_unknown() { + pylint::rules::invalid_string_characters( + &mut diagnostics, + token.kind(), + token.range(), + locator, + ); } } @@ -98,7 +112,7 @@ pub(crate) fn check_tokens( ]) { pycodestyle::rules::compound_statements( &mut diagnostics, - tokens.kinds(), + tokens, locator, indexer, source_type, @@ -112,7 +126,7 @@ pub(crate) fn check_tokens( ]) { flake8_implicit_str_concat::rules::implicit( &mut diagnostics, - tokens.kinds(), + tokens, settings, locator, indexer, @@ -124,15 +138,15 @@ pub(crate) fn check_tokens( Rule::TrailingCommaOnBareTuple, Rule::ProhibitedTrailingComma, ]) { - flake8_commas::rules::trailing_commas(&mut diagnostics, tokens.kinds(), locator, indexer); + flake8_commas::rules::trailing_commas(&mut diagnostics, tokens, locator, indexer); } if settings.rules.enabled(Rule::ExtraneousParentheses) { - pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens.kinds(), locator); + pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens, locator); } if source_type.is_stub() && settings.rules.enabled(Rule::TypeCommentInStub) { - flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, indexer); + flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, comment_ranges); } if settings.rules.any_enabled(&[ @@ -142,7 +156,7 @@ pub(crate) fn check_tokens( Rule::ShebangNotFirstLine, Rule::ShebangMissingPython, ]) { - flake8_executable::rules::from_tokens(&mut diagnostics, path, locator, indexer); + flake8_executable::rules::from_tokens(&mut diagnostics, path, locator, comment_ranges); } if settings.rules.any_enabled(&[ @@ -158,8 +172,7 @@ pub(crate) fn check_tokens( Rule::LineContainsTodo, Rule::LineContainsHack, ]) { - let todo_comments: Vec = indexer - .comment_ranges() + let todo_comments: Vec = comment_ranges .iter() .enumerate() .filter_map(|(i, comment_range)| { @@ -167,12 +180,12 @@ pub(crate) fn check_tokens( TodoComment::from_comment(comment, *comment_range, i) }) .collect(); - flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, indexer); + flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, comment_ranges); flake8_fixme::rules::todos(&mut diagnostics, &todo_comments); } if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) { - pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens.kinds()); + pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens); } diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule())); diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs index 8c6c74b83f..398d02696a 100644 --- a/crates/ruff_linter/src/directives.rs +++ b/crates/ruff_linter/src/directives.rs @@ -4,9 +4,9 @@ use std::iter::Peekable; use std::str::FromStr; use bitflags::bitflags; -use ruff_python_ast::StringFlags; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; +use ruff_python_ast::ModModule; +use ruff_python_parser::{Parsed, TokenKind, Tokens}; +use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_python_index::Indexer; @@ -52,19 +52,19 @@ pub struct Directives { } pub fn extract_directives( - lxr: &[LexResult], + parsed: &Parsed, flags: Flags, locator: &Locator, indexer: &Indexer, ) -> Directives { Directives { noqa_line_for: if flags.intersects(Flags::NOQA) { - extract_noqa_line_for(lxr, locator, indexer) + extract_noqa_line_for(parsed.tokens(), locator, indexer) } else { NoqaMapping::default() }, isort: if flags.intersects(Flags::ISORT) { - extract_isort_directives(locator, indexer) + extract_isort_directives(locator, parsed.comment_ranges()) } else { IsortDirectives::default() }, @@ -105,22 +105,22 @@ where } /// Extract a mapping from logical line to noqa line. -fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer) -> NoqaMapping { +fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping { let mut string_mappings = Vec::new(); - for (tok, range) in lxr.iter().flatten() { - match tok { - Tok::EndOfFile => { + for token in tokens.up_to_first_unknown() { + match token.kind() { + TokenKind::EndOfFile => { break; } // For multi-line strings, we expect `noqa` directives on the last line of the // string. - Tok::String { flags, .. } if flags.is_triple_quoted() => { - if locator.contains_line_break(*range) { + TokenKind::String if token.is_triple_quoted_string() => { + if locator.contains_line_break(token.range()) { string_mappings.push(TextRange::new( - locator.line_start(range.start()), - range.end(), + locator.line_start(token.start()), + token.end(), )); } } @@ -197,12 +197,12 @@ fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer } /// Extract a set of ranges over which to disable isort. -fn extract_isort_directives(locator: &Locator, indexer: &Indexer) -> IsortDirectives { +fn extract_isort_directives(locator: &Locator, comment_ranges: &CommentRanges) -> IsortDirectives { let mut exclusions: Vec = Vec::default(); let mut splits: Vec = Vec::default(); let mut off: Option = None; - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment_text = locator.slice(range); // `isort` allows for `# isort: skip` and `# isort: skip_file` to include or @@ -379,8 +379,7 @@ impl TodoDirectiveKind { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; + use ruff_python_parser::parse_module; use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_python_index::Indexer; @@ -391,12 +390,14 @@ mod tests { }; use crate::noqa::NoqaMapping; - fn noqa_mappings(contents: &str) -> NoqaMapping { - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); + use super::IsortDirectives; - extract_noqa_line_for(&lxr, &locator, &indexer) + fn noqa_mappings(contents: &str) -> NoqaMapping { + let parsed = parse_module(contents).unwrap(); + let locator = Locator::new(contents); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); + + extract_noqa_line_for(parsed.tokens(), &locator, &indexer) } #[test] @@ -566,29 +567,26 @@ assert foo, \ ); } + fn isort_directives(contents: &str) -> IsortDirectives { + let parsed = parse_module(contents).unwrap(); + let locator = Locator::new(contents); + extract_isort_directives(&locator, parsed.comment_ranges()) + } + #[test] fn isort_exclusions() { let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); let contents = "# isort: off x = 1 y = 2 # isort: on z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(25))]) ); @@ -599,11 +597,8 @@ y = 2 # isort: on z = x + 1 # isort: on"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(38))]) ); @@ -611,11 +606,8 @@ z = x + 1 x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::at(TextSize::from(0), contents.text_len())]) ); @@ -623,13 +615,7 @@ z = x + 1"; x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); let contents = "# isort: off x = 1 @@ -637,13 +623,7 @@ x = 1 y = 2 # isort: skip_file z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); } #[test] @@ -651,36 +631,18 @@ z = x + 1"; let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - Vec::new() - ); + assert_eq!(isort_directives(contents).splits, Vec::new()); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - vec![TextSize::from(12)] - ); + assert_eq!(isort_directives(contents).splits, vec![TextSize::from(12)]); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - vec![TextSize::from(13)] - ); + assert_eq!(isort_directives(contents).splits, vec![TextSize::from(13)]); } #[test] diff --git a/crates/ruff_linter/src/doc_lines.rs b/crates/ruff_linter/src/doc_lines.rs index eebe21cec3..d1f780053d 100644 --- a/crates/ruff_linter/src/doc_lines.rs +++ b/crates/ruff_linter/src/doc_lines.rs @@ -2,28 +2,29 @@ //! standalone comment or a constant string statement. use std::iter::FusedIterator; +use std::slice::Iter; use ruff_python_ast::{self as ast, Stmt, Suite}; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; use ruff_source_file::{Locator, UniversalNewlineIterator}; /// Extract doc lines (standalone comments) from a token sequence. -pub(crate) fn doc_lines_from_tokens(tokens: TokenKindIter) -> DocLines { +pub(crate) fn doc_lines_from_tokens(tokens: &Tokens) -> DocLines { DocLines::new(tokens) } pub(crate) struct DocLines<'a> { - inner: TokenKindIter<'a>, + inner: Iter<'a, Token>, prev: TextSize, } impl<'a> DocLines<'a> { - fn new(tokens: TokenKindIter<'a>) -> Self { + fn new(tokens: &'a Tokens) -> Self { Self { - inner: tokens, + inner: tokens.up_to_first_unknown().iter(), prev: TextSize::default(), } } @@ -35,12 +36,12 @@ impl Iterator for DocLines<'_> { fn next(&mut self) -> Option { let mut at_start_of_line = true; loop { - let (tok, range) = self.inner.next()?; + let token = self.inner.next()?; - match tok { + match token.kind() { TokenKind::Comment => { if at_start_of_line { - break Some(range.start()); + break Some(token.start()); } } TokenKind::Newline | TokenKind::NonLogicalNewline => { @@ -54,7 +55,7 @@ impl Iterator for DocLines<'_> { } } - self.prev = range.end(); + self.prev = token.end(); } } } diff --git a/crates/ruff_linter/src/fix/edits.rs b/crates/ruff_linter/src/fix/edits.rs index 3d45f1ea01..0901a9f694 100644 --- a/crates/ruff_linter/src/fix/edits.rs +++ b/crates/ruff_linter/src/fix/edits.rs @@ -531,8 +531,9 @@ mod tests { use test_case::test_case; use ruff_diagnostics::{Diagnostic, Edit, Fix}; + use ruff_python_ast::Stmt; use ruff_python_codegen::Stylist; - use ruff_python_parser::{lexer, parse_expression, parse_suite, Mode}; + use ruff_python_parser::{parse_expression, parse_module}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -541,17 +542,21 @@ mod tests { add_to_dunder_all, make_redundant_alias, next_stmt_break, trailing_semicolon, }; + /// Parse the given source using [`Mode::Module`] and return the first statement. + fn parse_first_stmt(source: &str) -> Result { + let suite = parse_module(source)?.into_suite(); + Ok(suite.into_iter().next().unwrap()) + } + #[test] fn find_semicolon() -> Result<()> { let contents = "x = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!(trailing_semicolon(stmt.end(), &locator), None); let contents = "x = 1; y = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -559,8 +564,7 @@ mod tests { ); let contents = "x = 1 ; y = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -572,8 +576,7 @@ x = 1 \ ; y = 1 " .trim(); - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -612,12 +615,11 @@ x = 1 \ } #[test] - fn redundant_alias() { + fn redundant_alias() -> Result<()> { let contents = "import x, y as y, z as bees"; - let program = parse_suite(contents).unwrap(); - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; assert_eq!( - make_redundant_alias(["x"].into_iter().map(Cow::from), stmt), + make_redundant_alias(["x"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), @@ -625,7 +627,7 @@ x = 1 \ "make just one item redundant" ); assert_eq!( - make_redundant_alias(vec!["x", "y"].into_iter().map(Cow::from), stmt), + make_redundant_alias(vec!["x", "y"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), @@ -633,13 +635,14 @@ x = 1 \ "the second item is already a redundant alias" ); assert_eq!( - make_redundant_alias(vec!["x", "z"].into_iter().map(Cow::from), stmt), + make_redundant_alias(vec!["x", "z"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), )], "the third item is already aliased to something else" ); + Ok(()) } #[test_case("()", &["x", "y"], r#"("x", "y")"# ; "2 into empty tuple")] @@ -661,13 +664,9 @@ x = 1 \ fn add_to_dunder_all_test(raw: &str, names: &[&str], expect: &str) -> Result<()> { let locator = Locator::new(raw); let edits = { - let expr = parse_expression(raw)?; - let stylist = Stylist::from_tokens( - &lexer::lex(raw, Mode::Expression).collect::>(), - &locator, - ); - // SUT - add_to_dunder_all(names.iter().copied(), &expr, &stylist) + let parsed = parse_expression(raw)?; + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + add_to_dunder_all(names.iter().copied(), parsed.expr(), &stylist) }; let diag = { use crate::rules::pycodestyle::rules::MissingNewlineAtEndOfFile; diff --git a/crates/ruff_linter/src/importer/insertion.rs b/crates/ruff_linter/src/importer/insertion.rs index 274147a756..715405e19e 100644 --- a/crates/ruff_linter/src/importer/insertion.rs +++ b/crates/ruff_linter/src/importer/insertion.rs @@ -1,8 +1,8 @@ //! Insert statements into Python code. use std::ops::Add; -use ruff_python_ast::{PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::Stmt; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::Edit; @@ -145,7 +145,7 @@ impl<'a> Insertion<'a> { mut location: TextSize, locator: &Locator<'a>, stylist: &Stylist, - source_type: PySourceType, + tokens: &Tokens, ) -> Insertion<'a> { enum Awaiting { Colon(u32), @@ -154,40 +154,38 @@ impl<'a> Insertion<'a> { } let mut state = Awaiting::Colon(0); - for (tok, range) in - lexer::lex_starts_at(locator.after(location), source_type.as_mode(), location).flatten() - { + for token in tokens.after(location) { match state { // Iterate until we find the colon indicating the start of the block body. - Awaiting::Colon(depth) => match tok { - Tok::Colon if depth == 0 => { + Awaiting::Colon(depth) => match token.kind() { + TokenKind::Colon if depth == 0 => { state = Awaiting::Newline; } - Tok::Lpar | Tok::Lbrace | Tok::Lsqb => { + TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { state = Awaiting::Colon(depth.saturating_add(1)); } - Tok::Rpar | Tok::Rbrace | Tok::Rsqb => { + TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { state = Awaiting::Colon(depth.saturating_sub(1)); } _ => {} }, // Once we've seen the colon, we're looking for a newline; otherwise, there's no // block body (e.g. `if True: pass`). - Awaiting::Newline => match tok { - Tok::Comment(..) => {} - Tok::Newline => { + Awaiting::Newline => match token.kind() { + TokenKind::Comment => {} + TokenKind::Newline => { state = Awaiting::Indent; } _ => { - location = range.start(); + location = token.start(); break; } }, // Once we've seen the newline, we're looking for the indentation of the block body. - Awaiting::Indent => match tok { - Tok::Comment(..) => {} - Tok::NonLogicalNewline => {} - Tok::Indent => { + Awaiting::Indent => match token.kind() { + TokenKind::Comment => {} + TokenKind::NonLogicalNewline => {} + TokenKind::Indent => { // This is like: // ```python // if True: @@ -196,13 +194,13 @@ impl<'a> Insertion<'a> { // Where `range` is the indentation before the `pass` token. return Insertion::indented( "", - range.start(), + token.start(), stylist.line_ending().as_str(), - locator.slice(range), + locator.slice(token), ); } _ => { - location = range.start(); + location = token.start(); break; } }, @@ -319,9 +317,8 @@ fn match_continuation(s: &str) -> Option { mod tests { use anyhow::Result; - use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; - use ruff_python_parser::{parse_suite, Mode}; + use ruff_python_parser::parse_module; use ruff_source_file::{LineEnding, Locator}; use ruff_text_size::TextSize; @@ -330,11 +327,10 @@ mod tests { #[test] fn start_of_file() -> Result<()> { fn insert(contents: &str) -> Result { - let program = parse_suite(contents)?; - let tokens = ruff_python_parser::tokenize(contents, Mode::Module); + let parsed = parse_module(contents)?; let locator = Locator::new(contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - Ok(Insertion::start_of_file(&program, &locator, &stylist)) + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + Ok(Insertion::start_of_file(parsed.suite(), &locator, &stylist)) } let contents = ""; @@ -442,10 +438,10 @@ x = 1 #[test] fn start_of_block() { fn insert(contents: &str, offset: TextSize) -> Insertion { - let tokens = ruff_python_parser::tokenize(contents, Mode::Module); + let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - Insertion::start_of_block(offset, &locator, &stylist, PySourceType::default()) + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + Insertion::start_of_block(offset, &locator, &stylist, parsed.tokens()) } let contents = "if True: pass"; diff --git a/crates/ruff_linter/src/importer/mod.rs b/crates/ruff_linter/src/importer/mod.rs index e59a265a7b..51ada8f45d 100644 --- a/crates/ruff_linter/src/importer/mod.rs +++ b/crates/ruff_linter/src/importer/mod.rs @@ -7,7 +7,8 @@ use std::error::Error; use anyhow::Result; use libcst_native::{ImportAlias, Name, NameOrAttribute}; -use ruff_python_ast::{self as ast, PySourceType, Stmt}; +use ruff_python_ast::{self as ast, ModModule, Stmt}; +use ruff_python_parser::{Parsed, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::Edit; @@ -27,6 +28,8 @@ mod insertion; pub(crate) struct Importer<'a> { /// The Python AST to which we are adding imports. python_ast: &'a [Stmt], + /// The tokens representing the Python AST. + tokens: &'a Tokens, /// The [`Locator`] for the Python AST. locator: &'a Locator<'a>, /// The [`Stylist`] for the Python AST. @@ -39,12 +42,13 @@ pub(crate) struct Importer<'a> { impl<'a> Importer<'a> { pub(crate) fn new( - python_ast: &'a [Stmt], + parsed: &'a Parsed, locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, ) -> Self { Self { - python_ast, + python_ast: parsed.suite(), + tokens: parsed.tokens(), locator, stylist, runtime_imports: Vec::default(), @@ -121,7 +125,6 @@ impl<'a> Importer<'a> { import: &ImportedMembers, at: TextSize, semantic: &SemanticModel, - source_type: PySourceType, ) -> Result { // Generate the modified import statement. let content = fix::codemods::retain_imports( @@ -178,7 +181,7 @@ impl<'a> Importer<'a> { // Add the import to a `TYPE_CHECKING` block. let add_import_edit = if let Some(block) = self.preceding_type_checking_block(at) { // Add the import to the `TYPE_CHECKING` block. - self.add_to_type_checking_block(&content, block.start(), source_type) + self.add_to_type_checking_block(&content, block.start()) } else { // Add the import to a new `TYPE_CHECKING` block. self.add_type_checking_block( @@ -455,13 +458,8 @@ impl<'a> Importer<'a> { } /// Add an import statement to an existing `TYPE_CHECKING` block. - fn add_to_type_checking_block( - &self, - content: &str, - at: TextSize, - source_type: PySourceType, - ) -> Edit { - Insertion::start_of_block(at, self.locator, self.stylist, source_type).into_edit(content) + fn add_to_type_checking_block(&self, content: &str, at: TextSize) -> Edit { + Insertion::start_of_block(at, self.locator, self.stylist, self.tokens).into_edit(content) } /// Return the import statement that precedes the given position, if any. diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index 86d59b6d4e..7a36e67d5b 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -10,11 +10,10 @@ use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_notebook::Notebook; -use ruff_python_ast::{PySourceType, Suite}; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::{AsMode, ParseError, TokenKindIter, Tokens}; +use ruff_python_parser::{ParseError, Parsed}; use ruff_source_file::{Locator, SourceFileBuilder}; use ruff_text_size::Ranged; @@ -82,18 +81,21 @@ pub fn check_path( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, - tokens: TokenSource, + parsed: &Parsed, ) -> LinterResult> { // Aggregate all diagnostics. let mut diagnostics = vec![]; let mut error = None; + let tokens = parsed.tokens(); + let comment_ranges = parsed.comment_ranges(); + // Collect doc lines. This requires a rare mix of tokens (for comments) and AST // (for docstrings), which demands special-casing at this level. let use_doc_lines = settings.rules.enabled(Rule::DocLineTooLong); let mut doc_lines = vec![]; if use_doc_lines { - doc_lines.extend(doc_lines_from_tokens(tokens.kinds())); + doc_lines.extend(doc_lines_from_tokens(tokens)); } // Run the token-based rules. @@ -103,7 +105,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_tokens()) { diagnostics.extend(check_tokens( - &tokens, + parsed, path, locator, indexer, @@ -120,7 +122,13 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_filesystem()) { - diagnostics.extend(check_file_path(path, package, locator, indexer, settings)); + diagnostics.extend(check_file_path( + path, + package, + locator, + comment_ranges, + settings, + )); } // Run the logical line-based rules. @@ -130,7 +138,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_logical_lines()) { diagnostics.extend(crate::checkers::logical_lines::check_logical_lines( - &tokens, locator, indexer, stylist, settings, + tokens, locator, indexer, stylist, settings, )); } @@ -145,14 +153,13 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_imports()); if use_ast || use_imports || use_doc_lines { - // Parse, if the AST wasn't pre-provided provided. - match tokens.into_ast(source_kind, source_type) { - Ok(python_ast) => { + match parsed.as_result() { + Ok(parsed) => { let cell_offsets = source_kind.as_ipy_notebook().map(Notebook::cell_offsets); let notebook_index = source_kind.as_ipy_notebook().map(Notebook::index); if use_ast { diagnostics.extend(check_ast( - &python_ast, + parsed, locator, stylist, indexer, @@ -168,7 +175,7 @@ pub fn check_path( } if use_imports { let import_diagnostics = check_imports( - &python_ast, + parsed, locator, indexer, &directives.isort, @@ -182,7 +189,7 @@ pub fn check_path( diagnostics.extend(import_diagnostics); } if use_doc_lines { - doc_lines.extend(doc_lines_from_ast(&python_ast, locator)); + doc_lines.extend(doc_lines_from_ast(parsed.suite(), locator)); } } Err(parse_error) => { @@ -191,8 +198,9 @@ pub fn check_path( // if it's disabled via any of the usual mechanisms (e.g., `noqa`, // `per-file-ignores`), and the easiest way to detect that suppression is // to see if the diagnostic persists to the end of the function. - pycodestyle::rules::syntax_error(&mut diagnostics, &parse_error, locator); - error = Some(parse_error); + pycodestyle::rules::syntax_error(&mut diagnostics, parse_error, locator); + // TODO(dhruvmanila): Remove this clone + error = Some(parse_error.clone()); } } } @@ -210,7 +218,12 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_physical_lines()) { diagnostics.extend(check_physical_lines( - locator, stylist, indexer, &doc_lines, settings, + locator, + stylist, + indexer, + comment_ranges, + &doc_lines, + settings, )); } @@ -222,36 +235,44 @@ pub fn check_path( continue; } let diagnostic = match test_rule { - Rule::StableTestRule => test_rules::StableTestRule::diagnostic(locator, indexer), + Rule::StableTestRule => { + test_rules::StableTestRule::diagnostic(locator, comment_ranges) + } Rule::StableTestRuleSafeFix => { - test_rules::StableTestRuleSafeFix::diagnostic(locator, indexer) + test_rules::StableTestRuleSafeFix::diagnostic(locator, comment_ranges) } Rule::StableTestRuleUnsafeFix => { - test_rules::StableTestRuleUnsafeFix::diagnostic(locator, indexer) + test_rules::StableTestRuleUnsafeFix::diagnostic(locator, comment_ranges) } Rule::StableTestRuleDisplayOnlyFix => { - test_rules::StableTestRuleDisplayOnlyFix::diagnostic(locator, indexer) + test_rules::StableTestRuleDisplayOnlyFix::diagnostic(locator, comment_ranges) + } + Rule::NurseryTestRule => { + test_rules::NurseryTestRule::diagnostic(locator, comment_ranges) + } + Rule::PreviewTestRule => { + test_rules::PreviewTestRule::diagnostic(locator, comment_ranges) } - Rule::NurseryTestRule => test_rules::NurseryTestRule::diagnostic(locator, indexer), - Rule::PreviewTestRule => test_rules::PreviewTestRule::diagnostic(locator, indexer), Rule::DeprecatedTestRule => { - test_rules::DeprecatedTestRule::diagnostic(locator, indexer) + test_rules::DeprecatedTestRule::diagnostic(locator, comment_ranges) } Rule::AnotherDeprecatedTestRule => { - test_rules::AnotherDeprecatedTestRule::diagnostic(locator, indexer) + test_rules::AnotherDeprecatedTestRule::diagnostic(locator, comment_ranges) + } + Rule::RemovedTestRule => { + test_rules::RemovedTestRule::diagnostic(locator, comment_ranges) } - Rule::RemovedTestRule => test_rules::RemovedTestRule::diagnostic(locator, indexer), Rule::AnotherRemovedTestRule => { - test_rules::AnotherRemovedTestRule::diagnostic(locator, indexer) + test_rules::AnotherRemovedTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedToTestRule => { - test_rules::RedirectedToTestRule::diagnostic(locator, indexer) + test_rules::RedirectedToTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedFromTestRule => { - test_rules::RedirectedFromTestRule::diagnostic(locator, indexer) + test_rules::RedirectedFromTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedFromPrefixTestRule => { - test_rules::RedirectedFromPrefixTestRule::diagnostic(locator, indexer) + test_rules::RedirectedFromPrefixTestRule::diagnostic(locator, comment_ranges) } _ => unreachable!("All test rules must have an implementation"), }; @@ -288,7 +309,7 @@ pub fn check_path( &mut diagnostics, path, locator, - indexer.comment_ranges(), + comment_ranges, &directives.noqa_line_for, error.is_none(), &per_file_ignores, @@ -350,23 +371,21 @@ pub fn add_noqa_to_path( source_type: PySourceType, settings: &LinterSettings, ) -> Result { - let contents = source_kind.source_code(); - - // Tokenize once. - let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); + // Parse once. + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); // Map row and column locations to byte slices (lazily). - let locator = Locator::new(contents); + let locator = Locator::new(source_kind.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -387,7 +406,7 @@ pub fn add_noqa_to_path( flags::Noqa::Disabled, source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); // Log any parse errors. @@ -409,7 +428,7 @@ pub fn add_noqa_to_path( path, &diagnostics, &locator, - indexer.comment_ranges(), + parsed.comment_ranges(), &settings.external, &directives.noqa_line_for, stylist.line_ending(), @@ -425,23 +444,22 @@ pub fn lint_only( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, - data: ParseSource, + source: ParseSource, ) -> LinterResult> { - // Tokenize once. - let tokens = data.into_token_source(source_kind, source_type); + let parsed = source.into_parsed(source_kind, source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(source_kind.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -459,7 +477,7 @@ pub fn lint_only( noqa, source_kind, source_type, - tokens, + &parsed, ); result.map(|diagnostics| diagnostics_to_messages(diagnostics, path, &locator, &directives)) @@ -517,21 +535,22 @@ pub fn lint_fix<'a>( // Continuously fix until the source code stabilizes. loop { - // Tokenize once. - let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); + // Parse once. + let parsed = + ruff_python_parser::parse_unchecked_source(transformed.source_code(), source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(transformed.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -549,7 +568,7 @@ pub fn lint_fix<'a>( noqa, &transformed, source_type, - TokenSource::Tokens(tokens), + &parsed, ); if iterations == 0 { @@ -685,70 +704,21 @@ This indicates a bug in Ruff. If you could open an issue at: #[derive(Debug, Clone)] pub enum ParseSource { - /// Extract the tokens and AST from the given source code. + /// Parse the [`Parsed`] from the given source code. None, - /// Use the precomputed tokens and AST. - Precomputed { tokens: Tokens, ast: Suite }, + /// Use the precomputed [`Parsed`]. + Precomputed(Parsed), } impl ParseSource { - /// Convert to a [`TokenSource`], tokenizing if necessary. - fn into_token_source(self, source_kind: &SourceKind, source_type: PySourceType) -> TokenSource { + /// Consumes the [`ParseSource`] and returns the parsed [`Parsed`], parsing the source code if + /// necessary. + fn into_parsed(self, source_kind: &SourceKind, source_type: PySourceType) -> Parsed { match self { - Self::None => TokenSource::Tokens(ruff_python_parser::tokenize( - source_kind.source_code(), - source_type.as_mode(), - )), - Self::Precomputed { tokens, ast } => TokenSource::Precomputed { tokens, ast }, - } - } -} - -#[derive(Debug, Clone)] -pub enum TokenSource { - /// Use the precomputed tokens to generate the AST. - Tokens(Tokens), - /// Use the precomputed tokens and AST. - Precomputed { tokens: Tokens, ast: Suite }, -} - -impl TokenSource { - /// Returns an iterator over the [`TokenKind`] and the corresponding range. - /// - /// [`TokenKind`]: ruff_python_parser::TokenKind - pub fn kinds(&self) -> TokenKindIter { - match self { - TokenSource::Tokens(tokens) => tokens.kinds(), - TokenSource::Precomputed { tokens, .. } => TokenKindIter::new(tokens), - } - } -} - -impl Deref for TokenSource { - type Target = [LexResult]; - - fn deref(&self) -> &Self::Target { - match self { - Self::Tokens(tokens) => tokens, - Self::Precomputed { tokens, .. } => tokens, - } - } -} - -impl TokenSource { - /// Convert to an [`AstSource`], parsing if necessary. - fn into_ast( - self, - source_kind: &SourceKind, - source_type: PySourceType, - ) -> Result { - match self { - Self::Tokens(tokens) => Ok(ruff_python_parser::parse_program_tokens( - tokens, - source_kind.source_code(), - source_type.is_ipynb(), - )?), - Self::Precomputed { ast, .. } => Ok(ast), + ParseSource::None => { + ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type) + } + ParseSource::Precomputed(parsed) => parsed, } } } diff --git a/crates/ruff_linter/src/rules/eradicate/detection.rs b/crates/ruff_linter/src/rules/eradicate/detection.rs index fa870287c6..f2e0229668 100644 --- a/crates/ruff_linter/src/rules/eradicate/detection.rs +++ b/crates/ruff_linter/src/rules/eradicate/detection.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use once_cell::sync::Lazy; use regex::{Regex, RegexSet}; -use ruff_python_parser::parse_suite; +use ruff_python_parser::parse_module; use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer}; use ruff_text_size::TextSize; @@ -84,7 +84,7 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool { } // Finally, compile the source code. - parse_suite(line).is_ok() + parse_module(line).is_ok() } #[cfg(test)] diff --git a/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs b/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs index 4c17871ae5..9848f161d9 100644 --- a/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs +++ b/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use crate::settings::LinterSettings; @@ -47,14 +47,14 @@ impl Violation for CommentedOutCode { pub(crate) fn commented_out_code( diagnostics: &mut Vec, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) { // Skip comments within `/// script` tags. let mut in_script_tag = false; // Iterate over all comments in the document. - for range in indexer.comment_ranges() { + for range in comment_ranges { let line = locator.lines(*range); // Detect `/// script` tags. diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs b/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs index 4a1b7ecf6f..7e38b527e7 100644 --- a/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs +++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs @@ -68,7 +68,7 @@ pub(crate) fn zip_without_explicit_strict(checker: &mut Checker, call: &ast::Exp add_argument( "strict=False", &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ), // If the function call contains `**kwargs`, mark the fix as unsafe. diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs index c1598262b2..69c1c8598b 100644 --- a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs +++ b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -27,31 +27,31 @@ enum TokenType { /// Simplified token specialized for the task. #[derive(Copy, Clone)] -struct Token { +struct SimpleToken { ty: TokenType, range: TextRange, } -impl Ranged for Token { +impl Ranged for SimpleToken { fn range(&self) -> TextRange { self.range } } -impl Token { +impl SimpleToken { fn new(ty: TokenType, range: TextRange) -> Self { Self { ty, range } } - fn irrelevant() -> Token { - Token { + fn irrelevant() -> SimpleToken { + SimpleToken { ty: TokenType::Irrelevant, range: TextRange::default(), } } } -impl From<(TokenKind, TextRange)> for Token { +impl From<(TokenKind, TextRange)> for SimpleToken { fn from((tok, range): (TokenKind, TextRange)) -> Self { let ty = match tok { TokenKind::Name => TokenType::Named, @@ -226,13 +226,13 @@ impl AlwaysFixableViolation for ProhibitedTrailingComma { /// COM812, COM818, COM819 pub(crate) fn trailing_commas( diagnostics: &mut Vec, - tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, indexer: &Indexer, ) { let mut fstrings = 0u32; - let tokens = tokens.filter_map(|(token, tok_range)| { - match token { + let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| { + match token.kind() { // Completely ignore comments -- they just interfere with the logic. TokenKind::Comment => None, // F-strings are handled as `String` token type with the complete range @@ -247,15 +247,15 @@ pub(crate) fn trailing_commas( if fstrings == 0 { indexer .fstring_ranges() - .outermost(tok_range.start()) - .map(|range| Token::new(TokenType::String, range)) + .outermost(token.start()) + .map(|range| SimpleToken::new(TokenType::String, range)) } else { None } } _ => { if fstrings == 0 { - Some(Token::from((token, tok_range))) + Some(SimpleToken::from(token.as_tuple())) } else { None } @@ -263,12 +263,12 @@ pub(crate) fn trailing_commas( } }); - let mut prev = Token::irrelevant(); - let mut prev_prev = Token::irrelevant(); + let mut prev = SimpleToken::irrelevant(); + let mut prev_prev = SimpleToken::irrelevant(); let mut stack = vec![Context::new(ContextType::No)]; - for token in tokens { + for token in simple_tokens { if prev.ty == TokenType::NonLogicalNewline && token.ty == TokenType::NonLogicalNewline { // Collapse consecutive newlines to the first one -- trailing commas are // added before the first newline. @@ -301,9 +301,9 @@ pub(crate) fn trailing_commas( } fn check_token( - token: Token, - prev: Token, - prev_prev: Token, + token: SimpleToken, + prev: SimpleToken, + prev_prev: SimpleToken, context: Context, locator: &Locator, ) -> Option { @@ -387,9 +387,9 @@ fn check_token( } fn update_context( - token: Token, - prev: Token, - prev_prev: Token, + token: SimpleToken, + prev: SimpleToken, + prev_prev: SimpleToken, stack: &mut Vec, ) -> Context { let new_context = match token.ty { diff --git a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs index 5166fbdd3e..9d1c59e387 100644 --- a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs +++ b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs @@ -139,7 +139,7 @@ pub(crate) fn unnecessary_generator_list(checker: &mut Checker, call: &ast::Expr let range = parenthesized_range( argument.into(), (&call.arguments).into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(argument.range()); diff --git a/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs index 4feb54de31..114e7dbef1 100644 --- a/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs @@ -1,7 +1,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; pub(crate) use shebang_leading_whitespace::*; pub(crate) use shebang_missing_executable_file::*; @@ -21,10 +21,10 @@ pub(crate) fn from_tokens( diagnostics: &mut Vec, path: &Path, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { let mut has_any_shebang = false; - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment = locator.slice(*range); if let Some(shebang) = ShebangDirective::try_extract(comment) { has_any_shebang = true; diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs index b254fe8338..5cbd3f46e7 100644 --- a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs +++ b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs @@ -4,9 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; -use ruff_text_size::TextRange; +use ruff_text_size::{Ranged, TextRange}; use crate::settings::LinterSettings; @@ -92,37 +92,39 @@ impl Violation for MultiLineImplicitStringConcatenation { /// ISC001, ISC002 pub(crate) fn implicit( diagnostics: &mut Vec, - tokens: TokenKindIter, + tokens: &Tokens, settings: &LinterSettings, locator: &Locator, indexer: &Indexer, ) { - for ((a_tok, a_range), (b_tok, b_range)) in tokens - .filter(|(token, _)| { - *token != TokenKind::Comment + for (a_token, b_token) in tokens + .up_to_first_unknown() + .iter() + .filter(|token| { + token.kind() != TokenKind::Comment && (settings.flake8_implicit_str_concat.allow_multiline - || *token != TokenKind::NonLogicalNewline) + || token.kind() != TokenKind::NonLogicalNewline) }) .tuple_windows() { - let (a_range, b_range) = match (a_tok, b_tok) { - (TokenKind::String, TokenKind::String) => (a_range, b_range), + let (a_range, b_range) = match (a_token.kind(), b_token.kind()) { + (TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()), (TokenKind::String, TokenKind::FStringStart) => { - match indexer.fstring_ranges().innermost(b_range.start()) { - Some(b_range) => (a_range, b_range), + match indexer.fstring_ranges().innermost(b_token.start()) { + Some(b_range) => (a_token.range(), b_range), None => continue, } } (TokenKind::FStringEnd, TokenKind::String) => { - match indexer.fstring_ranges().innermost(a_range.start()) { - Some(a_range) => (a_range, b_range), + match indexer.fstring_ranges().innermost(a_token.start()) { + Some(a_range) => (a_range, b_token.range()), None => continue, } } (TokenKind::FStringEnd, TokenKind::FStringStart) => { match ( - indexer.fstring_ranges().innermost(a_range.start()), - indexer.fstring_ranges().innermost(b_range.start()), + indexer.fstring_ranges().innermost(a_token.start()), + indexer.fstring_ranges().innermost(b_token.start()), ) { (Some(a_range), Some(b_range)) => (a_range, b_range), _ => continue, diff --git a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs index 26cdea8dcb..10c3a591dd 100644 --- a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs +++ b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -45,7 +45,7 @@ pub(crate) fn implicit_namespace_package( path: &Path, package: Option<&Path>, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, project_root: &Path, src: &[PathBuf], ) -> Option { @@ -61,8 +61,7 @@ pub(crate) fn implicit_namespace_package( .parent() .is_some_and( |parent| src.iter().any(|src| src == parent)) // Ignore files that contain a shebang. - && !indexer - .comment_ranges() + && !comment_ranges .first().filter(|range| range.start() == TextSize::from(0)) .is_some_and(|range| ShebangDirective::try_extract(locator.slice(*range)).is_some()) { diff --git a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs index 61aa28988a..1f0b799855 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs +++ b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs @@ -129,7 +129,7 @@ pub(crate) fn unnecessary_dict_kwargs(checker: &mut Checker, call: &ast::ExprCal parenthesized_range( value.into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(value.range()) diff --git a/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs b/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs index c08f74870a..6c104f0006 100644 --- a/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs +++ b/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs @@ -114,7 +114,7 @@ fn generate_fix( let insertion = add_argument( locator.slice(generic_base), arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), source, ); diff --git a/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs b/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs index c52d353180..c21558f590 100644 --- a/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs +++ b/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs @@ -1,6 +1,6 @@ use once_cell::sync::Lazy; use regex::Regex; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_diagnostics::{Diagnostic, Violation}; @@ -38,9 +38,9 @@ impl Violation for TypeCommentInStub { pub(crate) fn type_comment_in_stub( diagnostics: &mut Vec, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment = locator.slice(*range); if TYPE_COMMENT_REGEX.is_match(comment) && !TYPE_IGNORE_REGEX.is_match(comment) { diff --git a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs index 3aaa7e8b3d..674fb0f4bf 100644 --- a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs +++ b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs @@ -284,7 +284,7 @@ pub(crate) fn unittest_assertion( // the assertion is part of a larger expression. if checker.semantic().current_statement().is_expr_stmt() && checker.semantic().current_expression_parent().is_none() - && !checker.indexer().comment_ranges().intersects(expr.range()) + && !checker.parsed().comment_ranges().intersects(expr.range()) { if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) { diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement( @@ -292,7 +292,7 @@ pub(crate) fn unittest_assertion( parenthesized_range( expr.into(), checker.semantic().current_statement().into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()), @@ -385,7 +385,7 @@ pub(crate) fn unittest_raises_assertion( call.func.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(call, checker.locator()) { @@ -745,7 +745,7 @@ pub(crate) fn composite_condition( let mut diagnostic = Diagnostic::new(PytestCompositeAssertion, stmt.range()); if matches!(composite, CompositionKind::Simple) && msg.is_none() - && !checker.indexer().comment_ranges().intersects(stmt.range()) + && !checker.parsed().comment_ranges().intersects(stmt.range()) && !checker .indexer() .in_multi_statement_line(stmt, checker.locator()) diff --git a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs index 4f7cd1c4b4..6ef3b8687e 100644 --- a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs +++ b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs @@ -353,7 +353,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) { let name_range = get_parametrize_name_range( decorator, expr, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()); @@ -388,7 +388,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) { let name_range = get_parametrize_name_range( decorator, expr, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()); @@ -681,11 +681,7 @@ fn check_duplicates(checker: &mut Checker, values: &Expr) { let element_end = trailing_comma(element, checker.locator().contents(), values_end); let deletion_range = TextRange::new(previous_end, element_end); - if !checker - .indexer() - .comment_ranges() - .intersects(deletion_range) - { + if !checker.parsed().comment_ranges().intersects(deletion_range) { diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion(deletion_range))); } } diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs index fe6a01df95..241ba9695d 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs @@ -527,7 +527,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) { // Avoid removing comments. if checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { @@ -779,7 +779,7 @@ fn is_short_circuit( parenthesized_range( furthest.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(furthest.range()) @@ -807,7 +807,7 @@ fn is_short_circuit( parenthesized_range( furthest.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(furthest.range()) diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs index 6b8d107520..f9b9b5752e 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs @@ -164,7 +164,7 @@ pub(crate) fn if_expr_with_true_false( parenthesized_range( test.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(test.range()), diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs index 881f4b3691..17b04340f5 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs @@ -168,7 +168,7 @@ pub(crate) fn multiple_with_statements( TextRange::new(with_stmt.start(), colon.end()), ); if !checker - .indexer() + .parsed() .comment_ranges() .intersects(TextRange::new(with_stmt.start(), with_stmt.body[0].start())) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs index 8fb4f17fae..2a78b971c5 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs @@ -113,14 +113,10 @@ pub(crate) fn nested_if_statements( ); // The fixer preserves comments in the nested body, but removes comments between // the outer and inner if statements. - if !checker - .indexer() - .comment_ranges() - .intersects(TextRange::new( - nested_if.start(), - nested_if.body()[0].start(), - )) - { + if !checker.parsed().comment_ranges().intersects(TextRange::new( + nested_if.start(), + nested_if.body()[0].start(), + )) { match collapse_nested_if(checker.locator(), checker.stylist(), nested_if) { Ok(edit) => { if edit.content().map_or(true, |content| { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs index 71144145d2..64a0294816 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs @@ -210,7 +210,7 @@ pub(crate) fn if_else_block_instead_of_dict_get(checker: &mut Checker, stmt_if: stmt_if.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt_if, checker.locator()) { @@ -300,7 +300,7 @@ pub(crate) fn if_exp_instead_of_dict_get( expr.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs index 0740b99716..60deb30459 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs @@ -143,7 +143,7 @@ pub(crate) fn if_else_block_instead_of_if_exp(checker: &mut Checker, stmt_if: &a stmt_if.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt_if, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs index 6494e262f6..e43eb1b7c0 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs @@ -8,8 +8,7 @@ use ruff_python_ast::comparable::ComparableStmt; use ruff_python_ast::parenthesize::parenthesized_range; use ruff_python_ast::stmt_if::{if_elif_branches, IfElifBranch}; use ruff_python_ast::{self as ast, Expr}; -use ruff_python_index::Indexer; -use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer}; +use ruff_python_trivia::{CommentRanges, SimpleTokenKind, SimpleTokenizer}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -74,13 +73,13 @@ pub(crate) fn if_with_same_arms(checker: &mut Checker, stmt_if: &ast::StmtIf) { // ...and the same comments let first_comments = checker - .indexer() + .parsed() .comment_ranges() .comments_in_range(body_range(¤t_branch, checker.locator())) .iter() .map(|range| checker.locator().slice(*range)); let second_comments = checker - .indexer() + .parsed() .comment_ranges() .comments_in_range(body_range(following_branch, checker.locator())) .iter() @@ -100,7 +99,7 @@ pub(crate) fn if_with_same_arms(checker: &mut Checker, stmt_if: &ast::StmtIf) { ¤t_branch, following_branch, checker.locator(), - checker.indexer(), + checker.parsed().comment_ranges(), ) }); @@ -114,7 +113,7 @@ fn merge_branches( current_branch: &IfElifBranch, following_branch: &IfElifBranch, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) -> Result { // Identify the colon (`:`) at the end of the current branch's test. let Some(current_branch_colon) = @@ -133,7 +132,7 @@ fn merge_branches( let following_branch_test = if let Some(range) = parenthesized_range( following_branch.test.into(), stmt_if.into(), - indexer.comment_ranges(), + comment_ranges, locator.contents(), ) { Cow::Borrowed(locator.slice(range)) diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs index 7ebcd9f9f3..619fdddca4 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs @@ -100,14 +100,14 @@ fn key_in_dict( let left_range = parenthesized_range( left.into(), parent, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(left.range()); let right_range = parenthesized_range( right.into(), parent, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(right.range()); diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs index 1eb1943c89..ac51e2ea68 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs @@ -194,7 +194,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) { // Generate the replacement condition. let condition = if checker - .indexer() + .parsed() .comment_ranges() .has_comments(&range, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs index 936a96fe4c..b94cdacec5 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs @@ -126,7 +126,7 @@ pub(crate) fn suppressible_exception( stmt.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs b/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs index cbd5a1b2a2..35f1d6039f 100644 --- a/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs +++ b/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs @@ -1,6 +1,6 @@ use once_cell::sync::Lazy; use regex::RegexSet; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -235,7 +235,7 @@ pub(crate) fn todos( diagnostics: &mut Vec, todo_comments: &[TodoComment], locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { for todo_comment in todo_comments { let TodoComment { @@ -256,12 +256,7 @@ pub(crate) fn todos( let mut has_issue_link = false; let mut curr_range = range; - for next_range in indexer - .comment_ranges() - .iter() - .skip(range_index + 1) - .copied() - { + for next_range in comment_ranges.iter().skip(range_index + 1).copied() { // Ensure that next_comment_range is in the same multiline comment "block" as // comment_range. if !locator diff --git a/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs b/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs index dc1a7c2e1f..3470430384 100644 --- a/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs +++ b/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs @@ -491,7 +491,6 @@ fn fix_imports(checker: &Checker, node_id: NodeId, imports: &[ImportBinding]) -> }, at, checker.semantic(), - checker.source_type, )? .into_edits(); diff --git a/crates/ruff_linter/src/rules/isort/annotate.rs b/crates/ruff_linter/src/rules/isort/annotate.rs index 012364d718..a30cf78708 100644 --- a/crates/ruff_linter/src/rules/isort/annotate.rs +++ b/crates/ruff_linter/src/rules/isort/annotate.rs @@ -1,4 +1,5 @@ -use ruff_python_ast::{self as ast, PySourceType, Stmt}; +use ruff_python_ast::{self as ast, Stmt}; +use ruff_python_parser::Tokens; use ruff_text_size::{Ranged, TextRange}; use ruff_source_file::Locator; @@ -13,7 +14,7 @@ pub(crate) fn annotate_imports<'a>( comments: Vec>, locator: &Locator<'a>, split_on_trailing_comma: bool, - source_type: PySourceType, + tokens: &Tokens, ) -> Vec> { let mut comments_iter = comments.into_iter().peekable(); @@ -120,7 +121,7 @@ pub(crate) fn annotate_imports<'a>( names: aliases, level: *level, trailing_comma: if split_on_trailing_comma { - trailing_comma(import, locator, source_type) + trailing_comma(import, tokens) } else { TrailingComma::default() }, diff --git a/crates/ruff_linter/src/rules/isort/comments.rs b/crates/ruff_linter/src/rules/isort/comments.rs index daec232098..d2c88213cc 100644 --- a/crates/ruff_linter/src/rules/isort/comments.rs +++ b/crates/ruff_linter/src/rules/isort/comments.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -20,10 +20,9 @@ impl Ranged for Comment<'_> { pub(crate) fn collect_comments<'a>( range: TextRange, locator: &'a Locator, - indexer: &'a Indexer, + comment_ranges: &'a CommentRanges, ) -> Vec> { - indexer - .comment_ranges() + comment_ranges .comments_in_range(range) .iter() .map(|range| Comment { diff --git a/crates/ruff_linter/src/rules/isort/helpers.rs b/crates/ruff_linter/src/rules/isort/helpers.rs index 6f519f8923..50b8b7ffca 100644 --- a/crates/ruff_linter/src/rules/isort/helpers.rs +++ b/crates/ruff_linter/src/rules/isort/helpers.rs @@ -1,5 +1,5 @@ -use ruff_python_ast::{PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::Stmt; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::PythonWhitespace; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::Ranged; @@ -8,31 +8,23 @@ use crate::rules::isort::types::TrailingComma; /// Return `true` if a `Stmt::ImportFrom` statement ends with a magic /// trailing comma. -pub(super) fn trailing_comma( - stmt: &Stmt, - locator: &Locator, - source_type: PySourceType, -) -> TrailingComma { - let contents = locator.slice(stmt); +pub(super) fn trailing_comma(stmt: &Stmt, tokens: &Tokens) -> TrailingComma { let mut count = 0u32; let mut trailing_comma = TrailingComma::Absent; - for (tok, _) in lexer::lex_starts_at(contents, source_type.as_mode(), stmt.start()).flatten() { - if matches!(tok, Tok::Lpar) { - count = count.saturating_add(1); - } - if matches!(tok, Tok::Rpar) { - count = count.saturating_sub(1); + for token in tokens.in_range(stmt.range()) { + match token.kind() { + TokenKind::Lpar => count = count.saturating_add(1), + TokenKind::Rpar => count = count.saturating_sub(1), + _ => {} } if count == 1 { - if matches!( - tok, - Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent | Tok::Comment(_) - ) { - continue; - } else if matches!(tok, Tok::Comma) { - trailing_comma = TrailingComma::Present; - } else { - trailing_comma = TrailingComma::Absent; + match token.kind() { + TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment => continue, + TokenKind::Comma => trailing_comma = TrailingComma::Present, + _ => trailing_comma = TrailingComma::Absent, } } } diff --git a/crates/ruff_linter/src/rules/isort/mod.rs b/crates/ruff_linter/src/rules/isort/mod.rs index 71af19faf1..4a82745e6c 100644 --- a/crates/ruff_linter/src/rules/isort/mod.rs +++ b/crates/ruff_linter/src/rules/isort/mod.rs @@ -12,6 +12,7 @@ use normalize::normalize_imports; use order::order_imports; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use settings::Settings; use types::EitherImport::{Import, ImportFrom}; @@ -72,6 +73,7 @@ pub(crate) fn format_imports( source_type: PySourceType, target_version: PythonVersion, settings: &Settings, + tokens: &Tokens, ) -> String { let trailer = &block.trailer; let block = annotate_imports( @@ -79,7 +81,7 @@ pub(crate) fn format_imports( comments, locator, settings.split_on_trailing_comma, - source_type, + tokens, ); // Normalize imports (i.e., deduplicate, aggregate `from` imports). diff --git a/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs b/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs index d8564dd5b8..87265c9cd2 100644 --- a/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs @@ -4,9 +4,9 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::is_docstring_stmt; use ruff_python_ast::imports::{Alias, AnyImport, FutureImport, Import, ImportFrom}; -use ruff_python_ast::{self as ast, PySourceType, Stmt, Suite}; +use ruff_python_ast::{self as ast, ModModule, PySourceType, Stmt}; use ruff_python_codegen::Stylist; -use ruff_python_parser::parse_suite; +use ruff_python_parser::{parse_module, Parsed}; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -87,13 +87,13 @@ fn includes_import(stmt: &Stmt, target: &AnyImport) -> bool { #[allow(clippy::too_many_arguments)] fn add_required_import( required_import: &AnyImport, - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, source_type: PySourceType, ) -> Option { // Don't add imports to semantically-empty files. - if python_ast.iter().all(is_docstring_stmt) { + if parsed.suite().iter().all(is_docstring_stmt) { return None; } @@ -103,7 +103,8 @@ fn add_required_import( } // If the import is already present in a top-level block, don't add it. - if python_ast + if parsed + .suite() .iter() .any(|stmt| includes_import(stmt, required_import)) { @@ -116,15 +117,14 @@ fn add_required_import( TextRange::default(), ); diagnostic.set_fix(Fix::safe_edit( - Importer::new(python_ast, locator, stylist) - .add_import(required_import, TextSize::default()), + Importer::new(parsed, locator, stylist).add_import(required_import, TextSize::default()), )); Some(diagnostic) } /// I002 pub(crate) fn add_required_imports( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, settings: &LinterSettings, @@ -135,7 +135,7 @@ pub(crate) fn add_required_imports( .required_imports .iter() .flat_map(|required_import| { - let Ok(body) = parse_suite(required_import) else { + let Ok(body) = parse_module(required_import).map(Parsed::into_suite) else { error!("Failed to parse required import: `{}`", required_import); return vec![]; }; @@ -165,7 +165,7 @@ pub(crate) fn add_required_imports( }, level: *level, }), - python_ast, + parsed, locator, stylist, source_type, @@ -182,7 +182,7 @@ pub(crate) fn add_required_imports( as_name: name.asname.as_deref(), }, }), - python_ast, + parsed, locator, stylist, source_type, diff --git a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs index e571271d08..7e0c3be59d 100644 --- a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs @@ -5,9 +5,10 @@ use itertools::{EitherOrBoth, Itertools}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::whitespace::trailing_lines_end; -use ruff_python_ast::{PySourceType, Stmt}; +use ruff_python_ast::{ModModule, PySourceType, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_python_trivia::{leading_indentation, textwrap::indent, PythonWhitespace}; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::{Ranged, TextRange}; @@ -78,7 +79,7 @@ fn matches_ignoring_indentation(val1: &str, val2: &str) -> bool { }) } -#[allow(clippy::cast_sign_loss)] +#[allow(clippy::cast_sign_loss, clippy::too_many_arguments)] /// I001 pub(crate) fn organize_imports( block: &Block, @@ -88,6 +89,7 @@ pub(crate) fn organize_imports( settings: &LinterSettings, package: Option<&Path>, source_type: PySourceType, + parsed: &Parsed, ) -> Option { let indentation = locator.slice(extract_indentation_range(&block.imports, locator)); let indentation = leading_indentation(indentation); @@ -106,7 +108,7 @@ pub(crate) fn organize_imports( let comments = comments::collect_comments( TextRange::new(range.start(), locator.full_line_end(range.end())), locator, - indexer, + parsed.comment_ranges(), ); let trailing_line_end = if block.trailer.is_none() { @@ -128,6 +130,7 @@ pub(crate) fn organize_imports( source_type, settings.target_version, &settings.isort, + parsed.tokens(), ); // Expand the span the entire range, including leading and trailing space. diff --git a/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs b/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs index f9586b975b..463ef0a4f5 100644 --- a/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs +++ b/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs @@ -177,10 +177,15 @@ pub(crate) fn function_is_too_complex( mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_ast::Suite; + use ruff_python_parser::parse_module; use super::get_complexity_number; + fn parse_suite(source: &str) -> Result { + Ok(parse_module(source)?.into_suite()) + } + #[test] fn trivial() -> Result<()> { let source = r" diff --git a/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs index e1766b27c4..ed4446660c 100644 --- a/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs @@ -93,7 +93,7 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) { call, keyword, statement, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ) { diagnostic.set_fix(fix); diff --git a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs index cb1988746c..b724f15659 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs @@ -2,8 +2,7 @@ use std::ops::Deref; use unicode_width::UnicodeWidthStr; -use ruff_python_index::Indexer; -use ruff_python_trivia::is_pragma_comment; +use ruff_python_trivia::{is_pragma_comment, CommentRanges}; use ruff_source_file::Line; use ruff_text_size::{TextLen, TextRange}; @@ -20,7 +19,7 @@ impl Overlong { /// otherwise. pub(super) fn try_from_line( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, limit: LineLength, task_tags: &[String], tab_size: IndentWidth, @@ -40,7 +39,7 @@ impl Overlong { } // Strip trailing comments and re-measure the line, if needed. - let line = StrippedLine::from_line(line, indexer, task_tags); + let line = StrippedLine::from_line(line, comment_ranges, task_tags); let width = match &line { StrippedLine::WithoutPragma(line) => { let width = measure(line.as_str(), tab_size); @@ -119,8 +118,8 @@ enum StrippedLine<'a> { impl<'a> StrippedLine<'a> { /// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like /// `# type: ignore`) or, if necessary, a task comment (like `# TODO`). - fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self { - let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else { + fn from_line(line: &'a Line<'a>, comment_ranges: &CommentRanges, task_tags: &[String]) -> Self { + let [comment_range] = comment_ranges.comments_in_range(line.range()) else { return Self::Unchanged(line); }; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs index ad6b6478cb..172ff40e5b 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs @@ -1,5 +1,7 @@ use itertools::Itertools; use ruff_notebook::CellOffsets; +use ruff_python_parser::Token; +use ruff_python_parser::Tokens; use std::cmp::Ordering; use std::iter::Peekable; use std::num::NonZeroU32; @@ -12,7 +14,7 @@ use ruff_diagnostics::Fix; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::TokenKind; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextRange; use ruff_text_size::TextSize; @@ -381,7 +383,7 @@ struct LogicalLineInfo { /// Iterator that processes tokens until a full logical line (or comment line) is "built". /// It then returns characteristics of that logical line (see `LogicalLineInfo`). struct LinePreprocessor<'a> { - tokens: TokenKindIter<'a>, + tokens: Peekable>, locator: &'a Locator<'a>, indent_width: IndentWidth, /// The start position of the next logical line. @@ -397,13 +399,13 @@ struct LinePreprocessor<'a> { impl<'a> LinePreprocessor<'a> { fn new( - tokens: TokenKindIter<'a>, + tokens: &'a Tokens, locator: &'a Locator, indent_width: IndentWidth, cell_offsets: Option<&'a CellOffsets>, ) -> LinePreprocessor<'a> { LinePreprocessor { - tokens, + tokens: tokens.up_to_first_unknown().iter().peekable(), locator, line_start: TextSize::new(0), max_preceding_blank_lines: BlankLines::Zero, @@ -424,75 +426,80 @@ impl<'a> Iterator for LinePreprocessor<'a> { // Number of consecutive blank lines directly preceding this logical line. let mut blank_lines = BlankLines::Zero; let mut first_logical_line_token: Option<(LogicalLineKind, TextRange)> = None; - let mut last_token: TokenKind = TokenKind::EndOfFile; + let mut last_token = TokenKind::EndOfFile; let mut parens = 0u32; - while let Some((token, range)) = self.tokens.next() { - if matches!(token, TokenKind::Indent | TokenKind::Dedent) { + while let Some(token) = self.tokens.next() { + let (kind, range) = token.as_tuple(); + if matches!(kind, TokenKind::Indent | TokenKind::Dedent) { continue; } - let (logical_line_kind, first_token_range) = if let Some(first_token_range) = - first_logical_line_token - { - first_token_range - } - // At the start of the line... - else { - // Check if we are at the beginning of a cell in a notebook. - if let Some(ref mut cell_offsets) = self.cell_offsets { - if cell_offsets - .peek() - .is_some_and(|offset| offset == &&self.line_start) - { - self.is_beginning_of_cell = true; - cell_offsets.next(); - blank_lines = BlankLines::Zero; - self.max_preceding_blank_lines = BlankLines::Zero; - } + let (logical_line_kind, first_token_range) = + if let Some(first_token_range) = first_logical_line_token { + first_token_range } - - // An empty line - if token == TokenKind::NonLogicalNewline { - blank_lines.add(range); - - self.line_start = range.end(); - - continue; - } - - is_docstring = token == TokenKind::String; - - let logical_line_kind = match token { - TokenKind::Class => LogicalLineKind::Class, - TokenKind::Comment => LogicalLineKind::Comment, - TokenKind::At => LogicalLineKind::Decorator, - TokenKind::Def => LogicalLineKind::Function, - // Lookahead to distinguish `async def` from `async with`. - TokenKind::Async if matches!(self.tokens.peek(), Some((TokenKind::Def, _))) => { - LogicalLineKind::Function + // At the start of the line... + else { + // Check if we are at the beginning of a cell in a notebook. + if let Some(ref mut cell_offsets) = self.cell_offsets { + if cell_offsets + .peek() + .is_some_and(|offset| offset == &&self.line_start) + { + self.is_beginning_of_cell = true; + cell_offsets.next(); + blank_lines = BlankLines::Zero; + self.max_preceding_blank_lines = BlankLines::Zero; + } } - TokenKind::Import => LogicalLineKind::Import, - TokenKind::From => LogicalLineKind::FromImport, - _ => LogicalLineKind::Other, + + // An empty line + if kind == TokenKind::NonLogicalNewline { + blank_lines.add(range); + + self.line_start = range.end(); + + continue; + } + + is_docstring = kind == TokenKind::String; + + let logical_line_kind = match kind { + TokenKind::Class => LogicalLineKind::Class, + TokenKind::Comment => LogicalLineKind::Comment, + TokenKind::At => LogicalLineKind::Decorator, + TokenKind::Def => LogicalLineKind::Function, + // Lookahead to distinguish `async def` from `async with`. + TokenKind::Async + if self + .tokens + .peek() + .is_some_and(|token| token.kind() == TokenKind::Def) => + { + LogicalLineKind::Function + } + TokenKind::Import => LogicalLineKind::Import, + TokenKind::From => LogicalLineKind::FromImport, + _ => LogicalLineKind::Other, + }; + + first_logical_line_token = Some((logical_line_kind, range)); + + (logical_line_kind, range) }; - first_logical_line_token = Some((logical_line_kind, range)); - - (logical_line_kind, range) - }; - - if !token.is_trivia() { + if !kind.is_trivia() { line_is_comment_only = false; } // A docstring line is composed only of the docstring (TokenKind::String) and trivia tokens. // (If a comment follows a docstring, we still count the line as a docstring) - if token != TokenKind::String && !token.is_trivia() { + if kind != TokenKind::String && !kind.is_trivia() { is_docstring = false; } - match token { + match kind { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } @@ -538,8 +545,8 @@ impl<'a> Iterator for LinePreprocessor<'a> { _ => {} } - if !token.is_trivia() { - last_token = token; + if !kind.is_trivia() { + last_token = kind; } } @@ -722,7 +729,7 @@ impl<'a> BlankLinesChecker<'a> { } /// E301, E302, E303, E304, E305, E306 - pub(crate) fn check_lines(&self, tokens: TokenKindIter<'a>, diagnostics: &mut Vec) { + pub(crate) fn check_lines(&self, tokens: &Tokens, diagnostics: &mut Vec) { let mut prev_indent_length: Option = None; let mut state = BlankLinesState::default(); let line_preprocessor = diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs index f22c771fc7..bdfb2e9629 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs @@ -1,7 +1,9 @@ +use std::slice::Iter; + use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; @@ -99,7 +101,7 @@ impl AlwaysFixableViolation for UselessSemicolon { /// E701, E702, E703 pub(crate) fn compound_statements( diagnostics: &mut Vec, - mut tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, indexer: &Indexer, source_type: PySourceType, @@ -125,33 +127,26 @@ pub(crate) fn compound_statements( // This is used to allow `class C: ...`-style definitions in stubs. let mut allow_ellipsis = false; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + // Track the nesting level. + let mut nesting = 0u32; // Track indentation. let mut indent = 0u32; - while let Some((token, range)) = tokens.next() { - match token { - TokenKind::Lpar => { - par_count = par_count.saturating_add(1); + // Use an iterator to allow passing it around. + let mut token_iter = tokens.up_to_first_unknown().iter(); + + loop { + let Some(token) = token_iter.next() else { + break; + }; + + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - TokenKind::Rpar => { - par_count = par_count.saturating_sub(1); - } - TokenKind::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - TokenKind::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - TokenKind::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - TokenKind::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } TokenKind::Ellipsis => { if allow_ellipsis { @@ -168,28 +163,27 @@ pub(crate) fn compound_statements( _ => {} } - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - match token { + match token.kind() { TokenKind::Newline => { - if let Some((start, end)) = semi { + if let Some(range) = semi { if !(source_type.is_ipynb() && indent == 0 && cell_offsets - .and_then(|cell_offsets| cell_offsets.containing_range(range.start())) + .and_then(|cell_offsets| cell_offsets.containing_range(token.start())) .is_some_and(|cell_range| { - !has_non_trivia_tokens_till(tokens.clone(), cell_range.end()) + !has_non_trivia_tokens_till(token_iter.clone(), cell_range.end()) })) { - let mut diagnostic = - Diagnostic::new(UselessSemicolon, TextRange::new(start, end)); + let mut diagnostic = Diagnostic::new(UselessSemicolon, range); diagnostic.set_fix(Fix::safe_edit(Edit::deletion( indexer - .preceded_by_continuations(start, locator) - .unwrap_or(start), - end, + .preceded_by_continuations(range.start(), locator) + .unwrap_or(range.start()), + range.end(), ))); diagnostics.push(diagnostic); } @@ -225,14 +219,14 @@ pub(crate) fn compound_statements( || while_.is_some() || with.is_some() { - colon = Some((range.start(), range.end())); + colon = Some(token.range()); // Allow `class C: ...`-style definitions. allow_ellipsis = true; } } TokenKind::Semi => { - semi = Some((range.start(), range.end())); + semi = Some(token.range()); allow_ellipsis = false; } TokenKind::Comment @@ -240,22 +234,16 @@ pub(crate) fn compound_statements( | TokenKind::Dedent | TokenKind::NonLogicalNewline => {} _ => { - if let Some((start, end)) = semi { - diagnostics.push(Diagnostic::new( - MultipleStatementsOnOneLineSemicolon, - TextRange::new(start, end), - )); + if let Some(range) = semi { + diagnostics.push(Diagnostic::new(MultipleStatementsOnOneLineSemicolon, range)); // Reset. semi = None; allow_ellipsis = false; } - if let Some((start, end)) = colon { - diagnostics.push(Diagnostic::new( - MultipleStatementsOnOneLineColon, - TextRange::new(start, end), - )); + if let Some(range) = colon { + diagnostics.push(Diagnostic::new(MultipleStatementsOnOneLineColon, range)); // Reset. colon = None; @@ -276,7 +264,7 @@ pub(crate) fn compound_statements( } } - match token { + match token.kind() { TokenKind::Lambda => { // Reset. colon = None; @@ -294,40 +282,40 @@ pub(crate) fn compound_statements( with = None; } TokenKind::Case => { - case = Some((range.start(), range.end())); + case = Some(token.range()); } TokenKind::If => { - if_ = Some((range.start(), range.end())); + if_ = Some(token.range()); } TokenKind::While => { - while_ = Some((range.start(), range.end())); + while_ = Some(token.range()); } TokenKind::For => { - for_ = Some((range.start(), range.end())); + for_ = Some(token.range()); } TokenKind::Try => { - try_ = Some((range.start(), range.end())); + try_ = Some(token.range()); } TokenKind::Except => { - except = Some((range.start(), range.end())); + except = Some(token.range()); } TokenKind::Finally => { - finally = Some((range.start(), range.end())); + finally = Some(token.range()); } TokenKind::Elif => { - elif = Some((range.start(), range.end())); + elif = Some(token.range()); } TokenKind::Else => { - else_ = Some((range.start(), range.end())); + else_ = Some(token.range()); } TokenKind::Class => { - class = Some((range.start(), range.end())); + class = Some(token.range()); } TokenKind::With => { - with = Some((range.start(), range.end())); + with = Some(token.range()); } TokenKind::Match => { - match_ = Some((range.start(), range.end())); + match_ = Some(token.range()); } _ => {} }; @@ -336,13 +324,13 @@ pub(crate) fn compound_statements( /// Returns `true` if there are any non-trivia tokens from the given token /// iterator till the given end offset. -fn has_non_trivia_tokens_till(tokens: TokenKindIter, cell_end: TextSize) -> bool { - for (token, tok_range) in tokens { - if tok_range.start() >= cell_end { +fn has_non_trivia_tokens_till(tokens: Iter<'_, Token>, cell_end: TextSize) -> bool { + for token in tokens { + if token.start() >= cell_end { return false; } if !matches!( - token, + token.kind(), TokenKind::Newline | TokenKind::Comment | TokenKind::EndOfFile diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs index b13c461e19..5661f62036 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Line; use crate::rules::pycodestyle::overlong::Overlong; @@ -84,13 +84,13 @@ impl Violation for DocLineTooLong { /// W505 pub(crate) fn doc_line_too_long( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Option { let limit = settings.pycodestyle.max_doc_length?; Overlong::try_from_line( line, - indexer, + comment_ranges, limit, if settings.pycodestyle.ignore_overlong_task_comments { &settings.task_tags diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs index a722344fa0..54b1bf09fd 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Line; use crate::rules::pycodestyle::overlong::Overlong; @@ -82,14 +82,14 @@ impl Violation for LineTooLong { /// E501 pub(crate) fn line_too_long( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Option { let limit = settings.pycodestyle.max_line_length; Overlong::try_from_line( line, - indexer, + comment_ranges, limit, if settings.pycodestyle.ignore_overlong_task_comments { &settings.task_tags diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs index 68a9ba3d7f..3489aa0c56 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs @@ -324,7 +324,7 @@ pub(crate) fn literal_comparisons(checker: &mut Checker, compare: &ast::ExprComp &ops, &compare.comparators, compare.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ); for diagnostic in &mut diagnostics { diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs index 606972bcf0..a483187e57 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -14,10 +14,9 @@ use std::fmt::{Debug, Formatter}; use std::iter::FusedIterator; use bitflags::bitflags; -use ruff_python_parser::lexer::LexResult; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; -use ruff_python_parser::TokenKind; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::is_python_whitespace; use ruff_source_file::Locator; @@ -60,17 +59,16 @@ pub(crate) struct LogicalLines<'a> { } impl<'a> LogicalLines<'a> { - pub(crate) fn from_tokens(tokens: &'a [LexResult], locator: &'a Locator<'a>) -> Self { + pub(crate) fn from_tokens(tokens: &Tokens, locator: &'a Locator<'a>) -> Self { assert!(u32::try_from(tokens.len()).is_ok()); let mut builder = LogicalLinesBuilder::with_capacity(tokens.len()); let mut parens = 0u32; - for (token, range) in tokens.iter().flatten() { - let token_kind = TokenKind::from_token(token); - builder.push_token(token_kind, *range); + for token in tokens.up_to_first_unknown() { + builder.push_token(token.kind(), token.range()); - match token_kind { + match token.kind() { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } @@ -506,9 +504,7 @@ struct Line { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; - + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use super::LogicalLines; @@ -592,9 +588,9 @@ if False: } fn assert_logical_lines(contents: &str, expected: &[&str]) { - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) + let actual: Vec = LogicalLines::from_tokens(parsed.tokens(), &locator) .into_iter() .map(|line| line.text_trimmed()) .map(ToString::to_string) diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs index 6990d66f76..1602e84f79 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs @@ -104,7 +104,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) { &[CmpOp::NotIn], comparators, unary_op.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), unary_op.range(), @@ -125,7 +125,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) { &[CmpOp::IsNot], comparators, unary_op.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), unary_op.range(), diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs index f117210d32..c34ce2216b 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs @@ -1,7 +1,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange, TextSize}; /// ## What it does /// Checks for files with multiple trailing blank lines. @@ -54,22 +54,19 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile { } /// W391 -pub(crate) fn too_many_newlines_at_end_of_file( - diagnostics: &mut Vec, - tokens: TokenKindIter, -) { +pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec, tokens: &Tokens) { let mut num_trailing_newlines = 0u32; let mut start: Option = None; let mut end: Option = None; // Count the number of trailing newlines. - for (token, range) in tokens.rev() { - match token { + for token in tokens.up_to_first_unknown().iter().rev() { + match token.kind() { TokenKind::NonLogicalNewline | TokenKind::Newline => { if num_trailing_newlines == 0 { - end = Some(range.end()); + end = Some(token.end()); } - start = Some(range.end()); + start = Some(token.end()); num_trailing_newlines += 1; } TokenKind::Dedent => continue, diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index 81bc61c1f1..f88cc6f285 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -17,12 +17,12 @@ mod tests { use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; - use ruff_python_parser::AsMode; + use ruff_python_trivia::textwrap::dedent; use ruff_source_file::Locator; use ruff_text_size::Ranged; - use crate::linter::{check_path, LinterResult, TokenSource}; + use crate::linter::{check_path, LinterResult}; use crate::registry::{AsRule, Linter, Rule}; use crate::rules::pyflakes; use crate::settings::types::PreviewMode; @@ -638,12 +638,13 @@ mod tests { let source_type = PySourceType::default(); let source_kind = SourceKind::Python(contents.to_string()); let settings = LinterSettings::for_rules(Linter::Pyflakes.rules()); - let tokens = ruff_python_parser::tokenize(&contents, source_type.as_mode()); + let parsed = + ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let locator = Locator::new(&contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(&settings), &locator, &indexer, @@ -662,7 +663,7 @@ mod tests { flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); diagnostics.sort_by_key(Ranged::start); let actual = diagnostics diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index aaf4761679..5301e1cada 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -4,8 +4,8 @@ use ruff_python_ast::{CmpOp, Expr}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers; -use ruff_python_parser::{lexer, Mode, Tok}; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; @@ -96,7 +96,7 @@ pub(crate) fn invalid_literal_comparison( { let mut diagnostic = Diagnostic::new(IsLiteral { cmp_op: op.into() }, expr.range()); if lazy_located.is_none() { - lazy_located = Some(locate_cmp_ops(expr, checker.locator().contents())); + lazy_located = Some(locate_cmp_ops(expr, checker.parsed().tokens())); } if let Some(located_op) = lazy_located.as_ref().and_then(|located| located.get(index)) { assert_eq!(located_op.op, *op); @@ -110,7 +110,7 @@ pub(crate) fn invalid_literal_comparison( } { diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( content, - located_op.range + expr.start(), + located_op.range, ))); } } else { @@ -138,102 +138,83 @@ impl From<&CmpOp> for IsCmpOp { } } -/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate -/// ranges. +/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate ranges. /// -/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes. -/// `CPython` doesn't either. This method iterates over the token stream and -/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges. -fn locate_cmp_ops(expr: &Expr, source: &str) -> Vec { - // If `Expr` is a multi-line expression, we need to parenthesize it to - // ensure that it's lexed correctly. - let contents = &source[expr.range()]; - let parenthesized_contents = format!("({contents})"); - let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression) - .flatten() - .skip(1) - .map(|(tok, range)| (tok, range - TextSize::from(1))) - .filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_))) +/// This method iterates over the token stream and re-identifies [`CmpOp`] nodes, annotating them +/// with valid ranges. +fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec { + let mut tok_iter = tokens + .in_range(expr.range()) + .iter() + .filter(|token| !token.is_trivia()) .peekable(); let mut ops: Vec = vec![]; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + // Track the nesting level. + let mut nesting = 0u32; loop { - let Some((tok, range)) = tok_iter.next() else { + let Some(token) = tok_iter.next() else { break; }; - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Rpar => { - par_count = par_count.saturating_sub(1); - } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - Tok::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - Tok::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } _ => {} } - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - match tok { - Tok::Not => { - if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_in()) { + match token.kind() { + TokenKind::Not => { + if let Some(next_token) = tok_iter.next_if(|token| token.kind() == TokenKind::In) { ops.push(LocatedCmpOp::new( - TextRange::new(range.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::NotIn, )); } } - Tok::In => { - ops.push(LocatedCmpOp::new(range, CmpOp::In)); + TokenKind::In => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::In)); } - Tok::Is => { - let op = if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_not()) { + TokenKind::Is => { + let op = if let Some(next_token) = + tok_iter.next_if(|token| token.kind() == TokenKind::Not) + { LocatedCmpOp::new( - TextRange::new(range.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::IsNot, ) } else { - LocatedCmpOp::new(range, CmpOp::Is) + LocatedCmpOp::new(token.range(), CmpOp::Is) }; ops.push(op); } - Tok::NotEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::NotEq)); + TokenKind::NotEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::NotEq)); } - Tok::EqEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::Eq)); + TokenKind::EqEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Eq)); } - Tok::GreaterEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::GtE)); + TokenKind::GreaterEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::GtE)); } - Tok::Greater => { - ops.push(LocatedCmpOp::new(range, CmpOp::Gt)); + TokenKind::Greater => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Gt)); } - Tok::LessEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::LtE)); + TokenKind::LessEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::LtE)); } - Tok::Less => { - ops.push(LocatedCmpOp::new(range, CmpOp::Lt)); + TokenKind::Less => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Lt)); } _ => {} } @@ -266,12 +247,16 @@ mod tests { use super::{locate_cmp_ops, LocatedCmpOp}; + fn extract_cmp_op_locations(source: &str) -> Result> { + let parsed = parse_expression(source)?; + Ok(locate_cmp_ops(parsed.expr(), parsed.tokens())) + } + #[test] - fn extract_cmp_op_location() -> Result<()> { + fn test_locate_cmp_ops() -> Result<()> { let contents = "x == 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::Eq @@ -279,9 +264,8 @@ mod tests { ); let contents = "x != 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::NotEq @@ -289,9 +273,8 @@ mod tests { ); let contents = "x is 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::Is @@ -299,9 +282,8 @@ mod tests { ); let contents = "x is not 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(8), CmpOp::IsNot @@ -309,9 +291,8 @@ mod tests { ); let contents = "x in 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::In @@ -319,9 +300,8 @@ mod tests { ); let contents = "x not in 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(8), CmpOp::NotIn @@ -329,9 +309,8 @@ mod tests { ); let contents = "x != (1 is not 2)"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::NotEq diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs b/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs index 5575e15410..66fcfdc0ea 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs @@ -169,7 +169,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i - 1).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i - 1).range()) @@ -177,7 +177,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i).range()) @@ -201,7 +201,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i - 1).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i - 1).range()) @@ -209,7 +209,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i).range()) diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs index b84fcd27d6..934a4d0af9 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs @@ -4,10 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::contains_effect; use ruff_python_ast::parenthesize::parenthesized_range; -use ruff_python_ast::{self as ast, PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::{self as ast, Stmt}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_semantic::{Binding, Scope}; -use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; @@ -65,22 +64,13 @@ impl Violation for UnusedVariable { } /// Return the [`TextRange`] of the token before the next match of the predicate -fn match_token_before( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_before(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); - for ((_, range), (tok, _)) in lexer::lex_starts_at(contents, source_type.as_mode(), location) - .flatten() - .tuple_windows() - { - if f(tok) { - return Some(range); + for (prev, current) in tokens.after(location).iter().tuple_windows() { + if f(current.kind()) { + return Some(prev.range()); } } None @@ -88,55 +78,31 @@ where /// Return the [`TextRange`] of the token after the next match of the predicate, skipping over /// any bracketed expressions. -fn match_token_after( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_after(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + let mut nesting = 0u32; - for ((tok, _), (_, range)) in lexer::lex_starts_at(contents, source_type.as_mode(), location) - .flatten() - .tuple_windows() - { - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); + for (current, next) in tokens.after(location).iter().tuple_windows() { + match current.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - Tok::Rpar => { - par_count = par_count.saturating_sub(1); - } - Tok::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } _ => {} } // If we're in nested brackets, continue. - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - if f(tok) { - return Some(range); + if f(current.kind()) { + return Some(next.range()); } } None @@ -144,61 +110,34 @@ where /// Return the [`TextRange`] of the token matching the predicate or the first mismatched /// bracket, skipping over any bracketed expressions. -fn match_token_or_closing_brace( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_or_closing_brace(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); + // Track the nesting level. + let mut nesting = 0u32; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; - - for (tok, range) in lexer::lex_starts_at(contents, source_type.as_mode(), location).flatten() { - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); + for token in tokens.after(location) { + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - Tok::Rpar => { - if par_count == 0 { - return Some(range); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + if nesting == 0 { + return Some(token.range()); } - par_count = par_count.saturating_sub(1); - } - Tok::Rsqb => { - if sqb_count == 0 { - return Some(range); - } - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Rbrace => { - if brace_count == 0 { - return Some(range); - } - brace_count = brace_count.saturating_sub(1); + nesting = nesting.saturating_sub(1); } _ => {} } // If we're in nested brackets, continue. - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - if f(tok) { - return Some(range); + if f(token.kind()) { + return Some(token.range()); } } None @@ -226,18 +165,16 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { let start = parenthesized_range( target.into(), statement.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(target.range()) .start(); - let end = match_token_after( - target.end(), - checker.locator(), - checker.source_type, - |tok| tok == Tok::Equal, - )? - .start(); + let end = + match_token_after(checker.parsed().tokens(), target.end(), |token| { + token == TokenKind::Equal + })? + .start(); let edit = Edit::deletion(start, end); Some(Fix::unsafe_edit(edit)) } else { @@ -269,11 +206,10 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { // If the expression is complex (`x = foo()`), remove the assignment, // but preserve the right-hand side. let start = statement.start(); - let end = - match_token_after(start, checker.locator(), checker.source_type, |tok| { - tok == Tok::Equal - })? - .start(); + let end = match_token_after(checker.parsed().tokens(), start, |token| { + token == TokenKind::Equal + })? + .start(); let edit = Edit::deletion(start, end); Some(Fix::unsafe_edit(edit)) } else { @@ -293,21 +229,18 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { if optional_vars.range() == binding.range() { // Find the first token before the `as` keyword. let start = match_token_before( + checker.parsed().tokens(), item.context_expr.start(), - checker.locator(), - checker.source_type, - |tok| tok == Tok::As, + |token| token == TokenKind::As, )? .end(); // Find the first colon, comma, or closing bracket after the `as` keyword. - let end = match_token_or_closing_brace( - start, - checker.locator(), - checker.source_type, - |tok| tok == Tok::Colon || tok == Tok::Comma, - )? - .start(); + let end = + match_token_or_closing_brace(checker.parsed().tokens(), start, |token| { + token == TokenKind::Colon || token == TokenKind::Comma + })? + .start(); let edit = Edit::deletion(start, end); return Some(Fix::unsafe_edit(edit)); diff --git a/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs b/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs index f594be4294..91b08c9c08 100644 --- a/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs +++ b/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs @@ -5,7 +5,7 @@ use regex::Regex; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -51,10 +51,10 @@ impl Violation for BlanketTypeIgnore { /// PGH003 pub(crate) fn blanket_type_ignore( diagnostics: &mut Vec, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator, ) { - for range in indexer.comment_ranges() { + for range in comment_ranges { let line = locator.slice(*range); // Match, e.g., `# type: ignore` or `# type: ignore[attr-defined]`. diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs index 3fa235beb9..dfca0b6f20 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -1,7 +1,6 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; -use ruff_python_trivia::is_python_whitespace; +use ruff_python_trivia::{is_python_whitespace, CommentRanges}; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -45,12 +44,12 @@ impl Violation for EmptyComment { /// PLR2044 pub(crate) fn empty_comments( diagnostics: &mut Vec, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator, ) { - let block_comments = indexer.comment_ranges().block_comments(locator); + let block_comments = comment_ranges.block_comments(locator); - for range in indexer.comment_ranges() { + for range in comment_ranges { // Ignore comments that are part of multi-line "comment blocks". if block_comments.binary_search(&range.start()).is_ok() { continue; diff --git a/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs b/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs index 7bfdae8e14..0e9eceb984 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs @@ -160,7 +160,7 @@ pub(crate) fn if_stmt_min_max(checker: &mut Checker, stmt_if: &ast::StmtIf) { parenthesized_range( body_target.into(), body.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents() ) .unwrap_or(body_target.range()) diff --git a/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs b/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs index d3d5d9e182..a78d4de677 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs @@ -156,7 +156,7 @@ pub(crate) fn nested_min_max( }) { let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range()); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs b/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs index c6ff569fcb..03690dd350 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs @@ -76,7 +76,7 @@ pub(crate) fn subprocess_run_without_check(checker: &mut Checker, call: &ast::Ex add_argument( "check=False", &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ), // If the function call contains `**kwargs`, mark the fix as unsafe. diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs index 409c85f109..e2b76a4101 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs @@ -254,13 +254,13 @@ pub(crate) fn too_many_branches( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_parser::parse_module; use super::num_branches; fn test_helper(source: &str, expected_num_branches: usize) -> Result<()> { - let branches = parse_suite(source)?; - assert_eq!(num_branches(&branches), expected_num_branches); + let parsed = parse_module(source)?; + assert_eq!(num_branches(parsed.suite()), expected_num_branches); Ok(()) } diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs index 39f573bb11..5e6e34dba8 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs @@ -98,13 +98,13 @@ pub(crate) fn too_many_return_statements( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_parser::parse_module; use super::num_returns; fn test_helper(source: &str, expected: usize) -> Result<()> { - let stmts = parse_suite(source)?; - assert_eq!(num_returns(&stmts), expected); + let parsed = parse_module(source)?; + assert_eq!(num_returns(parsed.suite()), expected); Ok(()) } diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs index d2dbf632ac..3ab6f9fb15 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs @@ -158,10 +158,16 @@ pub(crate) fn too_many_statements( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + + use ruff_python_ast::Suite; + use ruff_python_parser::parse_module; use super::num_statements; + fn parse_suite(source: &str) -> Result { + Ok(parse_module(source)?.into_suite()) + } + #[test] fn pass() -> Result<()> { let source: &str = r" diff --git a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs index c5f6b93703..8fc8c9692a 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs @@ -175,7 +175,7 @@ fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix { })) ), &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), )) } @@ -190,7 +190,7 @@ fn generate_import_fix(checker: &Checker, call: &ast::ExprCall) -> Result { let argument_edit = add_argument( &format!("encoding={binding}(False)"), &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ); Ok(Fix::unsafe_edits(import_edit, [argument_edit])) diff --git a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs index 7f259e2f9a..65486abee2 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs @@ -1,52 +1,49 @@ -use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_ast::StmtImportFrom; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; /// Remove any imports matching `members` from an import-from statement. -pub(crate) fn remove_import_members(contents: &str, members: &[&str]) -> String { - let mut names: Vec = vec![]; - let mut commas: Vec = vec![]; - let mut removal_indices: Vec = vec![]; - - // Find all Tok::Name tokens that are not preceded by Tok::As, and all - // Tok::Comma tokens. - let mut prev_tok = None; - for (tok, range) in lexer::lex(contents, Mode::Module) - .flatten() - .skip_while(|(tok, _)| !matches!(tok, Tok::Import)) - { - if let Tok::Name { name } = &tok { - if matches!(prev_tok, Some(Tok::As)) { - // Adjust the location to take the alias into account. - let last_range = names.last_mut().unwrap(); - *last_range = TextRange::new(last_range.start(), range.end()); +pub(crate) fn remove_import_members( + locator: &Locator<'_>, + import_from_stmt: &StmtImportFrom, + tokens: &Tokens, + members_to_remove: &[&str], +) -> String { + let commas: Vec = tokens + .in_range(import_from_stmt.range()) + .iter() + .skip_while(|token| token.kind() != TokenKind::Import) + .filter_map(|token| { + if token.kind() == TokenKind::Comma { + Some(token.range()) } else { - if members.contains(&&**name) { - removal_indices.push(names.len()); - } - names.push(range); + None } - } else if matches!(tok, Tok::Comma) { - commas.push(range); - } - prev_tok = Some(tok); - } + }) + .collect(); // Reconstruct the source code by skipping any names that are in `members`. - let locator = Locator::new(contents); - let mut output = String::with_capacity(contents.len()); - let mut last_pos = TextSize::default(); + let mut output = String::with_capacity(import_from_stmt.range().len().to_usize()); + let mut last_pos = import_from_stmt.start(); let mut is_first = true; - for index in 0..names.len() { - if !removal_indices.contains(&index) { + + for (index, member) in import_from_stmt.names.iter().enumerate() { + if !members_to_remove.contains(&member.name.as_str()) { is_first = false; continue; } let range = if is_first { - TextRange::new(names[index].start(), names[index + 1].start()) + TextRange::new( + import_from_stmt.names[index].start(), + import_from_stmt.names[index + 1].start(), + ) } else { - TextRange::new(commas[index - 1].start(), names[index].end()) + TextRange::new( + commas[index - 1].start(), + import_from_stmt.names[index].end(), + ) }; // Add all contents from `last_pos` to `fix.location`. @@ -61,20 +58,39 @@ pub(crate) fn remove_import_members(contents: &str, members: &[&str]) -> String } // Add the remaining content. - let slice = locator.after(last_pos); + let slice = locator.slice(TextRange::new(last_pos, import_from_stmt.end())); output.push_str(slice); output } #[cfg(test)] mod tests { - use crate::rules::pyupgrade::fixes::remove_import_members; + use ruff_python_parser::parse_module; + use ruff_source_file::Locator; + + use super::remove_import_members; + + fn test_helper(source: &str, members_to_remove: &[&str]) -> String { + let parsed = parse_module(source).unwrap(); + let import_from_stmt = parsed + .suite() + .first() + .expect("source should have one statement") + .as_import_from_stmt() + .expect("first statement should be an import from statement"); + remove_import_members( + &Locator::new(source), + import_from_stmt, + parsed.tokens(), + members_to_remove, + ) + } #[test] fn once() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, baz, qux as q"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -82,7 +98,7 @@ mod tests { fn twice() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, qux as q"; - let actual = remove_import_members(source, &["baz", "bop"]); + let actual = test_helper(source, &["baz", "bop"]); assert_eq!(expected, actual); } @@ -90,7 +106,7 @@ mod tests { fn aliased() { let source = r"from foo import bar, baz, bop as boop, qux as q"; let expected = r"from foo import bar, baz, qux as q"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -98,7 +114,7 @@ mod tests { fn parenthesized() { let source = r"from foo import (bar, baz, bop, qux as q)"; let expected = r"from foo import (bar, baz, qux as q)"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -106,7 +122,7 @@ mod tests { fn last_import() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, baz, bop"; - let actual = remove_import_members(source, &["qux"]); + let actual = test_helper(source, &["qux"]); assert_eq!(expected, actual); } @@ -114,7 +130,7 @@ mod tests { fn first_import() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import baz, bop, qux as q"; - let actual = remove_import_members(source, &["bar"]); + let actual = test_helper(source, &["bar"]); assert_eq!(expected, actual); } @@ -122,7 +138,7 @@ mod tests { fn first_two_imports() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bop, qux as q"; - let actual = remove_import_members(source, &["bar", "baz"]); + let actual = test_helper(source, &["bar", "baz"]); assert_eq!(expected, actual); } @@ -138,7 +154,7 @@ mod tests { bop, qux as q )"; - let actual = remove_import_members(source, &["bar", "baz"]); + let actual = test_helper(source, &["bar", "baz"]); assert_eq!(expected, actual); } @@ -155,7 +171,7 @@ mod tests { baz, qux as q, )"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -171,7 +187,7 @@ mod tests { bar, qux as q, )"; - let actual = remove_import_members(source, &["baz", "bop"]); + let actual = test_helper(source, &["baz", "bop"]); assert_eq!(expected, actual); } @@ -191,7 +207,7 @@ mod tests { # This comment should be retained. qux as q, )"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -211,7 +227,7 @@ mod tests { bop, qux as q, )"; - let actual = remove_import_members(source, &["bar"]); + let actual = test_helper(source, &["bar"]); assert_eq!(expected, actual); } } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs index 7777e13a4b..f34b688feb 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs @@ -1,10 +1,11 @@ use itertools::Itertools; -use ruff_python_ast::{Alias, Stmt}; +use ruff_python_ast::{Alias, StmtImportFrom}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::whitespace::indentation; use ruff_python_codegen::Stylist; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -398,29 +399,29 @@ const TYPING_EXTENSIONS_TO_TYPES_313: &[&str] = &["CapsuleType"]; const TYPING_EXTENSIONS_TO_WARNINGS_313: &[&str] = &["deprecated"]; struct ImportReplacer<'a> { - stmt: &'a Stmt, + import_from_stmt: &'a StmtImportFrom, module: &'a str, - members: &'a [Alias], locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, + tokens: &'a Tokens, version: PythonVersion, } impl<'a> ImportReplacer<'a> { const fn new( - stmt: &'a Stmt, + import_from_stmt: &'a StmtImportFrom, module: &'a str, - members: &'a [Alias], locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, + tokens: &'a Tokens, version: PythonVersion, ) -> Self { Self { - stmt, + import_from_stmt, module, - members, locator, stylist, + tokens, version, } } @@ -430,7 +431,7 @@ impl<'a> ImportReplacer<'a> { let mut operations = vec![]; if self.module == "typing" { if self.version >= PythonVersion::Py39 { - for member in self.members { + for member in &self.import_from_stmt.names { if let Some(target) = TYPING_TO_RENAME_PY39.iter().find_map(|(name, target)| { if &member.name == *name { Some(*target) @@ -616,7 +617,7 @@ impl<'a> ImportReplacer<'a> { let fix = Some(matched); Some((operation, fix)) } else { - let indentation = indentation(self.locator, self.stmt); + let indentation = indentation(self.locator, self.import_from_stmt); // If we have matched _and_ unmatched names, but the import is not on its own // line, we can't add a statement after it. For example, if we have @@ -636,7 +637,9 @@ impl<'a> ImportReplacer<'a> { let matched = ImportReplacer::format_import_from(&matched_names, target); let unmatched = fixes::remove_import_members( - self.locator.slice(self.stmt.range()), + self.locator, + self.import_from_stmt, + self.tokens, &matched_names .iter() .map(|name| name.name.as_str()) @@ -664,7 +667,7 @@ impl<'a> ImportReplacer<'a> { fn partition_imports(&self, candidates: &[&str]) -> (Vec<&Alias>, Vec<&Alias>) { let mut matched_names = vec![]; let mut unmatched_names = vec![]; - for name in self.members { + for name in &self.import_from_stmt.names { if candidates.contains(&name.name.as_str()) { matched_names.push(name); } else { @@ -691,21 +694,19 @@ impl<'a> ImportReplacer<'a> { } /// UP035 -pub(crate) fn deprecated_import( - checker: &mut Checker, - stmt: &Stmt, - names: &[Alias], - module: Option<&str>, - level: u32, -) { +pub(crate) fn deprecated_import(checker: &mut Checker, import_from_stmt: &StmtImportFrom) { // Avoid relative and star imports. - if level > 0 { + if import_from_stmt.level > 0 { return; } - if names.first().is_some_and(|name| &name.name == "*") { + if import_from_stmt + .names + .first() + .is_some_and(|name| &name.name == "*") + { return; } - let Some(module) = module else { + let Some(module) = import_from_stmt.module.as_deref() else { return; }; @@ -713,13 +714,12 @@ pub(crate) fn deprecated_import( return; } - let members: Vec = names.iter().map(Clone::clone).collect(); let fixer = ImportReplacer::new( - stmt, + import_from_stmt, module, - &members, checker.locator(), checker.stylist(), + checker.parsed().tokens(), checker.settings.target_version, ); @@ -728,12 +728,12 @@ pub(crate) fn deprecated_import( DeprecatedImport { deprecation: Deprecation::WithoutRename(operation), }, - stmt.range(), + import_from_stmt.range(), ); if let Some(content) = fix { diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( content, - stmt.range(), + import_from_stmt.range(), ))); } checker.diagnostics.push(diagnostic); @@ -744,7 +744,7 @@ pub(crate) fn deprecated_import( DeprecatedImport { deprecation: Deprecation::WithRename(operation), }, - stmt.range(), + import_from_stmt.range(), ); checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs index 499f30324e..bc75dbe6a7 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs @@ -1,5 +1,7 @@ -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::TextRange; +use std::slice::Iter; + +use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; @@ -36,17 +38,17 @@ impl AlwaysFixableViolation for ExtraneousParentheses { } // See: https://github.com/asottile/pyupgrade/blob/97ed6fb3cf2e650d4f762ba231c3f04c41797710/pyupgrade/_main.py#L148 -fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange, TextRange)> { +fn match_extraneous_parentheses(tokens: &mut Iter<'_, Token>) -> Option<(TextRange, TextRange)> { // Store the location of the extraneous opening parenthesis. let start_range = loop { - let (token, range) = tokens.next()?; + let token = tokens.next()?; - match token { + match token.kind() { TokenKind::Comment | TokenKind::NonLogicalNewline => { continue; } TokenKind::Lpar => { - break range; + break token.range(); } _ => { return None; @@ -62,22 +64,28 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange // Store the location of the extraneous closing parenthesis. let end_range = loop { - let (token, range) = tokens.next()?; + let token = tokens.next()?; - // If we find a comma or a yield at depth 1 or 2, it's a tuple or coroutine. - if depth == 1 && matches!(token, TokenKind::Comma | TokenKind::Yield) { - return None; - } else if matches!(token, TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb) { - depth = depth.saturating_add(1); - } else if matches!(token, TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb) { - depth = depth.saturating_sub(1); + match token.kind() { + // If we find a comma or a yield at depth 1 or 2, it's a tuple or coroutine. + TokenKind::Comma | TokenKind::Yield if depth == 1 => return None, + TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { + depth = depth.saturating_add(1); + } + TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { + depth = depth.saturating_sub(1); + } + _ => {} } if depth == 0 { - break range; + break token.range(); } - if !matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline) { + if !matches!( + token.kind(), + TokenKind::Comment | TokenKind::NonLogicalNewline + ) { empty_tuple = false; } }; @@ -88,9 +96,9 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange // Find the next non-coding token. let token = loop { - let (token, _) = tokens.next()?; + let token = tokens.next()?; - match token { + match token.kind() { TokenKind::Comment | TokenKind::NonLogicalNewline => continue, _ => { break token; @@ -98,7 +106,7 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange } }; - if matches!(token, TokenKind::Rpar) { + if matches!(token.kind(), TokenKind::Rpar) { Some((start_range, end_range)) } else { None @@ -108,15 +116,16 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange /// UP034 pub(crate) fn extraneous_parentheses( diagnostics: &mut Vec, - mut tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, ) { - while let Some((token, _)) = tokens.next() { - if !matches!(token, TokenKind::Lpar) { + let mut token_iter = tokens.up_to_first_unknown().iter(); + while let Some(token) = token_iter.next() { + if !matches!(token.kind(), TokenKind::Lpar) { continue; } - let Some((start_range, end_range)) = match_extraneous_parentheses(&mut tokens) else { + let Some((start_range, end_range)) = match_extraneous_parentheses(&mut token_iter) else { continue; }; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index d6441f9904..930f2cd998 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -11,7 +11,7 @@ use ruff_python_ast::{self as ast, Expr, Keyword}; use ruff_python_literal::format::{ FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate, }; -use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_parser::TokenKind; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -409,15 +409,13 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F }; let mut patches: Vec<(TextRange, FStringConversion)> = vec![]; - let mut lex = lexer::lex_starts_at( - checker.locator().slice(call.func.range()), - Mode::Expression, - call.start(), - ) - .flatten(); + let mut tokens = checker.parsed().tokens().in_range(call.func.range()).iter(); let end = loop { - match lex.next() { - Some((Tok::Dot, range)) => { + let Some(token) = tokens.next() else { + unreachable!("Should break from the `Tok::Dot` arm"); + }; + match token.kind() { + TokenKind::Dot => { // ``` // ( // "a" @@ -429,10 +427,11 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // // We know that the expression is a string literal, so we can safely assume that the // dot is the start of an attribute access. - break range.start(); + break token.start(); } - Some((Tok::String { .. }, range)) => { - match FStringConversion::try_convert(range, &mut summary, checker.locator()) { + TokenKind::String => { + match FStringConversion::try_convert(token.range(), &mut summary, checker.locator()) + { // If the format string contains side effects that would need to be repeated, // we can't convert it to an f-string. Ok(FStringConversion::SideEffects) => return, @@ -440,11 +439,10 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // expression. Err(_) => return, // Otherwise, push the conversion to be processed later. - Ok(conversion) => patches.push((range, conversion)), + Ok(conversion) => patches.push((token.range(), conversion)), } } - Some(_) => continue, - None => unreachable!("Should break from the `Tok::Dot` arm"), + _ => {} } }; if patches.is_empty() { @@ -515,7 +513,7 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // ) // ``` let has_comments = checker - .indexer() + .parsed() .comment_ranges() .intersects(call.arguments.range()); diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs index 03b33011be..046ef14a6b 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -8,7 +8,7 @@ use ruff_python_codegen::Stylist; use ruff_python_literal::cformat::{ CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, }; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_parser::TokenKind; use ruff_python_stdlib::identifiers::is_identifier; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -344,38 +344,22 @@ fn convertible(format_string: &CFormatString, params: &Expr) -> bool { } /// UP031 -pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right: &Expr) { - // Grab each string segment (in case there's an implicit concatenation). - let mut strings: Vec<(TextRange, AnyStringFlags)> = vec![]; - let mut extension = None; - for (tok, range) in lexer::lex_starts_at( - checker.locator().slice(expr), - checker.source_type.as_mode(), - expr.start(), - ) - .flatten() - { - match tok { - Tok::String { flags, .. } => strings.push((range, flags)), - // If we hit a right paren, we have to preserve it. - Tok::Rpar => extension = Some(range), - // Break as soon as we find the modulo symbol. - Tok::Percent => break, - _ => continue, - } - } +pub(crate) fn printf_string_formatting( + checker: &mut Checker, + bin_op: &ast::ExprBinOp, + string_expr: &ast::ExprStringLiteral, +) { + let right = &*bin_op.right; - // If there are no string segments, abort. - if strings.is_empty() { - return; - } - - // Parse each string segment. let mut num_positional_arguments = 0; let mut num_keyword_arguments = 0; - let mut format_strings = Vec::with_capacity(strings.len()); - for (range, flags) in &strings { - let string = checker.locator().slice(*range); + let mut format_strings: Vec<(TextRange, String)> = + Vec::with_capacity(string_expr.value.as_slice().len()); + + // Parse each string segment. + for string_literal in &string_expr.value { + let string = checker.locator().slice(string_literal); + let flags = AnyStringFlags::from(string_literal.flags); let string = &string [usize::from(flags.opener_len())..(string.len() - usize::from(flags.closer_len()))]; @@ -400,7 +384,10 @@ pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right } // Convert the `%`-format string to a `.format` string. - format_strings.push(flags.format_string_contents(&percent_to_format(&format_string))); + format_strings.push(( + string_literal.range(), + flags.format_string_contents(&percent_to_format(&format_string)), + )); } // Parse the parameters. @@ -448,41 +435,55 @@ pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right // Reconstruct the string. let mut contents = String::new(); - let mut prev = None; - for ((range, _), format_string) in strings.iter().zip(format_strings) { + let mut prev_end = None; + for (range, format_string) in format_strings { // Add the content before the string segment. - match prev { + match prev_end { None => { contents.push_str( checker .locator() - .slice(TextRange::new(expr.start(), range.start())), + .slice(TextRange::new(bin_op.start(), range.start())), ); } - Some(prev) => { - contents.push_str(checker.locator().slice(TextRange::new(prev, range.start()))); + Some(prev_end) => { + contents.push_str( + checker + .locator() + .slice(TextRange::new(prev_end, range.start())), + ); } } // Add the string itself. contents.push_str(&format_string); - prev = Some(range.end()); + prev_end = Some(range.end()); } - if let Some(range) = extension { - contents.push_str( - checker - .locator() - .slice(TextRange::new(prev.unwrap(), range.end())), - ); + if let Some(prev_end) = prev_end { + for token in checker.parsed().tokens().after(prev_end) { + match token.kind() { + // If we hit a right paren, we have to preserve it. + TokenKind::Rpar => { + contents.push_str( + checker + .locator() + .slice(TextRange::new(prev_end, token.end())), + ); + } + // Break as soon as we find the modulo symbol. + TokenKind::Percent => break, + _ => {} + } + } } // Add the `.format` call. contents.push_str(&format!(".format{params_string}")); - let mut diagnostic = Diagnostic::new(PrintfStringFormatting, expr.range()); + let mut diagnostic = Diagnostic::new(PrintfStringFormatting, bin_op.range()); diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement( contents, - expr.range(), + bin_op.range(), ))); checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs index 6cbd36e233..d502107007 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs @@ -4,9 +4,8 @@ use anyhow::{anyhow, Result}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, Expr, PySourceType}; -use ruff_python_parser::{lexer, AsMode}; -use ruff_source_file::Locator; +use ruff_python_ast::{self as ast, Expr}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use crate::checkers::ast::Checker; @@ -76,12 +75,11 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall) }) = &keyword.value { if let Ok(mode) = OpenMode::from_str(mode_param_value.to_str()) { - checker.diagnostics.push(create_check( + checker.diagnostics.push(create_diagnostic( call, &keyword.value, mode.replacement_value(), - checker.locator(), - checker.source_type, + checker.parsed().tokens(), )); } } @@ -91,12 +89,11 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall) Some(mode_param) => { if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = &mode_param { if let Ok(mode) = OpenMode::from_str(value.to_str()) { - checker.diagnostics.push(create_check( + checker.diagnostics.push(create_diagnostic( call, mode_param, mode.replacement_value(), - checker.locator(), - checker.source_type, + checker.parsed().tokens(), )); } } @@ -146,18 +143,17 @@ impl OpenMode { } } -fn create_check( - expr: &T, +fn create_diagnostic( + call: &ast::ExprCall, mode_param: &Expr, replacement_value: Option<&str>, - locator: &Locator, - source_type: PySourceType, + tokens: &Tokens, ) -> Diagnostic { let mut diagnostic = Diagnostic::new( RedundantOpenModes { replacement: replacement_value.map(ToString::to_string), }, - expr.range(), + call.range(), ); if let Some(content) = replacement_value { @@ -166,52 +162,53 @@ fn create_check( mode_param.range(), ))); } else { - diagnostic.try_set_fix(|| { - create_remove_param_fix(locator, expr, mode_param, source_type).map(Fix::safe_edit) - }); + diagnostic + .try_set_fix(|| create_remove_param_fix(call, mode_param, tokens).map(Fix::safe_edit)); } diagnostic } -fn create_remove_param_fix( - locator: &Locator, - expr: &T, +fn create_remove_param_fix( + call: &ast::ExprCall, mode_param: &Expr, - source_type: PySourceType, + tokens: &Tokens, ) -> Result { - let content = locator.slice(expr); // Find the last comma before mode_param and create a deletion fix // starting from the comma and ending after mode_param. let mut fix_start: Option = None; let mut fix_end: Option = None; let mut is_first_arg: bool = false; let mut delete_first_arg: bool = false; - for (tok, range) in lexer::lex_starts_at(content, source_type.as_mode(), expr.start()).flatten() - { - if range.start() == mode_param.start() { + + for token in tokens.in_range(call.range()) { + if token.start() == mode_param.start() { if is_first_arg { delete_first_arg = true; continue; } - fix_end = Some(range.end()); + fix_end = Some(token.end()); break; } - if delete_first_arg && tok.is_name() { - fix_end = Some(range.start()); - break; - } - if tok.is_lpar() { - is_first_arg = true; - fix_start = Some(range.end()); - } - if tok.is_comma() { - is_first_arg = false; - if !delete_first_arg { - fix_start = Some(range.start()); + match token.kind() { + TokenKind::Name if delete_first_arg => { + fix_end = Some(token.start()); + break; } + TokenKind::Lpar => { + is_first_arg = true; + fix_start = Some(token.end()); + } + TokenKind::Comma => { + is_first_arg = false; + if !delete_first_arg { + fix_start = Some(token.start()); + } + } + _ => {} } } + match (fix_start, fix_end) { (Some(start), Some(end)) => Ok(Edit::deletion(start, end)), _ => Err(anyhow::anyhow!( diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs index 46ed24c176..68b0ee777f 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs @@ -4,6 +4,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -49,10 +50,11 @@ pub(crate) fn unnecessary_coding_comment( diagnostics: &mut Vec, locator: &Locator, indexer: &Indexer, + comment_ranges: &CommentRanges, ) { // The coding comment must be on one of the first two lines. Since each comment spans at least // one line, we only need to check the first two comments at most. - for comment_range in indexer.comment_ranges().iter().take(2) { + for comment_range in comment_ranges.iter().take(2) { // If leading content is not whitespace then it's not a valid coding comment e.g. // ``` // print(x) # coding=utf8 diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs index db894ed688..6ed6696628 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs @@ -1,7 +1,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, Arguments, Expr, Keyword, PySourceType}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::{self as ast, Arguments, Expr, Keyword}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -117,33 +117,26 @@ fn match_encoding_arg(arguments: &Arguments) -> Option { } /// Return a [`Fix`] replacing the call to encode with a byte string. -fn replace_with_bytes_literal( - locator: &Locator, - call: &ast::ExprCall, - source_type: PySourceType, -) -> Fix { +fn replace_with_bytes_literal(locator: &Locator, call: &ast::ExprCall, tokens: &Tokens) -> Fix { // Build up a replacement string by prefixing all string tokens with `b`. - let contents = locator.slice(call); - let mut replacement = String::with_capacity(contents.len() + 1); + let mut replacement = String::with_capacity(call.range().len().to_usize() + 1); let mut prev = call.start(); - for (tok, range) in - lexer::lex_starts_at(contents, source_type.as_mode(), call.start()).flatten() - { - match tok { - Tok::Dot => break, - Tok::String { .. } => { - replacement.push_str(locator.slice(TextRange::new(prev, range.start()))); - let string = locator.slice(range); + for token in tokens.in_range(call.range()) { + match token.kind() { + TokenKind::Dot => break, + TokenKind::String => { + replacement.push_str(locator.slice(TextRange::new(prev, token.start()))); + let string = locator.slice(token); replacement.push_str(&format!( "b{}", &string.trim_start_matches('u').trim_start_matches('U') )); } _ => { - replacement.push_str(locator.slice(TextRange::new(prev, range.end()))); + replacement.push_str(locator.slice(TextRange::new(prev, token.end()))); } } - prev = range.end(); + prev = token.end(); } Fix::safe_edit(Edit::range_replacement( @@ -172,7 +165,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal diagnostic.set_fix(replace_with_bytes_literal( checker.locator(), call, - checker.source_type, + checker.parsed().tokens(), )); checker.diagnostics.push(diagnostic); } else if let EncodingArg::Keyword(kwarg) = encoding_arg { diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs index d371eb9661..7e87c72d4f 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs @@ -116,7 +116,7 @@ pub(crate) fn yield_in_for_loop(checker: &mut Checker, stmt_for: &ast::StmtFor) parenthesized_range( iter.as_ref().into(), stmt_for.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(iter.range()), diff --git a/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs b/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs index 131b8eb789..c3f404c243 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs @@ -7,7 +7,7 @@ use ruff_python_ast::comparable::ComparableExpr; use ruff_python_ast::helpers::contains_effect; use ruff_python_ast::parenthesize::parenthesized_range; use ruff_python_ast::Expr; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -74,8 +74,18 @@ pub(crate) fn if_exp_instead_of_or_operator(checker: &mut Checker, if_expr: &ast Edit::range_replacement( format!( "{} or {}", - parenthesize_test(test, if_expr, checker.indexer(), checker.locator()), - parenthesize_test(orelse, if_expr, checker.indexer(), checker.locator()), + parenthesize_test( + test, + if_expr, + checker.parsed().comment_ranges(), + checker.locator() + ), + parenthesize_test( + orelse, + if_expr, + checker.parsed().comment_ranges(), + checker.locator() + ), ), if_expr.range(), ), @@ -99,13 +109,13 @@ pub(crate) fn if_exp_instead_of_or_operator(checker: &mut Checker, if_expr: &ast fn parenthesize_test<'a>( expr: &Expr, if_expr: &ast::ExprIf, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator<'a>, ) -> Cow<'a, str> { if let Some(range) = parenthesized_range( expr.into(), if_expr.into(), - indexer.comment_ranges(), + comment_ranges, locator.contents(), ) { Cow::Borrowed(locator.slice(range)) diff --git a/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs b/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs index 1eb6bbaf6f..60893aa8e3 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs @@ -114,7 +114,7 @@ pub(crate) fn repeated_append(checker: &mut Checker, stmt: &Stmt) { // # comment // a.append(2) // ``` - if group.is_consecutive && !checker.indexer().comment_ranges().intersects(group.range()) + if group.is_consecutive && !checker.parsed().comment_ranges().intersects(group.range()) { diagnostic.set_fix(Fix::unsafe_edit(Edit::replacement( replacement, diff --git a/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs b/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs index e635e1e2da..97f9aea116 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs @@ -83,7 +83,7 @@ pub(crate) fn single_item_membership_test( &[membership_test.replacement_op()], &[item.clone()], expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), expr.range(), diff --git a/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs b/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs index 05f7f602db..b4fe7df371 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs @@ -199,7 +199,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp expr.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs index 8fe4215551..2f132ca140 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs @@ -69,9 +69,9 @@ impl AlwaysFixableViolation for InvalidFormatterSuppressionComment { /// RUF028 pub(crate) fn ignored_formatter_suppression_comment(checker: &mut Checker, suite: &ast::Suite) { - let indexer = checker.indexer(); let locator = checker.locator(); - let comment_ranges: SmallVec<[SuppressionComment; 8]> = indexer + let comment_ranges: SmallVec<[SuppressionComment; 8]> = checker + .parsed() .comment_ranges() .into_iter() .filter_map(|range| { diff --git a/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs b/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs index 35976dbf5b..95989f9721 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs @@ -114,10 +114,12 @@ fn should_be_fstring( } let fstring_expr = format!("f{}", locator.slice(literal)); + let Ok(parsed) = parse_expression(&fstring_expr) else { + return false; + }; // Note: Range offsets for `value` are based on `fstring_expr` - let Ok(ast::Expr::FString(ast::ExprFString { value, .. })) = parse_expression(&fstring_expr) - else { + let Some(ast::ExprFString { value, .. }) = parsed.expr().as_f_string_expr() else { return false; }; diff --git a/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs b/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs index 04e140bf36..cdb7b9e6c1 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs @@ -84,7 +84,7 @@ pub(crate) fn parenthesize_chained_logical_operators( if parenthesized_range( bool_op.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), locator.contents(), ) .is_none() diff --git a/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs b/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs index 77a4160ec2..2d8e684f0f 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs @@ -111,7 +111,7 @@ fn convert_to_reduce(iterable: &Expr, call: &ast::ExprCall, checker: &Checker) - parenthesized_range( iterable.into(), call.arguments.as_any_node_ref(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(iterable.range()), diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 5953ab55a1..9f4ce6129c 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use ruff_python_ast as ast; use ruff_python_codegen::Stylist; -use ruff_python_parser::{lexer, Mode, Tok, TokenKind}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_stdlib::str::is_cased_uppercase; use ruff_python_trivia::{first_non_trivia_token, leading_indentation, SimpleTokenKind}; use ruff_source_file::Locator; @@ -336,6 +336,7 @@ impl<'a> MultilineStringSequenceValue<'a> { range: TextRange, kind: SequenceKind, locator: &Locator, + tokens: &Tokens, string_items: &[&'a str], ) -> Option> { // Parse the multiline string sequence using the raw tokens. @@ -344,7 +345,7 @@ impl<'a> MultilineStringSequenceValue<'a> { // // Step (1). Start by collecting information on each line individually: let (lines, ends_with_trailing_comma) = - collect_string_sequence_lines(range, kind, locator, string_items)?; + collect_string_sequence_lines(range, kind, tokens, string_items)?; // Step (2). Group lines together into sortable "items": // - Any "item" contains a single element of the list/tuple @@ -488,7 +489,7 @@ impl Ranged for MultilineStringSequenceValue<'_> { fn collect_string_sequence_lines<'a>( range: TextRange, kind: SequenceKind, - locator: &Locator, + tokens: &Tokens, string_items: &[&'a str], ) -> Option<(Vec>, bool)> { // These first two variables are used for keeping track of state @@ -501,39 +502,34 @@ fn collect_string_sequence_lines<'a>( // An iterator over the string values in the sequence. let mut string_items_iter = string_items.iter(); - // `lex_starts_at()` gives us absolute ranges rather than relative ranges, - // but (surprisingly) we still need to pass in the slice of code we want it to lex, - // rather than the whole source file: - let mut token_iter = - lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); - let (first_tok, _) = token_iter.next()?.ok()?; - if TokenKind::from(&first_tok) != kind.opening_token_for_multiline_definition() { + let mut token_iter = tokens.in_range(range).iter(); + let first_token = token_iter.next()?; + if first_token.kind() != kind.opening_token_for_multiline_definition() { return None; } let expected_final_token = kind.closing_token_for_multiline_definition(); - for pair in token_iter { - let (tok, subrange) = pair.ok()?; - match tok { - Tok::NonLogicalNewline => { + for token in token_iter { + match token.kind() { + TokenKind::NonLogicalNewline => { lines.push(line_state.into_string_sequence_line()); line_state = LineState::default(); } - Tok::Comment(_) => { - line_state.visit_comment_token(subrange); + TokenKind::Comment => { + line_state.visit_comment_token(token.range()); } - Tok::String { .. } => { + TokenKind::String => { let Some(string_value) = string_items_iter.next() else { unreachable!("Expected the number of string tokens to be equal to the number of string items in the sequence"); }; - line_state.visit_string_token(string_value, subrange); + line_state.visit_string_token(string_value, token.range()); ends_with_trailing_comma = false; } - Tok::Comma => { - line_state.visit_comma_token(subrange); + TokenKind::Comma => { + line_state.visit_comma_token(token.range()); ends_with_trailing_comma = true; } - tok if TokenKind::from(&tok) == expected_final_token => { + kind if kind == expected_final_token => { lines.push(line_state.into_string_sequence_line()); break; } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 2d88b64def..0ac227e935 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -216,6 +216,7 @@ fn create_fix( range, kind, locator, + checker.parsed().tokens(), string_items, )?; assert_eq!(value.len(), elts.len()); diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 46adf10fb4..55b12f2684 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -210,6 +210,7 @@ impl<'a> StringLiteralDisplay<'a> { self.range(), *sequence_kind, locator, + checker.parsed().tokens(), elements, )?; assert_eq!(analyzed_sequence.len(), self.elts.len()); diff --git a/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs b/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs index d148dff835..b9e9cea7c0 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs @@ -15,15 +15,15 @@ /// will not converge. use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::TextSize; use crate::registry::Rule; /// Check if a comment exists anywhere in a the given file -fn comment_exists(text: &str, locator: &Locator, indexer: &Indexer) -> bool { - for range in indexer.comment_ranges() { +fn comment_exists(text: &str, locator: &Locator, comment_ranges: &CommentRanges) -> bool { + for range in comment_ranges { let comment_text = locator.slice(range); if text.trim_end() == comment_text { return true; @@ -49,7 +49,7 @@ pub(crate) const TEST_RULES: &[Rule] = &[ ]; pub(crate) trait TestRule { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option; + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option; } /// ## What it does @@ -80,7 +80,7 @@ impl Violation for StableTestRule { } impl TestRule for StableTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( StableTestRule, ruff_text_size::TextRange::default(), @@ -116,9 +116,9 @@ impl Violation for StableTestRuleSafeFix { } impl TestRule for StableTestRuleSafeFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-safe-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -160,9 +160,9 @@ impl Violation for StableTestRuleUnsafeFix { } impl TestRule for StableTestRuleUnsafeFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-unsafe-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -207,9 +207,9 @@ impl Violation for StableTestRuleDisplayOnlyFix { } impl TestRule for StableTestRuleDisplayOnlyFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-display-only-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -254,7 +254,7 @@ impl Violation for PreviewTestRule { } impl TestRule for PreviewTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( PreviewTestRule, ruff_text_size::TextRange::default(), @@ -290,7 +290,7 @@ impl Violation for NurseryTestRule { } impl TestRule for NurseryTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( NurseryTestRule, ruff_text_size::TextRange::default(), @@ -326,7 +326,7 @@ impl Violation for DeprecatedTestRule { } impl TestRule for DeprecatedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( DeprecatedTestRule, ruff_text_size::TextRange::default(), @@ -362,7 +362,7 @@ impl Violation for AnotherDeprecatedTestRule { } impl TestRule for AnotherDeprecatedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( AnotherDeprecatedTestRule, ruff_text_size::TextRange::default(), @@ -398,7 +398,7 @@ impl Violation for RemovedTestRule { } impl TestRule for RemovedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RemovedTestRule, ruff_text_size::TextRange::default(), @@ -434,7 +434,7 @@ impl Violation for AnotherRemovedTestRule { } impl TestRule for AnotherRemovedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( AnotherRemovedTestRule, ruff_text_size::TextRange::default(), @@ -470,7 +470,7 @@ impl Violation for RedirectedFromTestRule { } impl TestRule for RedirectedFromTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedFromTestRule, ruff_text_size::TextRange::default(), @@ -506,7 +506,7 @@ impl Violation for RedirectedToTestRule { } impl TestRule for RedirectedToTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedToTestRule, ruff_text_size::TextRange::default(), @@ -542,7 +542,7 @@ impl Violation for RedirectedFromPrefixTestRule { } impl TestRule for RedirectedFromPrefixTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedFromPrefixTestRule, ruff_text_size::TextRange::default(), diff --git a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs index 97724db212..1813efd784 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs @@ -110,7 +110,7 @@ pub(crate) fn unnecessary_key_check(checker: &mut Checker, expr: &Expr) { parenthesized_range( obj_right.into(), right.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(obj_right.range()) @@ -119,7 +119,7 @@ pub(crate) fn unnecessary_key_check(checker: &mut Checker, expr: &Expr) { parenthesized_range( key_right.into(), right.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(key_right.range()) diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index b646d76bd1..d23406bef0 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -16,14 +16,13 @@ use ruff_notebook::NotebookError; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::AsMode; use ruff_python_trivia::textwrap::dedent; use ruff_source_file::{Locator, SourceFileBuilder}; use ruff_text_size::Ranged; use crate::directives; use crate::fix::{fix_file, FixResult}; -use crate::linter::{check_path, LinterResult, TokenSource}; +use crate::linter::{check_path, LinterResult}; use crate::message::{Emitter, EmitterContext, Message, TextEmitter}; use crate::packaging::detect_package_root; use crate::registry::AsRule; @@ -110,12 +109,12 @@ pub(crate) fn test_contents<'a>( settings: &LinterSettings, ) -> (Vec, Cow<'a, SourceKind>) { let source_type = PySourceType::from(path); - let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let locator = Locator::new(source_kind.source_code()); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -135,7 +134,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let source_has_errors = error.is_some(); @@ -175,13 +174,13 @@ pub(crate) fn test_contents<'a>( transformed = Cow::Owned(transformed.updated(fixed_contents, &source_map)); - let tokens = - ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); + let parsed = + ruff_python_parser::parse_unchecked_source(transformed.source_code(), source_type); let locator = Locator::new(transformed.source_code()); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -201,7 +200,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, &transformed, source_type, - TokenSource::Tokens(tokens), + &parsed, ); if let Some(fixed_error) = fixed_error { diff --git a/crates/ruff_python_ast/src/str_prefix.rs b/crates/ruff_python_ast/src/str_prefix.rs index e6784d2604..b2da865d17 100644 --- a/crates/ruff_python_ast/src/str_prefix.rs +++ b/crates/ruff_python_ast/src/str_prefix.rs @@ -150,45 +150,6 @@ impl AnyStringPrefix { } } -impl TryFrom for AnyStringPrefix { - type Error = String; - - fn try_from(value: char) -> Result { - let result = match value { - 'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }), - 'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }), - 'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode), - 'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular), - 'f' | 'F' => Self::Format(FStringPrefix::Regular), - _ => return Err(format!("Unexpected prefix '{value}'")), - }; - Ok(result) - } -} - -impl TryFrom<[char; 2]> for AnyStringPrefix { - type Error = String; - - fn try_from(value: [char; 2]) -> Result { - let result = match value { - ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: true }) - } - ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) - } - _ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])), - }; - Ok(result) - } -} - impl fmt::Display for AnyStringPrefix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) diff --git a/crates/ruff_python_ast_integration_tests/tests/identifier.rs b/crates/ruff_python_ast_integration_tests/tests/identifier.rs index 1e70c4fd65..324390b845 100644 --- a/crates/ruff_python_ast_integration_tests/tests/identifier.rs +++ b/crates/ruff_python_ast_integration_tests/tests/identifier.rs @@ -1,5 +1,5 @@ use ruff_python_ast::identifier; -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_text_size::{TextRange, TextSize}; #[test] @@ -11,7 +11,7 @@ else: pass " .trim(); - let stmts = parse_suite(contents)?; + let stmts = parse_module(contents)?.into_suite(); let stmt = stmts.first().unwrap(); let range = identifier::else_(stmt, contents).unwrap(); assert_eq!(&contents[range], "else"); diff --git a/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs b/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs index 6e6b2eeaaa..ec6b5d8650 100644 --- a/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs +++ b/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs @@ -6,9 +6,9 @@ use ruff_text_size::TextRange; #[test] fn test_parenthesized_name() { let source_code = r"(x) + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -23,9 +23,9 @@ fn test_parenthesized_name() { #[test] fn test_non_parenthesized_name() { let source_code = r"x + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -40,9 +40,9 @@ fn test_non_parenthesized_name() { #[test] fn test_parenthesized_argument() { let source_code = r"f((a))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); @@ -58,9 +58,9 @@ fn test_parenthesized_argument() { #[test] fn test_non_parenthesized_argument() { let source_code = r"f(a)"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); @@ -76,9 +76,9 @@ fn test_non_parenthesized_argument() { #[test] fn test_parenthesized_tuple_member() { let source_code = r"(a, (b))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let tuple = expr.as_tuple_expr().unwrap(); + let tuple = parsed.expr().as_tuple_expr().unwrap(); let member = tuple.elts.last().unwrap(); let parenthesized = parenthesized_range( @@ -93,9 +93,9 @@ fn test_parenthesized_tuple_member() { #[test] fn test_non_parenthesized_tuple_member() { let source_code = r"(a, b)"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let tuple = expr.as_tuple_expr().unwrap(); + let tuple = parsed.expr().as_tuple_expr().unwrap(); let member = tuple.elts.last().unwrap(); let parenthesized = parenthesized_range( @@ -110,9 +110,9 @@ fn test_non_parenthesized_tuple_member() { #[test] fn test_twice_parenthesized_name() { let source_code = r"((x)) + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -127,9 +127,9 @@ fn test_twice_parenthesized_name() { #[test] fn test_twice_parenthesized_argument() { let source_code = r"f(((a + 1)))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); diff --git a/crates/ruff_python_ast_integration_tests/tests/preorder.rs b/crates/ruff_python_ast_integration_tests/tests/preorder.rs index 21a159b424..8c375da3e0 100644 --- a/crates/ruff_python_ast_integration_tests/tests/preorder.rs +++ b/crates/ruff_python_ast_integration_tests/tests/preorder.rs @@ -4,8 +4,7 @@ use insta::assert_snapshot; use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, BoolOp, CmpOp, Operator, Singleton, UnaryOp}; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[test] fn function_arguments() { @@ -148,11 +147,10 @@ fn f_strings() { } fn trace_preorder_visitation(source: &str) -> String { - let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); + let parsed = parse(source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); - visitor.visit_mod(&parsed); + visitor.visit_mod(parsed.syntax()); visitor.output } diff --git a/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs b/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs index cacf964996..240d01187e 100644 --- a/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs +++ b/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs @@ -1,5 +1,5 @@ use ruff_python_ast::stmt_if::elif_else_range; -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_text_size::TextSize; #[test] @@ -9,12 +9,14 @@ fn extract_elif_else_range() -> Result<(), ParseError> { elif b: ... "; - let mut stmts = parse_suite(contents)?; - let stmt = stmts - .pop() - .and_then(ruff_python_ast::Stmt::if_stmt) - .unwrap(); - let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap(); + let parsed = parse_module(contents)?; + let if_stmt = parsed + .suite() + .first() + .expect("module should contain at least one statement") + .as_if_stmt() + .expect("first statement should be an `if` statement"); + let range = elif_else_range(&if_stmt.elif_else_clauses[0], contents).unwrap(); assert_eq!(range.start(), TextSize::from(14)); assert_eq!(range.end(), TextSize::from(18)); @@ -23,12 +25,14 @@ elif b: else: ... "; - let mut stmts = parse_suite(contents)?; - let stmt = stmts - .pop() - .and_then(ruff_python_ast::Stmt::if_stmt) - .unwrap(); - let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap(); + let parsed = parse_module(contents)?; + let if_stmt = parsed + .suite() + .first() + .expect("module should contain at least one statement") + .as_if_stmt() + .expect("first statement should be an `if` statement"); + let range = elif_else_range(&if_stmt.elif_else_clauses[0], contents).unwrap(); assert_eq!(range.start(), TextSize::from(14)); assert_eq!(range.end(), TextSize::from(18)); diff --git a/crates/ruff_python_ast_integration_tests/tests/visitor.rs b/crates/ruff_python_ast_integration_tests/tests/visitor.rs index 1c1bf0d0f7..128d0c3f12 100644 --- a/crates/ruff_python_ast_integration_tests/tests/visitor.rs +++ b/crates/ruff_python_ast_integration_tests/tests/visitor.rs @@ -13,8 +13,7 @@ use ruff_python_ast::{ Expr, FString, FStringElement, Keyword, MatchCase, Operator, Parameter, Parameters, Pattern, Stmt, StringLiteral, TypeParam, UnaryOp, WithItem, }; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[test] fn function_arguments() { @@ -157,11 +156,10 @@ fn f_strings() { } fn trace_visitation(source: &str) -> String { - let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); + let parsed = parse(source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); - walk_module(&mut visitor, &parsed); + walk_module(&mut visitor, parsed.syntax()); visitor.output } diff --git a/crates/ruff_python_codegen/Cargo.toml b/crates/ruff_python_codegen/Cargo.toml index 7afd304046..cf273027bb 100644 --- a/crates/ruff_python_codegen/Cargo.toml +++ b/crates/ruff_python_codegen/Cargo.toml @@ -18,6 +18,7 @@ ruff_python_ast = { workspace = true } ruff_python_literal = { workspace = true } ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } +ruff_text_size = { workspace = true } once_cell = { workspace = true } diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index 1c95db1f9c..9cb98dd7c6 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -1416,7 +1416,7 @@ impl<'a> Generator<'a> { #[cfg(test)] mod tests { use ruff_python_ast::{str::Quote, Mod, ModModule}; - use ruff_python_parser::{self, parse_suite, Mode}; + use ruff_python_parser::{self, parse_module, Mode}; use ruff_source_file::LineEnding; use crate::stylist::Indentation; @@ -1427,9 +1427,9 @@ mod tests { let indentation = Indentation::default(); let quote = Quote::default(); let line_ending = LineEnding::default(); - let stmt = parse_suite(contents).unwrap(); + let module = parse_module(contents).unwrap(); let mut generator = Generator::new(&indentation, quote, line_ending); - generator.unparse_suite(&stmt); + generator.unparse_suite(module.suite()); generator.generate() } @@ -1439,9 +1439,9 @@ mod tests { line_ending: LineEnding, contents: &str, ) -> String { - let stmt = parse_suite(contents).unwrap(); + let module = parse_module(contents).unwrap(); let mut generator = Generator::new(indentation, quote, line_ending); - generator.unparse_suite(&stmt); + generator.unparse_suite(module.suite()); generator.generate() } @@ -1449,8 +1449,8 @@ mod tests { let indentation = Indentation::default(); let quote = Quote::default(); let line_ending = LineEnding::default(); - let ast = ruff_python_parser::parse(contents, Mode::Ipython).unwrap(); - let Mod::Module(ModModule { body, .. }) = ast else { + let parsed = ruff_python_parser::parse(contents, Mode::Ipython).unwrap(); + let Mod::Module(ModModule { body, .. }) = parsed.into_syntax() else { panic!("Source code didn't return ModModule") }; let [stmt] = body.as_slice() else { diff --git a/crates/ruff_python_codegen/src/lib.rs b/crates/ruff_python_codegen/src/lib.rs index baa71ea127..64a991edcd 100644 --- a/crates/ruff_python_codegen/src/lib.rs +++ b/crates/ruff_python_codegen/src/lib.rs @@ -2,17 +2,16 @@ mod generator; mod stylist; pub use generator::Generator; -use ruff_python_parser::{lexer, parse_suite, Mode, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_source_file::Locator; pub use stylist::Stylist; /// Run round-trip source code generation on a given Python code. pub fn round_trip(code: &str) -> Result { let locator = Locator::new(code); - let python_ast = parse_suite(code)?; - let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect(); - let stylist = Stylist::from_tokens(&tokens, &locator); + let parsed = parse_module(code)?; + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let mut generator: Generator = (&stylist).into(); - generator.unparse_suite(&python_ast); + generator.unparse_suite(parsed.suite()); Ok(generator.generate()) } diff --git a/crates/ruff_python_codegen/src/stylist.rs b/crates/ruff_python_codegen/src/stylist.rs index fc9e43bfb6..375f0c8e16 100644 --- a/crates/ruff_python_codegen/src/stylist.rs +++ b/crates/ruff_python_codegen/src/stylist.rs @@ -4,10 +4,10 @@ use std::ops::Deref; use once_cell::unsync::OnceCell; -use ruff_python_ast::{str::Quote, StringFlags}; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::{Tok, TokenKind}; +use ruff_python_ast::str::Quote; +use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_source_file::{find_newline, LineEnding, Locator}; +use ruff_text_size::Ranged; #[derive(Debug, Clone)] pub struct Stylist<'a> { @@ -35,40 +35,42 @@ impl<'a> Stylist<'a> { }) } - pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self { - let indentation = detect_indention(tokens, locator); + pub fn from_tokens(tokens: &Tokens, locator: &'a Locator<'a>) -> Self { + let indentation = detect_indention(tokens.up_to_first_unknown(), locator); Self { locator, indentation, - quote: detect_quote(tokens), + quote: detect_quote(tokens.up_to_first_unknown()), line_ending: OnceCell::default(), } } } -fn detect_quote(tokens: &[LexResult]) -> Quote { - for (token, _) in tokens.iter().flatten() { - match token { - Tok::String { flags, .. } if !flags.is_triple_quoted() => return flags.quote_style(), - Tok::FStringStart(flags) => return flags.quote_style(), +fn detect_quote(tokens: &[Token]) -> Quote { + for token in tokens { + match token.kind() { + TokenKind::String if !token.is_triple_quoted_string() => { + return token.string_quote_style() + } + TokenKind::FStringStart => return token.string_quote_style(), _ => continue, } } Quote::default() } -fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { - let indent_range = tokens.iter().flatten().find_map(|(t, range)| { - if matches!(t, Tok::Indent) { - Some(range) +fn detect_indention(tokens: &[Token], locator: &Locator) -> Indentation { + let indent_range = tokens.iter().find_map(|token| { + if matches!(token.kind(), TokenKind::Indent) { + Some(token.range()) } else { None } }); if let Some(indent_range) = indent_range { - let mut whitespace = locator.slice(*indent_range); + let mut whitespace = locator.slice(indent_range); // https://docs.python.org/3/reference/lexical_analysis.html#indentation // > A formfeed character may be present at the start of the line; it will be ignored for // > the indentation calculations above. Formfeed characters occurring elsewhere in the @@ -96,7 +98,7 @@ fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { // ) // ``` let mut depth = 0usize; - for (token, range) in tokens.iter().flatten() { + for token in tokens { match token.kind() { TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { depth = depth.saturating_add(1); @@ -105,7 +107,7 @@ fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { depth = depth.saturating_sub(1); } TokenKind::NonLogicalNewline => { - let line = locator.line(range.end()); + let line = locator.line(token.end()); let indent_index = line.find(|c: char| !c.is_whitespace()); if let Some(indent_index) = indent_index { if indent_index > 0 { @@ -158,8 +160,7 @@ impl Deref for Indentation { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::lex; - use ruff_python_parser::Mode; + use ruff_python_parser::{parse_module, parse_unchecked, Mode}; use ruff_source_file::{find_newline, LineEnding}; @@ -170,44 +171,36 @@ mod tests { fn indentation() { let contents = r"x = 1"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation::default()); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation("\t".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation("\t".to_string())); let contents = r" x = ( @@ -217,11 +210,9 @@ x = ( ) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" x = ( @@ -231,9 +222,9 @@ x = ( ) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); + let parsed = parse_unchecked(contents, Mode::Module); assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), + Stylist::from_tokens(parsed.tokens(), &locator).indentation(), &Indentation(" ".to_string()) ); @@ -244,62 +235,48 @@ class FormFeedIndent: print(a) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); } #[test] fn quote() { let contents = r"x = 1"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::default()); let contents = r"x = '1'"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r"x = f'1'"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#"x = "1""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r#"x = f"1""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r#"s = "It's done.""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); // No style if only double quoted docstring (will take default Double) let contents = r#" @@ -308,11 +285,9 @@ def f(): pass "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::default()); // Detect from string literal appearing after docstring let contents = r#" @@ -321,11 +296,9 @@ def f(): a = 'v' "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#" '''Module docstring.''' @@ -333,11 +306,9 @@ a = 'v' a = "v" "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); // Detect from f-string appearing after docstring let contents = r#" @@ -346,11 +317,9 @@ a = "v" a = f'v' "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#" '''Module docstring.''' @@ -358,21 +327,17 @@ a = f'v' a = f"v" "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r" f'''Module docstring.''' "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); } #[test] diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 2c5d8ee508..a57e480130 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -20,7 +20,6 @@ ruff_macros = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } ruff_python_ast = { workspace = true } -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_text_size = { workspace = true } diff --git a/crates/ruff_python_formatter/src/cli.rs b/crates/ruff_python_formatter/src/cli.rs index 881ca8ffae..f2f86c7bd1 100644 --- a/crates/ruff_python_formatter/src/cli.rs +++ b/crates/ruff_python_formatter/src/cli.rs @@ -2,13 +2,12 @@ use std::path::{Path, PathBuf}; -use anyhow::{format_err, Context, Result}; +use anyhow::{Context, Result}; use clap::{command, Parser, ValueEnum}; use ruff_formatter::SourceCode; use ruff_python_ast::PySourceType; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode}; +use ruff_python_parser::{parse, AsMode}; use ruff_text_size::Ranged; use crate::comments::collect_comments; @@ -46,12 +45,9 @@ pub struct Cli { pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Result { let source_type = PySourceType::from(source_path); - let (tokens, comment_ranges) = tokens_and_ranges(source, source_type) - .map_err(|err| format_err!("Source contains syntax errors {err:?}"))?; // Parse the AST. - let module = - parse_tokens(tokens, source, source_type.as_mode()).context("Syntax error in input")?; + let parsed = parse(source, source_type.as_mode()).context("Syntax error in input")?; let options = PyFormatOptions::from_extension(source_path) .with_preview(if cli.preview { @@ -66,14 +62,14 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re }); let source_code = SourceCode::new(source); - let formatted = format_module_ast(&module, &comment_ranges, source, options) - .context("Failed to format node")?; + let formatted = format_module_ast(&parsed, source, options).context("Failed to format node")?; if cli.print_ir { println!("{}", formatted.document().display(source_code)); } if cli.print_comments { // Print preceding, following and enclosing nodes - let decorated_comments = collect_comments(&module, source_code, &comment_ranges); + let decorated_comments = + collect_comments(parsed.syntax(), source_code, parsed.comment_ranges()); if !decorated_comments.is_empty() { println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment"); } diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index 9717252a9b..3731a082e6 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -481,15 +481,12 @@ mod tests { use ruff_formatter::SourceCode; use ruff_python_ast::{Mod, PySourceType}; - use ruff_python_index::tokens_and_ranges; - use ruff_python_parser::{parse_tokens, AsMode}; - use ruff_python_trivia::CommentRanges; + use ruff_python_parser::{parse, AsMode, Parsed}; use crate::comments::Comments; struct CommentsTestCase<'a> { - module: Mod, - comment_ranges: CommentRanges, + parsed: Parsed, source_code: SourceCode<'a>, } @@ -497,20 +494,21 @@ mod tests { fn from_code(source: &'a str) -> Self { let source_code = SourceCode::new(source); let source_type = PySourceType::Python; - let (tokens, comment_ranges) = - tokens_and_ranges(source, source_type).expect("Expect source to be valid Python"); - let parsed = parse_tokens(tokens, source, source_type.as_mode()) - .expect("Expect source to be valid Python"); + let parsed = + parse(source, source_type.as_mode()).expect("Expect source to be valid Python"); CommentsTestCase { + parsed, source_code, - module: parsed, - comment_ranges, } } fn to_comments(&self) -> Comments { - Comments::from_ast(&self.module, self.source_code, &self.comment_ranges) + Comments::from_ast( + self.parsed.syntax(), + self.source_code, + self.parsed.comment_ranges(), + ) } } diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 3d5f23590a..32169ccf7d 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -3,6 +3,7 @@ use crate::other::f_string_element::FStringExpressionElementContext; use crate::PyFormatOptions; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_python_ast::str::Quote; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, DerefMut}; @@ -12,6 +13,7 @@ pub struct PyFormatContext<'a> { options: PyFormatOptions, contents: &'a str, comments: Comments<'a>, + tokens: &'a Tokens, node_level: NodeLevel, indent_level: IndentLevel, /// Set to a non-None value when the formatter is running on a code @@ -28,11 +30,17 @@ pub struct PyFormatContext<'a> { } impl<'a> PyFormatContext<'a> { - pub(crate) fn new(options: PyFormatOptions, contents: &'a str, comments: Comments<'a>) -> Self { + pub(crate) fn new( + options: PyFormatOptions, + contents: &'a str, + comments: Comments<'a>, + tokens: &'a Tokens, + ) -> Self { Self { options, contents, comments, + tokens, node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other), indent_level: IndentLevel::new(0), docstring: None, @@ -69,6 +77,10 @@ impl<'a> PyFormatContext<'a> { &self.comments } + pub(crate) fn tokens(&self) -> &'a Tokens { + self.tokens + } + /// Returns a non-None value only if the formatter is running on a code /// snippet within a docstring. /// diff --git a/crates/ruff_python_formatter/src/expression/expr_name.rs b/crates/ruff_python_formatter/src/expression/expr_name.rs index 276ded6dd9..5a8b6b2665 100644 --- a/crates/ruff_python_formatter/src/expression/expr_name.rs +++ b/crates/ruff_python_formatter/src/expression/expr_name.rs @@ -31,15 +31,15 @@ impl NeedsParentheses for ExprName { #[cfg(test)] mod tests { - use ruff_python_parser::parse_program; + use ruff_python_parser::parse_module; use ruff_text_size::{Ranged, TextRange, TextSize}; #[test] fn name_range_with_comments() { - let source = parse_program("a # comment").unwrap(); + let module = parse_module("a # comment").unwrap(); - let expression_statement = source - .body + let expression_statement = module + .suite() .first() .expect("Expected non-empty body") .as_expr_stmt() diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs index 766bb7071a..c85355922f 100644 --- a/crates/ruff_python_formatter/src/expression/parentheses.rs +++ b/crates/ruff_python_formatter/src/expression/parentheses.rs @@ -444,17 +444,16 @@ impl Format> for FormatEmptyParenthesized<'_> { mod tests { use ruff_python_ast::ExpressionRef; use ruff_python_parser::parse_expression; - use ruff_python_trivia::CommentRanges; use crate::expression::parentheses::is_expression_parenthesized; #[test] fn test_has_parentheses() { let expression = r#"(b().c("")).d()"#; - let expr = parse_expression(expression).unwrap(); + let parsed = parse_expression(expression).unwrap(); assert!(!is_expression_parenthesized( - ExpressionRef::from(&expr), - &CommentRanges::default(), + ExpressionRef::from(parsed.expr()), + parsed.comment_ranges(), expression )); } diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 858714abf8..283727ff76 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -6,8 +6,7 @@ use ruff_formatter::prelude::*; use ruff_formatter::{format, write, FormatError, Formatted, PrintError, Printed, SourceCode}; use ruff_python_ast::AstNode; use ruff_python_ast::Mod; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType}; +use ruff_python_parser::{parse, AsMode, ParseError, Parsed}; use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; @@ -114,29 +113,23 @@ pub fn format_module_source( options: PyFormatOptions, ) -> Result { let source_type = options.source_type(); - let (tokens, comment_ranges) = - tokens_and_ranges(source, source_type).map_err(|err| ParseError { - location: err.location(), - error: ParseErrorType::Lexical(err.into_error()), - })?; - let module = parse_tokens(tokens, source, source_type.as_mode())?; - let formatted = format_module_ast(&module, &comment_ranges, source, options)?; + let parsed = parse(source, source_type.as_mode())?; + let formatted = format_module_ast(&parsed, source, options)?; Ok(formatted.print()?) } pub fn format_module_ast<'a>( - module: &'a Mod, - comment_ranges: &'a CommentRanges, + parsed: &'a Parsed, source: &'a str, options: PyFormatOptions, ) -> FormatResult>> { let source_code = SourceCode::new(source); - let comments = Comments::from_ast(module, source_code, comment_ranges); + let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let locator = Locator::new(source); let formatted = format!( - PyFormatContext::new(options, locator.contents(), comments), - [module.format()] + PyFormatContext::new(options, locator.contents(), comments, parsed.tokens()), + [parsed.syntax().format()] )?; formatted .context() @@ -161,8 +154,7 @@ mod tests { use insta::assert_snapshot; use ruff_python_ast::PySourceType; - use ruff_python_index::tokens_and_ranges; - use ruff_python_parser::{parse_tokens, AsMode}; + use ruff_python_parser::{parse, AsMode}; use ruff_text_size::{TextRange, TextSize}; use crate::{format_module_ast, format_module_source, format_range, PyFormatOptions}; @@ -203,13 +195,12 @@ def main() -> None: "#; let source_type = PySourceType::Python; - let (tokens, comment_ranges) = tokens_and_ranges(source, source_type).unwrap(); // Parse the AST. let source_path = "code_inline.py"; - let module = parse_tokens(tokens, source, source_type.as_mode()).unwrap(); + let parsed = parse(source, source_type.as_mode()).unwrap(); let options = PyFormatOptions::from_extension(Path::new(source_path)); - let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap(); + let formatted = format_module_ast(&parsed, source, options).unwrap(); // Uncomment the `dbg` to print the IR. // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR diff --git a/crates/ruff_python_formatter/src/range.rs b/crates/ruff_python_formatter/src/range.rs index 58ea00117c..7e5f152ad7 100644 --- a/crates/ruff_python_formatter/src/range.rs +++ b/crates/ruff_python_formatter/src/range.rs @@ -5,9 +5,8 @@ use ruff_formatter::{ format, FormatContext, FormatError, FormatOptions, IndentStyle, PrintedRange, SourceCode, }; use ruff_python_ast::visitor::preorder::{walk_body, PreorderVisitor, TraversalSignal}; -use ruff_python_ast::{AnyNode, AnyNodeRef, Stmt, StmtMatch, StmtTry}; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType}; +use ruff_python_ast::{AnyNodeRef, Stmt, StmtMatch, StmtTry}; +use ruff_python_parser::{parse, AsMode}; use ruff_python_trivia::{indentation_at_offset, BackwardsTokenizer, SimpleToken, SimpleTokenKind}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -71,32 +70,27 @@ pub fn format_range( return Ok(PrintedRange::new(formatted.into_code(), range)); } - let (tokens, comment_ranges) = - tokens_and_ranges(source, options.source_type()).map_err(|err| ParseError { - location: err.location(), - error: ParseErrorType::Lexical(err.into_error()), - })?; - assert_valid_char_boundaries(range, source); - let module = parse_tokens(tokens, source, options.source_type().as_mode())?; - let root = AnyNode::from(module); + let parsed = parse(source, options.source_type().as_mode())?; let source_code = SourceCode::new(source); - let comments = Comments::from_ast(root.as_ref(), source_code, &comment_ranges); + let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let mut context = PyFormatContext::new( options.with_source_map_generation(SourceMapGeneration::Enabled), source, comments, + parsed.tokens(), ); - let (enclosing_node, base_indent) = match find_enclosing_node(range, root.as_ref(), &context) { - EnclosingNode::Node { node, indent_level } => (node, indent_level), - EnclosingNode::Suppressed => { - // The entire range falls into a suppressed range. There's nothing to format. - return Ok(PrintedRange::empty()); - } - }; + let (enclosing_node, base_indent) = + match find_enclosing_node(range, AnyNodeRef::from(parsed.syntax()), &context) { + EnclosingNode::Node { node, indent_level } => (node, indent_level), + EnclosingNode::Suppressed => { + // The entire range falls into a suppressed range. There's nothing to format. + return Ok(PrintedRange::empty()); + } + }; let narrowed_range = narrow_range(range, enclosing_node, &context); assert_valid_char_boundaries(narrowed_range, source); diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 7137558c50..2df9bca400 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -830,8 +830,7 @@ impl Format> for SuiteChildStatement<'_> { #[cfg(test)] mod tests { use ruff_formatter::format; - use ruff_python_parser::parse_suite; - use ruff_python_trivia::CommentRanges; + use ruff_python_parser::parse_module; use crate::comments::Comments; use crate::prelude::*; @@ -860,17 +859,17 @@ def trailing_func(): pass "; - let statements = parse_suite(source).unwrap(); + let parsed = parse_module(source).unwrap(); - let comment_ranges = CommentRanges::default(); let context = PyFormatContext::new( PyFormatOptions::default(), source, - Comments::from_ranges(&comment_ranges), + Comments::from_ranges(parsed.comment_ranges()), + parsed.tokens(), ); let test_formatter = - format_with(|f: &mut PyFormatter| statements.format().with_options(level).fmt(f)); + format_with(|f: &mut PyFormatter| parsed.suite().format().with_options(level).fmt(f)); let formatted = format!(context, [test_formatter]).unwrap(); let printed = formatted.print().unwrap(); diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index 6aefad2a12..65de2979b0 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -9,7 +9,6 @@ use itertools::Itertools; use ruff_formatter::printer::SourceMapGeneration; use ruff_python_ast::{str::Quote, StringFlags}; -use ruff_python_parser::ParseError; use {once_cell::sync::Lazy, regex::Regex}; use { ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, @@ -1552,16 +1551,14 @@ fn docstring_format_source( use ruff_python_parser::AsMode; let source_type = options.source_type(); - let (tokens, comment_ranges) = - ruff_python_index::tokens_and_ranges(source, source_type).map_err(ParseError::from)?; - let module = ruff_python_parser::parse_tokens(tokens, source, source_type.as_mode())?; + let parsed = ruff_python_parser::parse(source, source_type.as_mode())?; let source_code = ruff_formatter::SourceCode::new(source); - let comments = crate::Comments::from_ast(&module, source_code, &comment_ranges); + let comments = crate::Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let locator = Locator::new(source); - let ctx = PyFormatContext::new(options, locator.contents(), comments) + let ctx = PyFormatContext::new(options, locator.contents(), comments, parsed.tokens()) .in_docstring(docstring_quote_style); - let formatted = crate::format!(ctx, [module.format()])?; + let formatted = crate::format!(ctx, [parsed.syntax().format()])?; formatted .context() .comments() diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index 94635802ef..587f2d0690 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -1,13 +1,13 @@ use std::borrow::Cow; use std::iter::FusedIterator; +use std::slice::Iter; use unicode_width::UnicodeWidthStr; use ruff_formatter::{write, FormatError}; use ruff_python_ast::AnyNodeRef; use ruff_python_ast::Stmt; -use ruff_python_parser::lexer::{lex_starts_at, LexResult}; -use ruff_python_parser::{Mode, Tok}; +use ruff_python_parser::{self as parser, TokenKind}; use ruff_python_trivia::lines_before; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -725,13 +725,10 @@ struct FormatVerbatimStatementRange { impl Format> for FormatVerbatimStatementRange { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let lexer = lex_starts_at( - &f.context().source()[self.verbatim_range], - Mode::Module, - self.verbatim_range.start(), + let logical_lines = LogicalLinesIter::new( + f.context().tokens().in_range(self.verbatim_range).iter(), + self.verbatim_range, ); - - let logical_lines = LogicalLinesIter::new(lexer, self.verbatim_range); let mut first = true; for logical_line in logical_lines { @@ -784,43 +781,47 @@ impl Format> for FormatVerbatimStatementRange { } } -struct LogicalLinesIter { - lexer: I, +struct LogicalLinesIter<'a> { + tokens: Iter<'a, parser::Token>, // The end of the last logical line last_line_end: TextSize, // The position where the content to lex ends. content_end: TextSize, } -impl LogicalLinesIter { - fn new(lexer: I, verbatim_range: TextRange) -> Self { +impl<'a> LogicalLinesIter<'a> { + fn new(tokens: Iter<'a, parser::Token>, verbatim_range: TextRange) -> Self { Self { - lexer, + tokens, last_line_end: verbatim_range.start(), content_end: verbatim_range.end(), } } } -impl Iterator for LogicalLinesIter -where - I: Iterator, -{ +impl<'a> Iterator for LogicalLinesIter<'a> { type Item = FormatResult; fn next(&mut self) -> Option { let mut parens = 0u32; let (content_end, full_end) = loop { - match self.lexer.next() { - Some(Ok((token, range))) => match token { - Tok::Newline => break (range.start(), range.end()), + match self.tokens.next() { + Some(token) if token.kind() == TokenKind::Unknown => { + return Some(Err(FormatError::syntax_error( + "Unexpected token when lexing verbatim statement range.", + ))) + } + Some(token) => match token.kind() { + TokenKind::Newline => break (token.start(), token.end()), // Ignore if inside an expression - Tok::NonLogicalNewline if parens == 0 => break (range.start(), range.end()), - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + TokenKind::NonLogicalNewline if parens == 0 => { + break (token.start(), token.end()) + } + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { + TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => { parens = parens.saturating_sub(1); } _ => {} @@ -839,11 +840,6 @@ where None }; } - Some(Err(_)) => { - return Some(Err(FormatError::syntax_error( - "Unexpected token when lexing verbatim statement range.", - ))) - } } }; @@ -857,7 +853,7 @@ where } } -impl FusedIterator for LogicalLinesIter where I: Iterator {} +impl<'a> FusedIterator for LogicalLinesIter<'a> {} /// A logical line or a comment (or form feed only) line struct LogicalLine { diff --git a/crates/ruff_python_formatter/tests/fixtures.rs b/crates/ruff_python_formatter/tests/fixtures.rs index a72e505e7a..0c4da466ee 100644 --- a/crates/ruff_python_formatter/tests/fixtures.rs +++ b/crates/ruff_python_formatter/tests/fixtures.rs @@ -391,13 +391,15 @@ fn ensure_unchanged_ast( // Parse the unformatted code. let mut unformatted_ast = parse(unformatted_code, source_type.as_mode()) - .expect("Unformatted code to be valid syntax"); + .expect("Unformatted code to be valid syntax") + .into_syntax(); Normalizer.visit_module(&mut unformatted_ast); let unformatted_ast = ComparableMod::from(&unformatted_ast); // Parse the formatted code. - let mut formatted_ast = - parse(formatted_code, source_type.as_mode()).expect("Formatted code to be valid syntax"); + let mut formatted_ast = parse(formatted_code, source_type.as_mode()) + .expect("Formatted code to be valid syntax") + .into_syntax(); Normalizer.visit_module(&mut formatted_ast); let formatted_ast = ComparableMod::from(&formatted_ast); diff --git a/crates/ruff_python_index/src/comment_ranges.rs b/crates/ruff_python_index/src/comment_ranges.rs deleted file mode 100644 index e9ef4c0462..0000000000 --- a/crates/ruff_python_index/src/comment_ranges.rs +++ /dev/null @@ -1,44 +0,0 @@ -use std::fmt::Debug; - -use ruff_python_ast::PySourceType; -use ruff_python_parser::lexer::{lex, LexResult, LexicalError}; -use ruff_python_parser::{allocate_tokens_vec, AsMode, Tok}; -use ruff_python_trivia::CommentRanges; -use ruff_text_size::TextRange; - -#[derive(Debug, Clone, Default)] -pub struct CommentRangesBuilder { - ranges: Vec, -} - -impl CommentRangesBuilder { - pub fn visit_token(&mut self, token: &Tok, range: TextRange) { - if token.is_comment() { - self.ranges.push(range); - } - } - - pub fn finish(self) -> CommentRanges { - CommentRanges::new(self.ranges) - } -} - -/// Helper method to lex and extract comment ranges -pub fn tokens_and_ranges( - source: &str, - source_type: PySourceType, -) -> Result<(Vec, CommentRanges), LexicalError> { - let mut tokens = allocate_tokens_vec(source); - let mut comment_ranges = CommentRangesBuilder::default(); - - for result in lex(source, source_type.as_mode()) { - if let Ok((token, range)) = &result { - comment_ranges.visit_token(token, *range); - } - - tokens.push(result); - } - - let comment_ranges = comment_ranges.finish(); - Ok((tokens, comment_ranges)) -} diff --git a/crates/ruff_python_index/src/fstring_ranges.rs b/crates/ruff_python_index/src/fstring_ranges.rs index b92bbd382c..089050334e 100644 --- a/crates/ruff_python_index/src/fstring_ranges.rs +++ b/crates/ruff_python_index/src/fstring_ranges.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; -use ruff_python_parser::Tok; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{Token, TokenKind}; +use ruff_text_size::{Ranged, TextRange, TextSize}; /// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`]. /// There can be multiple overlapping ranges for nested f-strings. @@ -85,14 +85,14 @@ pub(crate) struct FStringRangesBuilder { } impl FStringRangesBuilder { - pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) { - match token { - Tok::FStringStart(_) => { - self.start_locations.push(range.start()); + pub(crate) fn visit_token(&mut self, token: &Token) { + match token.kind() { + TokenKind::FStringStart => { + self.start_locations.push(token.start()); } - Tok::FStringEnd => { + TokenKind::FStringEnd => { if let Some(start) = self.start_locations.pop() { - self.raw.insert(start, TextRange::new(start, range.end())); + self.raw.insert(start, TextRange::new(start, token.end())); } } _ => {} diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs index d7f7810de6..fb813f9814 100644 --- a/crates/ruff_python_index/src/indexer.rs +++ b/crates/ruff_python_index/src/indexer.rs @@ -2,21 +2,15 @@ //! are omitted from the AST (e.g., commented lines). use ruff_python_ast::Stmt; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; -use ruff_python_trivia::{ - has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges, -}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder}; use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder}; -use crate::CommentRangesBuilder; pub struct Indexer { - comment_ranges: CommentRanges, - /// Stores the start offset of continuation lines. continuation_lines: Vec, @@ -28,10 +22,9 @@ pub struct Indexer { } impl Indexer { - pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self { + pub fn from_tokens(tokens: &Tokens, locator: &Locator<'_>) -> Self { assert!(TextSize::try_from(locator.contents().len()).is_ok()); - let mut comment_ranges_builder = CommentRangesBuilder::default(); let mut fstring_ranges_builder = FStringRangesBuilder::default(); let mut multiline_ranges_builder = MultilineRangesBuilder::default(); let mut continuation_lines = Vec::new(); @@ -39,8 +32,8 @@ impl Indexer { let mut prev_end = TextSize::default(); let mut line_start = TextSize::default(); - for (tok, range) in tokens.iter().flatten() { - let trivia = locator.slice(TextRange::new(prev_end, range.start())); + for token in tokens.up_to_first_unknown() { + let trivia = locator.slice(TextRange::new(prev_end, token.start())); // Get the trivia between the previous and the current token and detect any newlines. // This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens @@ -59,38 +52,31 @@ impl Indexer { } } - comment_ranges_builder.visit_token(tok, *range); - fstring_ranges_builder.visit_token(tok, *range); - multiline_ranges_builder.visit_token(tok, *range); + fstring_ranges_builder.visit_token(token); + multiline_ranges_builder.visit_token(token); - match tok { - Tok::Newline | Tok::NonLogicalNewline => { - line_start = range.end(); + match token.kind() { + TokenKind::Newline | TokenKind::NonLogicalNewline => { + line_start = token.end(); } - Tok::String { .. } => { + TokenKind::String => { // If the previous token was a string, find the start of the line that contains // the closing delimiter, since the token itself can span multiple lines. - line_start = locator.line_start(range.end()); + line_start = locator.line_start(token.end()); } _ => {} } - prev_end = range.end(); + prev_end = token.end(); } Self { - comment_ranges: comment_ranges_builder.finish(), continuation_lines, fstring_ranges: fstring_ranges_builder.finish(), multiline_ranges: multiline_ranges_builder.finish(), } } - /// Returns the byte offset ranges of comments - pub const fn comment_ranges(&self) -> &CommentRanges { - &self.comment_ranges - } - /// Returns the byte offset ranges of f-strings. pub const fn fstring_ranges(&self) -> &FStringRanges { &self.fstring_ranges @@ -225,19 +211,22 @@ impl Indexer { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; use crate::Indexer; + fn new_indexer(contents: &str) -> Indexer { + let parsed = parse_module(contents).unwrap(); + let locator = Locator::new(contents); + Indexer::from_tokens(parsed.tokens(), &locator) + } + #[test] fn continuation() { let contents = r"x = 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents)); - assert_eq!(indexer.continuation_line_starts(), &[]); + assert_eq!(new_indexer(contents).continuation_line_starts(), &[]); let contents = r" # Hello, world! @@ -248,9 +237,7 @@ y = 2 " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents)); - assert_eq!(indexer.continuation_line_starts(), &[]); + assert_eq!(new_indexer(contents).continuation_line_starts(), &[]); let contents = r#" x = \ @@ -268,10 +255,8 @@ if True: ) "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 1 TextSize::from(0), @@ -300,10 +285,8 @@ x = 1; \ import os " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 9 TextSize::from(84), @@ -323,10 +306,8 @@ f'foo { 'str1' \ }' " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 1 TextSize::new(0), @@ -348,10 +329,8 @@ x = ( + 2) " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 3 TextSize::new(12), @@ -373,10 +352,8 @@ f"start {f"inner {f"another"}"} end" f"implicit " f"concatenation" "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer + new_indexer(contents) .fstring_ranges() .values() .copied() @@ -409,10 +386,8 @@ f-string"""} """ "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer + new_indexer(contents) .fstring_ranges() .values() .copied() @@ -447,8 +422,7 @@ f-string"""} the end""" "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); + let indexer = new_indexer(contents); // For reference, the ranges of the f-strings in the above code are as // follows where the ones inside parentheses are nested f-strings: diff --git a/crates/ruff_python_index/src/lib.rs b/crates/ruff_python_index/src/lib.rs index 2a4660f012..aabdef1d48 100644 --- a/crates/ruff_python_index/src/lib.rs +++ b/crates/ruff_python_index/src/lib.rs @@ -1,7 +1,5 @@ -mod comment_ranges; mod fstring_ranges; mod indexer; mod multiline_ranges; -pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder}; pub use indexer::Indexer; diff --git a/crates/ruff_python_index/src/multiline_ranges.rs b/crates/ruff_python_index/src/multiline_ranges.rs index 8043929aa9..585ff6f1ae 100644 --- a/crates/ruff_python_index/src/multiline_ranges.rs +++ b/crates/ruff_python_index/src/multiline_ranges.rs @@ -1,6 +1,5 @@ -use ruff_python_ast::StringFlags; -use ruff_python_parser::Tok; -use ruff_text_size::TextRange; +use ruff_python_parser::{Token, TokenKind}; +use ruff_text_size::{Ranged, TextRange}; /// Stores the range of all multiline strings in a file sorted by /// [`TextRange::start`]. @@ -46,10 +45,10 @@ pub(crate) struct MultilineRangesBuilder { } impl MultilineRangesBuilder { - pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) { - if let Tok::String { flags, .. } | Tok::FStringMiddle { flags, .. } = token { - if flags.is_triple_quoted() { - self.ranges.push(range); + pub(crate) fn visit_token(&mut self, token: &Token) { + if matches!(token.kind(), TokenKind::String | TokenKind::FStringMiddle) { + if token.is_triple_quoted_string() { + self.ranges.push(token.range()); } } } diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index fc064e6f0a..00ac193efe 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -14,6 +14,7 @@ license = { workspace = true } [dependencies] ruff_python_ast = { workspace = true } +ruff_python_trivia = { workspace = true } ruff_text_size = { workspace = true } anyhow = { workspace = true } diff --git a/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py b/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py index 0641706d52..355a877b6f 100644 --- a/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py +++ b/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py @@ -2,7 +2,5 @@ async class Foo: ... async while test: ... async x = 1 async async def foo(): ... -# TODO(dhruvmanila): Here, `match` is actually a Name token because -# of the soft keyword # transformer async match test: case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py new file mode 100644 index 0000000000..414c609081 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py @@ -0,0 +1,2 @@ +match yield foo: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py new file mode 100644 index 0000000000..cfa7bd3fcc --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py @@ -0,0 +1,2 @@ +match *foo: # Keyword + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py b/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py new file mode 100644 index 0000000000..1f8fb73b4e --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py @@ -0,0 +1,2 @@ +match [1, 2] + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py b/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py index 14a32f10e1..4589cb5401 100644 --- a/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py +++ b/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py @@ -1,3 +1,2 @@ -# TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer match x: case : ... diff --git a/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py new file mode 100644 index 0000000000..07ccf494ec --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py @@ -0,0 +1,4 @@ +try: ... +except Exception as match: ... +except Exception as case: ... +except Exception as type: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py b/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py new file mode 100644 index 0000000000..fb617bd3f4 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py @@ -0,0 +1,4 @@ +from match import pattern +from type import bar +from case import pattern +from match.type.case import foo diff --git a/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py new file mode 100644 index 0000000000..5f68a60cd1 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py @@ -0,0 +1,3 @@ +import foo as match +import bar as case +import baz as type diff --git a/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py new file mode 100644 index 0000000000..c434aa5c81 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py @@ -0,0 +1,4 @@ +match foo: + case case: ... + case match: ... + case type: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py new file mode 100644 index 0000000000..fa1487f776 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py @@ -0,0 +1,5 @@ +match foo: + case match.bar: ... + case case.bar: ... + case type.bar: ... + case match.case.type.bar.type.case.match: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py new file mode 100644 index 0000000000..bac1f88c78 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py @@ -0,0 +1 @@ +match not in case diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py new file mode 100644 index 0000000000..e670a92d87 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py @@ -0,0 +1,13 @@ +match +match != foo +(foo, match) +[foo, match] +{foo, match} +match; +match: int +match, +match.foo +match / foo +match << foo +match and foo +match is not foo diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py new file mode 100644 index 0000000000..33835d5825 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py @@ -0,0 +1,24 @@ +match foo: + case _: ... +match 1: + case _: ... +match 1.0: + case _: ... +match 1j: + case _: ... +match "foo": + case _: ... +match f"foo {x}": + case _: ... +match {1, 2}: + case _: ... +match ~foo: + case _: ... +match ...: + case _: ... +match not foo: + case _: ... +match await foo(): + case _: ... +match lambda foo: foo: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py new file mode 100644 index 0000000000..89540dd828 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py @@ -0,0 +1,12 @@ +match match: + case _: ... +match case: + case _: ... +match type: + case _: ... +match None: + case _: ... +match True: + case _: ... +match False: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py new file mode 100644 index 0000000000..8dd8f7dd7e --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py @@ -0,0 +1,10 @@ +match (1, 2) # Identifier +match (1, 2): # Keyword + case _: ... +match [1:] # Identifier +match [1, 2]: # Keyword + case _: ... +match * foo # Identifier +match - foo # Identifier +match -foo: # Keyword + case _: ... diff --git a/crates/ruff_python_parser/src/error.rs b/crates/ruff_python_parser/src/error.rs index 08aa223403..782820e56f 100644 --- a/crates/ruff_python_parser/src/error.rs +++ b/crates/ruff_python_parser/src/error.rs @@ -7,7 +7,7 @@ use crate::TokenKind; /// Represents represent errors that occur during parsing and are /// returned by the `parse_*` functions. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct ParseError { pub error: ParseErrorType, pub location: TextRange, @@ -85,7 +85,7 @@ impl std::fmt::Display for FStringErrorType { } /// Represents the different types of errors that can occur during parsing. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum ParseErrorType { /// An unexpected error occurred. OtherError(String), diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 34d5722047..8933e4cb74 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -4,204 +4,192 @@ //! governing what is and is not a valid token are defined in the Python reference //! guide section on [Lexical analysis]. //! -//! The primary function in this module is [`lex`], which takes a string slice -//! and returns an iterator over the tokens in the source code. The tokens are currently returned -//! as a `Result`, where [`Spanned`] is a tuple containing the -//! start and end [`TextSize`] and a [`Tok`] denoting the token. -//! -//! # Example -//! -//! ``` -//! use ruff_python_parser::{lexer::lex, Tok, Mode}; -//! -//! let source = "x = 'RustPython'"; -//! let tokens = lex(source, Mode::Module) -//! .map(|tok| tok.expect("Failed to lex")) -//! .collect::>(); -//! -//! for (token, range) in tokens { -//! println!( -//! "{token:?}@{range:?}", -//! ); -//! } -//! ``` -//! //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html -use std::iter::FusedIterator; use std::{char, cmp::Ordering, str::FromStr}; +use bitflags::bitflags; +use ruff_python_ast::str::Quote; +use ruff_python_ast::str_prefix::{ + AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, +}; use unicode_ident::{is_xid_continue, is_xid_start}; use unicode_normalization::UnicodeNormalization; -use ruff_python_ast::{ - str::Quote, - str_prefix::{AnyStringPrefix, FStringPrefix}, - AnyStringFlags, Int, IpyEscapeKind, StringFlags, -}; -use ruff_text_size::{TextLen, TextRange, TextSize}; +use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::error::FStringErrorType; use crate::lexer::cursor::{Cursor, EOF_CHAR}; -use crate::lexer::fstring::{FStringContext, FStrings}; -use crate::lexer::indentation::{Indentation, Indentations}; -use crate::soft_keywords::SoftKeywordTransformer; -use crate::token::Tok; -use crate::Mode; +use crate::lexer::fstring::{FStringContext, FStrings, FStringsCheckpoint}; +use crate::lexer::indentation::{Indentation, Indentations, IndentationsCheckpoint}; +use crate::{Mode, TokenKind}; mod cursor; mod fstring; mod indentation; /// A lexer for Python source code. -pub struct Lexer<'source> { - // Contains the source code to be lexed. - cursor: Cursor<'source>, - source: &'source str, +#[derive(Debug)] +pub struct Lexer<'src> { + /// Source code to be lexed. + source: &'src str, + /// A pointer to the current character of the source code which is being lexed. + cursor: Cursor<'src>, + + /// The kind of the current token. + current_kind: TokenKind, + + /// The range of the current token. + current_range: TextRange, + + /// The value of the current token. + current_value: TokenValue, + + /// Flags for the current token. + current_flags: TokenFlags, + + /// Lexer state. state: State, - // Amount of parenthesis. + + /// Represents the current level of nesting in the lexer, indicating the depth of parentheses. + /// The lexer is within a parenthesized context if the value is greater than 0. nesting: u32, - // Indentation levels. + + /// A stack of indentation representing the current indentation level. indentations: Indentations, pending_indentation: Option, - // Lexer mode. + + /// Lexer mode. mode: Mode, - // F-string contexts. + + /// F-string contexts. fstrings: FStrings, + + /// Errors encountered while lexing. + errors: Vec, } -/// Contains a Token along with its `range`. -pub type Spanned = (Tok, TextRange); -/// The result of lexing a token. -pub type LexResult = Result; - -/// Create a new lexer from a source string. -/// -/// # Examples -/// -/// ``` -/// use ruff_python_parser::{Mode, lexer::lex}; -/// -/// let source = "def hello(): return 'world'"; -/// let lexer = lex(source, Mode::Module); -/// -/// for token in lexer { -/// println!("{:?}", token); -/// } -/// ``` -#[inline] -pub fn lex(source: &str, mode: Mode) -> SoftKeywordTransformer { - SoftKeywordTransformer::new(Lexer::new(source, mode), mode) -} - -pub struct LexStartsAtIterator { - start_offset: TextSize, - inner: I, -} - -impl Iterator for LexStartsAtIterator -where - I: Iterator, -{ - type Item = LexResult; - - #[inline] - fn next(&mut self) -> Option { - let result = match self.inner.next()? { - Ok((tok, range)) => Ok((tok, range + self.start_offset)), - Err(error) => { - let location = error.location() + self.start_offset; - Err(LexicalError::new(error.into_error(), location)) - } - }; - - Some(result) - } - - fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() - } -} - -impl FusedIterator for LexStartsAtIterator where I: Iterator + FusedIterator {} -impl ExactSizeIterator for LexStartsAtIterator where - I: Iterator + ExactSizeIterator -{ -} - -/// Create a new lexer from a source string, starting at a given location. -/// You probably want to use [`lex`] instead. -pub fn lex_starts_at( - source: &str, - mode: Mode, - start_offset: TextSize, -) -> LexStartsAtIterator> { - LexStartsAtIterator { - start_offset, - inner: lex(source, mode), - } -} - -impl<'source> Lexer<'source> { - /// Create a new lexer from T and a starting location. You probably want to use - /// [`lex`] instead. - pub fn new(input: &'source str, mode: Mode) -> Self { +impl<'src> Lexer<'src> { + /// Create a new lexer for the given input source which starts at the given offset. + /// + /// If the start offset is greater than 0, the cursor is moved ahead that many bytes. + /// This means that the input source should be the complete source code and not the + /// sliced version. + pub(crate) fn new(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { assert!( - u32::try_from(input.len()).is_ok(), + u32::try_from(source.len()).is_ok(), "Lexer only supports files with a size up to 4GB" ); - let mut lxr = Lexer { + let mut lexer = Lexer { + source, + cursor: Cursor::new(source), state: State::AfterNewline, + current_kind: TokenKind::EndOfFile, + current_range: TextRange::empty(start_offset), + current_value: TokenValue::None, + current_flags: TokenFlags::empty(), nesting: 0, indentations: Indentations::default(), pending_indentation: None, - - source: input, - cursor: Cursor::new(input), mode, fstrings: FStrings::default(), + errors: Vec::new(), }; + // TODO: Handle possible mismatch between BOM and explicit encoding declaration. // spell-checker:ignore feff - lxr.cursor.eat_char('\u{feff}'); + lexer.cursor.eat_char('\u{feff}'); - lxr + if start_offset > TextSize::new(0) { + lexer.cursor.skip_bytes(start_offset.to_usize()); + } + + lexer + } + + /// Returns the kind of the current token. + pub(crate) fn current_kind(&self) -> TokenKind { + self.current_kind + } + + /// Returns the range of the current token. + pub(crate) fn current_range(&self) -> TextRange { + self.current_range + } + + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.current_flags + } + + /// Helper function to push the given error and return the [`TokenKind::Unknown`] token. + fn push_error(&mut self, error: LexicalError) -> TokenKind { + self.errors.push(error); + TokenKind::Unknown + } + + /// Try lexing the single character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_single_char_prefix(&mut self, first: char) -> bool { + match first { + 'f' | 'F' => self.current_flags |= TokenFlags::F_STRING, + 'u' | 'U' => self.current_flags |= TokenFlags::UNICODE_STRING, + 'b' | 'B' => self.current_flags |= TokenFlags::BYTE_STRING, + 'r' => self.current_flags |= TokenFlags::RAW_STRING_LOWERCASE, + 'R' => self.current_flags |= TokenFlags::RAW_STRING_UPPERCASE, + _ => return false, + } + true + } + + /// Try lexing the double character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_double_char_prefix(&mut self, value: [char; 2]) -> bool { + match value { + ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_LOWERCASE; + } + ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_UPPERCASE; + } + ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_LOWERCASE; + } + ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_UPPERCASE; + } + _ => return false, + } + true } /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. - fn lex_identifier(&mut self, first: char) -> Result { + fn lex_identifier(&mut self, first: char) -> TokenKind { // Detect potential string like rb'' b'' f'' u'' r'' - match (first, self.cursor.first()) { - ('f' | 'F', quote @ ('\'' | '"')) => { + let quote = match (first, self.cursor.first()) { + (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then(|| { self.cursor.bump(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Regular)); - } - ('r', 'f' | 'F') | ('f' | 'F', 'r') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: false })); - } - ('R', 'f' | 'F') | ('f' | 'F', 'R') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true })); - } - (_, quote @ ('\'' | '"')) => { - if let Ok(prefix) = AnyStringPrefix::try_from(first) { + quote + }), + (_, second) if is_quote(self.cursor.second()) => { + self.try_double_char_prefix([first, second]).then(|| { self.cursor.bump(); - return self.lex_string(prefix, quote); - } + // SAFETY: Safe because of the `is_quote` check in this match arm's guard + self.cursor.bump().unwrap() + }) } - (_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => { - self.cursor.bump(); - if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) { - let quote = self.cursor.bump().unwrap(); - return self.lex_string(prefix, quote); - } + _ => None, + }; + + if let Some(quote) = quote { + if self.current_flags.is_f_string() { + return self.lex_fstring_start(quote); } - _ => {} + + return self.lex_string(quote); } // Keep track of whether the identifier is ASCII-only or not. @@ -218,62 +206,58 @@ impl<'source> Lexer<'source> { let text = self.token_text(); if !is_ascii { - return Ok(Tok::Name { - name: text.nfkc().collect::().into_boxed_str(), - }); + self.current_value = TokenValue::Name(text.nfkc().collect::().into_boxed_str()); + return TokenKind::Name; } - let keyword = match text { - "False" => Tok::False, - "None" => Tok::None, - "True" => Tok::True, - "and" => Tok::And, - "as" => Tok::As, - "assert" => Tok::Assert, - "async" => Tok::Async, - "await" => Tok::Await, - "break" => Tok::Break, - "case" => Tok::Case, - "class" => Tok::Class, - "continue" => Tok::Continue, - "def" => Tok::Def, - "del" => Tok::Del, - "elif" => Tok::Elif, - "else" => Tok::Else, - "except" => Tok::Except, - "finally" => Tok::Finally, - "for" => Tok::For, - "from" => Tok::From, - "global" => Tok::Global, - "if" => Tok::If, - "import" => Tok::Import, - "in" => Tok::In, - "is" => Tok::Is, - "lambda" => Tok::Lambda, - "match" => Tok::Match, - "nonlocal" => Tok::Nonlocal, - "not" => Tok::Not, - "or" => Tok::Or, - "pass" => Tok::Pass, - "raise" => Tok::Raise, - "return" => Tok::Return, - "try" => Tok::Try, - "type" => Tok::Type, - "while" => Tok::While, - "with" => Tok::With, - "yield" => Tok::Yield, + match text { + "False" => TokenKind::False, + "None" => TokenKind::None, + "True" => TokenKind::True, + "and" => TokenKind::And, + "as" => TokenKind::As, + "assert" => TokenKind::Assert, + "async" => TokenKind::Async, + "await" => TokenKind::Await, + "break" => TokenKind::Break, + "case" => TokenKind::Case, + "class" => TokenKind::Class, + "continue" => TokenKind::Continue, + "def" => TokenKind::Def, + "del" => TokenKind::Del, + "elif" => TokenKind::Elif, + "else" => TokenKind::Else, + "except" => TokenKind::Except, + "finally" => TokenKind::Finally, + "for" => TokenKind::For, + "from" => TokenKind::From, + "global" => TokenKind::Global, + "if" => TokenKind::If, + "import" => TokenKind::Import, + "in" => TokenKind::In, + "is" => TokenKind::Is, + "lambda" => TokenKind::Lambda, + "match" => TokenKind::Match, + "nonlocal" => TokenKind::Nonlocal, + "not" => TokenKind::Not, + "or" => TokenKind::Or, + "pass" => TokenKind::Pass, + "raise" => TokenKind::Raise, + "return" => TokenKind::Return, + "try" => TokenKind::Try, + "type" => TokenKind::Type, + "while" => TokenKind::While, + "with" => TokenKind::With, + "yield" => TokenKind::Yield, _ => { - return Ok(Tok::Name { - name: text.to_string().into_boxed_str(), - }) + self.current_value = TokenValue::Name(text.to_string().into_boxed_str()); + TokenKind::Name } - }; - - Ok(keyword) + } } /// Numeric lexing. The feast can start! - fn lex_number(&mut self, first: char) -> Result { + fn lex_number(&mut self, first: char) -> TokenKind { if first == '0' { if self.cursor.eat_if(|c| matches!(c, 'x' | 'X')).is_some() { self.lex_number_radix(Radix::Hex) @@ -290,7 +274,7 @@ impl<'source> Lexer<'source> { } /// Lex a hex/octal/decimal/binary number without a decimal point. - fn lex_number_radix(&mut self, radix: Radix) -> Result { + fn lex_number_radix(&mut self, radix: Radix) -> TokenKind { #[cfg(debug_assertions)] debug_assert!(matches!( self.cursor.previous().to_ascii_lowercase(), @@ -307,17 +291,18 @@ impl<'source> Lexer<'source> { let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) { Ok(int) => int, Err(err) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), )); } }; - Ok(Tok::Int { value }) + self.current_value = TokenValue::Int(value); + TokenKind::Int } /// Lex a normal number, that is, no octal, hex or binary number. - fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> Result { + fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert!(self.cursor.previous().is_ascii_digit() || self.cursor.previous() == '.'); let start_is_zero = first_digit_or_dot == '0'; @@ -332,7 +317,7 @@ impl<'source> Lexer<'source> { number.push('.'); if self.cursor.eat_char('_') { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()), TextRange::new(self.offset() - TextSize::new(1), self.offset()), )); @@ -363,35 +348,38 @@ impl<'source> Lexer<'source> { if is_float { // Improvement: Use `Cow` instead of pushing to value text - let value = f64::from_str(number.as_str()).map_err(|_| { - LexicalError::new( + let Ok(value) = f64::from_str(number.as_str()) else { + return self.push_error(LexicalError::new( LexicalErrorType::OtherError( "Invalid decimal literal".to_string().into_boxed_str(), ), self.token_range(), - ) - })?; + )); + }; // Parse trailing 'j': if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { - Ok(Tok::Complex { + self.current_value = TokenValue::Complex { real: 0.0, imag: value, - }) + }; + TokenKind::Complex } else { - Ok(Tok::Float { value }) + self.current_value = TokenValue::Float(value); + TokenKind::Float } } else { // Parse trailing 'j': if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { let imag = f64::from_str(number.as_str()).unwrap(); - Ok(Tok::Complex { real: 0.0, imag }) + self.current_value = TokenValue::Complex { real: 0.0, imag }; + TokenKind::Complex } else { let value = match Int::from_str(number.as_str()) { Ok(value) => { if start_is_zero && value.as_u8() != Some(0) { // Leading zeros in decimal integer literals are not permitted. - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError( "Invalid decimal integer literal" .to_string() @@ -403,13 +391,14 @@ impl<'source> Lexer<'source> { value } Err(err) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), )) } }; - Ok(Tok::Int { value }) + self.current_value = TokenValue::Int(value); + TokenKind::Int } } } @@ -434,7 +423,7 @@ impl<'source> Lexer<'source> { } /// Lex a single comment. - fn lex_comment(&mut self) -> Tok { + fn lex_comment(&mut self) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), '#'); @@ -442,11 +431,11 @@ impl<'source> Lexer<'source> { let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len()); self.cursor.skip_bytes(offset); - Tok::Comment(self.token_text().to_string().into_boxed_str()) + TokenKind::Comment } /// Lex a single IPython escape command. - fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> Tok { + fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> TokenKind { let mut value = String::new(); loop { @@ -539,16 +528,21 @@ impl<'source> Lexer<'source> { 2 => IpyEscapeKind::Help2, _ => unreachable!("`question_count` is always 1 or 2"), }; - return Tok::IpyEscapeCommand { + + self.current_value = TokenValue::IpyEscapeCommand { kind, value: value.into_boxed_str(), }; + + return TokenKind::IpyEscapeCommand; } '\n' | '\r' | EOF_CHAR => { - return Tok::IpyEscapeCommand { + self.current_value = TokenValue::IpyEscapeCommand { kind: escape_kind, value: value.into_boxed_str(), }; + + return TokenKind::IpyEscapeCommand; } c => { self.cursor.bump(); @@ -559,40 +553,39 @@ impl<'source> Lexer<'source> { } /// Lex a f-string start token. - fn lex_fstring_start(&mut self, quote: char, prefix: FStringPrefix) -> Tok { + fn lex_fstring_start(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(AnyStringPrefix::Format(prefix)) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); - - if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; } - self.fstrings.push(FStringContext::new(flags, self.nesting)); - Tok::FStringStart(flags) + if self.cursor.eat_char2(quote, quote) { + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; + } + + self.fstrings + .push(FStringContext::new(self.current_flags, self.nesting)); + + TokenKind::FStringStart } /// Lex a f-string middle or end token. - fn lex_fstring_middle_or_end(&mut self) -> Result, LexicalError> { + fn lex_fstring_middle_or_end(&mut self) -> Option { // SAFETY: Safe because the function is only called when `self.fstrings` is not empty. let fstring = self.fstrings.current().unwrap(); - self.cursor.start_token(); // Check if we're at the end of the f-string. if fstring.is_triple_quoted() { let quote_char = fstring.quote_char(); if self.cursor.eat_char3(quote_char, quote_char, quote_char) { - return Ok(Some(Tok::FStringEnd)); + self.current_flags = fstring.flags(); + return Some(TokenKind::FStringEnd); } } else if self.cursor.eat_char(fstring.quote_char()) { - return Ok(Some(Tok::FStringEnd)); + self.current_flags = fstring.flags(); + return Some(TokenKind::FStringEnd); } // We have to decode `{{` and `}}` into `{` and `}` respectively. As an @@ -619,10 +612,11 @@ impl<'source> Lexer<'source> { } else { FStringErrorType::UnterminatedString }; - return Err(LexicalError::new( + self.fstrings.pop(); + return Some(self.push_error(LexicalError::new( LexicalErrorType::FStringError(error), self.token_range(), - )); + ))); } '\n' | '\r' if !fstring.is_triple_quoted() => { // If we encounter a newline while we're in a format spec, then @@ -632,10 +626,11 @@ impl<'source> Lexer<'source> { if in_format_spec { break; } - return Err(LexicalError::new( + self.fstrings.pop(); + return Some(self.push_error(LexicalError::new( LexicalErrorType::FStringError(FStringErrorType::UnterminatedString), self.token_range(), - )); + ))); } '\\' => { self.cursor.bump(); // '\' @@ -698,7 +693,7 @@ impl<'source> Lexer<'source> { } let range = self.token_range(); if range.is_empty() { - return Ok(None); + return None; } let value = if normalized.is_empty() { @@ -707,42 +702,39 @@ impl<'source> Lexer<'source> { normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]); normalized }; - Ok(Some(Tok::FStringMiddle { - value: value.into_boxed_str(), - flags: fstring.flags(), - })) + + self.current_value = TokenValue::FStringMiddle(value.into_boxed_str()); + self.current_flags = fstring.flags(); + + Some(TokenKind::FStringMiddle) } /// Lex a string literal. - fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result { + fn lex_string(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(prefix) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; + } // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; } let value_start = self.offset(); let quote_byte = u8::try_from(quote).expect("char that fits in u8"); - let value_end = if flags.is_triple_quoted() { + let value_end = if self.current_flags.is_triple_quoted() { // For triple-quoted strings, scan until we find the closing quote (ignoring escaped // quotes) or the end of the file. loop { let Some(index) = memchr::memchr(quote_byte, self.cursor.rest().as_bytes()) else { self.cursor.skip_to_end(); - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnclosedStringError, self.token_range(), )); @@ -778,7 +770,7 @@ impl<'source> Lexer<'source> { else { self.cursor.skip_to_end(); - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::StringError, self.token_range(), )); @@ -806,7 +798,7 @@ impl<'source> Lexer<'source> { match ch { Some('\r' | '\n') => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnclosedStringError, self.token_range(), )); @@ -819,34 +811,33 @@ impl<'source> Lexer<'source> { } }; - Ok(Tok::String { - value: self.source[TextRange::new(value_start, value_end)] + self.current_value = TokenValue::String( + self.source[TextRange::new(value_start, value_end)] .to_string() .into_boxed_str(), - flags, - }) + ); + + TokenKind::String } - // This is the main entry point. Call this function to retrieve the next token. - // This function is used by the iterator implementation. - pub fn next_token(&mut self) -> LexResult { + /// Lex the next token. + pub fn next_token(&mut self) -> TokenKind { + self.cursor.start_token(); + self.current_value = TokenValue::None; + self.current_flags = TokenFlags::empty(); + self.current_kind = self.lex_token(); + self.current_range = self.token_range(); + self.current_kind + } + + fn lex_token(&mut self) -> TokenKind { if let Some(fstring) = self.fstrings.current() { if !fstring.is_in_expression(self.nesting) { - match self.lex_fstring_middle_or_end() { - Ok(Some(tok)) => { - if tok.is_f_string_end() { - self.fstrings.pop(); - } - return Ok((tok, self.token_range())); - } - Err(e) => { - // This is to prevent an infinite loop in which the lexer - // continuously returns an error token because the f-string - // remains on the stack. + if let Some(token) = self.lex_fstring_middle_or_end() { + if matches!(token, TokenKind::FStringEnd) { self.fstrings.pop(); - return Err(e); } - _ => {} + return token; } } } @@ -855,15 +846,17 @@ impl<'source> Lexer<'source> { match self.indentations.current().try_compare(indentation) { Ok(Ordering::Greater) => { self.pending_indentation = Some(indentation); - let offset = self.offset(); - self.indentations.dedent_one(indentation).map_err(|_| { - LexicalError::new(LexicalErrorType::IndentationError, self.token_range()) - })?; - return Ok((Tok::Dedent, TextRange::empty(offset))); + if self.indentations.dedent_one(indentation).is_err() { + return self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + )); + } + return TokenKind::Dedent; } Ok(_) => {} Err(_) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::IndentationError, self.token_range(), )); @@ -872,24 +865,28 @@ impl<'source> Lexer<'source> { } if self.state.is_after_newline() { - if let Some(indentation) = self.eat_indentation()? { - return Ok(indentation); + if let Some(indentation) = self.eat_indentation() { + return indentation; } } else { - self.skip_whitespace()?; + if let Err(error) = self.skip_whitespace() { + return self.push_error(error); + } } + // The lexer might've skipped whitespaces, so update the start offset self.cursor.start_token(); + if let Some(c) = self.cursor.bump() { if c.is_ascii() { self.consume_ascii_character(c) } else if is_unicode_identifier_start(c) { - let identifier = self.lex_identifier(c)?; + let identifier = self.lex_identifier(c); self.state = State::Other; - Ok((identifier, self.token_range())) + identifier } else { - Err(LexicalError::new( + self.push_error(LexicalError::new( LexicalErrorType::UnrecognizedToken { tok: c }, self.token_range(), )) @@ -934,9 +931,8 @@ impl<'source> Lexer<'source> { Ok(()) } - fn eat_indentation(&mut self) -> Result, LexicalError> { + fn eat_indentation(&mut self) -> Option { let mut indentation = Indentation::root(); - self.cursor.start_token(); loop { match self.cursor.first() { @@ -953,12 +949,15 @@ impl<'source> Lexer<'source> { if self.cursor.eat_char('\r') { self.cursor.eat_char('\n'); } else if self.cursor.is_eof() { - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + return Some(self.push_error(LexicalError::new( + LexicalErrorType::Eof, + self.token_range(), + ))); } else if !self.cursor.eat_char('\n') { - return Err(LexicalError::new( + return Some(self.push_error(LexicalError::new( LexicalErrorType::LineContinuationError, self.token_range(), - )); + ))); } indentation = Indentation::root(); } @@ -975,30 +974,42 @@ impl<'source> Lexer<'source> { if !matches!(self.cursor.first(), '\n' | '\r' | '#' | EOF_CHAR) { self.state = State::NonEmptyLogicalLine; - if let Some(spanned) = self.handle_indentation(indentation)? { - // Set to false so that we don't handle indentation on the next call. - - return Ok(Some(spanned)); - } + // Set to false so that we don't handle indentation on the next call. + return self.handle_indentation(indentation); } - Ok(None) + None } - fn handle_indentation( - &mut self, - indentation: Indentation, - ) -> Result, LexicalError> { + fn handle_indentation(&mut self, indentation: Indentation) -> Option { let token = match self.indentations.current().try_compare(indentation) { // Dedent Ok(Ordering::Greater) => { self.pending_indentation = Some(indentation); - self.indentations.dedent_one(indentation).map_err(|_| { - LexicalError::new(LexicalErrorType::IndentationError, self.token_range()) - })?; + if self.indentations.dedent_one(indentation).is_err() { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + ))); + }; - Some((Tok::Dedent, TextRange::empty(self.offset()))) + // The lexer might've eaten some whitespaces to calculate the `indentation`. For + // example: + // + // ```py + // if first: + // if second: + // pass + // foo + // # ^ + // ``` + // + // Here, the cursor is at `^` and the `indentation` contains the whitespaces before + // the `pass` token. + self.cursor.start_token(); + + Some(TokenKind::Dedent) } Ok(Ordering::Equal) => None, @@ -1006,74 +1017,74 @@ impl<'source> Lexer<'source> { // Indent Ok(Ordering::Less) => { self.indentations.indent(indentation); - Some((Tok::Indent, self.token_range())) + Some(TokenKind::Indent) } Err(_) => { - return Err(LexicalError::new( + return Some(self.push_error(LexicalError::new( LexicalErrorType::IndentationError, self.token_range(), - )); + ))); } }; - Ok(token) + token } - fn consume_end(&mut self) -> Result { + fn consume_end(&mut self) -> TokenKind { // We reached end of file. // First of all, we need all nestings to be finished. if self.nesting > 0 { // Reset the nesting to avoid going into infinite loop. self.nesting = 0; - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); } // Next, insert a trailing newline, if required. if !self.state.is_new_logical_line() { self.state = State::AfterNewline; - Ok((Tok::Newline, TextRange::empty(self.offset()))) + TokenKind::Newline } // Next, flush the indentation stack to zero. else if self.indentations.dedent().is_some() { - Ok((Tok::Dedent, TextRange::empty(self.offset()))) + TokenKind::Dedent } else { - Ok((Tok::EndOfFile, TextRange::empty(self.offset()))) + TokenKind::EndOfFile } } // Dispatch based on the given character. - fn consume_ascii_character(&mut self, c: char) -> Result { + fn consume_ascii_character(&mut self, c: char) -> TokenKind { let token = match c { - c if is_ascii_identifier_start(c) => self.lex_identifier(c)?, - '0'..='9' => self.lex_number(c)?, - '#' => return Ok((self.lex_comment(), self.token_range())), - '\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?, + c if is_ascii_identifier_start(c) => self.lex_identifier(c), + '0'..='9' => self.lex_number(c), + '#' => return self.lex_comment(), + '\'' | '"' => self.lex_string(c), '=' => { if self.cursor.eat_char('=') { - Tok::EqEqual + TokenKind::EqEqual } else { self.state = State::AfterEqual; - return Ok((Tok::Equal, self.token_range())); + return TokenKind::Equal; } } '+' => { if self.cursor.eat_char('=') { - Tok::PlusEqual + TokenKind::PlusEqual } else { - Tok::Plus + TokenKind::Plus } } '*' => { if self.cursor.eat_char('=') { - Tok::StarEqual + TokenKind::StarEqual } else if self.cursor.eat_char('*') { if self.cursor.eat_char('=') { - Tok::DoubleStarEqual + TokenKind::DoubleStarEqual } else { - Tok::DoubleStar + TokenKind::DoubleStar } } else { - Tok::Star + TokenKind::Star } } @@ -1100,97 +1111,97 @@ impl<'source> Lexer<'source> { self.lex_ipython_escape_command(kind) } - '?' if self.mode == Mode::Ipython => Tok::Question, + '?' if self.mode == Mode::Ipython => TokenKind::Question, '/' => { if self.cursor.eat_char('=') { - Tok::SlashEqual + TokenKind::SlashEqual } else if self.cursor.eat_char('/') { if self.cursor.eat_char('=') { - Tok::DoubleSlashEqual + TokenKind::DoubleSlashEqual } else { - Tok::DoubleSlash + TokenKind::DoubleSlash } } else { - Tok::Slash + TokenKind::Slash } } '%' => { if self.cursor.eat_char('=') { - Tok::PercentEqual + TokenKind::PercentEqual } else { - Tok::Percent + TokenKind::Percent } } '|' => { if self.cursor.eat_char('=') { - Tok::VbarEqual + TokenKind::VbarEqual } else { - Tok::Vbar + TokenKind::Vbar } } '^' => { if self.cursor.eat_char('=') { - Tok::CircumflexEqual + TokenKind::CircumflexEqual } else { - Tok::CircumFlex + TokenKind::CircumFlex } } '&' => { if self.cursor.eat_char('=') { - Tok::AmperEqual + TokenKind::AmperEqual } else { - Tok::Amper + TokenKind::Amper } } '-' => { if self.cursor.eat_char('=') { - Tok::MinusEqual + TokenKind::MinusEqual } else if self.cursor.eat_char('>') { - Tok::Rarrow + TokenKind::Rarrow } else { - Tok::Minus + TokenKind::Minus } } '@' => { if self.cursor.eat_char('=') { - Tok::AtEqual + TokenKind::AtEqual } else { - Tok::At + TokenKind::At } } '!' => { if self.cursor.eat_char('=') { - Tok::NotEqual + TokenKind::NotEqual } else { - Tok::Exclamation + TokenKind::Exclamation } } - '~' => Tok::Tilde, + '~' => TokenKind::Tilde, '(' => { self.nesting += 1; - Tok::Lpar + TokenKind::Lpar } ')' => { self.nesting = self.nesting.saturating_sub(1); - Tok::Rpar + TokenKind::Rpar } '[' => { self.nesting += 1; - Tok::Lsqb + TokenKind::Lsqb } ']' => { self.nesting = self.nesting.saturating_sub(1); - Tok::Rsqb + TokenKind::Rsqb } '{' => { self.nesting += 1; - Tok::Lbrace + TokenKind::Lbrace } '}' => { if let Some(fstring) = self.fstrings.current_mut() { if fstring.nesting() == self.nesting { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::FStringError(FStringErrorType::SingleRbrace), self.token_range(), )); @@ -1198,7 +1209,7 @@ impl<'source> Lexer<'source> { fstring.try_end_format_spec(self.nesting); } self.nesting = self.nesting.saturating_sub(1); - Tok::Rbrace + TokenKind::Rbrace } ':' => { if self @@ -1206,85 +1217,79 @@ impl<'source> Lexer<'source> { .current_mut() .is_some_and(|fstring| fstring.try_start_format_spec(self.nesting)) { - Tok::Colon + TokenKind::Colon } else if self.cursor.eat_char('=') { - Tok::ColonEqual + TokenKind::ColonEqual } else { - Tok::Colon + TokenKind::Colon } } - ';' => Tok::Semi, + ';' => TokenKind::Semi, '<' => { if self.cursor.eat_char('<') { if self.cursor.eat_char('=') { - Tok::LeftShiftEqual + TokenKind::LeftShiftEqual } else { - Tok::LeftShift + TokenKind::LeftShift } } else if self.cursor.eat_char('=') { - Tok::LessEqual + TokenKind::LessEqual } else { - Tok::Less + TokenKind::Less } } '>' => { if self.cursor.eat_char('>') { if self.cursor.eat_char('=') { - Tok::RightShiftEqual + TokenKind::RightShiftEqual } else { - Tok::RightShift + TokenKind::RightShift } } else if self.cursor.eat_char('=') { - Tok::GreaterEqual + TokenKind::GreaterEqual } else { - Tok::Greater + TokenKind::Greater } } - ',' => Tok::Comma, + ',' => TokenKind::Comma, '.' => { if self.cursor.first().is_ascii_digit() { - self.lex_decimal_number('.')? + self.lex_decimal_number('.') } else if self.cursor.eat_char2('.', '.') { - Tok::Ellipsis + TokenKind::Ellipsis } else { - Tok::Dot + TokenKind::Dot } } '\n' => { - return Ok(( - if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - Tok::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } - Tok::NonLogicalNewline - }, - self.token_range(), - )) + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } + TokenKind::NonLogicalNewline + } } '\r' => { self.cursor.eat_char('\n'); - return Ok(( - if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - Tok::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } - Tok::NonLogicalNewline - }, - self.token_range(), - )); + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } + TokenKind::NonLogicalNewline + }; } _ => { self.state = State::Other; - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnrecognizedToken { tok: c }, self.token_range(), )); @@ -1293,7 +1298,7 @@ impl<'source> Lexer<'source> { self.state = State::Other; - Ok((token, self.token_range())) + token } #[inline] @@ -1305,11 +1310,12 @@ impl<'source> Lexer<'source> { } #[inline] - fn token_text(&self) -> &'source str { + fn token_text(&self) -> &'src str { &self.source[self.token_range()] } - // Lexer doesn't allow files larger than 4GB + /// Retrieves the current offset of the cursor within the source code. + // SAFETY: Lexer doesn't allow files larger than 4GB #[allow(clippy::cast_possible_truncation)] #[inline] fn offset(&self) -> TextSize { @@ -1320,25 +1326,220 @@ impl<'source> Lexer<'source> { fn token_start(&self) -> TextSize { self.token_range().start() } + + /// Takes the token value corresponding to the current token out of the lexer, replacing it + /// with the default value. + /// + /// All the subsequent call to this method without moving the lexer would always return the + /// default value which is [`TokenValue::None`]. + pub(crate) fn take_value(&mut self) -> TokenValue { + std::mem::take(&mut self.current_value) + } + + /// Creates a checkpoint to which the lexer can later return to using [`Self::rewind`]. + pub(crate) fn checkpoint(&self) -> LexerCheckpoint<'src> { + LexerCheckpoint { + value: self.current_value.clone(), + current_kind: self.current_kind, + current_range: self.current_range, + current_flags: self.current_flags, + cursor: self.cursor.clone(), + state: self.state, + nesting: self.nesting, + indentations_checkpoint: self.indentations.checkpoint(), + pending_indentation: self.pending_indentation, + fstrings_checkpoint: self.fstrings.checkpoint(), + errors_position: self.errors.len(), + } + } + + /// Restore the lexer to the given checkpoint. + pub(crate) fn rewind(&mut self, checkpoint: LexerCheckpoint<'src>) { + let LexerCheckpoint { + value, + current_kind, + current_range, + current_flags, + cursor, + state, + nesting, + indentations_checkpoint, + pending_indentation, + fstrings_checkpoint, + errors_position, + } = checkpoint; + + self.current_value = value; + self.current_kind = current_kind; + self.current_range = current_range; + self.current_flags = current_flags; + self.cursor = cursor; + self.state = state; + self.nesting = nesting; + self.indentations.rewind(indentations_checkpoint); + self.pending_indentation = pending_indentation; + self.fstrings.rewind(fstrings_checkpoint); + self.errors.truncate(errors_position); + } + + pub fn finish(self) -> Vec { + self.errors + } } -// Implement iterator pattern for Lexer. -// Calling the next element in the iterator will yield the next lexical -// token. -impl Iterator for Lexer<'_> { - type Item = LexResult; +bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub(crate) struct TokenFlags: u8 { + /// The token is a string with double quotes (`"`). + const DOUBLE_QUOTES = 1 << 0; + /// The token is a triple-quoted string i.e., it starts and ends with three consecutive + /// quote characters (`"""` or `'''`). + const TRIPLE_QUOTED_STRING = 1 << 1; - fn next(&mut self) -> Option { - let token = self.next_token(); + /// The token is a unicode string i.e., prefixed with `u` or `U` + const UNICODE_STRING = 1 << 2; + /// The token is a byte string i.e., prefixed with `b` or `B` + const BYTE_STRING = 1 << 3; + /// The token is an f-string i.e., prefixed with `f` or `F` + const F_STRING = 1 << 4; + /// The token is a raw string and the prefix character is in lowercase. + const RAW_STRING_LOWERCASE = 1 << 5; + /// The token is a raw string and the prefix character is in uppercase. + const RAW_STRING_UPPERCASE = 1 << 6; - match token { - Ok((Tok::EndOfFile, _)) => None, - r => Some(r), + /// The token is a raw string i.e., prefixed with `r` or `R` + const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits(); + } +} + +impl StringFlags for TokenFlags { + fn quote_style(self) -> Quote { + if self.intersects(TokenFlags::DOUBLE_QUOTES) { + Quote::Double + } else { + Quote::Single + } + } + + fn is_triple_quoted(self) -> bool { + self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) + } + + fn prefix(self) -> AnyStringPrefix { + if self.intersects(TokenFlags::F_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Format(FStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::BYTE_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Bytes(ByteStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) + } else if self.intersects(TokenFlags::UNICODE_STRING) { + AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) + } else { + AnyStringPrefix::Regular(StringLiteralPrefix::Empty) } } } -impl FusedIterator for Lexer<'_> {} +impl TokenFlags { + /// Returns `true` if the token is an f-string. + const fn is_f_string(self) -> bool { + self.intersects(TokenFlags::F_STRING) + } + + /// Returns `true` if the token is a raw string. + const fn is_raw_string(self) -> bool { + self.intersects(TokenFlags::RAW_STRING) + } + + pub(crate) fn as_any_string_flags(self) -> AnyStringFlags { + AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Token { + /// The kind of the token. + kind: TokenKind, + /// The range of the token. + range: TextRange, + /// The set of flags describing this token. + flags: TokenFlags, +} + +impl Token { + pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token { + Self { kind, range, flags } + } + + /// Returns the token kind. + #[inline] + pub const fn kind(&self) -> TokenKind { + self.kind + } + + /// Returns the token as a tuple of (kind, range). + #[inline] + pub const fn as_tuple(&self) -> (TokenKind, TextRange) { + (self.kind, self.range) + } + + /// Returns `true` if this is a trivia token. + #[inline] + pub const fn is_trivia(self) -> bool { + matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline) + } + + /// Returns `true` if this is any kind of string token. + const fn is_any_string(self) -> bool { + matches!( + self.kind, + TokenKind::String + | TokenKind::FStringStart + | TokenKind::FStringMiddle + | TokenKind::FStringEnd + ) + } + + /// Returns `true` if the current token is a triple-quoted string of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub fn is_triple_quoted_string(self) -> bool { + assert!(self.is_any_string()); + self.flags.is_triple_quoted() + } + + /// Returns the [`Quote`] style for the current string token of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub fn string_quote_style(self) -> Quote { + assert!(self.is_any_string()); + self.flags.quote_style() + } +} + +impl Ranged for Token { + fn range(&self) -> TextRange { + self.range + } +} /// Represents an error that occur during lexing and are /// returned by the `parse_*` functions in the iterator in the @@ -1463,6 +1664,55 @@ impl std::fmt::Display for LexicalErrorType { } } +#[derive(Clone, Debug, Default)] +pub(crate) enum TokenValue { + #[default] + None, + /// Token value for a name, commonly known as an identifier. + /// + /// Unicode names are NFKC-normalized by the lexer, + /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers) + Name(Box), + /// Token value for an integer. + Int(Int), + /// Token value for a floating point number. + Float(f64), + /// Token value for a complex number. + Complex { + /// The real part of the complex number. + real: f64, + /// The imaginary part of the complex number. + imag: f64, + }, + /// Token value for a string. + String(Box), + /// Token value that includes the portion of text inside the f-string that's not + /// part of the expression part and isn't an opening or closing brace. + FStringMiddle(Box), + /// Token value for IPython escape commands. These are recognized by the lexer + /// only when the mode is [`Mode::Ipython`]. + IpyEscapeCommand { + /// The magic command value. + value: Box, + /// The kind of magic command. + kind: IpyEscapeKind, + }, +} + +pub(crate) struct LexerCheckpoint<'src> { + value: TokenValue, + current_kind: TokenKind, + current_range: TextRange, + current_flags: TokenFlags, + cursor: Cursor<'src>, + state: State, + nesting: u32, + indentations_checkpoint: IndentationsCheckpoint, + pending_indentation: Option, + fstrings_checkpoint: FStringsCheckpoint, + errors_position: usize, +} + #[derive(Copy, Clone, Debug)] enum State { /// Lexer is right at the beginning of the file or after a `Newline` token. @@ -1608,9 +1858,16 @@ impl<'a> LexedText<'a> { } } +/// Create a new [`Lexer`] for the given source code and [`Mode`]. +pub fn lex(source: &str, mode: Mode) -> Lexer { + Lexer::new(source, mode, TextSize::default()) +} + #[cfg(test)] mod tests { - use insta::assert_debug_snapshot; + use std::fmt::Write; + + use insta::assert_snapshot; use super::*; @@ -1618,61 +1875,145 @@ mod tests { const MAC_EOL: &str = "\r"; const UNIX_EOL: &str = "\n"; - fn lex_source_with_mode(source: &str, mode: Mode) -> Vec { - let lexer = lex(source, mode); - lexer.map(std::result::Result::unwrap).collect() + /// Same as [`Token`] except that this includes the [`TokenValue`] as well. + struct TestToken { + kind: TokenKind, + value: TokenValue, + range: TextRange, + flags: TokenFlags, } - fn lex_source(source: &str) -> Vec { - lex_source_with_mode(source, Mode::Module) + impl std::fmt::Debug for TestToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut tuple = f.debug_tuple(""); + let mut tuple = if matches!(self.value, TokenValue::None) { + tuple.field(&self.kind) + } else { + tuple.field(&self.value) + }; + tuple = tuple.field(&self.range); + if self.flags.is_empty() { + tuple.finish() + } else { + tuple.field(&self.flags).finish() + } + } } - fn lex_jupyter_source(source: &str) -> Vec { - lex_source_with_mode(source, Mode::Ipython) + struct LexerOutput { + tokens: Vec, + errors: Vec, } - fn ipython_escape_command_line_continuation_eol(eol: &str) -> Vec { + impl std::fmt::Display for LexerOutput { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "## Tokens")?; + writeln!(f, "```\n{:#?}\n```", self.tokens)?; + if !self.errors.is_empty() { + writeln!(f, "## Errors")?; + writeln!(f, "```\n{:#?}\n```", self.errors)?; + } + Ok(()) + } + } + + fn lex(source: &str, mode: Mode) -> LexerOutput { + let mut lexer = Lexer::new(source, mode, TextSize::default()); + let mut tokens = Vec::new(); + loop { + let kind = lexer.next_token(); + if kind.is_eof() { + break; + } + tokens.push(TestToken { + kind, + value: lexer.take_value(), + range: lexer.current_range(), + flags: lexer.current_flags(), + }); + } + LexerOutput { + tokens, + errors: lexer.finish(), + } + } + + fn lex_valid(source: &str, mode: Mode) -> LexerOutput { + let output = lex(source, mode); + + if !output.errors.is_empty() { + let mut message = "Unexpected lexical errors for a valid source:\n".to_string(); + for error in &output.errors { + writeln!(&mut message, "{error:?}").unwrap(); + } + writeln!(&mut message, "Source:\n{source}").unwrap(); + panic!("{message}"); + } + + output + } + + fn lex_invalid(source: &str, mode: Mode) -> LexerOutput { + let output = lex(source, mode); + + assert!( + !output.errors.is_empty(), + "Expected lexer to generate at least one error for the following source:\n{source}" + ); + + output + } + + fn lex_source(source: &str) -> LexerOutput { + lex_valid(source, Mode::Module) + } + + fn lex_jupyter_source(source: &str) -> LexerOutput { + lex_valid(source, Mode::Ipython) + } + + fn ipython_escape_command_line_continuation_eol(eol: &str) -> LexerOutput { let source = format!("%matplotlib \\{eol} --inline"); lex_jupyter_source(&source) } #[test] fn test_ipython_escape_command_line_continuation_unix_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(UNIX_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(UNIX_EOL)); } #[test] fn test_ipython_escape_command_line_continuation_mac_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(MAC_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(MAC_EOL)); } #[test] fn test_ipython_escape_command_line_continuation_windows_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(WINDOWS_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(WINDOWS_EOL)); } - fn ipython_escape_command_line_continuation_with_eol_and_eof(eol: &str) -> Vec { + fn ipython_escape_command_line_continuation_with_eol_and_eof(eol: &str) -> LexerOutput { let source = format!("%matplotlib \\{eol}"); lex_jupyter_source(&source) } #[test] fn test_ipython_escape_command_line_continuation_with_unix_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( UNIX_EOL )); } #[test] fn test_ipython_escape_command_line_continuation_with_mac_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( MAC_EOL )); } #[test] fn test_ipython_escape_command_line_continuation_with_windows_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( WINDOWS_EOL )); } @@ -1680,7 +2021,7 @@ mod tests { #[test] fn test_empty_ipython_escape_command() { let source = "%\n%%\n!\n!!\n?\n??\n/\n,\n;"; - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1701,7 +2042,7 @@ mod tests { !ls " .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1726,7 +2067,7 @@ mod tests { %%foo??? !pwd?" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1736,7 +2077,7 @@ if True: %matplotlib \ --inline" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1748,13 +2089,13 @@ bar = %timeit a % 3 baz = %matplotlib \ inline" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } - fn assert_no_ipython_escape_command(tokens: &[Spanned]) { - for (tok, _) in tokens { - if let Tok::IpyEscapeCommand { .. } = tok { - panic!("Unexpected escape command token: {tok:?}") + fn assert_no_ipython_escape_command(tokens: &[TestToken]) { + for token in tokens { + if matches!(token.kind, TokenKind::IpyEscapeCommand) { + panic!("Unexpected escape command token at {:?}", token.range) } } } @@ -1772,147 +2113,153 @@ foo = ,func def f(arg=%timeit a = b): pass" .trim(); - let tokens = lex_jupyter_source(source); - assert_no_ipython_escape_command(&tokens); + let output = lex(source, Mode::Ipython); + assert!(output.errors.is_empty()); + assert_no_ipython_escape_command(&output.tokens); } #[test] fn test_numbers() { let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000 0x995DC9BBDF1939FA 0x995DC9BBDF1939FA995DC9BBDF1939FA"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_invalid_leading_zero_small() { let source = "025"; - - let lexer = lex(source, Mode::Module); - let tokens = lexer.collect::, LexicalError>>(); - assert_debug_snapshot!(tokens); + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_invalid_leading_zero_big() { let source = "0252222222222222522222222222225222222222222252222222222222522222222222225222222222222"; - - let lexer = lex(source, Mode::Module); - let tokens = lexer.collect::, LexicalError>>(); - assert_debug_snapshot!(tokens); + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_line_comment_long() { let source = "99232 # foo".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_whitespace() { let source = "99232 # ".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_single_whitespace() { let source = "99232 # ".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_empty() { let source = "99232 #".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } - fn comment_until_eol(eol: &str) -> Vec { + fn comment_until_eol(eol: &str) -> LexerOutput { let source = format!("123 # Foo{eol}456"); lex_source(&source) } #[test] fn test_comment_until_unix_eol() { - assert_debug_snapshot!(comment_until_eol(UNIX_EOL)); + assert_snapshot!(comment_until_eol(UNIX_EOL)); } #[test] fn test_comment_until_mac_eol() { - assert_debug_snapshot!(comment_until_eol(MAC_EOL)); + assert_snapshot!(comment_until_eol(MAC_EOL)); } #[test] fn test_comment_until_windows_eol() { - assert_debug_snapshot!(comment_until_eol(WINDOWS_EOL)); + assert_snapshot!(comment_until_eol(WINDOWS_EOL)); } #[test] fn test_assignment() { let source = r"a_variable = 99 + 2-0"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn indentation_with_eol(eol: &str) -> Vec { + fn indentation_with_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol} return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_indentation_with_unix_eol() { - assert_debug_snapshot!(indentation_with_eol(UNIX_EOL)); + assert_snapshot!(indentation_with_eol(UNIX_EOL)); } #[test] fn test_indentation_with_mac_eol() { - assert_debug_snapshot!(indentation_with_eol(MAC_EOL)); + assert_snapshot!(indentation_with_eol(MAC_EOL)); } #[test] fn test_indentation_with_windows_eol() { - assert_debug_snapshot!(indentation_with_eol(WINDOWS_EOL)); + assert_snapshot!(indentation_with_eol(WINDOWS_EOL)); } - fn double_dedent_with_eol(eol: &str) -> Vec { + fn double_dedent_with_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol} if x:{eol}{eol} return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_double_dedent_with_unix_eol() { - assert_debug_snapshot!(double_dedent_with_eol(UNIX_EOL)); + assert_snapshot!(double_dedent_with_eol(UNIX_EOL)); } #[test] fn test_double_dedent_with_mac_eol() { - assert_debug_snapshot!(double_dedent_with_eol(MAC_EOL)); + assert_snapshot!(double_dedent_with_eol(MAC_EOL)); } #[test] fn test_double_dedent_with_windows_eol() { - assert_debug_snapshot!(double_dedent_with_eol(WINDOWS_EOL)); + assert_snapshot!(double_dedent_with_eol(WINDOWS_EOL)); } - fn double_dedent_with_tabs_eol(eol: &str) -> Vec { + fn double_dedent_with_tabs_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol}\tif x:{eol}{eol}\t\t return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_double_dedent_with_tabs_unix_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(UNIX_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(UNIX_EOL)); } #[test] fn test_double_dedent_with_tabs_mac_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(MAC_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(MAC_EOL)); } #[test] fn test_double_dedent_with_tabs_windows_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(WINDOWS_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(WINDOWS_EOL)); } - fn newline_in_brackets_eol(eol: &str) -> Vec { + #[test] + fn dedent_after_whitespace() { + let source = "\ +if first: + if second: + pass + foo +"; + assert_snapshot!(lex_source(source)); + } + + fn newline_in_brackets_eol(eol: &str) -> LexerOutput { let source = r"x = [ 1,2 @@ -1929,17 +2276,17 @@ def f(arg=%timeit a = b): #[test] fn test_newline_in_brackets_unix_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(UNIX_EOL)); + assert_snapshot!(newline_in_brackets_eol(UNIX_EOL)); } #[test] fn test_newline_in_brackets_mac_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(MAC_EOL)); + assert_snapshot!(newline_in_brackets_eol(MAC_EOL)); } #[test] fn test_newline_in_brackets_windows_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(WINDOWS_EOL)); + assert_snapshot!(newline_in_brackets_eol(WINDOWS_EOL)); } #[test] @@ -1951,55 +2298,57 @@ def f(arg=%timeit a = b): 'c' \ 'd' )"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_logical_newline_line_comment() { let source = "#Hello\n#World\n"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_operators() { let source = "//////=/ /"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_string() { let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\'' '\420' '\200\0a'"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn string_continuation_with_eol(eol: &str) -> Vec { + fn string_continuation_with_eol(eol: &str) -> LexerOutput { let source = format!("\"abc\\{eol}def\""); lex_source(&source) } #[test] fn test_string_continuation_with_unix_eol() { - assert_debug_snapshot!(string_continuation_with_eol(UNIX_EOL)); + assert_snapshot!(string_continuation_with_eol(UNIX_EOL)); } #[test] fn test_string_continuation_with_mac_eol() { - assert_debug_snapshot!(string_continuation_with_eol(MAC_EOL)); + assert_snapshot!(string_continuation_with_eol(MAC_EOL)); } #[test] fn test_string_continuation_with_windows_eol() { - assert_debug_snapshot!(string_continuation_with_eol(WINDOWS_EOL)); + assert_snapshot!(string_continuation_with_eol(WINDOWS_EOL)); } #[test] fn test_escape_unicode_name() { let source = r#""\N{EN SPACE}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn get_tokens_only(source: &str) -> Vec { - lex_source(source).into_iter().map(|(tok, _)| tok).collect() + fn get_tokens_only(source: &str) -> Vec { + let output = lex(source, Mode::Module); + assert!(output.errors.is_empty()); + output.tokens.into_iter().map(|token| token.kind).collect() } #[test] @@ -2009,24 +2358,24 @@ def f(arg=%timeit a = b): assert_eq!(get_tokens_only(source1), get_tokens_only(source2)); } - fn triple_quoted_eol(eol: &str) -> Vec { + fn triple_quoted_eol(eol: &str) -> LexerOutput { let source = format!("\"\"\"{eol} test string{eol} \"\"\""); lex_source(&source) } #[test] fn test_triple_quoted_unix_eol() { - assert_debug_snapshot!(triple_quoted_eol(UNIX_EOL)); + assert_snapshot!(triple_quoted_eol(UNIX_EOL)); } #[test] fn test_triple_quoted_mac_eol() { - assert_debug_snapshot!(triple_quoted_eol(MAC_EOL)); + assert_snapshot!(triple_quoted_eol(MAC_EOL)); } #[test] fn test_triple_quoted_windows_eol() { - assert_debug_snapshot!(triple_quoted_eol(WINDOWS_EOL)); + assert_snapshot!(triple_quoted_eol(WINDOWS_EOL)); } // This test case is to just make sure that the lexer doesn't go into @@ -2034,125 +2383,110 @@ def f(arg=%timeit a = b): #[test] fn test_infinite_loop() { let source = "[1"; - let _ = lex(source, Mode::Module).collect::>(); + lex_invalid(source, Mode::Module); } /// Emoji identifiers are a non-standard python feature and are not supported by our lexer. #[test] fn test_emoji_identifier() { let source = "🐦"; - - let lexed: Vec<_> = lex(source, Mode::Module).collect(); - - match lexed.as_slice() { - [Err(error)] => { - assert_eq!( - error.error(), - &LexicalErrorType::UnrecognizedToken { tok: '🐦' } - ); - } - result => panic!("Expected an error token but found {result:?}"), - } + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn tet_too_low_dedent() { - let tokens: Vec<_> = lex( - "if True: + let source = "if True: pass - pass", - Mode::Module, - ) - .collect(); - assert_debug_snapshot!(tokens); + pass"; + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_empty_fstrings() { let source = r#"f"" "" F"" f'' '' f"""""" f''''''"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_prefix() { let source = r#"f"" F"" rf"" rF"" Rf"" RF"" fr"" Fr"" fR"" FR"""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring() { let source = r#"f"normal {foo} {{another}} {bar} {{{three}}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_parentheses() { let source = r#"f"{}" f"{{}}" f" {}" f"{{{}}}" f"{{{{}}}}" f" {} {{}} {{{}}} {{{{}}}} ""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn fstring_single_quote_escape_eol(eol: &str) -> Vec { + fn fstring_single_quote_escape_eol(eol: &str) -> LexerOutput { let source = format!(r"f'text \{eol} more text'"); lex_source(&source) } #[test] fn test_fstring_single_quote_escape_unix_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(UNIX_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(UNIX_EOL)); } #[test] fn test_fstring_single_quote_escape_mac_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(MAC_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(MAC_EOL)); } #[test] fn test_fstring_single_quote_escape_windows_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(WINDOWS_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(WINDOWS_EOL)); } #[test] fn test_fstring_escape() { let source = r#"f"\{x:\"\{x}} \"\"\ end""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_escape_braces() { let source = r"f'\{foo}' f'\\{foo}' f'\{{foo}}' f'\\{{foo}}'"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_escape_raw() { let source = r#"rf"\{x:\"\{x}} \"\"\ end""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_named_unicode() { let source = r#"f"\N{BULLET} normal \Nope \N""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_named_unicode_raw() { let source = r#"rf"\N{BULLET} normal""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_named_expression() { let source = r#"f"{x:=10} {(x:=10)} {x,{y:=10}} {[x:=10]}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_format_spec() { let source = r#"f"{foo:} {x=!s:.3f} {x:.{y}f} {'':*^{1:{1}}} {x:{{1}.pop()}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2175,19 +2509,19 @@ f'__{ b }__' "; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_conversion() { let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_nested() { let source = r#"f"foo {f"bar {x + f"{wow}"}"} baz" f'foo {f'bar'} some {f"another"}'"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2197,7 +2531,7 @@ f'__{ * y } second""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2210,7 +2544,7 @@ hello hello ''' f"some {f"""multiline allowed {x}"""} string""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2220,13 +2554,13 @@ allowed {x}"""} string""#; x } # not a comment """"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_ipy_escape_command() { let source = r#"f"foo {!pwd} bar""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2236,13 +2570,13 @@ f"{lambda x:{x}}" f"{(lambda x:{x})}" "# .trim(); - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_nul_char() { let source = r"f'\0'"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2250,18 +2584,18 @@ f"{(lambda x:{x})}" let source = r"match foo: case bar: pass"; - assert_debug_snapshot!(lex_jupyter_source(source)); - } - - fn lex_error(source: &str) -> LexicalError { - match lex(source, Mode::Module).find_map(Result::err) { - Some(err) => err, - _ => panic!("Expected at least one error"), - } + assert_snapshot!(lex_jupyter_source(source)); } fn lex_fstring_error(source: &str) -> FStringErrorType { - match lex_error(source).into_error() { + let output = lex(source, Mode::Module); + match output + .errors + .into_iter() + .next() + .expect("lexer should give at least one error") + .into_error() + { LexicalErrorType::FStringError(error) => error, err => panic!("Expected FStringError: {err:?}"), } diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs index 6dd8e63d70..e7cd633920 100644 --- a/crates/ruff_python_parser/src/lexer/cursor.rs +++ b/crates/ruff_python_parser/src/lexer/cursor.rs @@ -1,18 +1,26 @@ -use ruff_text_size::{TextLen, TextSize}; use std::str::Chars; +use ruff_text_size::{TextLen, TextSize}; + pub(crate) const EOF_CHAR: char = '\0'; +/// A cursor represents a pointer in the source code. #[derive(Clone, Debug)] -pub(super) struct Cursor<'a> { - chars: Chars<'a>, +pub(super) struct Cursor<'src> { + /// An iterator over the [`char`]'s of the source code. + chars: Chars<'src>, + + /// Length of the source code. This is used as a marker to indicate the start of the current + /// token which is being lexed. source_length: TextSize, + + /// Stores the previous character for debug assertions. #[cfg(debug_assertions)] prev_char: char, } -impl<'a> Cursor<'a> { - pub(crate) fn new(source: &'a str) -> Self { +impl<'src> Cursor<'src> { + pub(crate) fn new(source: &'src str) -> Self { Self { source_length: source.text_len(), chars: source.chars(), @@ -21,14 +29,14 @@ impl<'a> Cursor<'a> { } } - /// Returns the previous token. Useful for debug assertions. + /// Returns the previous character. Useful for debug assertions. #[cfg(debug_assertions)] pub(super) const fn previous(&self) -> char { self.prev_char } /// Peeks the next character from the input stream without consuming it. - /// Returns [`EOF_CHAR`] if the file is at the end of the file. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. pub(super) fn first(&self) -> char { self.chars.clone().next().unwrap_or(EOF_CHAR) } @@ -42,29 +50,44 @@ impl<'a> Cursor<'a> { } /// Returns the remaining text to lex. - pub(super) fn rest(&self) -> &'a str { + /// + /// Use [`Cursor::text_len`] to get the length of the remaining text. + pub(super) fn rest(&self) -> &'src str { self.chars.as_str() } + /// Returns the length of the remaining text. + /// + /// Use [`Cursor::rest`] to get the remaining text. // SAFETY: The `source.text_len` call in `new` would panic if the string length is larger than a `u32`. #[allow(clippy::cast_possible_truncation)] pub(super) fn text_len(&self) -> TextSize { TextSize::new(self.chars.as_str().len() as u32) } + /// Returns the length of the current token length. + /// + /// This is to be used after setting the start position of the token using + /// [`Cursor::start_token`]. pub(super) fn token_len(&self) -> TextSize { self.source_length - self.text_len() } + /// Mark the current position of the cursor as the start of the token which is going to be + /// lexed. + /// + /// Use [`Cursor::token_len`] to get the length of the lexed token. pub(super) fn start_token(&mut self) { self.source_length = self.text_len(); } + /// Returns `true` if the cursor is at the end of file. pub(super) fn is_eof(&self) -> bool { self.chars.as_str().is_empty() } - /// Consumes the next character + /// Moves the cursor to the next character, returning the previous character. + /// Returns [`None`] if there is no next character. pub(super) fn bump(&mut self) -> Option { let prev = self.chars.next()?; diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs index 16dae1222d..7b702a77b7 100644 --- a/crates/ruff_python_parser/src/lexer/fstring.rs +++ b/crates/ruff_python_parser/src/lexer/fstring.rs @@ -1,9 +1,11 @@ -use ruff_python_ast::{AnyStringFlags, StringFlags}; +use ruff_python_ast::StringFlags; + +use super::TokenFlags; /// The context representing the current f-string that the lexer is in. -#[derive(Debug)] +#[derive(Clone, Debug)] pub(crate) struct FStringContext { - flags: AnyStringFlags, + flags: TokenFlags, /// The level of nesting for the lexer when it entered the current f-string. /// The nesting level includes all kinds of parentheses i.e., round, square, @@ -17,8 +19,9 @@ pub(crate) struct FStringContext { } impl FStringContext { - pub(crate) const fn new(flags: AnyStringFlags, nesting: u32) -> Self { - debug_assert!(flags.is_f_string()); + pub(crate) const fn new(flags: TokenFlags, nesting: u32) -> Self { + assert!(flags.is_f_string()); + Self { flags, nesting, @@ -26,8 +29,7 @@ impl FStringContext { } } - pub(crate) const fn flags(&self) -> AnyStringFlags { - debug_assert!(self.flags.is_f_string()); + pub(crate) const fn flags(&self) -> TokenFlags { self.flags } @@ -127,4 +129,15 @@ impl FStrings { pub(crate) fn current_mut(&mut self) -> Option<&mut FStringContext> { self.stack.last_mut() } + + pub(crate) fn checkpoint(&self) -> FStringsCheckpoint { + FStringsCheckpoint(self.stack.clone()) + } + + pub(crate) fn rewind(&mut self, checkpoint: FStringsCheckpoint) { + self.stack = checkpoint.0; + } } + +#[derive(Debug, Clone)] +pub(crate) struct FStringsCheckpoint(Vec); diff --git a/crates/ruff_python_parser/src/lexer/indentation.rs b/crates/ruff_python_parser/src/lexer/indentation.rs index 2b12efab06..7125f3a224 100644 --- a/crates/ruff_python_parser/src/lexer/indentation.rs +++ b/crates/ruff_python_parser/src/lexer/indentation.rs @@ -82,8 +82,8 @@ impl Indentation { #[derive(Debug, Copy, Clone, PartialEq)] pub(super) struct UnexpectedIndentation; -// The indentations stack is used to keep track of the current indentation level -// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation). +/// The indentations stack is used to keep track of the current indentation level +/// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation). #[derive(Debug, Clone, Default)] pub(super) struct Indentations { stack: Vec, @@ -124,8 +124,19 @@ impl Indentations { static ROOT: Indentation = Indentation::root(); self.stack.last().unwrap_or(&ROOT) } + + pub(crate) fn checkpoint(&self) -> IndentationsCheckpoint { + IndentationsCheckpoint(self.stack.clone()) + } + + pub(crate) fn rewind(&mut self, checkpoint: IndentationsCheckpoint) { + self.stack = checkpoint.0; + } } +#[derive(Debug, Clone)] +pub(crate) struct IndentationsCheckpoint(Vec); + assert_eq_size!(Indentation, u64); #[cfg(test)] diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 3795203b98..52b436592b 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -57,81 +57,37 @@ //! //! - token: This module contains the definition of the tokens that are generated by the lexer. //! - [lexer]: This module contains the lexer and is responsible for generating the tokens. -//! - parser: This module contains an interface to the [Program] and is responsible for generating the AST. +//! - parser: This module contains an interface to the [Parsed] and is responsible for generating the AST. //! - mode: This module contains the definition of the different modes that the `ruff_python_parser` can be in. //! -//! # Examples -//! -//! For example, to get a stream of tokens from a given string, one could do this: -//! -//! ``` -//! use ruff_python_parser::{lexer::lex, Mode}; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let mut tokens = lex(python_source, Mode::Module); -//! assert!(tokens.all(|t| t.is_ok())); -//! ``` -//! -//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST: -//! -//! ``` -//! use ruff_python_parser::lexer::lex; -//! use ruff_python_parser::{Mode, parse_tokens}; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let tokens = lex(python_source, Mode::Module); -//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module); -//! -//! assert!(ast.is_ok()); -//! ``` -//! -//! Alternatively, you can use one of the other `parse_*` functions to parse a string directly without using a specific -//! mode or tokenizing the source beforehand: -//! -//! ``` -//! use ruff_python_parser::parse_suite; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let ast = parse_suite(python_source); -//! -//! assert!(ast.is_ok()); -//! ``` -//! //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing //! [lexer]: crate::lexer -use std::iter::FusedIterator; +use std::cell::OnceCell; use std::ops::Deref; -use ruff_python_ast::{Expr, Mod, ModModule, PySourceType, Suite}; -use ruff_text_size::{TextRange, TextSize}; - pub use crate::error::{FStringErrorType, ParseError, ParseErrorType}; -use crate::lexer::{lex, lex_starts_at, LexResult}; -pub use crate::parser::Program; -pub use crate::token::{Tok, TokenKind}; +pub use crate::lexer::Token; +pub use crate::token::TokenKind; + +use crate::parser::Parser; + +use itertools::Itertools; +use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite}; +use ruff_python_trivia::CommentRanges; +use ruff_text_size::{Ranged, TextRange, TextSize}; mod error; pub mod lexer; mod parser; -mod soft_keywords; mod string; mod token; mod token_set; mod token_source; pub mod typing; -/// Parse a full Python program usually consisting of multiple lines. +/// Parse a full Python module usually consisting of multiple lines. /// /// This is a convenience function that can be used to parse a full Python program without having to /// specify the [`Mode`] or the location. It is probably what you want to use most of the time. @@ -141,7 +97,7 @@ pub mod typing; /// For example, parsing a simple function definition and a call to that function: /// /// ``` -/// use ruff_python_parser::parse_program; +/// use ruff_python_parser::parse_module; /// /// let source = r#" /// def foo(): @@ -150,41 +106,15 @@ pub mod typing; /// print(foo()) /// "#; /// -/// let program = parse_program(source); -/// assert!(program.is_ok()); +/// let module = parse_module(source); +/// assert!(module.is_ok()); /// ``` -pub fn parse_program(source: &str) -> Result { - let lexer = lex(source, Mode::Module); - match parse_tokens(lexer.collect(), source, Mode::Module)? { - Mod::Module(m) => Ok(m), - Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), - } -} - -/// Parse a full Python program into a [`Suite`]. -/// -/// This function is similar to [`parse_program`] except that it returns the module body -/// instead of the module itself. -/// -/// # Example -/// -/// For example, parsing a simple function definition and a call to that function: -/// -/// ``` -/// use ruff_python_parser::parse_suite; -/// -/// let source = r#" -/// def foo(): -/// return 42 -/// -/// print(foo()) -/// "#; -/// -/// let body = parse_suite(source); -/// assert!(body.is_ok()); -/// ``` -pub fn parse_suite(source: &str) -> Result { - parse_program(source).map(|m| m.body) +pub fn parse_module(source: &str) -> Result, ParseError> { + Parser::new(source, Mode::Module) + .parse() + .try_into_module() + .unwrap() + .into_result() } /// Parses a single Python expression. @@ -202,37 +132,40 @@ pub fn parse_suite(source: &str) -> Result { /// let expr = parse_expression("1 + 2"); /// assert!(expr.is_ok()); /// ``` -pub fn parse_expression(source: &str) -> Result { - let lexer = lex(source, Mode::Expression).collect(); - match parse_tokens(lexer, source, Mode::Expression)? { - Mod::Expression(expression) => Ok(*expression.body), - Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), - } +pub fn parse_expression(source: &str) -> Result, ParseError> { + Parser::new(source, Mode::Expression) + .parse() + .try_into_expression() + .unwrap() + .into_result() } -/// Parses a Python expression from a given location. +/// Parses a Python expression for the given range in the source. /// -/// This function allows to specify the location of the expression in the source code, other than +/// This function allows to specify the range of the expression in the source code, other than /// that, it behaves exactly like [`parse_expression`]. /// /// # Example /// -/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different, -/// somewhat silly, location: +/// Parsing one of the numeric literal which is part of an addition expression: /// /// ``` -/// use ruff_python_parser::parse_expression_starts_at; -/// # use ruff_text_size::TextSize; +/// use ruff_python_parser::parse_expression_range; +/// # use ruff_text_size::{TextRange, TextSize}; /// -/// let expr = parse_expression_starts_at("1 + 2", TextSize::from(400)); -/// assert!(expr.is_ok()); +/// let parsed = parse_expression_range("11 + 22 + 33", TextRange::new(TextSize::new(5), TextSize::new(7))); +/// assert!(parsed.is_ok()); /// ``` -pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result { - let lexer = lex_starts_at(source, Mode::Module, offset).collect(); - match parse_tokens(lexer, source, Mode::Expression)? { - Mod::Expression(expression) => Ok(*expression.body), - Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), - } +pub fn parse_expression_range( + source: &str, + range: TextRange, +) -> Result, ParseError> { + let source = &source[..range.end().to_usize()]; + Parser::new_starts_at(source, Mode::Expression, range.start()) + .parse() + .try_into_expression() + .unwrap() + .into_result() } /// Parse the given Python source code using the specified [`Mode`]. @@ -249,8 +182,8 @@ pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result Result Result Result { - let lxr = lexer::lex(source, mode); - parse_tokens(lxr.collect(), source, mode) +pub fn parse(source: &str, mode: Mode) -> Result, ParseError> { + parse_unchecked(source, mode).into_result() } -/// Parse the given Python source code using the specified [`Mode`] and [`TextSize`]. +/// Parse the given Python source code using the specified [`Mode`]. /// -/// This function allows to specify the location of the source code, other than -/// that, it behaves exactly like [`parse`]. -/// -/// # Example -/// -/// ``` -/// # use ruff_text_size::TextSize; -/// use ruff_python_parser::{Mode, parse_starts_at}; -/// -/// let source = r#" -/// def fib(i): -/// a, b = 0, 1 -/// for _ in range(i): -/// a, b = b, a + b -/// return a -/// -/// print(fib(42)) -/// "#; -/// let program = parse_starts_at(source, Mode::Module, TextSize::from(0)); -/// assert!(program.is_ok()); -/// ``` -pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result { - let lxr = lexer::lex_starts_at(source, mode, offset); - parse_tokens(lxr.collect(), source, mode) +/// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`] +/// and returns the [`Parsed`] as is. +pub fn parse_unchecked(source: &str, mode: Mode) -> Parsed { + Parser::new(source, mode).parse() } -/// Parse an iterator of [`LexResult`]s using the specified [`Mode`]. -/// -/// This could allow you to perform some preprocessing on the tokens before parsing them. -/// -/// # Example -/// -/// As an example, instead of parsing a string, we can parse a list of tokens after we generate -/// them using the [`lexer::lex`] function: -/// -/// ``` -/// use ruff_python_parser::lexer::lex; -/// use ruff_python_parser::{Mode, parse_tokens}; -/// -/// let source = "1 + 2"; -/// let tokens = lex(source, Mode::Expression); -/// let expr = parse_tokens(tokens.collect(), source, Mode::Expression); -/// assert!(expr.is_ok()); -/// ``` -pub fn parse_tokens(tokens: Vec, source: &str, mode: Mode) -> Result { - let program = Program::parse_tokens(source, tokens, mode); - if program.is_valid() { - Ok(program.into_ast()) - } else { - Err(program.into_errors().into_iter().next().unwrap()) +/// Parse the given Python source code using the specified [`PySourceType`]. +pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed { + // SAFETY: Safe because `PySourceType` always parses to a `ModModule` + Parser::new(source, source_type.as_mode()) + .parse() + .try_into_module() + .unwrap() +} + +/// Represents the parsed source code. +#[derive(Debug, Clone)] +pub struct Parsed { + syntax: T, + tokens: Tokens, + errors: Vec, + comment_ranges: CommentRanges, +} + +impl Parsed { + /// Returns the syntax node represented by this parsed output. + pub fn syntax(&self) -> &T { + &self.syntax + } + + /// Returns all the tokens for the parsed output. + pub fn tokens(&self) -> &Tokens { + &self.tokens + } + + /// Returns a list of syntax errors found during parsing. + pub fn errors(&self) -> &[ParseError] { + &self.errors + } + + /// Returns the comment ranges for the parsed output. + pub fn comment_ranges(&self) -> &CommentRanges { + &self.comment_ranges + } + + /// Consumes the [`Parsed`] output and returns the contained syntax node. + pub fn into_syntax(self) -> T { + self.syntax + } + + /// Consumes the [`Parsed`] output and returns a list of syntax errors found during parsing. + pub fn into_errors(self) -> Vec { + self.errors + } + + /// Returns `true` if the parsed source code is valid i.e., it has no syntax errors. + pub fn is_valid(&self) -> bool { + self.errors.is_empty() + } + + /// Returns the [`Parsed`] output as a [`Result`], returning [`Ok`] if it has no syntax errors, + /// or [`Err`] containing the first [`ParseError`] encountered. + pub fn as_result(&self) -> Result<&Parsed, &ParseError> { + if let [error, ..] = self.errors() { + Err(error) + } else { + Ok(self) + } + } + + /// Consumes the [`Parsed`] output and returns a [`Result`] which is [`Ok`] if it has no syntax + /// errors, or [`Err`] containing the first [`ParseError`] encountered. + pub(crate) fn into_result(self) -> Result, ParseError> { + if self.is_valid() { + Ok(self) + } else { + Err(self.into_errors().into_iter().next().unwrap()) + } } } -/// Tokens represents a vector of [`LexResult`]. -/// -/// This should only include tokens up to and including the first error. This struct is created -/// by the [`tokenize`] function. +impl Parsed { + /// Attempts to convert the [`Parsed`] into a [`Parsed`]. + /// + /// This method checks if the `syntax` field of the output is a [`Mod::Module`]. If it is, the + /// method returns [`Some(Parsed)`] with the contained module. Otherwise, it + /// returns [`None`]. + /// + /// [`Some(Parsed)`]: Some + fn try_into_module(self) -> Option> { + match self.syntax { + Mod::Module(module) => Some(Parsed { + syntax: module, + tokens: self.tokens, + errors: self.errors, + comment_ranges: self.comment_ranges, + }), + Mod::Expression(_) => None, + } + } + + /// Attempts to convert the [`Parsed`] into a [`Parsed`]. + /// + /// This method checks if the `syntax` field of the output is a [`Mod::Expression`]. If it is, + /// the method returns [`Some(Parsed)`] with the contained expression. + /// Otherwise, it returns [`None`]. + /// + /// [`Some(Parsed)`]: Some + fn try_into_expression(self) -> Option> { + match self.syntax { + Mod::Module(_) => None, + Mod::Expression(expression) => Some(Parsed { + syntax: expression, + tokens: self.tokens, + errors: self.errors, + comment_ranges: self.comment_ranges, + }), + } + } +} + +impl Parsed { + /// Returns the module body contained in this parsed output as a [`Suite`]. + pub fn suite(&self) -> &Suite { + &self.syntax.body + } + + /// Consumes the [`Parsed`] output and returns the module body as a [`Suite`]. + pub fn into_suite(self) -> Suite { + self.syntax.body + } +} + +impl Parsed { + /// Returns the expression contained in this parsed output. + pub fn expr(&self) -> &Expr { + &self.syntax.body + } + + /// Consumes the [`Parsed`] output and returns the contained [`Expr`]. + pub fn into_expr(self) -> Expr { + *self.syntax.body + } +} + +/// Tokens represents a vector of lexed [`Token`]. #[derive(Debug, Clone)] -pub struct Tokens(Vec); +pub struct Tokens { + raw: Vec, + + /// Index of the first [`TokenKind::Unknown`] token or the length of the token vector. + first_unknown_or_len: OnceCell, +} impl Tokens { - /// Returns an iterator over the [`TokenKind`] and the range corresponding to the tokens. - pub fn kinds(&self) -> TokenKindIter { - TokenKindIter::new(&self.0) + pub(crate) fn new(tokens: Vec) -> Tokens { + Tokens { + raw: tokens, + first_unknown_or_len: OnceCell::new(), + } } - /// Consumes the [`Tokens`], returning the underlying vector of [`LexResult`]. - pub fn into_inner(self) -> Vec { - self.0 + /// Returns a slice of tokens up to (and excluding) the first [`TokenKind::Unknown`] token or + /// all the tokens if there is none. + pub fn up_to_first_unknown(&self) -> &[Token] { + let end = *self.first_unknown_or_len.get_or_init(|| { + self.raw + .iter() + .find_position(|token| token.kind() == TokenKind::Unknown) + .map(|(idx, _)| idx) + .unwrap_or_else(|| self.raw.len()) + }); + &self.raw[..end] + } + + /// Returns a slice of [`Token`] that are within the given `range`. + /// + /// The start and end offset of the given range should be either: + /// 1. Token boundary + /// 2. Gap between the tokens + /// + /// For example, considering the following tokens and their corresponding range: + /// + /// | Token | Range | + /// |---------------------|-----------| + /// | `Def` | `0..3` | + /// | `Name` | `4..7` | + /// | `Lpar` | `7..8` | + /// | `Rpar` | `8..9` | + /// | `Colon` | `9..10` | + /// | `Newline` | `10..11` | + /// | `Comment` | `15..24` | + /// | `NonLogicalNewline` | `24..25` | + /// | `Indent` | `25..29` | + /// | `Pass` | `29..33` | + /// + /// Here, for (1) a token boundary is considered either the start or end offset of any of the + /// above tokens. For (2), the gap would be any offset between the `Newline` and `Comment` + /// token which are 12, 13, and 14. + /// + /// Examples: + /// 1) `4..10` would give `Name`, `Lpar`, `Rpar`, `Colon` + /// 2) `11..25` would give `Comment`, `NonLogicalNewline` + /// 3) `12..25` would give same as (2) and offset 12 is in the "gap" + /// 4) `9..12` would give `Colon`, `Newline` and offset 12 is in the "gap" + /// 5) `18..27` would panic because both the start and end offset is within a token + /// + /// ## Note + /// + /// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical + /// error encountered within the given range. + /// + /// # Panics + /// + /// If either the start or end offset of the given range is within a token range. + pub fn in_range(&self, range: TextRange) -> &[Token] { + let tokens_after_start = self.after(range.start()); + + match tokens_after_start.binary_search_by_key(&range.end(), Ranged::end) { + Ok(idx) => { + // If we found the token with the end offset, that token should be included in the + // return slice. + &tokens_after_start[..=idx] + } + Err(idx) => { + if let Some(token) = tokens_after_start.get(idx) { + // If it's equal to the start offset, then it's at a token boundary which is + // valid. If it's less than the start offset, then it's in the gap between the + // tokens which is valid as well. + assert!( + range.end() <= token.start(), + "End offset {:?} is inside a token range {:?}", + range.end(), + token.range() + ); + } + + // This index is where the token with the offset _could_ be, so that token should + // be excluded from the return slice. + &tokens_after_start[..idx] + } + } + } + + /// Returns a slice of tokens after the given [`TextSize`] offset. + /// + /// If the given offset is between two tokens, the returned slice will start from the following + /// token. In other words, if the offset is between the end of previous token and start of next + /// token, the returned slice will start from the next token. + /// + /// # Panics + /// + /// If the given offset is inside a token range. + pub fn after(&self, offset: TextSize) -> &[Token] { + match self.binary_search_by(|token| token.start().cmp(&offset)) { + Ok(idx) => &self[idx..], + Err(idx) => { + // We can't use `saturating_sub` here because a file could contain a BOM header, in + // which case the token starts at offset 3 for UTF-8 encoded file content. + if idx > 0 { + if let Some(prev) = self.get(idx - 1) { + // If it's equal to the end offset, then it's at a token boundary which is + // valid. If it's greater than the end offset, then it's in the gap between + // the tokens which is valid as well. + assert!( + offset >= prev.end(), + "Offset {:?} is inside a token range {:?}", + offset, + prev.range() + ); + } + } + + &self[idx..] + } + } + } +} + +impl<'a> IntoIterator for &'a Tokens { + type Item = &'a Token; + type IntoIter = std::slice::Iter<'a, Token>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() } } impl Deref for Tokens { - type Target = [LexResult]; + type Target = [Token]; fn deref(&self) -> &Self::Target { - &self.0 - } -} - -/// An iterator over the [`TokenKind`] and the corresponding range. -/// -/// This struct is created by the [`Tokens::kinds`] method. -#[derive(Clone, Default)] -pub struct TokenKindIter<'a> { - inner: std::iter::Flatten>, -} - -impl<'a> TokenKindIter<'a> { - /// Create a new iterator from a slice of [`LexResult`]. - pub fn new(tokens: &'a [LexResult]) -> Self { - Self { - inner: tokens.iter().flatten(), - } - } - - /// Return the next value without advancing the iterator. - pub fn peek(&mut self) -> Option<(TokenKind, TextRange)> { - self.clone().next() - } -} - -impl Iterator for TokenKindIter<'_> { - type Item = (TokenKind, TextRange); - - fn next(&mut self) -> Option { - let &(ref tok, range) = self.inner.next()?; - Some((TokenKind::from_token(tok), range)) - } -} - -impl FusedIterator for TokenKindIter<'_> {} - -impl DoubleEndedIterator for TokenKindIter<'_> { - fn next_back(&mut self) -> Option { - let &(ref tok, range) = self.inner.next_back()?; - Some((TokenKind::from_token(tok), range)) - } -} - -/// Collect tokens up to and including the first error. -pub fn tokenize(contents: &str, mode: Mode) -> Tokens { - let mut tokens: Vec = allocate_tokens_vec(contents); - for tok in lexer::lex(contents, mode) { - let is_err = tok.is_err(); - tokens.push(tok); - if is_err { - break; - } - } - - Tokens(tokens) -} - -/// Tokenizes all tokens. -/// -/// It differs from [`tokenize`] in that it tokenizes all tokens and doesn't stop -/// after the first `Err`. -pub fn tokenize_all(contents: &str, mode: Mode) -> Vec { - let mut tokens = allocate_tokens_vec(contents); - for token in lexer::lex(contents, mode) { - tokens.push(token); - } - tokens -} - -/// Allocates a [`Vec`] with an approximated capacity to fit all tokens -/// of `contents`. -/// -/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation. -pub fn allocate_tokens_vec(contents: &str) -> Vec { - Vec::with_capacity(approximate_tokens_lower_bound(contents)) -} - -/// Approximates the number of tokens when lexing `contents`. -fn approximate_tokens_lower_bound(contents: &str) -> usize { - contents.len().saturating_mul(15) / 100 -} - -/// Parse a full Python program from its tokens. -pub fn parse_program_tokens( - tokens: Tokens, - source: &str, - is_jupyter_notebook: bool, -) -> anyhow::Result { - let mode = if is_jupyter_notebook { - Mode::Ipython - } else { - Mode::Module - }; - match parse_tokens(tokens.into_inner(), source, mode)? { - Mod::Module(m) => Ok(m.body), - Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), + &self.raw } } @@ -529,3 +581,174 @@ impl std::fmt::Display for ModeParseError { write!(f, r#"mode must be "exec", "eval", "ipython", or "single""#) } } + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use crate::lexer::TokenFlags; + + use super::*; + + /// Test case containing a "gap" between two tokens. + /// + /// Code: + const TEST_CASE_WITH_GAP: [(TokenKind, Range); 10] = [ + (TokenKind::Def, 0..3), + (TokenKind::Name, 4..7), + (TokenKind::Lpar, 7..8), + (TokenKind::Rpar, 8..9), + (TokenKind::Colon, 9..10), + (TokenKind::Newline, 10..11), + // Gap ||..|| + (TokenKind::Comment, 15..24), + (TokenKind::NonLogicalNewline, 24..25), + (TokenKind::Indent, 25..29), + (TokenKind::Pass, 29..33), + // No newline at the end to keep the token set full of unique tokens + ]; + + /// Test case containing [`TokenKind::Unknown`] token. + /// + /// Code: + const TEST_CASE_WITH_UNKNOWN: [(TokenKind, Range); 5] = [ + (TokenKind::Name, 0..1), + (TokenKind::Equal, 2..3), + (TokenKind::Unknown, 4..11), + (TokenKind::Plus, 11..12), + (TokenKind::Int, 13..14), + // No newline at the end to keep the token set full of unique tokens + ]; + + /// Helper function to create [`Tokens`] from an iterator of (kind, range). + fn new_tokens(tokens: impl Iterator)>) -> Tokens { + Tokens::new( + tokens + .map(|(kind, range)| { + Token::new( + kind, + TextRange::new(TextSize::new(range.start), TextSize::new(range.end)), + TokenFlags::empty(), + ) + }) + .collect(), + ) + } + + #[test] + fn tokens_up_to_first_unknown_empty() { + let tokens = Tokens::new(vec![]); + assert_eq!(tokens.up_to_first_unknown(), &[]); + } + + #[test] + fn tokens_up_to_first_unknown_noop() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let up_to_first_unknown = tokens.up_to_first_unknown(); + assert_eq!(up_to_first_unknown.len(), tokens.len()); + } + + #[test] + fn tokens_up_to_first_unknown() { + let tokens = new_tokens(TEST_CASE_WITH_UNKNOWN.into_iter()); + let up_to_first_unknown = tokens.up_to_first_unknown(); + assert_eq!(up_to_first_unknown.len(), 2); + } + + #[test] + fn tokens_after_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(8)); + assert_eq!(after.len(), 7); + assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar); + } + + #[test] + fn tokens_after_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(11)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(13)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_at_last_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(33)); + assert_eq!(after.len(), 0); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_after_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.after(TextSize::new(5)); + } + + #[test] + fn tokens_in_range_at_token_offset() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(4.into(), 10.into())); + assert_eq!(in_range.len(), 4); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon); + } + + #[test] + fn tokens_in_range_start_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(11.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(8.into(), 15.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + fn tokens_in_range_start_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(13.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(9.into(), 13.into())); + assert_eq!(in_range.len(), 2); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_in_range_start_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(5.into(), 10.into())); + } + + #[test] + #[should_panic(expected = "End offset 6 is inside a token range 4..7")] + fn tokens_in_range_end_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(0.into(), 6.into())); + } +} diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index fbb836c759..8504504c8a 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -11,11 +11,12 @@ use ruff_python_ast::{ }; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; +use crate::lexer::TokenValue; use crate::parser::progress::ParserProgress; use crate::parser::{helpers, FunctionKind, Parser}; use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType}; use crate::token_set::TokenSet; -use crate::{FStringErrorType, Mode, ParseErrorType, Tok, TokenKind}; +use crate::{FStringErrorType, Mode, ParseErrorType, TokenKind}; use super::{Parenthesized, RecoveryContextKind}; @@ -106,9 +107,24 @@ pub(super) const END_EXPR_SET: TokenSet = TokenSet::new([ const END_SEQUENCE_SET: TokenSet = END_EXPR_SET.remove(TokenKind::Comma); impl<'src> Parser<'src> { + /// Returns `true` if the parser is at a name or keyword (including soft keyword) token. + pub(super) fn at_name_or_keyword(&self) -> bool { + self.at(TokenKind::Name) || self.current_token_kind().is_keyword() + } + + /// Returns `true` if the parser is at a name or soft keyword token. + pub(super) fn at_name_or_soft_keyword(&self) -> bool { + self.at(TokenKind::Name) || self.at_soft_keyword() + } + + /// Returns `true` if the parser is at a soft keyword token. + pub(super) fn at_soft_keyword(&self) -> bool { + self.current_token_kind().is_soft_keyword() + } + /// Returns `true` if the current token is the start of an expression. pub(super) fn at_expr(&self) -> bool { - self.at_ts(EXPR_SET) + self.at_ts(EXPR_SET) || self.at_soft_keyword() } /// Returns `true` if the current token ends a sequence. @@ -459,36 +475,43 @@ impl<'src> Parser<'src> { let range = self.current_token_range(); if self.at(TokenKind::Name) { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { + let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else { unreachable!(); }; - ast::Identifier { + return ast::Identifier { id: name.to_string(), range, - } - } else { - if self.current_token_kind().is_keyword() { - let (tok, range) = self.next_token(); - self.add_error( - ParseErrorType::OtherError(format!( - "Expected an identifier, but found a keyword '{tok}' that cannot be used here" - )), - range, - ); + }; + } - ast::Identifier { - id: tok.to_string(), - range, - } - } else { - self.add_error( - ParseErrorType::OtherError("Expected an identifier".into()), - range, - ); - ast::Identifier { - id: String::new(), - range: self.missing_node_range(), - } + if self.current_token_kind().is_soft_keyword() { + let id = self.src_text(range).to_string(); + self.bump_soft_keyword_as_name(); + return ast::Identifier { id, range }; + } + + if self.current_token_kind().is_keyword() { + // Non-soft keyword + self.add_error( + ParseErrorType::OtherError(format!( + "Expected an identifier, but found a keyword {} that cannot be used here", + self.current_token_kind() + )), + range, + ); + + let id = self.src_text(range).to_string(); + self.bump_any(); + ast::Identifier { id, range } + } else { + self.add_error( + ParseErrorType::OtherError("Expected an identifier".into()), + range, + ); + + ast::Identifier { + id: String::new(), + range: self.missing_node_range(), } } } @@ -501,7 +524,7 @@ impl<'src> Parser<'src> { let lhs = match self.current_token_kind() { TokenKind::Float => { - let (Tok::Float { value }, _) = self.bump(TokenKind::Float) else { + let TokenValue::Float(value) = self.bump_value(TokenKind::Float) else { unreachable!() }; @@ -511,7 +534,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Complex => { - let (Tok::Complex { real, imag }, _) = self.bump(TokenKind::Complex) else { + let TokenValue::Complex { real, imag } = self.bump_value(TokenKind::Complex) else { unreachable!() }; Expr::NumberLiteral(ast::ExprNumberLiteral { @@ -520,7 +543,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Int => { - let (Tok::Int { value }, _) = self.bump(TokenKind::Int) else { + let TokenValue::Int(value) = self.bump_value(TokenKind::Int) else { unreachable!() }; Expr::NumberLiteral(ast::ExprNumberLiteral { @@ -1231,7 +1254,10 @@ impl<'src> Parser<'src> { /// /// See: fn parse_string_or_byte_literal(&mut self) -> StringType { - let (Tok::String { value, flags }, range) = self.bump(TokenKind::String) else { + let range = self.current_token_range(); + let flags = self.tokens.current_flags().as_any_string_flags(); + + let TokenValue::String(value) = self.bump_value(TokenKind::String) else { unreachable!() }; @@ -1277,18 +1303,17 @@ impl<'src> Parser<'src> { /// See: fn parse_fstring(&mut self) -> ast::FString { let start = self.node_start(); + let flags = self.tokens.current_flags().as_any_string_flags(); - let (Tok::FStringStart(kind), _) = self.bump(TokenKind::FStringStart) else { - unreachable!() - }; - let elements = self.parse_fstring_elements(); + self.bump(TokenKind::FStringStart); + let elements = self.parse_fstring_elements(flags); self.expect(TokenKind::FStringEnd); ast::FString { elements, range: self.node_range(start), - flags: kind.into(), + flags: ast::FStringFlags::from(flags), } } @@ -1297,16 +1322,18 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` or `FStringMiddle` token. - fn parse_fstring_elements(&mut self) -> FStringElements { + fn parse_fstring_elements(&mut self, flags: ast::AnyStringFlags) -> FStringElements { let mut elements = vec![]; self.parse_list(RecoveryContextKind::FStringElements, |parser| { let element = match parser.current_token_kind() { TokenKind::Lbrace => { - FStringElement::Expression(parser.parse_fstring_expression_element()) + FStringElement::Expression(parser.parse_fstring_expression_element(flags)) } TokenKind::FStringMiddle => { - let (Tok::FStringMiddle { value, flags, .. }, range) = parser.next_token() + let range = parser.current_token_range(); + let TokenValue::FStringMiddle(value) = + parser.bump_value(TokenKind::FStringMiddle) else { unreachable!() }; @@ -1332,7 +1359,7 @@ impl<'src> Parser<'src> { // `Invalid` tokens are created when there's a lexical error, so // we ignore it here to avoid creating unexpected token errors TokenKind::Unknown => { - parser.next_token(); + parser.bump_any(); return; } tok => { @@ -1356,7 +1383,10 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` token. - fn parse_fstring_expression_element(&mut self) -> ast::FStringExpressionElement { + fn parse_fstring_expression_element( + &mut self, + flags: ast::AnyStringFlags, + ) -> ast::FStringExpressionElement { let start = self.node_start(); self.bump(TokenKind::Lbrace); @@ -1396,7 +1426,10 @@ impl<'src> Parser<'src> { let conversion = if self.eat(TokenKind::Exclamation) { let conversion_flag_range = self.current_token_range(); - if let Tok::Name { name } = self.next_token().0 { + if self.at(TokenKind::Name) { + let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else { + unreachable!(); + }; match &*name { "s" => ConversionFlag::Str, "r" => ConversionFlag::Repr, @@ -1419,6 +1452,8 @@ impl<'src> Parser<'src> { ParseErrorType::FStringError(FStringErrorType::InvalidConversionFlag), conversion_flag_range, ); + // TODO(dhruvmanila): Avoid dropping this token + self.bump_any(); ConversionFlag::None } } else { @@ -1427,7 +1462,7 @@ impl<'src> Parser<'src> { let format_spec = if self.eat(TokenKind::Colon) { let spec_start = self.node_start(); - let elements = self.parse_fstring_elements(); + let elements = self.parse_fstring_elements(flags); Some(Box::new(ast::FStringFormatSpec { range: self.node_range(spec_start), elements, @@ -2229,7 +2264,8 @@ impl<'src> Parser<'src> { fn parse_ipython_escape_command_expression(&mut self) -> ast::ExprIpyEscapeCommand { let start = self.node_start(); - let (Tok::IpyEscapeCommand { value, kind }, _) = self.bump(TokenKind::IpyEscapeCommand) + let TokenValue::IpyEscapeCommand { value, kind } = + self.bump_value(TokenKind::IpyEscapeCommand) else { unreachable!() }; diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 2545b1dc86..f1b240cfd6 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -2,20 +2,16 @@ use std::cmp::Ordering; use bitflags::bitflags; -use ast::Mod; -use ruff_python_ast as ast; +use ruff_python_ast::{Mod, ModExpression, ModModule}; use ruff_text_size::{Ranged, TextRange, TextSize}; -use crate::lexer::lex; +use crate::lexer::TokenValue; +use crate::parser::expression::ExpressionContext; use crate::parser::progress::{ParserProgress, TokenId}; -use crate::{ - lexer::{LexResult, Spanned}, - token_set::TokenSet, - token_source::TokenSource, - Mode, ParseError, ParseErrorType, Tok, TokenKind, -}; - -use self::expression::ExpressionContext; +use crate::token_set::TokenSet; +use crate::token_source::{TokenSource, TokenSourceCheckpoint}; +use crate::{Mode, ParseError, ParseErrorType, TokenKind}; +use crate::{Parsed, Tokens}; mod expression; mod helpers; @@ -26,57 +22,12 @@ mod statement; #[cfg(test)] mod tests; -/// Represents the parsed source code. -/// -/// This includes the AST and all of the errors encountered during parsing. -#[derive(Debug)] -pub struct Program { - ast: ast::Mod, - parse_errors: Vec, -} - -impl Program { - /// Returns the parsed AST. - pub fn ast(&self) -> &ast::Mod { - &self.ast - } - - /// Returns a list of syntax errors found during parsing. - pub fn errors(&self) -> &[ParseError] { - &self.parse_errors - } - - /// Consumes the [`Program`] and returns the parsed AST. - pub fn into_ast(self) -> ast::Mod { - self.ast - } - - /// Consumes the [`Program`] and returns a list of syntax errors found during parsing. - pub fn into_errors(self) -> Vec { - self.parse_errors - } - - /// Returns `true` if the program is valid i.e., it has no syntax errors. - pub fn is_valid(&self) -> bool { - self.parse_errors.is_empty() - } - - /// Parse the given Python source code using the specified [`Mode`]. - pub fn parse_str(source: &str, mode: Mode) -> Program { - let tokens = lex(source, mode); - Self::parse_tokens(source, tokens.collect(), mode) - } - - /// Parse a vector of [`LexResult`]s using the specified [`Mode`]. - pub fn parse_tokens(source: &str, tokens: Vec, mode: Mode) -> Program { - Parser::new(source, mode, TokenSource::new(tokens)).parse_program() - } -} - #[derive(Debug)] pub(crate) struct Parser<'src> { source: &'src str, - tokens: TokenSource, + + /// Token source for the parser that skips over any non-trivia token. + tokens: TokenSource<'src>, /// Stores all the syntax errors found during the parsing. errors: Vec, @@ -84,37 +35,29 @@ pub(crate) struct Parser<'src> { /// Specify the mode in which the code will be parsed. mode: Mode, - /// Current token along with its range. - current: Spanned, - /// The ID of the current token. This is used to track the progress of the parser /// to avoid infinite loops when the parser is stuck. current_token_id: TokenId, - /// The end of the last processed. Used to determine a node's end. - last_token_end: TextSize, - - /// The range of the tokens to parse. - /// - /// The range is equal to `[0; source.len())` when parsing an entire file. The range can be - /// different when parsing only a part of a file using the [`crate::lex_starts_at`] and - /// [`crate::parse_expression_starts_at`] APIs in which case the the range is equal to - /// `[offset; subrange.len())`. - tokens_range: TextRange, + /// The end of the previous token processed. This is used to determine a node's end. + prev_token_end: TextSize, + /// The recovery context in which the parser is currently in. recovery_context: RecoveryContext, + + /// The start offset in the source code from which to start parsing at. + start_offset: TextSize, } impl<'src> Parser<'src> { - pub(crate) fn new(source: &'src str, mode: Mode, mut tokens: TokenSource) -> Parser<'src> { - let tokens_range = TextRange::new( - tokens.position().unwrap_or_default(), - tokens.end().unwrap_or_default(), - ); + /// Create a new parser for the given source code. + pub(crate) fn new(source: &'src str, mode: Mode) -> Self { + Parser::new_starts_at(source, mode, TextSize::new(0)) + } - let current = tokens - .next() - .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(tokens_range.end()))); + /// Create a new parser for the given source code which starts parsing at the given offset. + pub(crate) fn new_starts_at(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { + let tokens = TokenSource::from_source(source, mode, start_offset); Parser { mode, @@ -122,24 +65,20 @@ impl<'src> Parser<'src> { errors: Vec::new(), tokens, recovery_context: RecoveryContext::empty(), - last_token_end: tokens_range.start(), - current, + prev_token_end: TextSize::new(0), + start_offset, current_token_id: TokenId::default(), - tokens_range, } } - /// Consumes the [`Parser`] and returns the parsed [`Program`]. - pub(crate) fn parse_program(mut self) -> Program { - let ast = match self.mode { + /// Consumes the [`Parser`] and returns the parsed [`Parsed`]. + pub(crate) fn parse(mut self) -> Parsed { + let syntax = match self.mode { Mode::Expression => Mod::Expression(self.parse_single_expression()), Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()), }; - Program { - ast, - parse_errors: self.finish(), - } + self.finish(syntax) } /// Parses a single expression. @@ -150,7 +89,7 @@ impl<'src> Parser<'src> { /// /// After parsing a single expression, an error is reported and all remaining tokens are /// dropped by the parser. - fn parse_single_expression(&mut self) -> ast::ModExpression { + fn parse_single_expression(&mut self) -> ModExpression { let start = self.node_start(); let parsed_expr = self.parse_expression_list(ExpressionContext::default()); @@ -170,13 +109,13 @@ impl<'src> Parser<'src> { if self.at(TokenKind::EndOfFile) { break; } - self.next_token(); + self.bump_any(); } } self.bump(TokenKind::EndOfFile); - ast::ModExpression { + ModExpression { body: Box::new(parsed_expr.expr), range: self.node_range(start), } @@ -185,7 +124,7 @@ impl<'src> Parser<'src> { /// Parses a Python module. /// /// This is to be used for [`Mode::Module`] and [`Mode::Ipython`]. - fn parse_module(&mut self) -> ast::ModModule { + fn parse_module(&mut self) -> ModModule { let body = self.parse_list_into_vec( RecoveryContextKind::ModuleStatements, Parser::parse_statement, @@ -193,13 +132,13 @@ impl<'src> Parser<'src> { self.bump(TokenKind::EndOfFile); - ast::ModModule { + ModModule { body, - range: self.tokens_range, + range: TextRange::new(self.start_offset, self.current_token_range().end()), } } - fn finish(self) -> Vec { + fn finish(self, syntax: Mod) -> Parsed { assert_eq!( self.current_token_kind(), TokenKind::EndOfFile, @@ -208,13 +147,18 @@ impl<'src> Parser<'src> { // TODO consider re-integrating lexical error handling into the parser? let parse_errors = self.errors; - let lex_errors = self.tokens.finish(); + let (tokens, comment_ranges, lex_errors) = self.tokens.finish(); // Fast path for when there are no lex errors. // There's no fast path for when there are no parse errors because a lex error // always results in a parse error. if lex_errors.is_empty() { - return parse_errors; + return Parsed { + syntax, + tokens: Tokens::new(tokens), + comment_ranges, + errors: parse_errors, + }; } let mut merged = Vec::with_capacity(parse_errors.len().saturating_add(lex_errors.len())); @@ -241,7 +185,12 @@ impl<'src> Parser<'src> { merged.extend(parse_errors); merged.extend(lex_errors.map(ParseError::from)); - merged + Parsed { + syntax, + tokens: Tokens::new(tokens), + comment_ranges, + errors: merged, + } } /// Returns the start position for a node that starts at the current token. @@ -280,7 +229,7 @@ impl<'src> Parser<'src> { // // In either of the above cases, there's a "gap" between the end of the last token and start // of the current token. - if self.last_token_end <= start { + if self.prev_token_end <= start { // We need to create an empty range at the last token end instead of the start because // otherwise this node range will fall outside the range of it's parent node. Taking // the above example: @@ -302,9 +251,9 @@ impl<'src> Parser<'src> { // def foo # comment // def bar(): ... // def baz - TextRange::empty(self.last_token_end) + TextRange::empty(self.prev_token_end) } else { - TextRange::new(start, self.last_token_end) + TextRange::new(start, self.prev_token_end) } } @@ -319,65 +268,48 @@ impl<'src> Parser<'src> { // # ^^^^ expression range // # ^ last token end // ``` - TextRange::empty(self.last_token_end) + TextRange::empty(self.prev_token_end) } /// Moves the parser to the next token. - /// - /// Returns the old current token as an owned value. - fn next_token(&mut self) -> Spanned { - let next = self - .tokens - .next() - .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(self.tokens_range.end()))); - - self.current_token_id.increment(); - - let current = std::mem::replace(&mut self.current, next); - + fn do_bump(&mut self, kind: TokenKind) { if !matches!( - current.0, + self.current_token_kind(), // TODO explore including everything up to the dedent as part of the body. - Tok::Dedent + TokenKind::Dedent // Don't include newlines in the body - | Tok::Newline + | TokenKind::Newline // TODO(micha): Including the semi feels more correct but it isn't compatible with lalrpop and breaks the // formatters semicolon detection. Exclude it for now - | Tok::Semi + | TokenKind::Semi ) { - self.last_token_end = current.1.end(); + self.prev_token_end = self.current_token_range().end(); } - current + self.tokens.bump(kind); + self.current_token_id.increment(); } /// Returns the next token kind without consuming it. - fn peek(&self) -> TokenKind { - self.tokens - .peek() - .map_or(TokenKind::EndOfFile, |spanned| spanned.0) + fn peek(&mut self) -> TokenKind { + self.tokens.peek() } - /// Returns the current token kind along with its range. - /// - /// Use [`Parser::current_token_kind`] or [`Parser::current_token_range`] to only get the kind - /// or range respectively. - #[inline] - fn current_token(&self) -> (TokenKind, TextRange) { - (self.current_token_kind(), self.current_token_range()) + /// Returns the next two token kinds without consuming it. + fn peek2(&mut self) -> (TokenKind, TokenKind) { + self.tokens.peek2() } /// Returns the current token kind. #[inline] fn current_token_kind(&self) -> TokenKind { - // TODO: Converting the token kind over and over again can be expensive. - TokenKind::from_token(&self.current.0) + self.tokens.current_kind() } /// Returns the range of the current token. #[inline] fn current_token_range(&self) -> TextRange { - self.current.1 + self.tokens.current_range() } /// Returns the current token ID. @@ -386,50 +318,88 @@ impl<'src> Parser<'src> { self.current_token_id } - /// Eat the current token if it is of the given kind, returning `true` in - /// that case. Otherwise, return `false`. + /// Bumps the current token assuming it is of the given kind. + /// + /// # Panics + /// + /// If the current token is not of the given kind. + fn bump(&mut self, kind: TokenKind) { + assert_eq!(self.current_token_kind(), kind); + + self.do_bump(kind); + } + + /// Take the token value from the underlying token source and bump the current token. + /// + /// # Panics + /// + /// If the current token is not of the given kind. + fn bump_value(&mut self, kind: TokenKind) -> TokenValue { + let value = self.tokens.take_value(); + self.bump(kind); + value + } + + /// Bumps the current token assuming it is found in the given token set. + /// + /// # Panics + /// + /// If the current token is not found in the given token set. + fn bump_ts(&mut self, ts: TokenSet) { + let kind = self.current_token_kind(); + assert!(ts.contains(kind)); + + self.do_bump(kind); + } + + /// Bumps the current token regardless of its kind and advances to the next token. + /// + /// # Panics + /// + /// If the parser is at end of file. + fn bump_any(&mut self) { + let kind = self.current_token_kind(); + assert_ne!(kind, TokenKind::EndOfFile); + + self.do_bump(kind); + } + + /// Bumps the soft keyword token as a `Name` token. + /// + /// # Panics + /// + /// If the current token is not a soft keyword. + pub(crate) fn bump_soft_keyword_as_name(&mut self) { + assert!(self.at_soft_keyword()); + + self.do_bump(TokenKind::Name); + } + + /// Consume the current token if it is of the given kind. Returns `true` if it matches, `false` + /// otherwise. fn eat(&mut self, kind: TokenKind) -> bool { if self.at(kind) { - self.next_token(); + self.do_bump(kind); true } else { false } } - /// Bumps the current token assuming it is of the given kind. - /// - /// Returns the current token as an owned value. - /// - /// # Panics - /// - /// If the current token is not of the given kind. - fn bump(&mut self, kind: TokenKind) -> (Tok, TextRange) { - assert_eq!(self.current_token_kind(), kind); - - self.next_token() - } - - /// Bumps the current token assuming it is found in the given token set. - /// - /// Returns the current token as an owned value. - /// - /// # Panics - /// - /// If the current token is not found in the given token set. - fn bump_ts(&mut self, ts: TokenSet) -> (Tok, TextRange) { - assert!(ts.contains(self.current_token_kind())); - - self.next_token() - } - + /// Eat the current token if its of the expected kind, otherwise adds an appropriate error. fn expect(&mut self, expected: TokenKind) -> bool { if self.eat(expected) { return true; } - let (found, range) = self.current_token(); - self.add_error(ParseErrorType::ExpectedToken { found, expected }, range); + self.add_error( + ParseErrorType::ExpectedToken { + found: self.current_token_kind(), + expected, + }, + self.current_token_range(), + ); + false } @@ -468,11 +438,7 @@ impl<'src> Parser<'src> { where T: Ranged, { - let range = ranged.range(); - // `ranged` uses absolute ranges to the source text of an entire file. Fix the source by - // subtracting the start offset when parsing only a part of a file (when parsing the tokens - // from `lex_starts_at`). - &self.source[range - self.tokens_range.start()] + &self.source[ranged.range()] } /// Parses a list of elements into a vector where each element is parsed using @@ -531,7 +497,7 @@ impl<'src> Parser<'src> { break; } - self.next_token(); + self.bump_any(); } } @@ -615,7 +581,7 @@ impl<'src> Parser<'src> { trailing_comma_range = None; } - self.next_token(); + self.bump_any(); } } @@ -641,6 +607,42 @@ impl<'src> Parser<'src> { false } + + /// Creates a checkpoint to which the parser can later return to using [`Self::rewind`]. + fn checkpoint(&self) -> ParserCheckpoint<'src> { + ParserCheckpoint { + tokens: self.tokens.checkpoint(), + errors_position: self.errors.len(), + current_token_id: self.current_token_id, + prev_token_end: self.prev_token_end, + recovery_context: self.recovery_context, + } + } + + /// Restore the parser to the given checkpoint. + fn rewind(&mut self, checkpoint: ParserCheckpoint<'src>) { + let ParserCheckpoint { + tokens, + errors_position, + current_token_id, + prev_token_end, + recovery_context, + } = checkpoint; + + self.tokens.rewind(tokens); + self.errors.truncate(errors_position); + self.current_token_id = current_token_id; + self.prev_token_end = prev_token_end; + self.recovery_context = recovery_context; + } +} + +struct ParserCheckpoint<'src> { + tokens: TokenSourceCheckpoint<'src>, + errors_position: usize, + current_token_id: TokenId, + prev_token_end: TextSize, + recovery_context: RecoveryContext, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -872,7 +874,7 @@ impl RecoveryContextKind { fn is_list_terminator(self, p: &Parser) -> bool { match self { - // The program must consume all tokens until the end + // The parser must consume all tokens until the end RecoveryContextKind::ModuleStatements => false, RecoveryContextKind::BlockStatements => p.at(TokenKind::Dedent), @@ -1008,9 +1010,9 @@ impl RecoveryContextKind { RecoveryContextKind::Except => p.at(TokenKind::Except), RecoveryContextKind::AssignmentTargets => p.at(TokenKind::Equal), RecoveryContextKind::TypeParams => p.at_type_param(), - RecoveryContextKind::ImportNames => p.at(TokenKind::Name), + RecoveryContextKind::ImportNames => p.at_name_or_soft_keyword(), RecoveryContextKind::ImportFromAsNames(_) => { - matches!(p.current_token_kind(), TokenKind::Star | TokenKind::Name) + p.at(TokenKind::Star) || p.at_name_or_soft_keyword() } RecoveryContextKind::Slices => p.at(TokenKind::Colon) || p.at_expr(), RecoveryContextKind::ListElements @@ -1029,11 +1031,13 @@ impl RecoveryContextKind { RecoveryContextKind::MatchPatternClassArguments => p.at_pattern_start(), RecoveryContextKind::Arguments => p.at_expr(), RecoveryContextKind::DeleteTargets => p.at_expr(), - RecoveryContextKind::Identifiers => p.at(TokenKind::Name), - RecoveryContextKind::Parameters(_) => matches!( - p.current_token_kind(), - TokenKind::Name | TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash - ), + RecoveryContextKind::Identifiers => p.at_name_or_soft_keyword(), + RecoveryContextKind::Parameters(_) => { + matches!( + p.current_token_kind(), + TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash + ) || p.at_name_or_soft_keyword() + } RecoveryContextKind::WithItems(_) => p.at_expr(), RecoveryContextKind::FStringElements => matches!( p.current_token_kind(), diff --git a/crates/ruff_python_parser/src/parser/pattern.rs b/crates/ruff_python_parser/src/parser/pattern.rs index 4d200b4e42..c0fc818ca0 100644 --- a/crates/ruff_python_parser/src/parser/pattern.rs +++ b/crates/ruff_python_parser/src/parser/pattern.rs @@ -1,10 +1,11 @@ use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton}; use ruff_text_size::{Ranged, TextSize}; +use crate::lexer::TokenValue; use crate::parser::progress::ParserProgress; use crate::parser::{recovery, Parser, RecoveryContextKind, SequenceMatchPatternParentheses}; use crate::token_set::TokenSet; -use crate::{ParseErrorType, Tok, TokenKind}; +use crate::{ParseErrorType, TokenKind}; use super::expression::ExpressionContext; @@ -50,12 +51,12 @@ const MAPPING_PATTERN_START_SET: TokenSet = TokenSet::new([ impl<'src> Parser<'src> { /// Returns `true` if the current token is a valid start of a pattern. pub(super) fn at_pattern_start(&self) -> bool { - self.at_ts(PATTERN_START_SET) + self.at_ts(PATTERN_START_SET) || self.at_soft_keyword() } /// Returns `true` if the current token is a valid start of a mapping pattern. pub(super) fn at_mapping_pattern_start(&self) -> bool { - self.at_ts(MAPPING_PATTERN_START_SET) + self.at_ts(MAPPING_PATTERN_START_SET) || self.at_soft_keyword() } /// Entry point to start parsing a pattern. @@ -397,7 +398,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Complex => { - let (Tok::Complex { real, imag }, _) = self.bump(TokenKind::Complex) else { + let TokenValue::Complex { real, imag } = self.bump_value(TokenKind::Complex) else { unreachable!() }; let range = self.node_range(start); @@ -411,7 +412,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Int => { - let (Tok::Int { value }, _) = self.bump(TokenKind::Int) else { + let TokenValue::Int(value) = self.bump_value(TokenKind::Int) else { unreachable!() }; let range = self.node_range(start); @@ -425,7 +426,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Float => { - let (Tok::Float { value }, _) = self.bump(TokenKind::Float) else { + let TokenValue::Float(value) = self.bump_value(TokenKind::Float) else { unreachable!() }; let range = self.node_range(start); @@ -438,46 +439,6 @@ impl<'src> Parser<'src> { range, }) } - TokenKind::Name if self.peek() == TokenKind::Dot => { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { - unreachable!() - }; - let id = Expr::Name(ast::ExprName { - id: name.to_string(), - ctx: ExprContext::Load, - range: self.node_range(start), - }); - - let attribute = self.parse_attr_expr_for_match_pattern(id, start); - - Pattern::MatchValue(ast::PatternMatchValue { - value: Box::new(attribute), - range: self.node_range(start), - }) - } - TokenKind::Name => { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { - unreachable!() - }; - let range = self.node_range(start); - - // test_ok match_as_pattern - // match foo: - // case foo_bar: ... - // case _: ... - Pattern::MatchAs(ast::PatternMatchAs { - range, - pattern: None, - name: if &*name == "_" { - None - } else { - Some(ast::Identifier { - id: name.to_string(), - range, - }) - }, - }) - } kind => { // The `+` is only for better error recovery. if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() { @@ -506,26 +467,57 @@ impl<'src> Parser<'src> { } } - // Upon encountering an unexpected token, return a `Pattern::MatchValue` containing - // an empty `Expr::Name`. - let invalid_node = if kind.is_keyword() { - Expr::Name(self.parse_name()) + if self.at_name_or_keyword() { + if self.peek() == TokenKind::Dot { + // test_ok match_attr_pattern_soft_keyword + // match foo: + // case match.bar: ... + // case case.bar: ... + // case type.bar: ... + // case match.case.type.bar.type.case.match: ... + let id = Expr::Name(self.parse_name()); + + let attribute = self.parse_attr_expr_for_match_pattern(id, start); + + Pattern::MatchValue(ast::PatternMatchValue { + value: Box::new(attribute), + range: self.node_range(start), + }) + } else { + // test_ok match_as_pattern_soft_keyword + // match foo: + // case case: ... + // case match: ... + // case type: ... + let ident = self.parse_identifier(); + + // test_ok match_as_pattern + // match foo: + // case foo_bar: ... + // case _: ... + Pattern::MatchAs(ast::PatternMatchAs { + range: ident.range, + pattern: None, + name: if &ident == "_" { None } else { Some(ident) }, + }) + } } else { + // Upon encountering an unexpected token, return a `Pattern::MatchValue` containing + // an empty `Expr::Name`. self.add_error( ParseErrorType::OtherError("Expected a pattern".to_string()), self.current_token_range(), ); - Expr::Name(ast::ExprName { + let invalid_node = Expr::Name(ast::ExprName { range: self.missing_node_range(), id: String::new(), ctx: ExprContext::Invalid, + }); + Pattern::MatchValue(ast::PatternMatchValue { + range: invalid_node.range(), + value: Box::new(invalid_node), }) - }; - - Pattern::MatchValue(ast::PatternMatchValue { - range: invalid_node.range(), - value: Box::new(invalid_node), - }) + } } } } diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 69d7ec8a57..3e9a047db1 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -8,13 +8,14 @@ use ruff_python_ast::{ }; use ruff_text_size::{Ranged, TextSize}; +use crate::lexer::TokenValue; use crate::parser::expression::{GeneratorExpressionInParentheses, ParsedExpr, EXPR_SET}; use crate::parser::progress::ParserProgress; use crate::parser::{ helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind, }; use crate::token_set::TokenSet; -use crate::{Mode, ParseErrorType, Tok, TokenKind}; +use crate::{Mode, ParseErrorType, TokenKind}; use super::expression::{ExpressionContext, OperatorPrecedence}; use super::Parenthesized; @@ -84,13 +85,13 @@ impl<'src> Parser<'src> { /// Returns `true` if the current token is the start of a simple statement, /// including expressions. fn at_simple_stmt(&self) -> bool { - self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) + self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) || self.at_soft_keyword() } /// Returns `true` if the current token is the start of a simple, compound or expression /// statement. pub(super) fn at_stmt(&self) -> bool { - self.at_ts(STMTS_SET) + self.at_ts(STMTS_SET) || self.at_soft_keyword() } /// Checks if the parser is currently positioned at the start of a type parameter. @@ -120,8 +121,26 @@ impl<'src> Parser<'src> { TokenKind::With => Stmt::With(self.parse_with_statement(start)), TokenKind::At => self.parse_decorators(), TokenKind::Async => self.parse_async_statement(), - TokenKind::Match => Stmt::Match(self.parse_match_statement()), - _ => self.parse_single_simple_statement(), + token => { + if token == TokenKind::Match { + // Match is considered a soft keyword, so we will treat it as an identifier if + // it's followed by an unexpected token. + + match self.classify_match_token() { + MatchTokenKind::Keyword => { + return Stmt::Match(self.parse_match_statement()); + } + MatchTokenKind::KeywordOrIdentifier => { + if let Some(match_stmt) = self.try_parse_match_statement() { + return Stmt::Match(match_stmt); + } + } + MatchTokenKind::Identifier => {} + } + } + + self.parse_single_simple_statement() + } } } @@ -252,11 +271,22 @@ impl<'src> Parser<'src> { TokenKind::Assert => Stmt::Assert(self.parse_assert_statement()), TokenKind::Global => Stmt::Global(self.parse_global_statement()), TokenKind::Nonlocal => Stmt::Nonlocal(self.parse_nonlocal_statement()), - TokenKind::Type => Stmt::TypeAlias(self.parse_type_alias_statement()), TokenKind::IpyEscapeCommand => { Stmt::IpyEscapeCommand(self.parse_ipython_escape_command_statement()) } - _ => { + token => { + if token == TokenKind::Type { + // Type is considered a soft keyword, so we will treat it as an identifier if + // it's followed by an unexpected token. + let (first, second) = self.peek2(); + + if (first == TokenKind::Name || first.is_soft_keyword()) + && matches!(second, TokenKind::Lsqb | TokenKind::Equal) + { + return Stmt::TypeAlias(self.parse_type_alias_statement()); + } + } + let start = self.node_start(); // simple_stmt: `... | yield_stmt | star_expressions | ...` @@ -498,7 +528,12 @@ impl<'src> Parser<'src> { } } - let module = if self.at(TokenKind::Name) { + let module = if self.at_name_or_soft_keyword() { + // test_ok from_import_soft_keyword_module_name + // from match import pattern + // from type import bar + // from case import pattern + // from match.type.case import foo Some(self.parse_dotted_name()) } else { if leading_dots == 0 { @@ -603,7 +638,11 @@ impl<'src> Parser<'src> { }; let asname = if self.eat(TokenKind::As) { - if self.at(TokenKind::Name) { + if self.at_name_or_soft_keyword() { + // test_ok import_as_name_soft_keyword + // import foo as match + // import bar as case + // import baz as type Some(self.parse_identifier()) } else { // test_err import_alias_missing_asname @@ -872,7 +911,8 @@ impl<'src> Parser<'src> { fn parse_ipython_escape_command_statement(&mut self) -> ast::StmtIpyEscapeCommand { let start = self.node_start(); - let (Tok::IpyEscapeCommand { value, kind }, _) = self.bump(TokenKind::IpyEscapeCommand) + let TokenValue::IpyEscapeCommand { value, kind } = + self.bump_value(TokenKind::IpyEscapeCommand) else { unreachable!() }; @@ -1469,7 +1509,12 @@ impl<'src> Parser<'src> { }; let name = if self.eat(TokenKind::As) { - if self.at(TokenKind::Name) { + if self.at_name_or_soft_keyword() { + // test_ok except_stmt_as_name_soft_keyword + // try: ... + // except Exception as match: ... + // except Exception as case: ... + // except Exception as type: ... Some(self.parse_identifier()) } else { // test_err except_stmt_missing_as_name @@ -2327,6 +2372,84 @@ impl<'src> Parser<'src> { target } + /// Try parsing a `match` statement. + /// + /// This uses speculative parsing to remove the ambiguity of whether the `match` token is used + /// as a keyword or an identifier. This ambiguity arises only in if the `match` token is + /// followed by certain tokens. For example, if `match` is followed by `[`, we can't know if + /// it's used in the context of a subscript expression or as a list expression: + /// + /// ```python + /// # Subcript expression; `match` is an identifier + /// match[x] + /// + /// # List expression; `match` is a keyword + /// match [x, y]: + /// case [1, 2]: + /// pass + /// ``` + /// + /// This is done by parsing the subject expression considering `match` as a keyword token. + /// Then, based on certain heuristics we'll determine if our assumption is true. If so, we'll + /// continue parsing the entire match statement. Otherwise, return `None`. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `match` token. + /// + /// See: + fn try_parse_match_statement(&mut self) -> Option { + let checkpoint = self.checkpoint(); + + let start = self.node_start(); + self.bump(TokenKind::Match); + + let subject = self.parse_match_subject_expression(); + + match self.current_token_kind() { + TokenKind::Colon => { + // `match` is a keyword + self.bump(TokenKind::Colon); + + let cases = self.parse_match_body(); + + Some(ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + }) + } + TokenKind::Newline if matches!(self.peek2(), (TokenKind::Indent, TokenKind::Case)) => { + // `match` is a keyword + + // test_err match_expected_colon + // match [1, 2] + // case _: ... + self.add_error( + ParseErrorType::ExpectedToken { + found: self.current_token_kind(), + expected: TokenKind::Colon, + }, + self.current_token_range(), + ); + + let cases = self.parse_match_body(); + + Some(ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + }) + } + _ => { + // `match` is an identifier + self.rewind(checkpoint); + + None + } + } + } + /// Parses a match statement. /// /// # Panics @@ -2338,7 +2461,21 @@ impl<'src> Parser<'src> { let start = self.node_start(); self.bump(TokenKind::Match); - let subject_start = self.node_start(); + let subject = self.parse_match_subject_expression(); + self.expect(TokenKind::Colon); + + let cases = self.parse_match_body(); + + ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + } + } + + /// Parses the subject expression for a `match` statement. + fn parse_match_subject_expression(&mut self) -> Expr { + let start = self.node_start(); // Subject expression grammar is: // @@ -2370,13 +2507,12 @@ impl<'src> Parser<'src> { // case _: ... // match yield x: // case _: ... - let subject = if self.at(TokenKind::Comma) { - let tuple = - self.parse_tuple_expression(subject.expr, subject_start, Parenthesized::No, |p| { - p.parse_named_expression_or_higher(ExpressionContext::starred_bitwise_or()) - }); + if self.at(TokenKind::Comma) { + let tuple = self.parse_tuple_expression(subject.expr, start, Parenthesized::No, |p| { + p.parse_named_expression_or_higher(ExpressionContext::starred_bitwise_or()) + }); - Expr::Tuple(tuple).into() + Expr::Tuple(tuple) } else { if subject.is_unparenthesized_starred_expr() { // test_err match_stmt_single_starred_subject @@ -2384,11 +2520,15 @@ impl<'src> Parser<'src> { // case _: ... self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &subject); } - subject - }; - - self.expect(TokenKind::Colon); + subject.expr + } + } + /// Parses the body of a `match` statement. + /// + /// This method expects that the parser is positioned at a `Newline` token. If not, it adds a + /// syntax error and continues parsing. + fn parse_match_body(&mut self) -> Vec { // test_err match_stmt_no_newline_before_case // match foo: case _: ... self.expect(TokenKind::Newline); @@ -2411,11 +2551,7 @@ impl<'src> Parser<'src> { // TODO(dhruvmanila): Should we expect `Dedent` only if there was an `Indent` present? self.expect(TokenKind::Dedent); - ast::StmtMatch { - subject: Box::new(subject.expr), - cases, - range: self.node_range(start), - } + cases } /// Parses a list of match case blocks. @@ -2458,7 +2594,6 @@ impl<'src> Parser<'src> { self.bump(TokenKind::Case); // test_err match_stmt_missing_pattern - // # TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer // match x: // case : ... let pattern = self.parse_match_patterns(); @@ -2557,8 +2692,6 @@ impl<'src> Parser<'src> { // async while test: ... // async x = 1 // async async def foo(): ... - // # TODO(dhruvmanila): Here, `match` is actually a Name token because - // # of the soft keyword # transformer // async match test: // case _: ... self.add_error( @@ -2890,7 +3023,7 @@ impl<'src> Parser<'src> { let star_range = parser.current_token_range(); parser.bump(TokenKind::Star); - if parser.at(TokenKind::Name) { + if parser.at_name_or_soft_keyword() { let param = parser.parse_parameter(param_start, function_kind, AllowStarAnnotation::Yes); let param_star_range = parser.node_range(star_range.start()); @@ -3049,7 +3182,7 @@ impl<'src> Parser<'src> { last_keyword_only_separator_range = None; } - TokenKind::Name => { + _ if parser.at_name_or_soft_keyword() => { let param = parser.parse_parameter_with_default(param_start, function_kind); // TODO(dhruvmanila): Pyright seems to only highlight the first non-default argument @@ -3386,6 +3519,122 @@ impl<'src> Parser<'src> { } } + /// Classify the `match` soft keyword token. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `match` token. + fn classify_match_token(&mut self) -> MatchTokenKind { + assert_eq!(self.current_token_kind(), TokenKind::Match); + + let (first, second) = self.peek2(); + + match first { + // test_ok match_classify_as_identifier_1 + // match not in case + TokenKind::Not if second == TokenKind::In => MatchTokenKind::Identifier, + + // test_ok match_classify_as_keyword_1 + // match foo: + // case _: ... + // match 1: + // case _: ... + // match 1.0: + // case _: ... + // match 1j: + // case _: ... + // match "foo": + // case _: ... + // match f"foo {x}": + // case _: ... + // match {1, 2}: + // case _: ... + // match ~foo: + // case _: ... + // match ...: + // case _: ... + // match not foo: + // case _: ... + // match await foo(): + // case _: ... + // match lambda foo: foo: + // case _: ... + + // test_err match_classify_as_keyword + // match yield foo: + // case _: ... + TokenKind::Name + | TokenKind::Int + | TokenKind::Float + | TokenKind::Complex + | TokenKind::String + | TokenKind::FStringStart + | TokenKind::Lbrace + | TokenKind::Tilde + | TokenKind::Ellipsis + | TokenKind::Not + | TokenKind::Await + | TokenKind::Yield + | TokenKind::Lambda => MatchTokenKind::Keyword, + + // test_ok match_classify_as_keyword_or_identifier + // match (1, 2) # Identifier + // match (1, 2): # Keyword + // case _: ... + // match [1:] # Identifier + // match [1, 2]: # Keyword + // case _: ... + // match * foo # Identifier + // match - foo # Identifier + // match -foo: # Keyword + // case _: ... + + // test_err match_classify_as_keyword_or_identifier + // match *foo: # Keyword + // case _: ... + TokenKind::Lpar + | TokenKind::Lsqb + | TokenKind::Star + | TokenKind::Plus + | TokenKind::Minus => MatchTokenKind::KeywordOrIdentifier, + + _ => { + if first.is_soft_keyword() || first.is_singleton() { + // test_ok match_classify_as_keyword_2 + // match match: + // case _: ... + // match case: + // case _: ... + // match type: + // case _: ... + // match None: + // case _: ... + // match True: + // case _: ... + // match False: + // case _: ... + MatchTokenKind::Keyword + } else { + // test_ok match_classify_as_identifier_2 + // match + // match != foo + // (foo, match) + // [foo, match] + // {foo, match} + // match; + // match: int + // match, + // match.foo + // match / foo + // match << foo + // match and foo + // match is not foo + MatchTokenKind::Identifier + } + } + } + } + /// Specialized [`Parser::parse_list_into_vec`] for parsing a sequence of clauses. /// /// The difference is that the parser only continues parsing for as long as it sees the token @@ -3477,6 +3726,46 @@ impl Display for Clause { } } +/// The classification of the `match` token. +/// +/// The `match` token is a soft keyword which means, depending on the context, it can be used as a +/// keyword or an identifier. +#[derive(Debug, Clone, Copy)] +enum MatchTokenKind { + /// The `match` token is used as a keyword. + /// + /// For example: + /// ```python + /// match foo: + /// case _: + /// pass + /// ``` + Keyword, + + /// The `match` token is used as an identifier. + /// + /// For example: + /// ```python + /// match.values() + /// match is None + /// ```` + Identifier, + + /// The `match` token is used as either a keyword or an identifier. + /// + /// For example: + /// ```python + /// # Used as a keyword + /// match [x, y]: + /// case [1, 2]: + /// pass + /// + /// # Used as an identifier + /// match[x] + /// ``` + KeywordOrIdentifier, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum WithItemParsingState { /// The parser is currently parsing a with item without any ambiguity. diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs index ec23d01d27..09bc41e7f7 100644 --- a/crates/ruff_python_parser/src/parser/tests.rs +++ b/crates/ruff_python_parser/src/parser/tests.rs @@ -1,4 +1,4 @@ -use crate::{lex, parse, parse_expression, parse_suite, parse_tokens, Mode}; +use crate::{parse, parse_expression, parse_module, Mode}; #[test] fn test_modes() { @@ -45,23 +45,23 @@ fn test_expr_mode_valid_syntax() { let source = "first "; - let expr = parse_expression(source).unwrap(); + let parsed = parse_expression(source).unwrap(); - insta::assert_debug_snapshot!(expr); + insta::assert_debug_snapshot!(parsed.expr()); } #[test] fn test_unicode_aliases() { // https://github.com/RustPython/RustPython/issues/4566 let source = r#"x = "\N{BACKSPACE}another cool trick""#; - let parse_ast = parse_suite(source).unwrap(); + let suite = parse_module(source).unwrap().into_suite(); - insta::assert_debug_snapshot!(parse_ast); + insta::assert_debug_snapshot!(suite); } #[test] fn test_ipython_escape_commands() { - let parse_ast = parse( + let parsed = parse( r" # Normal Python code ( @@ -132,21 +132,5 @@ foo.bar[0].baz[2].egg?? Mode::Ipython, ) .unwrap(); - insta::assert_debug_snapshot!(parse_ast); -} - -#[test] -fn test_ipython_escape_command_parse_error() { - let source = r" -a = 1 -%timeit a == 1 - " - .trim(); - let lxr = lex(source, Mode::Ipython); - let parse_err = parse_tokens(lxr.collect(), source, Mode::Module).unwrap_err(); - assert_eq!( - parse_err.to_string(), - "IPython escape commands are only allowed in `Mode::Ipython` at byte range 6..20" - .to_string() - ); + insta::assert_debug_snapshot!(parsed.syntax()); } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap index c4232bccf1..248f1eab3f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Name { - name: "a_variable", - }, + Name( + "a_variable", + ), 0..10, ), ( @@ -14,9 +16,9 @@ expression: lex_source(source) 11..12, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 13..15, ), ( @@ -24,9 +26,9 @@ expression: lex_source(source) 16..17, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 18..19, ), ( @@ -34,9 +36,9 @@ expression: lex_source(source) 19..20, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 20..21, ), ( @@ -44,3 +46,4 @@ expression: lex_source(source) 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap index 5a0e7933e9..9e3a9cee5a 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(MAC_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(MAC_EOL) 10..11, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 11..14, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(MAC_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap index 3fdbd4c10f..6b884348b1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(UNIX_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(UNIX_EOL) 10..11, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 11..14, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(UNIX_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap index fcf5cfcb80..fa24089239 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(WINDOWS_EOL) 10..12, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 12..15, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(WINDOWS_EOL) 15..15, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap new file mode 100644 index 0000000000..698e077bff --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap @@ -0,0 +1,79 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(source) +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + Name( + "first", + ), + 3..8, + ), + ( + Colon, + 8..9, + ), + ( + Newline, + 9..10, + ), + ( + Indent, + 10..14, + ), + ( + If, + 14..16, + ), + ( + Name( + "second", + ), + 17..23, + ), + ( + Colon, + 23..24, + ), + ( + Newline, + 24..25, + ), + ( + Indent, + 25..33, + ), + ( + Pass, + 33..37, + ), + ( + Newline, + 37..38, + ), + ( + Dedent, + 42..42, + ), + ( + Name( + "foo", + ), + 42..45, + ), + ( + Newline, + 45..46, + ), + ( + Dedent, + 46..46, + ), +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap index 498d3cc426..f877c10bee 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(MAC_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(MAC_EOL) 21..27, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 28..30, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(MAC_EOL) 32..32, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap index a27a11a6cb..7c2082732f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 22..28, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 29..31, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap index 69fe4a3cce..214b173410 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 22..28, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 29..31, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap index f07534c23e..79bb8e6f48 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 13..15, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 25..31, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 32..34, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 38..38, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap index 49b3db404d..a01a3dd252 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(UNIX_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(UNIX_EOL) 21..27, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 28..30, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(UNIX_EOL) 32..32, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap index 2ebebf4483..2f84b6b91a 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 13..15, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 24..30, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 31..33, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap new file mode 100644 index 0000000000..0a9bec6cf9 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap @@ -0,0 +1,24 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: "lex_invalid(source, Mode::Module)" +--- +## Tokens +``` +[ + ( + Unknown, + 0..4, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: UnrecognizedToken { + tok: '🐦', + }, + location: 0..4, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap index 9733379a7b..2e6c623f95 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap @@ -2,115 +2,97 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 2..3, - ), - ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, - 4..6, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), + ), + ( + String( + "", + ), + 4..6, + TokenFlags( + DOUBLE_QUOTES, + ), + ), + ( + FStringStart, 7..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 9..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 11..13, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 13..14, + TokenFlags( + F_STRING, + ), ), ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "", + ), 15..17, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 18..22, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 22..25, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 26..30, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 30..33, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap index 133690977b..848e576a83 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -103,3 +105,4 @@ expression: lex_jupyter_source(source) 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap index 34fd624fa6..baa500ccb7 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - String { - value: "\\N{EN SPACE}", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "\\N{EN SPACE}", + ), 0..14, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap index cdc24e203e..cd6778a73a 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "normal ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "normal ", + ), 2..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 10..13, ), ( @@ -43,26 +36,22 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " {another} ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {another} ", + ), 14..27, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 27..28, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 28..31, ), ( @@ -70,26 +59,22 @@ expression: lex_source(source) 31..32, ), ( - FStringMiddle { - value: " {", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {", + ), 32..35, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 35..36, ), ( - Name { - name: "three", - }, + Name( + "three", + ), 36..41, ), ( @@ -97,24 +82,24 @@ expression: lex_source(source) 41..42, ), ( - FStringMiddle { - value: "}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "}", + ), 42..44, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 44..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 45..45, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap index 115fc4991d..8eb4842ebb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap @@ -2,40 +2,31 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 0..4, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "\n# not a comment ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "\n# not a comment ", + ), 4..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, 21..22, ), ( - Comment( - "# comment {", - ), + Comment, 23..34, ), ( @@ -43,9 +34,9 @@ expression: lex_source(source) 34..35, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 39..40, ), ( @@ -57,24 +48,24 @@ expression: lex_source(source) 41..42, ), ( - FStringMiddle { - value: " # not a comment\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + " # not a comment\n", + ), 42..59, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 59..62, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 62..62, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap index 9e237274e1..bcda1c925b 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 3..4, ), ( @@ -30,9 +27,9 @@ expression: lex_source(source) 4..5, ), ( - Name { - name: "s", - }, + Name( + "s", + ), 5..6, ), ( @@ -40,26 +37,22 @@ expression: lex_source(source) 6..7, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 7..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 8..9, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 9..10, ), ( @@ -71,9 +64,9 @@ expression: lex_source(source) 11..12, ), ( - Name { - name: "r", - }, + Name( + "r", + ), 12..13, ), ( @@ -81,26 +74,22 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 14..15, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 15..16, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -108,41 +97,37 @@ expression: lex_source(source) 17..18, ), ( - FStringMiddle { - value: ".3f!r", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".3f!r", + ), 18..23, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 23..24, ), ( - FStringMiddle { - value: " {x!r}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {x!r}", + ), 24..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 32..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap index d42ff61b5e..b581901ed9 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\", + ), 2..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 3..4, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 4..5, ), ( @@ -43,26 +36,22 @@ expression: lex_source(source) 5..6, ), ( - FStringMiddle { - value: "\\\"\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\\"\\", + ), 6..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -74,24 +63,24 @@ expression: lex_source(source) 12..13, ), ( - FStringMiddle { - value: " \\\"\\\"\\\n end", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " \\\"\\\"\\\n end", + ), 13..24, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 24..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 25..25, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap index e4cc748fa5..d8d007d560 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\", + ), 2..3, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 3..4, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -45,40 +38,34 @@ expression: lex_source(source) ( FStringEnd, 8..9, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, + TokenFlags( + F_STRING, ), - 10..12, ), ( - FStringMiddle { - value: "\\\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringStart, + 10..12, + TokenFlags( + F_STRING, + ), + ), + ( + FStringMiddle( + "\\\\", + ), 12..14, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 14..15, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 15..18, ), ( @@ -88,67 +75,59 @@ expression: lex_source(source) ( FStringEnd, 19..20, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, + TokenFlags( + F_STRING, ), - 21..23, ), ( - FStringMiddle { - value: "\\{foo}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringStart, + 21..23, + TokenFlags( + F_STRING, + ), + ), + ( + FStringMiddle( + "\\{foo}", + ), 23..31, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 31..32, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, + TokenFlags( + F_STRING, ), - 33..35, ), ( - FStringMiddle { - value: "\\\\{foo}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringStart, + 33..35, + TokenFlags( + F_STRING, + ), + ), + ( + FStringMiddle( + "\\\\{foo}", + ), 35..44, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 44..45, + TokenFlags( + F_STRING, + ), ), ( Newline, 45..45, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap index af0f7391c5..e92513e5bb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap @@ -2,44 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\", + ), 3..4, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 4..5, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 5..6, ), ( @@ -47,28 +36,22 @@ expression: lex_source(source) 6..7, ), ( - FStringMiddle { - value: "\\\"\\", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\\"\\", + ), 7..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 10..11, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 11..12, ), ( @@ -80,26 +63,24 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " \\\"\\\"\\\n end", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " \\\"\\\"\\\n end", + ), 14..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 25..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Newline, 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap index c02888312a..fef1db4f33 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "first ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "first ", + ), 2..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 14..15, ), ( @@ -55,9 +48,9 @@ expression: lex_source(source) 25..26, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 38..39, ), ( @@ -69,24 +62,24 @@ expression: lex_source(source) 40..41, ), ( - FStringMiddle { - value: " second", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " second", + ), 41..48, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 48..49, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 49..49, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap index d9a0765595..0393d76865 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap @@ -2,127 +2,99 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 0..4, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "\nhello\n world\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "\nhello\n world\n", + ), 4..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 21..24, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, ), - 25..29, ), ( - FStringMiddle { - value: "\n world\nhello\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringStart, + 25..29, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), + ), + ( + FStringMiddle( + "\n world\nhello\n", + ), 29..46, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 46..49, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, ), - 50..52, ), ( - FStringMiddle { - value: "some ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 50..52, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + "some ", + ), 52..57, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 57..58, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 58..62, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "multiline\nallowed ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "multiline\nallowed ", + ), 62..80, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, 80..81, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 81..82, ), ( @@ -132,30 +104,33 @@ expression: lex_source(source) ( FStringEnd, 83..86, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 86..87, ), ( - FStringMiddle { - value: " string", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " string", + ), 87..94, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 94..95, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 95..95, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap index 2ee532ba55..0f729e45f8 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "\\N{BULLET} normal \\Nope \\N", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\N{BULLET} normal \\Nope \\N", + ), 2..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 28..29, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 29..29, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap index 6de98ec526..760a715374 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap @@ -2,44 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringMiddle { - value: "\\N", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\N", + ), 3..5, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 5..6, ), ( - Name { - name: "BULLET", - }, + Name( + "BULLET", + ), 6..12, ), ( @@ -47,26 +36,24 @@ expression: lex_source(source) 12..13, ), ( - FStringMiddle { - value: " normal", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " normal", + ), 13..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 20..21, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap index 02ff537d09..3e82eadf77 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap @@ -2,69 +2,53 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "foo ", + ), 2..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 6..7, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 7..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "bar ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "bar ", + ), 9..13, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 13..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 14..15, ), ( @@ -72,25 +56,20 @@ expression: lex_source(source) 16..17, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "wow", - }, + Name( + "wow", + ), 21..24, ), ( @@ -100,6 +79,9 @@ expression: lex_source(source) ( FStringEnd, 25..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, @@ -108,135 +90,112 @@ expression: lex_source(source) ( FStringEnd, 27..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 28..29, ), ( - FStringMiddle { - value: " baz", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " baz", + ), 29..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 33..34, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 35..37, ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringStart, + 35..37, + TokenFlags( + F_STRING, + ), + ), + ( + FStringMiddle( + "foo ", + ), 37..41, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 41..42, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 42..44, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "bar", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "bar", + ), 44..47, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 47..48, + TokenFlags( + F_STRING, + ), ), ( Rbrace, 48..49, ), ( - FStringMiddle { - value: " some ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + " some ", + ), 49..55, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 55..56, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 56..58, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "another", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "another", + ), 58..65, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 65..66, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, @@ -245,9 +204,13 @@ expression: lex_source(source) ( FStringEnd, 67..68, + TokenFlags( + F_STRING, + ), ), ( Newline, 68..68, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap index 8654030cc6..1212187d91 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap @@ -2,18 +2,15 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -26,60 +23,48 @@ expression: lex_source(source) ( FStringEnd, 4..5, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 6..8, ), ( - FStringMiddle { - value: "{}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 6..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + "{}", + ), 8..12, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 12..13, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 14..16, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 14..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + " ", + ), 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -92,31 +77,25 @@ expression: lex_source(source) ( FStringEnd, 19..20, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 21..23, ), ( - FStringMiddle { - value: "{", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 21..23, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + "{", + ), 23..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -127,75 +106,59 @@ expression: lex_source(source) 26..27, ), ( - FStringMiddle { - value: "}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "}", + ), 27..29, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 29..30, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 31..33, ), ( - FStringMiddle { - value: "{{}}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 31..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + "{{}}", + ), 33..41, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 41..42, - ), - ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, + TokenFlags( + DOUBLE_QUOTES | F_STRING, ), - 43..45, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringStart, + 43..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), + ), + ( + FStringMiddle( + " ", + ), 45..46, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -206,17 +169,13 @@ expression: lex_source(source) 47..48, ), ( - FStringMiddle { - value: " {} {", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {} {", + ), 48..56, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -227,24 +186,24 @@ expression: lex_source(source) 57..58, ), ( - FStringMiddle { - value: "} {{}} ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "} {{}} ", + ), 58..71, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 71..72, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 72..72, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap index faf6cbe440..f134fe8d99 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap @@ -2,185 +2,152 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 2..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 4..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 6..7, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 8..11, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 11..12, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 13..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..21, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 21..22, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 23..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 26..27, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 28..31, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 31..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 33..36, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 36..37, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 38..41, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 41..42, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 43..46, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 46..47, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( Newline, 47..47, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap index c45fb72c94..bb5f4d7597 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(MAC_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\r more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\r more text", + ), 2..19, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 19..20, + TokenFlags( + F_STRING, + ), ), ( Newline, 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap index 1a27f26ecb..ace6850825 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(UNIX_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\n more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\n more text", + ), 2..19, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 19..20, + TokenFlags( + F_STRING, + ), ), ( Newline, 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap index 99edd6be19..a3b11f3d6b 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\r\n more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\r\n more text", + ), 2..20, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 20..21, + TokenFlags( + F_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap index d9c595f8f0..8157de849c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 3..6, ), ( @@ -34,26 +31,22 @@ expression: lex_source(source) 7..8, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 8..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -65,9 +58,9 @@ expression: lex_source(source) 12..13, ), ( - Name { - name: "s", - }, + Name( + "s", + ), 13..14, ), ( @@ -75,43 +68,35 @@ expression: lex_source(source) 14..15, ), ( - FStringMiddle { - value: ".3f", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".3f", + ), 15..18, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 18..19, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 19..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 21..22, ), ( @@ -119,26 +104,22 @@ expression: lex_source(source) 22..23, ), ( - FStringMiddle { - value: ".", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".", + ), 23..24, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 24..25, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 25..26, ), ( @@ -146,50 +127,35 @@ expression: lex_source(source) 26..27, ), ( - FStringMiddle { - value: "f", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "f", + ), 27..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 28..29, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 29..30, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 30..31, ), ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "", + ), 31..33, ), ( @@ -197,26 +163,22 @@ expression: lex_source(source) 33..34, ), ( - FStringMiddle { - value: "*^", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "*^", + ), 34..36, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 36..37, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 37..38, ), ( @@ -228,9 +190,9 @@ expression: lex_source(source) 39..40, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 40..41, ), ( @@ -246,26 +208,22 @@ expression: lex_source(source) 43..44, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 44..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 45..46, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 46..47, ), ( @@ -281,9 +239,9 @@ expression: lex_source(source) 49..50, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 50..51, ), ( @@ -295,9 +253,9 @@ expression: lex_source(source) 52..53, ), ( - Name { - name: "pop", - }, + Name( + "pop", + ), 53..56, ), ( @@ -319,9 +277,13 @@ expression: lex_source(source) ( FStringEnd, 60..61, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 61..61, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap index 3d89467bcd..7c749c92e7 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "foo ", + ), 2..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 7..8, ), ( - Name { - name: "pwd", - }, + Name( + "pwd", + ), 8..11, ), ( @@ -47,24 +40,24 @@ expression: lex_source(source) 11..12, ), ( - FStringMiddle { - value: " bar", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " bar", + ), 12..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 17..17, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap index 5e63f7f917..5fde2adc2c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap @@ -2,18 +2,15 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -24,9 +21,9 @@ expression: lex_source(source) 3..9, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -38,9 +35,9 @@ expression: lex_source(source) 12..13, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 13..14, ), ( @@ -54,22 +51,20 @@ expression: lex_source(source) ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 17..18, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -84,9 +79,9 @@ expression: lex_source(source) 22..28, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 29..30, ), ( @@ -98,9 +93,9 @@ expression: lex_source(source) 31..32, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 32..33, ), ( @@ -118,9 +113,13 @@ expression: lex_source(source) ( FStringEnd, 36..37, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap index a4a46dfacf..4e46987c12 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 0..4, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 4..6, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 7..8, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 12..13, ), ( @@ -47,67 +40,53 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: "d\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "d\n", + ), 14..16, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 16..17, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 17..19, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 19..22, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 22..23, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 23..27, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 27..29, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, @@ -118,9 +97,9 @@ expression: lex_source(source) 30..31, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 35..36, ), ( @@ -128,67 +107,53 @@ expression: lex_source(source) 36..37, ), ( - FStringMiddle { - value: "a\n b\n c\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "a\n b\n c\n", + ), 37..61, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 61..62, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 62..64, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 64..67, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 67..68, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 68..70, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 70..72, + TokenFlags( + F_STRING, + ), ), ( Lbrace, @@ -199,9 +164,9 @@ expression: lex_source(source) 73..74, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 78..79, ), ( @@ -209,17 +174,13 @@ expression: lex_source(source) 79..80, ), ( - FStringMiddle { - value: "d", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "d", + ), 80..81, + TokenFlags( + F_STRING, + ), ), ( NonLogicalNewline, @@ -230,50 +191,40 @@ expression: lex_source(source) 82..83, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 83..85, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 85..86, + TokenFlags( + F_STRING, + ), ), ( Newline, 86..87, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 87..89, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 89..91, + TokenFlags( + F_STRING, + ), ), ( Lbrace, @@ -284,9 +235,9 @@ expression: lex_source(source) 92..93, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 97..98, ), ( @@ -294,26 +245,22 @@ expression: lex_source(source) 98..99, ), ( - FStringMiddle { - value: "a", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "a", + ), 99..100, + TokenFlags( + F_STRING, + ), ), ( NonLogicalNewline, 100..101, ), ( - Name { - name: "b", - }, + Name( + "b", + ), 109..110, ), ( @@ -325,24 +272,24 @@ expression: lex_source(source) 111..112, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 112..114, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 114..115, + TokenFlags( + F_STRING, + ), ), ( Newline, 115..116, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap index c013731900..900373f25c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 3..4, ), ( @@ -30,34 +27,26 @@ expression: lex_source(source) 4..5, ), ( - FStringMiddle { - value: "=10", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "=10", + ), 5..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 8..9, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 9..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -68,9 +57,9 @@ expression: lex_source(source) 11..12, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 12..13, ), ( @@ -78,9 +67,9 @@ expression: lex_source(source) 13..15, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 15..17, ), ( @@ -92,26 +81,22 @@ expression: lex_source(source) 18..19, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 19..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 21..22, ), ( @@ -123,9 +108,9 @@ expression: lex_source(source) 23..24, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 24..25, ), ( @@ -133,9 +118,9 @@ expression: lex_source(source) 25..27, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 27..29, ), ( @@ -147,17 +132,13 @@ expression: lex_source(source) 30..31, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 31..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -168,9 +149,9 @@ expression: lex_source(source) 33..34, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 34..35, ), ( @@ -178,9 +159,9 @@ expression: lex_source(source) 35..37, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 37..39, ), ( @@ -194,9 +175,13 @@ expression: lex_source(source) ( FStringEnd, 41..42, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 42..42, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap index d612885716..2620cb6cc8 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\0", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\0", + ), 2..4, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 4..5, + TokenFlags( + F_STRING, + ), ), ( Newline, 5..5, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap index 96de2fd392..be043b9151 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(MAC_EOL) 15..21, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 22..24, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(MAC_EOL) 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap index c680d32089..7f92d8a8df 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(UNIX_EOL) 15..21, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 22..24, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(UNIX_EOL) 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap index acd7bc7f68..e7c4cdb3f0 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(WINDOWS_EOL) 16..22, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 23..25, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(WINDOWS_EOL) 29..29, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap index a0eb10bff4..189a89b5b8 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap @@ -1,12 +1,28 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- -Err( +## Tokens +``` +[ + ( + Unknown, + 0..85, + ), + ( + Newline, + 85..85, + ), +] +``` +## Errors +``` +[ LexicalError { error: OtherError( "Invalid decimal integer literal", ), location: 0..85, }, -) +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap index cf606bd31d..50a1a1564f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap @@ -1,12 +1,28 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- -Err( +## Tokens +``` +[ + ( + Unknown, + 0..3, + ), + ( + Newline, + 3..3, + ), +] +``` +## Errors +``` +[ LexicalError { error: OtherError( "Invalid decimal integer literal", ), location: 0..3, }, -) +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap index dc3d3ec217..87c8111347 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -125,3 +127,4 @@ expression: lex_jupyter_source(source) 180..180, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap index 07b029d90d..32a7e56eea 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( - Name { - name: "pwd", - }, + Name( + "pwd", + ), 0..3, ), ( @@ -25,9 +27,9 @@ expression: lex_jupyter_source(source) 10..11, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 11..14, ), ( @@ -46,9 +48,9 @@ expression: lex_jupyter_source(source) 30..31, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 31..34, ), ( @@ -67,9 +69,9 @@ expression: lex_jupyter_source(source) 50..51, ), ( - Name { - name: "baz", - }, + Name( + "baz", + ), 51..54, ), ( @@ -88,3 +90,4 @@ expression: lex_jupyter_source(source) 85..85, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap index 1a3d7e016c..add0a35364 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( If, @@ -39,3 +41,4 @@ expression: lex_jupyter_source(source) 43..43, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap index c10f2fb977..913af5854f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(MAC_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(MAC_EOL) 24..24, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap index 938d150f9e..4710ed4bc1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(UNIX_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(UNIX_EOL) 24..24, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap index c5f5d29dd0..0e9e3bde72 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(WINDOWS_EOL) 25..25, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap index ffee4a7eec..1d842b60d1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(MAC_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(MAC_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap index e5227d0a06..13d4cf600f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(UNIX_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(UNIX_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap index 7950d33905..b70e615c07 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(WINDOWS_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(WINDOWS_EO 15..15, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap index b760410a5e..69e13c03bd 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -180,3 +182,4 @@ expression: lex_jupyter_source(source) 132..132, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap index 34d9125a63..8aa9156f9f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "#", - ), + Comment, 7..8, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 8..8, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap index 0731cf4711..b583477cdb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# foo", - ), + Comment, 7..12, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 12..12, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap index f248b93ef1..0c4d6c8372 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# ", - ), + Comment, 7..9, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 9..9, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap index 4593910098..f2e37aa31d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# ", - ), + Comment, 7..10, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap index 944ad882a0..151f5dedd4 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap @@ -2,11 +2,11 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Comment( - "#Hello", - ), + Comment, 0..6, ), ( @@ -14,9 +14,7 @@ expression: lex_source(source) 6..7, ), ( - Comment( - "#World", - ), + Comment, 7..13, ), ( @@ -24,3 +22,4 @@ expression: lex_source(source) 13..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap index 0512714bd4..d56f39910d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( Match, 0..5, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 6..9, ), ( @@ -30,9 +32,9 @@ expression: lex_jupyter_source(source) 15..19, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 20..23, ), ( @@ -64,3 +66,4 @@ expression: lex_jupyter_source(source) 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap index 0a0a9fb1da..d167752f78 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(MAC_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 6..7, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 11..12, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 12..13, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 13..14, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 16..17, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 17..18, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 19..20, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 20..21, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 27..28, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 28..29, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 30..31, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 31..32, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 32..33, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 35..36, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(MAC_EOL) 38..39, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap index c3df5dbd24..6355d419f2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(UNIX_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 6..7, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 11..12, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 12..13, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 13..14, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 16..17, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 17..18, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 19..20, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 20..21, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 27..28, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 28..29, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 30..31, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 31..32, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 32..33, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 35..36, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(UNIX_EOL) 38..39, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap index 34184c68a9..cfcd1f7ea1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 7..9, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 13..14, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 14..15, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 15..16, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 19..20, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 20..21, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 22..24, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 24..25, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 32..34, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 34..35, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 36..38, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 38..39, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 39..40, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 43..44, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 46..48, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap index 1096935e0a..48356832bc 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( Lpar, @@ -12,16 +14,9 @@ expression: lex_source(source) 1..2, ), ( - String { - value: "a", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "a", + ), 6..9, ), ( @@ -29,16 +24,9 @@ expression: lex_source(source) 9..10, ), ( - String { - value: "b", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "b", + ), 14..17, ), ( @@ -50,29 +38,15 @@ expression: lex_source(source) 18..19, ), ( - String { - value: "c", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "c", + ), 23..26, ), ( - String { - value: "d", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "d", + ), 33..36, ), ( @@ -88,3 +62,4 @@ expression: lex_source(source) 38..38, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap index 92bc661965..ee49b4ab27 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap @@ -2,59 +2,61 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Int { - value: 47, - }, + Int( + 47, + ), 0..4, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 5..9, ), ( - Int { - value: 13, - }, + Int( + 13, + ), 10..16, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 17..18, ), ( - Int { - value: 123, - }, + Int( + 123, + ), 19..22, ), ( - Int { - value: 1234567890, - }, + Int( + 1234567890, + ), 23..36, ), ( - Float { - value: 0.2, - }, + Float( + 0.2, + ), 37..40, ), ( - Float { - value: 100.0, - }, + Float( + 100.0, + ), 41..45, ), ( - Float { - value: 2100.0, - }, + Float( + 2100.0, + ), 46..51, ), ( @@ -72,21 +74,21 @@ expression: lex_source(source) 55..59, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 60..63, ), ( - Int { - value: 11051210869376104954, - }, + Int( + 11051210869376104954, + ), 64..82, ), ( - Int { - value: 0x995DC9BBDF1939FA995DC9BBDF1939FA, - }, + Int( + 0x995DC9BBDF1939FA995DC9BBDF1939FA, + ), 83..117, ), ( @@ -94,3 +96,4 @@ expression: lex_source(source) 117..117, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap index 9da473b1d5..3a241f6c7c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( DoubleSlash, @@ -28,3 +30,4 @@ expression: lex_source(source) 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap index 7b947ef55f..c045c602e2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap @@ -2,124 +2,70 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - String { - value: "double", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "double", + ), 0..8, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( - String { - value: "single", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "single", + ), 9..17, ), ( - String { - value: "can\\'t", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "can\\'t", + ), 18..26, ), ( - String { - value: "\\\\\\\"", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "\\\\\\\"", + ), 27..33, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( - String { - value: "\\t\\r\\n", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\t\\r\\n", + ), 34..42, ), ( - String { - value: "\\g", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\g", + ), 43..47, ), ( - String { - value: "raw\\'", - flags: AnyStringFlags { - prefix: Regular( - Raw { - uppercase: false, - }, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "raw\\'", + ), 48..56, + TokenFlags( + RAW_STRING_LOWERCASE, + ), ), ( - String { - value: "\\420", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\420", + ), 57..63, ), ( - String { - value: "\\200\\0a", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\200\\0a", + ), 64..73, ), ( @@ -127,3 +73,4 @@ expression: lex_source(source) 73..73, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap index 062e7563c3..3df752853b 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(MAC_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\rdef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\rdef", + ), 0..10, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap index 285b0f72e3..e7413d1002 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\ndef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\ndef", + ), 0..10, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap index d1cbaf6552..ac945e5ace 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\r\ndef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\r\ndef", + ), 0..11, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 11..11, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap index 648ba0ccda..166877fd0e 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap @@ -1,66 +1,58 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- +## Tokens +``` [ - Ok( - ( - If, - 0..2, - ), + ( + If, + 0..2, ), - Ok( - ( - True, - 3..7, - ), + ( + True, + 3..7, ), - Ok( - ( - Colon, - 7..8, - ), + ( + Colon, + 7..8, ), - Ok( - ( - Newline, - 8..9, - ), + ( + Newline, + 8..9, ), - Ok( - ( - Indent, - 9..13, - ), + ( + Indent, + 9..13, ), - Ok( - ( - Pass, - 13..17, - ), + ( + Pass, + 13..17, ), - Ok( - ( - Newline, - 17..18, - ), + ( + Newline, + 17..18, ), - Err( - LexicalError { - error: IndentationError, - location: 18..20, - }, + ( + Unknown, + 18..20, ), - Ok( - ( - Pass, - 20..24, - ), + ( + Pass, + 20..24, ), - Ok( - ( - Newline, - 24..24, - ), + ( + Newline, + 24..24, ), ] +``` +## Errors +``` +[ + LexicalError { + error: IndentationError, + location: 18..20, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap index 6ab09f4663..6dcccf3fdb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(MAC_EOL) --- +## Tokens +``` [ ( - String { - value: "\r test string\r ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\r test string\r ", + ), 0..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap index 1fd944b34f..70f9c06af3 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(UNIX_EOL) --- +## Tokens +``` [ ( - String { - value: "\n test string\n ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\n test string\n ", + ), 0..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap index 6944efe4be..8dcdd0461c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - String { - value: "\r\n test string\r\n ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\r\n test string\r\n ", + ), 0..23, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 23..23, ), ] +``` diff --git a/crates/ruff_python_parser/src/soft_keywords.rs b/crates/ruff_python_parser/src/soft_keywords.rs deleted file mode 100644 index e29781c749..0000000000 --- a/crates/ruff_python_parser/src/soft_keywords.rs +++ /dev/null @@ -1,224 +0,0 @@ -use itertools::{Itertools, MultiPeek}; - -use crate::{lexer::LexResult, token::Tok, Mode}; - -/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` -/// `case`, and `type`). -/// -/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords -/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain -/// contexts. -/// -/// Later, [PEP 695](https://peps.python.org/pep-0695/#generic-type-alias) introduced the `type` -/// soft keyword. -/// -/// This function modifies a token stream to accommodate this change. In particular, it replaces -/// soft keyword tokens with `identifier` tokens if they are used as identifiers. -/// -/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and -/// `ruff_python_parser`, as neither of them need to be aware of soft keywords. -pub struct SoftKeywordTransformer -where - I: Iterator, -{ - underlying: MultiPeek, - position: Position, -} - -impl SoftKeywordTransformer -where - I: Iterator, -{ - pub fn new(lexer: I, mode: Mode) -> Self { - Self { - underlying: lexer.multipeek(), // spell-checker:ignore multipeek - position: if mode == Mode::Expression { - Position::Other - } else { - Position::Statement - }, - } - } -} - -impl Iterator for SoftKeywordTransformer -where - I: Iterator, -{ - type Item = LexResult; - - #[inline] - fn next(&mut self) -> Option { - let mut next = self.underlying.next(); - if let Some(Ok((tok, range))) = next.as_ref() { - // If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's - // used as an identifier. We assume every soft keyword use is an identifier unless - // a heuristic is met. - match tok { - // For `match` and `case`, all of the following conditions must be met: - // 1. The token is at the start of a logical line. - // 2. The logical line contains a top-level colon (that is, a colon that is not nested - // inside a parenthesized expression, list, or dictionary). - // 3. The top-level colon is not the immediate sibling of a `match` or `case` token. - // (This is to avoid treating `match` or `case` as identifiers when annotated with - // type hints.) - Tok::Match | Tok::Case => { - if matches!(self.position, Position::Statement) { - let mut nesting = 0; - let mut first = true; - let mut seen_colon = false; - let mut seen_lambda = false; - while let Some(Ok((tok, _))) = self.underlying.peek() { - match tok { - Tok::Newline => break, - Tok::Lambda if nesting == 0 => seen_lambda = true, - Tok::Colon if nesting == 0 => { - if seen_lambda { - seen_lambda = false; - } else if !first { - seen_colon = true; - } - } - Tok::Lpar | Tok::Lsqb | Tok::Lbrace => nesting += 1, - Tok::Rpar | Tok::Rsqb | Tok::Rbrace => nesting -= 1, - _ => {} - } - first = false; - } - if !seen_colon { - next = Some(Ok((soft_to_name(tok), *range))); - } - } else { - next = Some(Ok((soft_to_name(tok), *range))); - } - } - // For `type` all of the following conditions must be met: - // 1. The token is at the start of a logical line. - // 2. The type token is immediately followed by a name token. - // 3. The name token is eventually followed by an equality token. - Tok::Type => { - if matches!( - self.position, - Position::Statement | Position::SimpleStatement - ) { - let mut is_type_alias = false; - if let Some(Ok((tok, _))) = self.underlying.peek() { - if matches!( - tok, - Tok::Name { .. } | - // We treat a soft keyword token following a type token as a - // name to support cases like `type type = int` or `type match = int` - Tok::Type | Tok::Match | Tok::Case - ) { - let mut nesting = 0; - while let Some(Ok((tok, _))) = self.underlying.peek() { - match tok { - Tok::Newline => break, - Tok::Equal if nesting == 0 => { - is_type_alias = true; - break; - } - Tok::Lsqb => nesting += 1, - Tok::Rsqb => nesting -= 1, - // Allow arbitrary content within brackets for now - _ if nesting > 0 => {} - // Exit if unexpected tokens are seen - _ => break, - } - } - } - } - if !is_type_alias { - next = Some(Ok((soft_to_name(tok), *range))); - } - } else { - next = Some(Ok((soft_to_name(tok), *range))); - } - } - _ => (), // Not a soft keyword token - } - } - - // Update the position, to track whether we're at the start of a logical line. - if let Some(lex_result) = next.as_ref() { - if let Ok((tok, _)) = lex_result.as_ref() { - match tok { - Tok::NonLogicalNewline | Tok::Comment { .. } => { - // Nothing to do. - } - Tok::Newline | Tok::Indent | Tok::Dedent => { - self.position = Position::Statement; - } - // If we see a semicolon, assume we're at the start of a simple statement, as in: - // ```python - // type X = int; type Y = float - // ``` - Tok::Semi => { - self.position = Position::SimpleStatement; - } - // If we see a colon, and we're not in a nested context, assume we're at the - // start of a simple statement, as in: - // ```python - // class Class: type X = int - // ``` - Tok::Colon if self.position == Position::Other => { - self.position = Position::SimpleStatement; - } - Tok::Lpar | Tok::Lsqb | Tok::Lbrace => { - self.position = if let Position::Nested(depth) = self.position { - Position::Nested(depth.saturating_add(1)) - } else { - Position::Nested(1) - }; - } - Tok::Rpar | Tok::Rsqb | Tok::Rbrace => { - self.position = if let Position::Nested(depth) = self.position { - let depth = depth.saturating_sub(1); - if depth > 0 { - Position::Nested(depth) - } else { - Position::Other - } - } else { - Position::Other - }; - } - _ => { - self.position = Position::Other; - } - } - } - } - - next - } -} - -#[inline] -fn soft_to_name(tok: &Tok) -> Tok { - let name = match tok { - Tok::Match => "match", - Tok::Case => "case", - Tok::Type => "type", - _ => unreachable!("other tokens never reach here"), - }; - Tok::Name { - name: name.to_string().into_boxed_str(), - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Position { - /// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement. - Statement, - /// The lexer is at the start of a simple statement, e.g., a statement following a semicolon - /// or colon, as in: - /// ```python - /// class Class: type X = int - /// ``` - SimpleStatement, - /// The lexer is within brackets, with the given bracket nesting depth. - Nested(u32), - /// The lexer is some other location. - Other, -} diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index bd206d5e46..3976da3387 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -469,13 +469,19 @@ pub(crate) fn parse_fstring_literal_element( #[cfg(test)] mod tests { + use ruff_python_ast::Suite; + use crate::lexer::LexicalErrorType; - use crate::{parse_suite, FStringErrorType, ParseErrorType, Suite}; + use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Parsed}; const WINDOWS_EOL: &str = "\r\n"; const MAC_EOL: &str = "\r"; const UNIX_EOL: &str = "\n"; + fn parse_suite(source: &str) -> Result { + parse_module(source).map(Parsed::into_suite) + } + fn string_parser_escaped_eol(eol: &str) -> Suite { let source = format!(r"'text \{eol}more text'"); parse_suite(&source).unwrap() @@ -483,73 +489,69 @@ mod tests { #[test] fn test_string_parser_escaped_unix_eol() { - let parse_ast = string_parser_escaped_eol(UNIX_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(UNIX_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_string_parser_escaped_mac_eol() { - let parse_ast = string_parser_escaped_eol(MAC_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(MAC_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_string_parser_escaped_windows_eol() { - let parse_ast = string_parser_escaped_eol(WINDOWS_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(WINDOWS_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring() { let source = r#"f"{a}{ b }{{foo}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_spec() { let source = r#"f"{foo:{spec}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_not_nested_spec() { let source = r#"f"{foo:spec}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_empty_fstring() { - insta::assert_debug_snapshot!(parse_suite(r#"f"""#,).unwrap()); + let source = r#"f"""#; + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_base() { let source = r#"f"{user=}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_base_more() { let source = r#"f"mix {user=} with text and {second=}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_format() { let source = r#"f"{user=:>10}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } fn parse_fstring_error(source: &str) -> FStringErrorType { @@ -577,240 +579,236 @@ mod tests { // error appears after the unexpected `FStringMiddle` token, which is between the // `:` and the `{`. // assert_eq!(parse_fstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses); - assert!(parse_suite(r#"f"{class}""#,).is_err()); + assert!(parse_suite(r#"f"{class}""#).is_err()); } #[test] fn test_parse_fstring_not_equals() { let source = r#"f"{1 != 2}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_equals() { let source = r#"f"{42 == 42}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_self_doc_prec_space() { let source = r#"f"{x =}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_self_doc_trailing_space() { let source = r#"f"{x= }""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_yield_expr() { let source = r#"f"{yield}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_string_concat() { let source = "'Hello ' 'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_string_concat_1() { let source = "'Hello ' u'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_string_concat_2() { let source = "u'Hello ' 'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_1() { let source = "'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_2() { let source = "'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_3() { let source = "'Hello ' f'world{\"!\"}'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_4() { let source = "'Hello ' f'world{\"!\"}' 'again!'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_f_string_concat_1() { let source = "u'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_f_string_concat_2() { let source = "u'Hello ' f'world' '!'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_string_triple_quotes_with_kind() { let source = "u'''Hello, world!'''"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_single_quoted_byte() { // single quote let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_double_quoted_byte() { // double quote let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_escape_char_in_byte_literal() { // backslash does not escape let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_byte_literal_1() { let source = r"rb'\x1z'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_byte_literal_2() { let source = r"rb'\\'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_escape_octet() { let source = r"b'\43a\4\1234'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_escaped_newline() { let source = r#"f"\n{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_constant_range() { let source = r#"f"aaa{bbb}ccc{ddd}eee""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_unescaped_newline() { let source = r#"f""" {x}""""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_escaped_character() { let source = r#"f"\\{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_fstring() { let source = r#"rf"{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_triple_quoted_raw_fstring() { let source = r#"rf"""{x}""""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_line_continuation() { let source = r#"rf"\ {x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_string_spec() { let source = r#"f"{foo:{''}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_concatenation_string_spec() { let source = r#"f"{foo:{'' ''}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } /// #[test] fn test_dont_panic_on_8_in_octal_escape() { let source = r"bold = '\038[1m'"; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_invalid_unicode_literal() { let source = r"'\x1ó34'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -818,7 +816,6 @@ mod tests { fn test_missing_unicode_lbrace_error() { let source = r"'\N '"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -826,7 +823,6 @@ mod tests { fn test_missing_unicode_rbrace_error() { let source = r"'\N{SPACE'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -834,7 +830,6 @@ mod tests { fn test_invalid_unicode_name_error() { let source = r"'\N{INVALID}'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -842,7 +837,6 @@ mod tests { fn test_invalid_byte_literal_error() { let source = r"b'123a𝐁c'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -852,8 +846,8 @@ mod tests { #[test] fn $name() { let source = format!(r#""\N{{{0}}}""#, $alias); - let parse_ast = parse_suite(&source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(&source).unwrap(); + insta::assert_debug_snapshot!(suite); } )* } diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index 16ae72b313..f9f3fe8bb2 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -1,4 +1,4 @@ -//! Token type for Python source code created by the lexer and consumed by the `ruff_python_parser`. +//! Token kinds for Python source code created by the lexer and consumed by the `ruff_python_parser`. //! //! This module defines the tokens that the lexer recognizes. The tokens are //! loosely based on the token definitions found in the [CPython source]. @@ -7,482 +7,140 @@ use std::fmt; -use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp}; +use ruff_python_ast::{BoolOp, Operator, UnaryOp}; -/// The set of tokens the Python source code can be tokenized in. -#[derive(Clone, Debug, PartialEq, is_macro::Is)] -pub enum Tok { - /// Token value for a name, commonly known as an identifier. - Name { - /// The name value. - /// - /// Unicode names are NFKC-normalized by the lexer, - /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers) - name: Box, - }, - /// Token value for an integer. - Int { - /// The integer value. - value: Int, - }, - /// Token value for a floating point number. - Float { - /// The float value. - value: f64, - }, - /// Token value for a complex number. - Complex { - /// The real part of the complex number. - real: f64, - /// The imaginary part of the complex number. - imag: f64, - }, - /// Token value for a string. - String { - /// The string value. - value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, - }, - /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix - /// and the opening quote(s). - FStringStart(AnyStringFlags), - /// Token value that includes the portion of text inside the f-string that's not - /// part of the expression part and isn't an opening or closing brace. - FStringMiddle { - /// The string value. - value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, - }, - /// Token value for the end of an f-string. This includes the closing quote. - FStringEnd, - /// Token value for IPython escape commands. These are recognized by the lexer - /// only when the mode is [`Ipython`]. - /// - /// [`Ipython`]: crate::Mode::Ipython - IpyEscapeCommand { - /// The magic command value. - value: Box, - /// The kind of magic command. - kind: IpyEscapeKind, - }, - /// Token value for a comment. These are filtered out of the token stream prior to parsing. - Comment(Box), - /// Token value for a newline. - Newline, - /// Token value for a newline that is not a logical line break. These are filtered out of - /// the token stream prior to parsing. - NonLogicalNewline, - /// Token value for an indent. - Indent, - /// Token value for a dedent. - Dedent, - EndOfFile, - /// Token value for a question mark `?`. This is only used in [`Ipython`]. - /// - /// [`Ipython`]: crate::Mode::Ipython - Question, - /// Token value for a exclamation mark `!`. - Exclamation, - /// Token value for a left parenthesis `(`. - Lpar, - /// Token value for a right parenthesis `)`. - Rpar, - /// Token value for a left square bracket `[`. - Lsqb, - /// Token value for a right square bracket `]`. - Rsqb, - /// Token value for a colon `:`. - Colon, - /// Token value for a comma `,`. - Comma, - /// Token value for a semicolon `;`. - Semi, - /// Token value for plus `+`. - Plus, - /// Token value for minus `-`. - Minus, - /// Token value for star `*`. - Star, - /// Token value for slash `/`. - Slash, - /// Token value for vertical bar `|`. - Vbar, - /// Token value for ampersand `&`. - Amper, - /// Token value for less than `<`. - Less, - /// Token value for greater than `>`. - Greater, - /// Token value for equal `=`. - Equal, - /// Token value for dot `.`. - Dot, - /// Token value for percent `%`. - Percent, - /// Token value for left bracket `{`. - Lbrace, - /// Token value for right bracket `}`. - Rbrace, - /// Token value for double equal `==`. - EqEqual, - /// Token value for not equal `!=`. - NotEqual, - /// Token value for less than or equal `<=`. - LessEqual, - /// Token value for greater than or equal `>=`. - GreaterEqual, - /// Token value for tilde `~`. - Tilde, - /// Token value for caret `^`. - CircumFlex, - /// Token value for left shift `<<`. - LeftShift, - /// Token value for right shift `>>`. - RightShift, - /// Token value for double star `**`. - DoubleStar, - /// Token value for double star equal `**=`. - DoubleStarEqual, - /// Token value for plus equal `+=`. - PlusEqual, - /// Token value for minus equal `-=`. - MinusEqual, - /// Token value for star equal `*=`. - StarEqual, - /// Token value for slash equal `/=`. - SlashEqual, - /// Token value for percent equal `%=`. - PercentEqual, - /// Token value for ampersand equal `&=`. - AmperEqual, - /// Token value for vertical bar equal `|=`. - VbarEqual, - /// Token value for caret equal `^=`. - CircumflexEqual, - /// Token value for left shift equal `<<=`. - LeftShiftEqual, - /// Token value for right shift equal `>>=`. - RightShiftEqual, - /// Token value for double slash `//`. - DoubleSlash, - /// Token value for double slash equal `//=`. - DoubleSlashEqual, - /// Token value for colon equal `:=`. - ColonEqual, - /// Token value for at `@`. - At, - /// Token value for at equal `@=`. - AtEqual, - /// Token value for arrow `->`. - Rarrow, - /// Token value for ellipsis `...`. - Ellipsis, - - // Self documenting. - // Keywords (alphabetically): - False, - None, - True, - - And, - As, - Assert, - Async, - Await, - Break, - Class, - Continue, - Def, - Del, - Elif, - Else, - Except, - Finally, - For, - From, - Global, - If, - Import, - In, - Is, - Lambda, - Nonlocal, - Not, - Or, - Pass, - Raise, - Return, - Try, - While, - Match, - Type, - Case, - With, - Yield, - - Unknown, -} - -impl Tok { - #[inline] - pub fn kind(&self) -> TokenKind { - TokenKind::from_token(self) - } -} - -impl fmt::Display for Tok { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[allow(clippy::enum_glob_use)] - use Tok::*; - match self { - Name { name } => write!(f, "{name}"), - Int { value } => write!(f, "{value}"), - Float { value } => write!(f, "{value}"), - Complex { real, imag } => write!(f, "{real}j{imag}"), - String { value, flags } => { - write!(f, "{}", flags.format_string_contents(value)) - } - FStringStart(_) => f.write_str("FStringStart"), - FStringMiddle { value, .. } => f.write_str(value), - FStringEnd => f.write_str("FStringEnd"), - IpyEscapeCommand { kind, value } => write!(f, "{kind}{value}"), - Newline => f.write_str("Newline"), - NonLogicalNewline => f.write_str("NonLogicalNewline"), - Indent => f.write_str("Indent"), - Dedent => f.write_str("Dedent"), - EndOfFile => f.write_str("EOF"), - Question => f.write_str("?"), - Exclamation => f.write_str("!"), - Lpar => f.write_str("("), - Rpar => f.write_str(")"), - Lsqb => f.write_str("["), - Rsqb => f.write_str("]"), - Colon => f.write_str(":"), - Comma => f.write_str(","), - Comment(value) => f.write_str(value), - Semi => f.write_str(";"), - Plus => f.write_str("+"), - Minus => f.write_str("-"), - Star => f.write_str("*"), - Slash => f.write_str("/"), - Vbar => f.write_str("|"), - Amper => f.write_str("&"), - Less => f.write_str("<"), - Greater => f.write_str(">"), - Equal => f.write_str("="), - Dot => f.write_str("."), - Percent => f.write_str("%"), - Lbrace => f.write_str("{"), - Rbrace => f.write_str("}"), - EqEqual => f.write_str("=="), - NotEqual => f.write_str("!="), - LessEqual => f.write_str("<="), - GreaterEqual => f.write_str(">="), - Tilde => f.write_str("~"), - CircumFlex => f.write_str("^"), - LeftShift => f.write_str("<<"), - RightShift => f.write_str(">>"), - DoubleStar => f.write_str("**"), - DoubleStarEqual => f.write_str("**="), - PlusEqual => f.write_str("+="), - MinusEqual => f.write_str("-="), - StarEqual => f.write_str("*="), - SlashEqual => f.write_str("/="), - PercentEqual => f.write_str("%="), - AmperEqual => f.write_str("&="), - VbarEqual => f.write_str("|="), - CircumflexEqual => f.write_str("^="), - LeftShiftEqual => f.write_str("<<="), - RightShiftEqual => f.write_str(">>="), - DoubleSlash => f.write_str("//"), - DoubleSlashEqual => f.write_str("//="), - At => f.write_str("@"), - AtEqual => f.write_str("@="), - Rarrow => f.write_str("->"), - Ellipsis => f.write_str("..."), - False => f.write_str("False"), - None => f.write_str("None"), - True => f.write_str("True"), - And => f.write_str("and"), - As => f.write_str("as"), - Assert => f.write_str("assert"), - Async => f.write_str("async"), - Await => f.write_str("await"), - Break => f.write_str("break"), - Class => f.write_str("class"), - Continue => f.write_str("continue"), - Def => f.write_str("def"), - Del => f.write_str("del"), - Elif => f.write_str("elif"), - Else => f.write_str("else"), - Except => f.write_str("except"), - Finally => f.write_str("finally"), - For => f.write_str("for"), - From => f.write_str("from"), - Global => f.write_str("global"), - If => f.write_str("if"), - Import => f.write_str("import"), - In => f.write_str("in"), - Is => f.write_str("is"), - Lambda => f.write_str("lambda"), - Nonlocal => f.write_str("nonlocal"), - Not => f.write_str("not"), - Or => f.write_str("or"), - Pass => f.write_str("pass"), - Raise => f.write_str("raise"), - Return => f.write_str("return"), - Try => f.write_str("try"), - While => f.write_str("while"), - Match => f.write_str("match"), - Type => f.write_str("type"), - Case => f.write_str("case"), - With => f.write_str("with"), - Yield => f.write_str("yield"), - ColonEqual => f.write_str(":="), - Unknown => f.write_str(">"), - } - } -} - -/// A kind of token. -/// -/// This is a lightweight representation of [`Tok`] which doesn't contain any information -/// about the token itself. +/// A kind of a token. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub enum TokenKind { - /// Token value for a name, commonly known as an identifier. + /// Token kind for a name, commonly known as an identifier. Name, - /// Token value for an integer. + /// Token kind for an integer. Int, - /// Token value for a floating point number. + /// Token kind for a floating point number. Float, - /// Token value for a complex number. + /// Token kind for a complex number. Complex, - /// Token value for a string. + /// Token kind for a string. String, - /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix + /// Token kind for the start of an f-string. This includes the `f`/`F`/`fr` prefix /// and the opening quote(s). FStringStart, - /// Token value that includes the portion of text inside the f-string that's not + /// Token kind that includes the portion of text inside the f-string that's not /// part of the expression part and isn't an opening or closing brace. FStringMiddle, - /// Token value for the end of an f-string. This includes the closing quote. + /// Token kind for the end of an f-string. This includes the closing quote. FStringEnd, - /// Token value for a IPython escape command. + /// Token kind for a IPython escape command. IpyEscapeCommand, - /// Token value for a comment. These are filtered out of the token stream prior to parsing. + /// Token kind for a comment. These are filtered out of the token stream prior to parsing. Comment, - /// Token value for a newline. + /// Token kind for a newline. Newline, - /// Token value for a newline that is not a logical line break. These are filtered out of + /// Token kind for a newline that is not a logical line break. These are filtered out of /// the token stream prior to parsing. NonLogicalNewline, - /// Token value for an indent. + /// Token kind for an indent. Indent, - /// Token value for a dedent. + /// Token kind for a dedent. Dedent, EndOfFile, - /// Token value for a question mark `?`. + /// Token kind for a question mark `?`. Question, - /// Token value for an exclamation mark `!`. + /// Token kind for an exclamation mark `!`. Exclamation, - /// Token value for a left parenthesis `(`. + /// Token kind for a left parenthesis `(`. Lpar, - /// Token value for a right parenthesis `)`. + /// Token kind for a right parenthesis `)`. Rpar, - /// Token value for a left square bracket `[`. + /// Token kind for a left square bracket `[`. Lsqb, - /// Token value for a right square bracket `]`. + /// Token kind for a right square bracket `]`. Rsqb, - /// Token value for a colon `:`. + /// Token kind for a colon `:`. Colon, - /// Token value for a comma `,`. + /// Token kind for a comma `,`. Comma, - /// Token value for a semicolon `;`. + /// Token kind for a semicolon `;`. Semi, - /// Token value for plus `+`. + /// Token kind for plus `+`. Plus, - /// Token value for minus `-`. + /// Token kind for minus `-`. Minus, - /// Token value for star `*`. + /// Token kind for star `*`. Star, - /// Token value for slash `/`. + /// Token kind for slash `/`. Slash, - /// Token value for vertical bar `|`. + /// Token kind for vertical bar `|`. Vbar, - /// Token value for ampersand `&`. + /// Token kind for ampersand `&`. Amper, - /// Token value for less than `<`. + /// Token kind for less than `<`. Less, - /// Token value for greater than `>`. + /// Token kind for greater than `>`. Greater, - /// Token value for equal `=`. + /// Token kind for equal `=`. Equal, - /// Token value for dot `.`. + /// Token kind for dot `.`. Dot, - /// Token value for percent `%`. + /// Token kind for percent `%`. Percent, - /// Token value for left bracket `{`. + /// Token kind for left bracket `{`. Lbrace, - /// Token value for right bracket `}`. + /// Token kind for right bracket `}`. Rbrace, - /// Token value for double equal `==`. + /// Token kind for double equal `==`. EqEqual, - /// Token value for not equal `!=`. + /// Token kind for not equal `!=`. NotEqual, - /// Token value for less than or equal `<=`. + /// Token kind for less than or equal `<=`. LessEqual, - /// Token value for greater than or equal `>=`. + /// Token kind for greater than or equal `>=`. GreaterEqual, - /// Token value for tilde `~`. + /// Token kind for tilde `~`. Tilde, - /// Token value for caret `^`. + /// Token kind for caret `^`. CircumFlex, - /// Token value for left shift `<<`. + /// Token kind for left shift `<<`. LeftShift, - /// Token value for right shift `>>`. + /// Token kind for right shift `>>`. RightShift, - /// Token value for double star `**`. + /// Token kind for double star `**`. DoubleStar, - /// Token value for double star equal `**=`. + /// Token kind for double star equal `**=`. DoubleStarEqual, - /// Token value for plus equal `+=`. + /// Token kind for plus equal `+=`. PlusEqual, - /// Token value for minus equal `-=`. + /// Token kind for minus equal `-=`. MinusEqual, - /// Token value for star equal `*=`. + /// Token kind for star equal `*=`. StarEqual, - /// Token value for slash equal `/=`. + /// Token kind for slash equal `/=`. SlashEqual, - /// Token value for percent equal `%=`. + /// Token kind for percent equal `%=`. PercentEqual, - /// Token value for ampersand equal `&=`. + /// Token kind for ampersand equal `&=`. AmperEqual, - /// Token value for vertical bar equal `|=`. + /// Token kind for vertical bar equal `|=`. VbarEqual, - /// Token value for caret equal `^=`. + /// Token kind for caret equal `^=`. CircumflexEqual, - /// Token value for left shift equal `<<=`. + /// Token kind for left shift equal `<<=`. LeftShiftEqual, - /// Token value for right shift equal `>>=`. + /// Token kind for right shift equal `>>=`. RightShiftEqual, - /// Token value for double slash `//`. + /// Token kind for double slash `//`. DoubleSlash, - /// Token value for double slash equal `//=`. + /// Token kind for double slash equal `//=`. DoubleSlashEqual, - /// Token value for colon equal `:=`. + /// Token kind for colon equal `:=`. ColonEqual, - /// Token value for at `@`. + /// Token kind for at `@`. At, - /// Token value for at equal `@=`. + /// Token kind for at equal `@=`. AtEqual, - /// Token value for arrow `->`. + /// Token kind for arrow `->`. Rarrow, - /// Token value for ellipsis `...`. + /// Token kind for ellipsis `...`. Ellipsis, // The keywords should be sorted in alphabetical order. If the boundary tokens for the @@ -534,6 +192,11 @@ pub enum TokenKind { } impl TokenKind { + #[inline] + pub const fn is_eof(self) -> bool { + matches!(self, TokenKind::EndOfFile) + } + #[inline] pub const fn is_newline(self) -> bool { matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) @@ -541,7 +204,10 @@ impl TokenKind { /// Returns `true` if the token is a keyword (including soft keywords). /// - /// See also [`TokenKind::is_soft_keyword`], [`TokenKind::is_non_soft_keyword`]. + /// See also [`is_soft_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword #[inline] pub fn is_keyword(self) -> bool { TokenKind::And <= self && self <= TokenKind::Type @@ -549,7 +215,10 @@ impl TokenKind { /// Returns `true` if the token is strictly a soft keyword. /// - /// See also [`TokenKind::is_keyword`], [`TokenKind::is_non_soft_keyword`]. + /// See also [`is_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword #[inline] pub fn is_soft_keyword(self) -> bool { TokenKind::Case <= self && self <= TokenKind::Type @@ -557,7 +226,10 @@ impl TokenKind { /// Returns `true` if the token is strictly a non-soft keyword. /// - /// See also [`TokenKind::is_keyword`], [`TokenKind::is_soft_keyword`]. + /// See also [`is_keyword`], [`is_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword #[inline] pub fn is_non_soft_keyword(self) -> bool { TokenKind::And <= self && self <= TokenKind::Yield @@ -677,10 +349,12 @@ impl TokenKind { matches!(self, TokenKind::Plus | TokenKind::Minus) } - /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is an arithmetic unary + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary arithmetic /// operator, otherwise return [None]. /// - /// Use [`TokenKind::as_unary_operator`] to match against any unary operator. + /// Use [`as_unary_operator`] to match against any unary operator. + /// + /// [`as_unary_operator`]: TokenKind::as_unary_operator #[inline] pub(crate) const fn as_unary_arithmetic_operator(self) -> Option { Some(match self { @@ -693,8 +367,9 @@ impl TokenKind { /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator, /// otherwise return [None]. /// - /// Use [`TokenKind::as_unary_arithmetic_operator`] to match against only an arithmetic unary - /// operator. + /// Use [`as_unary_arithmetic_operator`] to match against only an arithmetic unary operator. + /// + /// [`as_unary_arithmetic_operator`]: TokenKind::as_unary_arithmetic_operator #[inline] pub(crate) const fn as_unary_operator(self) -> Option { Some(match self { @@ -720,8 +395,9 @@ impl TokenKind { /// Returns the binary [`Operator`] that corresponds to the current token, if it's a binary /// operator, otherwise return [None]. /// - /// Use [`TokenKind::as_augmented_assign_operator`] to match against an augmented assignment - /// token. + /// Use [`as_augmented_assign_operator`] to match against an augmented assignment token. + /// + /// [`as_augmented_assign_operator`]: TokenKind::as_augmented_assign_operator pub(crate) const fn as_binary_operator(self) -> Option { Some(match self { TokenKind::Plus => Operator::Add, @@ -762,126 +438,6 @@ impl TokenKind { _ => return None, }) } - - pub const fn from_token(token: &Tok) -> Self { - match token { - Tok::Name { .. } => TokenKind::Name, - Tok::Int { .. } => TokenKind::Int, - Tok::Float { .. } => TokenKind::Float, - Tok::Complex { .. } => TokenKind::Complex, - Tok::String { .. } => TokenKind::String, - Tok::FStringStart(_) => TokenKind::FStringStart, - Tok::FStringMiddle { .. } => TokenKind::FStringMiddle, - Tok::FStringEnd => TokenKind::FStringEnd, - Tok::IpyEscapeCommand { .. } => TokenKind::IpyEscapeCommand, - Tok::Comment(_) => TokenKind::Comment, - Tok::Newline => TokenKind::Newline, - Tok::NonLogicalNewline => TokenKind::NonLogicalNewline, - Tok::Indent => TokenKind::Indent, - Tok::Dedent => TokenKind::Dedent, - Tok::EndOfFile => TokenKind::EndOfFile, - Tok::Question => TokenKind::Question, - Tok::Exclamation => TokenKind::Exclamation, - Tok::Lpar => TokenKind::Lpar, - Tok::Rpar => TokenKind::Rpar, - Tok::Lsqb => TokenKind::Lsqb, - Tok::Rsqb => TokenKind::Rsqb, - Tok::Colon => TokenKind::Colon, - Tok::Comma => TokenKind::Comma, - Tok::Semi => TokenKind::Semi, - Tok::Plus => TokenKind::Plus, - Tok::Minus => TokenKind::Minus, - Tok::Star => TokenKind::Star, - Tok::Slash => TokenKind::Slash, - Tok::Vbar => TokenKind::Vbar, - Tok::Amper => TokenKind::Amper, - Tok::Less => TokenKind::Less, - Tok::Greater => TokenKind::Greater, - Tok::Equal => TokenKind::Equal, - Tok::Dot => TokenKind::Dot, - Tok::Percent => TokenKind::Percent, - Tok::Lbrace => TokenKind::Lbrace, - Tok::Rbrace => TokenKind::Rbrace, - Tok::EqEqual => TokenKind::EqEqual, - Tok::NotEqual => TokenKind::NotEqual, - Tok::LessEqual => TokenKind::LessEqual, - Tok::GreaterEqual => TokenKind::GreaterEqual, - Tok::Tilde => TokenKind::Tilde, - Tok::CircumFlex => TokenKind::CircumFlex, - Tok::LeftShift => TokenKind::LeftShift, - Tok::RightShift => TokenKind::RightShift, - Tok::DoubleStar => TokenKind::DoubleStar, - Tok::DoubleStarEqual => TokenKind::DoubleStarEqual, - Tok::PlusEqual => TokenKind::PlusEqual, - Tok::MinusEqual => TokenKind::MinusEqual, - Tok::StarEqual => TokenKind::StarEqual, - Tok::SlashEqual => TokenKind::SlashEqual, - Tok::PercentEqual => TokenKind::PercentEqual, - Tok::AmperEqual => TokenKind::AmperEqual, - Tok::VbarEqual => TokenKind::VbarEqual, - Tok::CircumflexEqual => TokenKind::CircumflexEqual, - Tok::LeftShiftEqual => TokenKind::LeftShiftEqual, - Tok::RightShiftEqual => TokenKind::RightShiftEqual, - Tok::DoubleSlash => TokenKind::DoubleSlash, - Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual, - Tok::ColonEqual => TokenKind::ColonEqual, - Tok::At => TokenKind::At, - Tok::AtEqual => TokenKind::AtEqual, - Tok::Rarrow => TokenKind::Rarrow, - Tok::Ellipsis => TokenKind::Ellipsis, - Tok::False => TokenKind::False, - Tok::None => TokenKind::None, - Tok::True => TokenKind::True, - Tok::And => TokenKind::And, - Tok::As => TokenKind::As, - Tok::Assert => TokenKind::Assert, - Tok::Async => TokenKind::Async, - Tok::Await => TokenKind::Await, - Tok::Break => TokenKind::Break, - Tok::Class => TokenKind::Class, - Tok::Continue => TokenKind::Continue, - Tok::Def => TokenKind::Def, - Tok::Del => TokenKind::Del, - Tok::Elif => TokenKind::Elif, - Tok::Else => TokenKind::Else, - Tok::Except => TokenKind::Except, - Tok::Finally => TokenKind::Finally, - Tok::For => TokenKind::For, - Tok::From => TokenKind::From, - Tok::Global => TokenKind::Global, - Tok::If => TokenKind::If, - Tok::Import => TokenKind::Import, - Tok::In => TokenKind::In, - Tok::Is => TokenKind::Is, - Tok::Lambda => TokenKind::Lambda, - Tok::Nonlocal => TokenKind::Nonlocal, - Tok::Not => TokenKind::Not, - Tok::Or => TokenKind::Or, - Tok::Pass => TokenKind::Pass, - Tok::Raise => TokenKind::Raise, - Tok::Return => TokenKind::Return, - Tok::Try => TokenKind::Try, - Tok::While => TokenKind::While, - Tok::Match => TokenKind::Match, - Tok::Case => TokenKind::Case, - Tok::Type => TokenKind::Type, - Tok::With => TokenKind::With, - Tok::Yield => TokenKind::Yield, - Tok::Unknown => TokenKind::Unknown, - } - } -} - -impl From<&Tok> for TokenKind { - fn from(value: &Tok) -> Self { - Self::from_token(value) - } -} - -impl From for TokenKind { - fn from(value: Tok) -> Self { - Self::from_token(&value) - } } impl From for TokenKind { @@ -1041,10 +597,8 @@ impl fmt::Display for TokenKind { #[cfg(target_pointer_width = "64")] mod sizes { use crate::lexer::{LexicalError, LexicalErrorType}; - use crate::Tok; use static_assertions::assert_eq_size; - assert_eq_size!(Tok, [u8; 24]); assert_eq_size!(LexicalErrorType, [u8; 24]); - assert_eq_size!(Result, [u8; 32]); + assert_eq_size!(LexicalError, [u8; 32]); } diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 1b48b143cf..005c5ff38d 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,115 +1,189 @@ -use std::iter::FusedIterator; - +use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; -use crate::lexer::{LexResult, LexicalError, Spanned}; -use crate::{Tok, TokenKind}; +use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue}; +use crate::{Mode, TokenKind}; -#[derive(Clone, Debug)] -pub(crate) struct TokenSource { - tokens: std::vec::IntoIter, - errors: Vec, +/// Token source for the parser that skips over any trivia tokens. +#[derive(Debug)] +pub(crate) struct TokenSource<'src> { + /// The underlying source for the tokens. + lexer: Lexer<'src>, + + /// A vector containing all the tokens emitted by the lexer. This is returned when the parser + /// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector + /// holds both the trivia and non-trivia tokens. + tokens: Vec, + + /// A vector containing the range of all the comment tokens emitted by the lexer. + comments: Vec, } -impl TokenSource { - pub(crate) fn new(tokens: Vec) -> Self { - Self { - tokens: tokens.into_iter(), - errors: Vec::new(), +impl<'src> TokenSource<'src> { + /// Create a new token source for the given lexer. + pub(crate) fn new(lexer: Lexer<'src>) -> Self { + // TODO(dhruvmanila): Use `allocate_tokens_vec` + TokenSource { + lexer, + tokens: vec![], + comments: vec![], } } - /// Returns the position of the current token. + /// Create a new token source from the given source code which starts at the given offset. + pub(crate) fn from_source(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { + let lexer = Lexer::new(source, mode, start_offset); + let mut source = TokenSource::new(lexer); + + // Initialize the token source so that the current token is set correctly. + source.do_bump(); + source + } + + /// Returns the kind of the current token. + pub(crate) fn current_kind(&self) -> TokenKind { + self.lexer.current_kind() + } + + /// Returns the range of the current token. + pub(crate) fn current_range(&self) -> TextRange { + self.lexer.current_range() + } + + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.lexer.current_flags() + } + + /// Calls the underlying [`take_value`] method on the lexer. Refer to its documentation + /// for more info. /// - /// This is the position before any whitespace or comments. - pub(crate) fn position(&self) -> Option { - let first = self.tokens.as_slice().first()?; - - let range = match first { - Ok((_, range)) => *range, - Err(error) => error.location(), - }; - - Some(range.start()) + /// [`take_value`]: Lexer::take_value + pub(crate) fn take_value(&mut self) -> TokenValue { + self.lexer.take_value() } - /// Returns the end of the last token - pub(crate) fn end(&self) -> Option { - let last = self.tokens.as_slice().last()?; - - let range = match last { - Ok((_, range)) => *range, - Err(error) => error.location(), - }; - - Some(range.end()) + /// Returns the next non-trivia token without consuming it. + /// + /// Use [`peek2`] to get the next two tokens. + /// + /// [`peek2`]: TokenSource::peek2 + pub(crate) fn peek(&mut self) -> TokenKind { + let checkpoint = self.lexer.checkpoint(); + let next = self.next_non_trivia_token(); + self.lexer.rewind(checkpoint); + next } - /// Returns the next token kind and its range without consuming it. - pub(crate) fn peek(&self) -> Option<(TokenKind, TextRange)> { - let mut iter = self.tokens.as_slice().iter(); + /// Returns the next two non-trivia tokens without consuming it. + /// + /// Use [`peek`] to only get the next token. + /// + /// [`peek`]: TokenSource::peek + pub(crate) fn peek2(&mut self) -> (TokenKind, TokenKind) { + let checkpoint = self.lexer.checkpoint(); + let first = self.next_non_trivia_token(); + let second = self.next_non_trivia_token(); + self.lexer.rewind(checkpoint); + (first, second) + } + /// Bumps the token source to the next non-trivia token. + /// + /// It pushes the given kind to the token vector with the current token range. + pub(crate) fn bump(&mut self, kind: TokenKind) { + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); + self.do_bump(); + } + + /// Bumps the token source to the next non-trivia token without adding the current token to the + /// token vector. It does add the trivia tokens to the token vector. + fn do_bump(&mut self) { loop { - let next = iter.next()?; - - if next.as_ref().is_ok_and(is_trivia) { + let kind = self.lexer.next_token(); + if is_trivia(kind) { + if kind == TokenKind::Comment { + self.comments.push(self.current_range()); + } + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); continue; } - - break Some(match next { - Ok((token, range)) => (TokenKind::from_token(token), *range), - Err(error) => (TokenKind::Unknown, error.location()), - }); + break; } } - pub(crate) fn finish(self) -> Vec { + /// Returns the next non-trivia token without adding it to the token vector. + fn next_non_trivia_token(&mut self) -> TokenKind { + loop { + let kind = self.lexer.next_token(); + if is_trivia(kind) { + continue; + } + break kind; + } + } + + /// Creates a checkpoint to which the token source can later return to using [`Self::rewind`]. + pub(crate) fn checkpoint(&self) -> TokenSourceCheckpoint<'src> { + TokenSourceCheckpoint { + lexer_checkpoint: self.lexer.checkpoint(), + tokens_position: self.tokens.len(), + comments_position: self.comments.len(), + } + } + + /// Restore the token source to the given checkpoint. + pub(crate) fn rewind(&mut self, checkpoint: TokenSourceCheckpoint<'src>) { + let TokenSourceCheckpoint { + lexer_checkpoint, + tokens_position, + comments_position, + } = checkpoint; + + self.lexer.rewind(lexer_checkpoint); + self.tokens.truncate(tokens_position); + self.comments.truncate(comments_position); + } + + /// Consumes the token source, returning the collected tokens, comment ranges, and any errors + /// encountered during lexing. The token collection includes both the trivia and non-trivia + /// tokens. + pub(crate) fn finish(mut self) -> (Vec, CommentRanges, Vec) { assert_eq!( - self.tokens.as_slice(), - &[], - "TokenSource was not fully consumed." + self.current_kind(), + TokenKind::EndOfFile, + "TokenSource was not fully consumed" ); - self.errors - } -} - -impl FromIterator for TokenSource { - #[inline] - fn from_iter>(iter: T) -> Self { - Self::new(Vec::from_iter(iter)) - } -} - -impl Iterator for TokenSource { - type Item = Spanned; - - #[inline] - fn next(&mut self) -> Option { - loop { - let next = self.tokens.next()?; - - match next { - Ok(token) => { - if is_trivia(&token) { - continue; - } - - break Some(token); - } - - Err(error) => { - let location = error.location(); - self.errors.push(error); - break Some((Tok::Unknown, location)); - } - } + // The `EndOfFile` token shouldn't be included in the token stream, it's mainly to signal + // the parser to stop. This isn't in `do_bump` because it only needs to be done once. + if let Some(last) = self.tokens.pop() { + assert_eq!(last.kind(), TokenKind::EndOfFile); } + + let comment_ranges = CommentRanges::new(self.comments); + (self.tokens, comment_ranges, self.lexer.finish()) } } -impl FusedIterator for TokenSource {} - -const fn is_trivia(result: &Spanned) -> bool { - matches!(result, (Tok::Comment(_) | Tok::NonLogicalNewline, _)) +pub(crate) struct TokenSourceCheckpoint<'src> { + lexer_checkpoint: LexerCheckpoint<'src>, + tokens_position: usize, + comments_position: usize, +} + +/// Allocates a [`Vec`] with an approximated capacity to fit all tokens +/// of `contents`. +/// +/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation. +#[allow(dead_code)] +fn allocate_tokens_vec(contents: &str) -> Vec { + let lower_bound = contents.len().saturating_mul(15) / 100; + Vec::with_capacity(lower_bound) +} + +fn is_trivia(token: TokenKind) -> bool { + matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline) } diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index c8d82304e9..02ebf3243c 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -6,7 +6,7 @@ use ruff_python_ast::relocate::relocate_expr; use ruff_python_ast::{str, Expr}; use ruff_text_size::{TextLen, TextRange}; -use crate::{parse_expression, parse_expression_starts_at}; +use crate::{parse_expression, parse_expression_range}; #[derive(is_macro::Is, Copy, Clone, Debug)] pub enum AnnotationKind { @@ -22,25 +22,30 @@ pub enum AnnotationKind { Complex, } -/// Parse a type annotation from a string. +/// Parses the value of a string literal node (`parsed_contents`) with `range` as a type +/// annotation. The given `source` is the entire source code. pub fn parse_type_annotation( - value: &str, + parsed_contents: &str, range: TextRange, source: &str, ) -> Result<(Expr, AnnotationKind)> { let expression = &source[range]; - if str::raw_contents(expression).is_some_and(|body| body == value) { + if str::raw_contents(expression).is_some_and(|raw_contents| raw_contents == parsed_contents) { // The annotation is considered "simple" if and only if the raw representation (e.g., // `List[int]` within "List[int]") exactly matches the parsed representation. This // isn't the case, e.g., for implicit concatenations, or for annotations that contain // escaped quotes. - let leading_quote = str::leading_quote(expression).unwrap(); - let expr = parse_expression_starts_at(value, range.start() + leading_quote.text_len())?; + let leading_quote_len = str::leading_quote(expression).unwrap().text_len(); + let trailing_quote_len = str::trailing_quote(expression).unwrap().text_len(); + let range = range + .add_start(leading_quote_len) + .sub_end(trailing_quote_len); + let expr = parse_expression_range(source, range)?.into_expr(); Ok((expr, AnnotationKind::Simple)) } else { // Otherwise, consider this a "complex" annotation. - let mut expr = parse_expression(value)?; + let mut expr = parse_expression(parsed_contents)?.into_expr(); relocate_expr(&mut expr, range); Ok((expr, AnnotationKind::Complex)) } diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs index 58896d6912..2a3dce311a 100644 --- a/crates/ruff_python_parser/tests/fixtures.rs +++ b/crates/ruff_python_parser/tests/fixtures.rs @@ -8,7 +8,7 @@ use annotate_snippets::snippet::{AnnotationType, Slice, Snippet, SourceAnnotatio use ruff_python_ast::visitor::preorder::{walk_module, PreorderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, Mod}; -use ruff_python_parser::{Mode, ParseErrorType, Program}; +use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -36,15 +36,15 @@ fn inline_err() { /// Snapshots the AST. fn test_valid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let program = Program::parse_str(&source, Mode::Module); + let parsed = parse_unchecked(&source, Mode::Module); - if !program.is_valid() { + if !parsed.is_valid() { let line_index = LineIndex::from_source_text(&source); let source_code = SourceCode::new(&source, &line_index); let mut message = "Expected no syntax errors for a valid program but the parser generated the following errors:\n".to_string(); - for error in program.errors() { + for error in parsed.errors() { writeln!( &mut message, "{}\n", @@ -60,11 +60,11 @@ fn test_valid_syntax(input_path: &Path) { panic!("{input_path:?}: {message}"); } - validate_ast(program.ast(), source.text_len(), input_path); + validate_ast(parsed.syntax(), source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); - writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap(); + writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap(); insta::with_settings!({ omit_expression => true, @@ -79,25 +79,25 @@ fn test_valid_syntax(input_path: &Path) { /// Snapshots the AST and the error messages. fn test_invalid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let program = Program::parse_str(&source, Mode::Module); + let parsed = parse_unchecked(&source, Mode::Module); assert!( - !program.is_valid(), + !parsed.is_valid(), "{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors." ); - validate_ast(program.ast(), source.text_len(), input_path); + validate_ast(parsed.syntax(), source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); - writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap(); + writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap(); writeln!(&mut output, "## Errors\n").unwrap(); let line_index = LineIndex::from_source_text(&source); let source_code = SourceCode::new(&source, &line_index); - for error in program.errors() { + for error in parsed.errors() { writeln!( &mut output, "{}\n", @@ -126,20 +126,22 @@ fn test_invalid_syntax(input_path: &Path) { #[allow(clippy::print_stdout)] fn parser_quick_test() { let source = "\ -data[*x,] +def foo() + pass "; - let program = Program::parse_str(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); - println!("AST:\n----\n{:#?}", program.ast()); + println!("AST:\n----\n{:#?}", parsed.syntax()); + println!("Tokens:\n-------\n{:#?}", parsed.tokens()); - if !program.is_valid() { + if !parsed.is_valid() { println!("Errors:\n-------"); let line_index = LineIndex::from_source_text(source); let source_code = SourceCode::new(source, &line_index); - for error in program.errors() { + for error in parsed.errors() { // Sometimes the code frame doesn't show the error message, so we print // the message as well. println!("Syntax Error: {error}"); diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap index 3ced503deb..ef88c92d75 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap @@ -96,13 +96,6 @@ Module( ``` ## Errors - | -1 | a: type X = int - | ^^^^ Syntax Error: Expected an identifier, but found a keyword 'type' that cannot be used here -2 | lambda: type X = int - | - - | 1 | a: type X = int | ^ Syntax Error: Simple statements must be separated by newlines or semicolons @@ -110,13 +103,6 @@ Module( | - | -1 | a: type X = int -2 | lambda: type X = int - | ^^^^ Syntax Error: Expected an identifier, but found a keyword 'type' that cannot be used here - | - - | 1 | a: type X = int 2 | lambda: type X = int diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap index d64d49c708..37154ac1ee 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap @@ -7,7 +7,7 @@ input_file: crates/ruff_python_parser/resources/inline/err/async_unexpected_toke ``` Module( ModModule { - range: 0..220, + range: 0..116, body: [ ClassDef( StmtClassDef { @@ -113,56 +113,41 @@ Module( ], }, ), - Expr( - StmtExpr { - range: 192..197, - value: Name( + Match( + StmtMatch { + range: 88..115, + subject: Name( ExprName { - range: 192..197, - id: "match", + range: 94..98, + id: "test", ctx: Load, }, ), - }, - ), - AnnAssign( - StmtAnnAssign { - range: 198..203, - target: Name( - ExprName { - range: 198..202, - id: "test", - ctx: Store, + cases: [ + MatchCase { + range: 104..115, + pattern: MatchAs( + PatternMatchAs { + range: 109..110, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 112..115, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 112..115, + }, + ), + }, + ), + ], }, - ), - annotation: Name( - ExprName { - range: 203..203, - id: "", - ctx: Invalid, - }, - ), - value: None, - simple: true, - }, - ), - AnnAssign( - StmtAnnAssign { - range: 213..219, - target: Name( - ExprName { - range: 213..214, - id: "_", - ctx: Store, - }, - ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 216..219, - }, - ), - value: None, - simple: true, + ], }, ), ], @@ -194,7 +179,7 @@ Module( 3 | async x = 1 | ^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found name 4 | async async def foo(): ... -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because +5 | async match test: | @@ -203,55 +188,15 @@ Module( 3 | async x = 1 4 | async async def foo(): ... | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found 'async' -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer +5 | async match test: +6 | case _: ... | | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found name -8 | case _: ... - | - - - | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons -8 | case _: ... - | - - - | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^ Syntax Error: Expected an expression -8 | case _: ... - | - - - | -6 | # of the soft keyword # transformer -7 | async match test: -8 | case _: ... - | ^^^^ Syntax Error: Unexpected indentation - | - - - | -6 | # of the soft keyword # transformer -7 | async match test: -8 | case _: ... - | ^^^^ Syntax Error: Expected a statement - | - - - | -7 | async match test: -8 | case _: ... +3 | async x = 1 +4 | async async def foo(): ... +5 | async match test: + | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found 'match' +6 | case _: ... | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap new file mode 100644 index 0000000000..70c3203746 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap @@ -0,0 +1,66 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..33, + body: [ + Match( + StmtMatch { + range: 0..32, + subject: Yield( + ExprYield { + range: 6..15, + value: Some( + Name( + ExprName { + range: 12..15, + id: "foo", + ctx: Load, + }, + ), + ), + }, + ), + cases: [ + MatchCase { + range: 21..32, + pattern: MatchAs( + PatternMatchAs { + range: 26..27, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 29..32, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 29..32, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match yield foo: + | ^^^^^^^^^ Syntax Error: Yield expression cannot be used here +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap new file mode 100644 index 0000000000..4f420387e9 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap @@ -0,0 +1,65 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py +--- +## AST + +``` +Module( + ModModule { + range: 0..39, + body: [ + Match( + StmtMatch { + range: 0..38, + subject: Starred( + ExprStarred { + range: 6..10, + value: Name( + ExprName { + range: 7..10, + id: "foo", + ctx: Load, + }, + ), + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 27..38, + pattern: MatchAs( + PatternMatchAs { + range: 32..33, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 35..38, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 35..38, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match *foo: # Keyword + | ^^^^ Syntax Error: Starred expression cannot be used here +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap new file mode 100644 index 0000000000..4b4f623995 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap @@ -0,0 +1,76 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_expected_colon.py +--- +## AST + +``` +Module( + ModModule { + range: 0..29, + body: [ + Match( + StmtMatch { + range: 0..28, + subject: List( + ExprList { + range: 6..12, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 7..8, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 10..11, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 17..28, + pattern: MatchAs( + PatternMatchAs { + range: 22..23, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 25..28, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 25..28, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match [1, 2] + | ^ Syntax Error: Expected ':', found newline +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap index 4736985e44..882bb79838 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap @@ -7,38 +7,48 @@ input_file: crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pa ``` Module( ModModule { - range: 0..110, + range: 0..24, body: [ Match( StmtMatch { - range: 86..99, + range: 0..23, subject: Name( ExprName { - range: 92..93, + range: 6..7, id: "x", ctx: Load, }, ), - cases: [], - }, - ), - AnnAssign( - StmtAnnAssign { - range: 99..109, - target: Name( - ExprName { - range: 99..103, - id: "case", - ctx: Store, + cases: [ + MatchCase { + range: 13..23, + pattern: MatchValue( + PatternMatchValue { + range: 17..17, + value: Name( + ExprName { + range: 17..17, + id: "", + ctx: Invalid, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 20..23, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 20..23, + }, + ), + }, + ), + ], }, - ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 106..109, - }, - ), - value: None, - simple: true, + ], }, ), ], @@ -48,14 +58,7 @@ Module( ## Errors | -1 | # TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer -2 | match x: -3 | case : ... - | ^^^^ Syntax Error: Expected `case` block - | - - - | -2 | match x: -3 | case : ... +1 | match x: +2 | case : ... + | ^ Syntax Error: Expected a pattern | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap index 0d11f2c94c..2e8be2f306 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap @@ -11,7 +11,7 @@ Module( body: [ Match( StmtMatch { - range: 0..10, + range: 0..22, subject: Name( ExprName { range: 6..9, @@ -19,38 +19,31 @@ Module( ctx: Load, }, ), - cases: [], - }, - ), - Expr( - StmtExpr { - range: 11..15, - value: Name( - ExprName { - range: 11..15, - id: "case", - ctx: Load, + cases: [ + MatchCase { + range: 11..22, + pattern: MatchAs( + PatternMatchAs { + range: 16..17, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 19..22, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 19..22, + }, + ), + }, + ), + ], }, - ), - }, - ), - AnnAssign( - StmtAnnAssign { - range: 16..22, - target: Name( - ExprName { - range: 16..17, - id: "_", - ctx: Store, - }, - ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 19..22, - }, - ), - value: None, - simple: true, + ], }, ), ], @@ -61,11 +54,10 @@ Module( | 1 | match foo: case _: ... - | ^^^^ Syntax Error: Expected newline, found name + | ^^^^ Syntax Error: Expected newline, found 'case' | | 1 | match foo: case _: ... - | ^ Syntax Error: Simple statements must be separated by newlines or semicolons | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap index d8e9b3da91..3b1a06c49d 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap @@ -42,14 +42,14 @@ Module( ), ], patterns: [ - MatchValue( - PatternMatchValue { + MatchAs( + PatternMatchAs { range: 164..166, - value: Name( - ExprName { - range: 164..166, + pattern: None, + name: Some( + Identifier { id: "as", - ctx: Load, + range: 164..166, }, ), }, diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap new file mode 100644 index 0000000000..d9ddd2be7c --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap @@ -0,0 +1,133 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..100, + body: [ + Try( + StmtTry { + range: 0..99, + body: [ + Expr( + StmtExpr { + range: 5..8, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 5..8, + }, + ), + }, + ), + ], + handlers: [ + ExceptHandler( + ExceptHandlerExceptHandler { + range: 9..39, + type_: Some( + Name( + ExprName { + range: 16..25, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "match", + range: 29..34, + }, + ), + body: [ + Expr( + StmtExpr { + range: 36..39, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 36..39, + }, + ), + }, + ), + ], + }, + ), + ExceptHandler( + ExceptHandlerExceptHandler { + range: 40..69, + type_: Some( + Name( + ExprName { + range: 47..56, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "case", + range: 60..64, + }, + ), + body: [ + Expr( + StmtExpr { + range: 66..69, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 66..69, + }, + ), + }, + ), + ], + }, + ), + ExceptHandler( + ExceptHandlerExceptHandler { + range: 70..99, + type_: Some( + Name( + ExprName { + range: 77..86, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "type", + range: 90..94, + }, + ), + body: [ + Expr( + StmtExpr { + range: 96..99, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 96..99, + }, + ), + }, + ), + ], + }, + ), + ], + orelse: [], + finalbody: [], + is_star: false, + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap new file mode 100644 index 0000000000..9ab3b52aba --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap @@ -0,0 +1,103 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py +--- +## AST + +``` +Module( + ModModule { + range: 0..104, + body: [ + ImportFrom( + StmtImportFrom { + range: 0..25, + module: Some( + Identifier { + id: "match", + range: 5..10, + }, + ), + names: [ + Alias { + range: 18..25, + name: Identifier { + id: "pattern", + range: 18..25, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 26..46, + module: Some( + Identifier { + id: "type", + range: 31..35, + }, + ), + names: [ + Alias { + range: 43..46, + name: Identifier { + id: "bar", + range: 43..46, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 47..71, + module: Some( + Identifier { + id: "case", + range: 52..56, + }, + ), + names: [ + Alias { + range: 64..71, + name: Identifier { + id: "pattern", + range: 64..71, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 72..103, + module: Some( + Identifier { + id: "match.type.case", + range: 77..92, + }, + ), + names: [ + Alias { + range: 100..103, + name: Identifier { + id: "foo", + range: 100..103, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap new file mode 100644 index 0000000000..b4e8a5ae63 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap @@ -0,0 +1,75 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..58, + body: [ + Import( + StmtImport { + range: 0..19, + names: [ + Alias { + range: 7..19, + name: Identifier { + id: "foo", + range: 7..10, + }, + asname: Some( + Identifier { + id: "match", + range: 14..19, + }, + ), + }, + ], + }, + ), + Import( + StmtImport { + range: 20..38, + names: [ + Alias { + range: 27..38, + name: Identifier { + id: "bar", + range: 27..30, + }, + asname: Some( + Identifier { + id: "case", + range: 34..38, + }, + ), + }, + ], + }, + ), + Import( + StmtImport { + range: 39..57, + names: [ + Alias { + range: 46..57, + name: Identifier { + id: "baz", + range: 46..49, + }, + asname: Some( + Identifier { + id: "type", + range: 53..57, + }, + ), + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap new file mode 100644 index 0000000000..eecf69925d --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap @@ -0,0 +1,113 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..69, + body: [ + Match( + StmtMatch { + range: 0..68, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..29, + pattern: MatchAs( + PatternMatchAs { + range: 20..24, + pattern: None, + name: Some( + Identifier { + id: "case", + range: 20..24, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 26..29, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 26..29, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 34..49, + pattern: MatchAs( + PatternMatchAs { + range: 39..44, + pattern: None, + name: Some( + Identifier { + id: "match", + range: 39..44, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 46..49, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 46..49, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 54..68, + pattern: MatchAs( + PatternMatchAs { + range: 59..63, + pattern: None, + name: Some( + Identifier { + id: "type", + range: 59..63, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 65..68, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 65..68, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap new file mode 100644 index 0000000000..fb3410108d --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap @@ -0,0 +1,231 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..131, + body: [ + Match( + StmtMatch { + range: 0..130, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..34, + pattern: MatchValue( + PatternMatchValue { + range: 20..29, + value: Attribute( + ExprAttribute { + range: 20..29, + value: Name( + ExprName { + range: 20..25, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 26..29, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 31..34, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 31..34, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 39..57, + pattern: MatchValue( + PatternMatchValue { + range: 44..52, + value: Attribute( + ExprAttribute { + range: 44..52, + value: Name( + ExprName { + range: 44..48, + id: "case", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 49..52, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 54..57, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 54..57, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 62..80, + pattern: MatchValue( + PatternMatchValue { + range: 67..75, + value: Attribute( + ExprAttribute { + range: 67..75, + value: Name( + ExprName { + range: 67..71, + id: "type", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 72..75, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 77..80, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 77..80, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 85..130, + pattern: MatchValue( + PatternMatchValue { + range: 90..125, + value: Attribute( + ExprAttribute { + range: 90..125, + value: Attribute( + ExprAttribute { + range: 90..119, + value: Attribute( + ExprAttribute { + range: 90..114, + value: Attribute( + ExprAttribute { + range: 90..109, + value: Attribute( + ExprAttribute { + range: 90..105, + value: Attribute( + ExprAttribute { + range: 90..100, + value: Name( + ExprName { + range: 90..95, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "case", + range: 96..100, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "type", + range: 101..105, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 106..109, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "type", + range: 110..114, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "case", + range: 115..119, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "match", + range: 120..125, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 127..130, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 127..130, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap new file mode 100644 index 0000000000..21dd833fc8 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap @@ -0,0 +1,44 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..18, + body: [ + Expr( + StmtExpr { + range: 0..17, + value: Compare( + ExprCompare { + range: 0..17, + left: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + ops: [ + NotIn, + ], + comparators: [ + Name( + ExprName { + range: 13..17, + id: "case", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap new file mode 100644 index 0000000000..c2023f5c4a --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap @@ -0,0 +1,319 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py +--- +## AST + +``` +Module( + ModModule { + range: 0..149, + body: [ + Expr( + StmtExpr { + range: 0..5, + value: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 6..18, + value: Compare( + ExprCompare { + range: 6..18, + left: Name( + ExprName { + range: 6..11, + id: "match", + ctx: Load, + }, + ), + ops: [ + NotEq, + ], + comparators: [ + Name( + ExprName { + range: 15..18, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 19..31, + value: Tuple( + ExprTuple { + range: 19..31, + elts: [ + Name( + ExprName { + range: 20..23, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 25..30, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + parenthesized: true, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 32..44, + value: List( + ExprList { + range: 32..44, + elts: [ + Name( + ExprName { + range: 33..36, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 38..43, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 45..57, + value: Set( + ExprSet { + range: 45..57, + elts: [ + Name( + ExprName { + range: 46..49, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 51..56, + id: "match", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 58..63, + value: Name( + ExprName { + range: 58..63, + id: "match", + ctx: Load, + }, + ), + }, + ), + AnnAssign( + StmtAnnAssign { + range: 65..75, + target: Name( + ExprName { + range: 65..70, + id: "match", + ctx: Store, + }, + ), + annotation: Name( + ExprName { + range: 72..75, + id: "int", + ctx: Load, + }, + ), + value: None, + simple: true, + }, + ), + Expr( + StmtExpr { + range: 76..82, + value: Tuple( + ExprTuple { + range: 76..82, + elts: [ + Name( + ExprName { + range: 76..81, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + parenthesized: false, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 83..92, + value: Attribute( + ExprAttribute { + range: 83..92, + value: Name( + ExprName { + range: 83..88, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "foo", + range: 89..92, + }, + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 93..104, + value: BinOp( + ExprBinOp { + range: 93..104, + left: Name( + ExprName { + range: 93..98, + id: "match", + ctx: Load, + }, + ), + op: Div, + right: Name( + ExprName { + range: 101..104, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 105..117, + value: BinOp( + ExprBinOp { + range: 105..117, + left: Name( + ExprName { + range: 105..110, + id: "match", + ctx: Load, + }, + ), + op: LShift, + right: Name( + ExprName { + range: 114..117, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 118..131, + value: BoolOp( + ExprBoolOp { + range: 118..131, + op: And, + values: [ + Name( + ExprName { + range: 118..123, + id: "match", + ctx: Load, + }, + ), + Name( + ExprName { + range: 128..131, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 132..148, + value: Compare( + ExprCompare { + range: 132..148, + left: Name( + ExprName { + range: 132..137, + id: "match", + ctx: Load, + }, + ), + ops: [ + IsNot, + ], + comparators: [ + Name( + ExprName { + range: 145..148, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap new file mode 100644 index 0000000000..b25b756c8c --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap @@ -0,0 +1,578 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..358, + body: [ + Match( + StmtMatch { + range: 0..26, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..26, + pattern: MatchAs( + PatternMatchAs { + range: 20..21, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 23..26, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 23..26, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 27..51, + subject: NumberLiteral( + ExprNumberLiteral { + range: 33..34, + value: Int( + 1, + ), + }, + ), + cases: [ + MatchCase { + range: 40..51, + pattern: MatchAs( + PatternMatchAs { + range: 45..46, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 48..51, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 48..51, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 52..78, + subject: NumberLiteral( + ExprNumberLiteral { + range: 58..61, + value: Float( + 1.0, + ), + }, + ), + cases: [ + MatchCase { + range: 67..78, + pattern: MatchAs( + PatternMatchAs { + range: 72..73, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 75..78, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 75..78, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 79..104, + subject: NumberLiteral( + ExprNumberLiteral { + range: 85..87, + value: Complex { + real: 0.0, + imag: 1.0, + }, + }, + ), + cases: [ + MatchCase { + range: 93..104, + pattern: MatchAs( + PatternMatchAs { + range: 98..99, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 101..104, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 101..104, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 105..133, + subject: StringLiteral( + ExprStringLiteral { + range: 111..116, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 111..116, + value: "foo", + flags: StringLiteralFlags { + quote_style: Double, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + cases: [ + MatchCase { + range: 122..133, + pattern: MatchAs( + PatternMatchAs { + range: 127..128, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 130..133, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 130..133, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 134..167, + subject: FString( + ExprFString { + range: 140..150, + value: FStringValue { + inner: Single( + FString( + FString { + range: 140..150, + elements: [ + Literal( + FStringLiteralElement { + range: 142..146, + value: "foo ", + }, + ), + Expression( + FStringExpressionElement { + range: 146..149, + expression: Name( + ExprName { + range: 147..148, + id: "x", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, + }, + ), + ), + }, + }, + ), + cases: [ + MatchCase { + range: 156..167, + pattern: MatchAs( + PatternMatchAs { + range: 161..162, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 164..167, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 164..167, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 168..197, + subject: Set( + ExprSet { + range: 174..180, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 175..176, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 178..179, + value: Int( + 2, + ), + }, + ), + ], + }, + ), + cases: [ + MatchCase { + range: 186..197, + pattern: MatchAs( + PatternMatchAs { + range: 191..192, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 194..197, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 194..197, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 198..225, + subject: UnaryOp( + ExprUnaryOp { + range: 204..208, + op: Invert, + operand: Name( + ExprName { + range: 205..208, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 214..225, + pattern: MatchAs( + PatternMatchAs { + range: 219..220, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 222..225, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 222..225, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 226..252, + subject: EllipsisLiteral( + ExprEllipsisLiteral { + range: 232..235, + }, + ), + cases: [ + MatchCase { + range: 241..252, + pattern: MatchAs( + PatternMatchAs { + range: 246..247, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 249..252, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 249..252, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 253..283, + subject: UnaryOp( + ExprUnaryOp { + range: 259..266, + op: Not, + operand: Name( + ExprName { + range: 263..266, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 272..283, + pattern: MatchAs( + PatternMatchAs { + range: 277..278, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 280..283, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 280..283, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 284..318, + subject: Await( + ExprAwait { + range: 290..301, + value: Call( + ExprCall { + range: 296..301, + func: Name( + ExprName { + range: 296..299, + id: "foo", + ctx: Load, + }, + ), + arguments: Arguments { + range: 299..301, + args: [], + keywords: [], + }, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 307..318, + pattern: MatchAs( + PatternMatchAs { + range: 312..313, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 315..318, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 315..318, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 319..357, + subject: Lambda( + ExprLambda { + range: 325..340, + parameters: Some( + Parameters { + range: 332..335, + posonlyargs: [], + args: [ + ParameterWithDefault { + range: 332..335, + parameter: Parameter { + range: 332..335, + name: Identifier { + id: "foo", + range: 332..335, + }, + annotation: None, + }, + default: None, + }, + ], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + ), + body: Name( + ExprName { + range: 337..340, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 346..357, + pattern: MatchAs( + PatternMatchAs { + range: 351..352, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 354..357, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 354..357, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap new file mode 100644 index 0000000000..88a69846f4 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap @@ -0,0 +1,233 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py +--- +## AST + +``` +Module( + ModModule { + range: 0..170, + body: [ + Match( + StmtMatch { + range: 0..28, + subject: Name( + ExprName { + range: 6..11, + id: "match", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 17..28, + pattern: MatchAs( + PatternMatchAs { + range: 22..23, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 25..28, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 25..28, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 29..56, + subject: Name( + ExprName { + range: 35..39, + id: "case", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 45..56, + pattern: MatchAs( + PatternMatchAs { + range: 50..51, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 53..56, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 53..56, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 57..84, + subject: Name( + ExprName { + range: 63..67, + id: "type", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 73..84, + pattern: MatchAs( + PatternMatchAs { + range: 78..79, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 81..84, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 81..84, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 85..112, + subject: NoneLiteral( + ExprNoneLiteral { + range: 91..95, + }, + ), + cases: [ + MatchCase { + range: 101..112, + pattern: MatchAs( + PatternMatchAs { + range: 106..107, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 109..112, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 109..112, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 113..140, + subject: BooleanLiteral( + ExprBooleanLiteral { + range: 119..123, + value: true, + }, + ), + cases: [ + MatchCase { + range: 129..140, + pattern: MatchAs( + PatternMatchAs { + range: 134..135, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 137..140, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 137..140, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 141..169, + subject: BooleanLiteral( + ExprBooleanLiteral { + range: 147..152, + value: false, + }, + ), + cases: [ + MatchCase { + range: 158..169, + pattern: MatchAs( + PatternMatchAs { + range: 163..164, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 166..169, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 166..169, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap new file mode 100644 index 0000000000..67f1d122bc --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap @@ -0,0 +1,291 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py +--- +## AST + +``` +Module( + ModModule { + range: 0..225, + body: [ + Expr( + StmtExpr { + range: 0..12, + value: Call( + ExprCall { + range: 0..12, + func: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + arguments: Arguments { + range: 6..12, + args: [ + NumberLiteral( + ExprNumberLiteral { + range: 7..8, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 10..11, + value: Int( + 2, + ), + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + Match( + StmtMatch { + range: 27..67, + subject: Tuple( + ExprTuple { + range: 33..39, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 34..35, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 37..38, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + parenthesized: true, + }, + ), + cases: [ + MatchCase { + range: 56..67, + pattern: MatchAs( + PatternMatchAs { + range: 61..62, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 64..67, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 64..67, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Expr( + StmtExpr { + range: 68..78, + value: Subscript( + ExprSubscript { + range: 68..78, + value: Name( + ExprName { + range: 68..73, + id: "match", + ctx: Load, + }, + ), + slice: Slice( + ExprSlice { + range: 75..77, + lower: Some( + NumberLiteral( + ExprNumberLiteral { + range: 75..76, + value: Int( + 1, + ), + }, + ), + ), + upper: None, + step: None, + }, + ), + ctx: Load, + }, + ), + }, + ), + Match( + StmtMatch { + range: 93..133, + subject: List( + ExprList { + range: 99..105, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 100..101, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 103..104, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 122..133, + pattern: MatchAs( + PatternMatchAs { + range: 127..128, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 130..133, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 130..133, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Expr( + StmtExpr { + range: 134..145, + value: BinOp( + ExprBinOp { + range: 134..145, + left: Name( + ExprName { + range: 134..139, + id: "match", + ctx: Load, + }, + ), + op: Mult, + right: Name( + ExprName { + range: 142..145, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 160..171, + value: BinOp( + ExprBinOp { + range: 160..171, + left: Name( + ExprName { + range: 160..165, + id: "match", + ctx: Load, + }, + ), + op: Sub, + right: Name( + ExprName { + range: 168..171, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Match( + StmtMatch { + range: 186..224, + subject: UnaryOp( + ExprUnaryOp { + range: 192..196, + op: USub, + operand: Name( + ExprName { + range: 193..196, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 213..224, + pattern: MatchAs( + PatternMatchAs { + range: 218..219, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 221..224, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 221..224, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_semantic/src/analyze/type_inference.rs b/crates/ruff_python_semantic/src/analyze/type_inference.rs index e2ecd8690e..6f7dfb0469 100644 --- a/crates/ruff_python_semantic/src/analyze/type_inference.rs +++ b/crates/ruff_python_semantic/src/analyze/type_inference.rs @@ -428,12 +428,12 @@ impl NumberLike { #[cfg(test)] mod tests { - use ruff_python_ast::Expr; - use ruff_python_parser::parse_expression; + use ruff_python_ast::ModExpression; + use ruff_python_parser::{parse_expression, Parsed}; use crate::analyze::type_inference::{NumberLike, PythonType, ResolvedPythonType}; - fn parse(expression: &str) -> Expr { + fn parse(expression: &str) -> Parsed { parse_expression(expression).unwrap() } @@ -441,95 +441,95 @@ mod tests { fn type_inference() { // Atoms. assert_eq!( - ResolvedPythonType::from(&parse("1")), + ResolvedPythonType::from(parse("1").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("'Hello, world'")), + ResolvedPythonType::from(parse("'Hello, world'").expr()), ResolvedPythonType::Atom(PythonType::String) ); assert_eq!( - ResolvedPythonType::from(&parse("b'Hello, world'")), + ResolvedPythonType::from(parse("b'Hello, world'").expr()), ResolvedPythonType::Atom(PythonType::Bytes) ); assert_eq!( - ResolvedPythonType::from(&parse("'Hello' % 'world'")), + ResolvedPythonType::from(parse("'Hello' % 'world'").expr()), ResolvedPythonType::Atom(PythonType::String) ); // Boolean operators. assert_eq!( - ResolvedPythonType::from(&parse("1 and 2")), + ResolvedPythonType::from(parse("1 and 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 and True")), + ResolvedPythonType::from(parse("1 and True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); // Binary operators. assert_eq!( - ResolvedPythonType::from(&parse("1.0 * 2")), + ResolvedPythonType::from(parse("1.0 * 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("2 * 1.0")), + ResolvedPythonType::from(parse("2 * 1.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1.0 * 2j")), + ResolvedPythonType::from(parse("1.0 * 2j").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Complex)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 / True")), + ResolvedPythonType::from(parse("1 / True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 / 2")), + ResolvedPythonType::from(parse("1 / 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("{1, 2} - {2}")), + ResolvedPythonType::from(parse("{1, 2} - {2}").expr()), ResolvedPythonType::Atom(PythonType::Set) ); // Unary operators. assert_eq!( - ResolvedPythonType::from(&parse("-1")), + ResolvedPythonType::from(parse("-1").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("-1.0")), + ResolvedPythonType::from(parse("-1.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("-1j")), + ResolvedPythonType::from(parse("-1j").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Complex)) ); assert_eq!( - ResolvedPythonType::from(&parse("-True")), + ResolvedPythonType::from(parse("-True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("not 'Hello'")), + ResolvedPythonType::from(parse("not 'Hello'").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Bool)) ); assert_eq!( - ResolvedPythonType::from(&parse("not x.y.z")), + ResolvedPythonType::from(parse("not x.y.z").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Bool)) ); // Conditional expressions. assert_eq!( - ResolvedPythonType::from(&parse("1 if True else 2")), + ResolvedPythonType::from(parse("1 if True else 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 if True else 2.0")), + ResolvedPythonType::from(parse("1 if True else 2.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 if True else False")), + ResolvedPythonType::from(parse("1 if True else False").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); } diff --git a/crates/ruff_python_trivia_integration_tests/Cargo.toml b/crates/ruff_python_trivia_integration_tests/Cargo.toml index 9e0480a7e9..7089c32214 100644 --- a/crates/ruff_python_trivia_integration_tests/Cargo.toml +++ b/crates/ruff_python_trivia_integration_tests/Cargo.toml @@ -12,7 +12,6 @@ license.workspace = true [dependencies] [dev-dependencies] -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } diff --git a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs index fe6cc47ac9..8bc8c5eb4c 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs @@ -1,5 +1,4 @@ -use ruff_python_index::Indexer; -use ruff_python_parser::{tokenize, Mode}; +use ruff_python_parser::{parse_unchecked, Mode}; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -7,12 +6,11 @@ use ruff_text_size::TextSize; fn block_comments_two_line_block_at_start() { // arrange let source = "# line 1\n# line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]); @@ -22,12 +20,11 @@ fn block_comments_two_line_block_at_start() { fn block_comments_indented_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]); @@ -37,12 +34,11 @@ fn block_comments_indented_block() { fn block_comments_single_line_is_not_a_block() { // arrange let source = "\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -52,12 +48,11 @@ fn block_comments_single_line_is_not_a_block() { fn block_comments_lines_with_code_not_a_block() { // arrange let source = "x = 1 # line 1\ny = 2 # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -67,12 +62,11 @@ fn block_comments_lines_with_code_not_a_block() { fn block_comments_sequential_lines_not_in_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -87,12 +81,11 @@ fn block_comments_lines_in_triple_quotes_not_a_block() { # line 2 """ "#; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -124,12 +117,11 @@ y = 2 # do not form a block comment # therefore do not form a block comment """ "#; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!( diff --git a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs index 5ac4296ea6..7db3766463 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs @@ -1,7 +1,6 @@ use insta::assert_debug_snapshot; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{Mode, Tok}; +use ruff_python_parser::{parse_unchecked, Mode}; use ruff_python_trivia::{lines_after, lines_before, SimpleToken, SimpleTokenizer}; use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind}; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -23,17 +22,8 @@ impl TokenizationTestCase { } fn tokenize_reverse(&self) -> Vec { - let comment_ranges: Vec<_> = lex(self.source, Mode::Module) - .filter_map(|result| { - let (token, range) = result.expect("Input to be a valid python program."); - if matches!(token, Tok::Comment(_)) { - Some(range) - } else { - None - } - }) - .collect(); - BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect() + let parsed = parse_unchecked(self.source, Mode::Module); + BackwardsTokenizer::new(self.source, self.range, parsed.comment_ranges()).collect() } fn tokens(&self) -> &[SimpleToken] { diff --git a/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs b/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs index 709a3a3d18..d73e2052b3 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs @@ -1,4 +1,4 @@ -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_python_trivia::has_trailing_content; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -6,26 +6,26 @@ use ruff_text_size::Ranged; #[test] fn trailing_content() -> Result<(), ParseError> { let contents = "x = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); let contents = "x = 1; y = 2"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(has_trailing_content(stmt.end(), &locator)); let contents = "x = 1 "; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); let contents = "x = 1 # Comment"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); @@ -34,8 +34,8 @@ x = 1 y = 2 " .trim(); - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index b984143fa2..de6340d7f0 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -7,7 +7,7 @@ use ruff_diagnostics::{Applicability, Diagnostic, DiagnosticKind, Edit, Fix}; use ruff_linter::{ directives::{extract_directives, Flags}, generate_noqa_edits, - linter::{check_path, LinterResult, TokenSource}, + linter::{check_path, LinterResult}, packaging::detect_package_root, registry::AsRule, settings::flags, @@ -16,7 +16,6 @@ use ruff_linter::{ use ruff_notebook::Notebook; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::AsMode; use ruff_source_file::{LineIndex, Locator}; use ruff_text_size::{Ranged, TextRange}; use ruff_workspace::resolver::match_any_exclusion; @@ -95,8 +94,8 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno let source_type = query.source_type(); - // Tokenize once. - let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + // Parse once. + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let index = LineIndex::from_source_text(source_kind.source_code()); @@ -104,13 +103,13 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno let locator = Locator::with_index(source_kind.source_code(), index.clone()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. - let directives = extract_directives(&tokens, Flags::all(), &locator, &indexer); + let directives = extract_directives(&parsed, Flags::all(), &locator, &indexer); // Generate checks. let LinterResult { data, .. } = check_path( @@ -124,14 +123,14 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let noqa_edits = generate_noqa_edits( query.virtual_file_path(), data.as_slice(), &locator, - indexer.comment_ranges(), + parsed.comment_ranges(), &linter_settings.external, &directives.noqa_line_for, stylist.line_ending(), diff --git a/crates/ruff_wasm/Cargo.toml b/crates/ruff_wasm/Cargo.toml index 35ba4e102e..c8cdfc9e71 100644 --- a/crates/ruff_wasm/Cargo.toml +++ b/crates/ruff_wasm/Cargo.toml @@ -28,7 +28,6 @@ ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } -ruff_python_trivia = { workspace = true } ruff_workspace = { workspace = true } console_error_panic_hook = { workspace = true, optional = true } diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 56843a82e0..068975fe83 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -8,7 +8,7 @@ use ruff_formatter::printer::SourceMapGeneration; use ruff_formatter::{FormatResult, Formatted, IndentStyle}; use ruff_linter::directives; use ruff_linter::line_width::{IndentWidth, LineLength}; -use ruff_linter::linter::{check_path, LinterResult, TokenSource}; +use ruff_linter::linter::{check_path, LinterResult}; use ruff_linter::registry::AsRule; use ruff_linter::settings::types::PythonVersion; use ruff_linter::settings::{flags, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX}; @@ -16,9 +16,8 @@ use ruff_linter::source_kind::SourceKind; use ruff_python_ast::{Mod, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle}; -use ruff_python_index::{CommentRangesBuilder, Indexer}; -use ruff_python_parser::{parse_tokens, tokenize_all, AsMode, Mode, Program}; -use ruff_python_trivia::CommentRanges; +use ruff_python_index::Indexer; +use ruff_python_parser::{parse, parse_unchecked, parse_unchecked_source, Mode, Parsed}; use ruff_source_file::{Locator, SourceLocation}; use ruff_text_size::Ranged; use ruff_workspace::configuration::Configuration; @@ -160,21 +159,21 @@ impl Workspace { // TODO(dhruvmanila): Support Jupyter Notebooks let source_kind = SourceKind::Python(contents.to_string()); - // Tokenize once. - let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); + // Parse once. + let parsed = parse_unchecked_source(source_kind.source_code(), source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = - directives::extract_directives(&tokens, directives::Flags::empty(), &locator, &indexer); + directives::extract_directives(&parsed, directives::Flags::empty(), &locator, &indexer); // Generate checks. let LinterResult { @@ -190,7 +189,7 @@ impl Workspace { flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let source_code = locator.to_source_code(); @@ -242,21 +241,25 @@ impl Workspace { pub fn comments(&self, contents: &str) -> Result { let parsed = ParsedModule::from_source(contents)?; - let comments = pretty_comments(&parsed.module, &parsed.comment_ranges, contents); + let comments = pretty_comments( + parsed.parsed.syntax(), + parsed.parsed.comment_ranges(), + contents, + ); Ok(comments) } /// Parses the content and returns its AST pub fn parse(&self, contents: &str) -> Result { - let program = Program::parse_str(contents, Mode::Module); + let parsed = parse_unchecked(contents, Mode::Module); - Ok(format!("{:#?}", program.into_ast())) + Ok(format!("{:#?}", parsed.into_syntax())) } pub fn tokens(&self, contents: &str) -> Result { - let tokens: Vec<_> = ruff_python_parser::lexer::lex(contents, Mode::Module).collect(); + let parsed = parse_unchecked(contents, Mode::Module); - Ok(format!("{tokens:#?}")) + Ok(format!("{:#?}", parsed.tokens())) } } @@ -266,25 +269,14 @@ pub(crate) fn into_error(err: E) -> Error { struct ParsedModule<'a> { source_code: &'a str, - module: Mod, - comment_ranges: CommentRanges, + parsed: Parsed, } impl<'a> ParsedModule<'a> { fn from_source(source_code: &'a str) -> Result { - let tokens: Vec<_> = tokenize_all(source_code, Mode::Module); - let mut comment_ranges = CommentRangesBuilder::default(); - - for (token, range) in tokens.iter().flatten() { - comment_ranges.visit_token(token, *range); - } - let comment_ranges = comment_ranges.finish(); - let module = parse_tokens(tokens, source_code, Mode::Module).map_err(into_error)?; - Ok(Self { source_code, - module, - comment_ranges, + parsed: parse(source_code, Mode::Module).map_err(into_error)?, }) } @@ -295,11 +287,6 @@ impl<'a> ParsedModule<'a> { .to_format_options(PySourceType::default(), self.source_code) .with_source_map_generation(SourceMapGeneration::Enabled); - format_module_ast( - &self.module, - &self.comment_ranges, - self.source_code, - options, - ) + format_module_ast(&self.parsed, self.source_code, options) } } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 28183f438d..f3e74c176f 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -24,6 +24,7 @@ ruff_python_index = { path = "../crates/ruff_python_index" } ruff_python_parser = { path = "../crates/ruff_python_parser" } ruff_source_file = { path = "../crates/ruff_source_file" } ruff_python_formatter = { path = "../crates/ruff_python_formatter"} +ruff_text_size = { path = "../crates/ruff_text_size" } arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } diff --git a/fuzz/fuzz_targets/ruff_parse_simple.rs b/fuzz/fuzz_targets/ruff_parse_simple.rs index 657e8c1449..805c04cd67 100644 --- a/fuzz/fuzz_targets/ruff_parse_simple.rs +++ b/fuzz/fuzz_targets/ruff_parse_simple.rs @@ -5,8 +5,9 @@ use libfuzzer_sys::{fuzz_target, Corpus}; use ruff_python_codegen::{Generator, Stylist}; -use ruff_python_parser::{lexer, parse_suite, Mode, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_source_file::Locator; +use ruff_text_size::Ranged; fn do_fuzz(case: &[u8]) -> Corpus { let Ok(code) = std::str::from_utf8(case) else { @@ -15,8 +16,8 @@ fn do_fuzz(case: &[u8]) -> Corpus { // just round-trip it once to trigger both parse and unparse let locator = Locator::new(code); - let python_ast = match parse_suite(code) { - Ok(stmts) => stmts, + let parsed = match parse_module(code) { + Ok(parsed) => parsed, Err(ParseError { location, .. }) => { let offset = location.start().to_usize(); assert!( @@ -28,38 +29,24 @@ fn do_fuzz(case: &[u8]) -> Corpus { } }; - let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect(); - - for maybe_token in tokens.iter() { - match maybe_token.as_ref() { - Ok((_, range)) => { - let start = range.start().to_usize(); - let end = range.end().to_usize(); - assert!( - code.is_char_boundary(start), - "Invalid start position {} (not at char boundary)", - start - ); - assert!( - code.is_char_boundary(end), - "Invalid end position {} (not at char boundary)", - end - ); - } - Err(err) => { - let offset = err.location().start().to_usize(); - assert!( - code.is_char_boundary(offset), - "Invalid error location {} (not at char boundary)", - offset - ); - } - } + for token in parsed.tokens() { + let start = token.start().to_usize(); + let end = token.end().to_usize(); + assert!( + code.is_char_boundary(start), + "Invalid start position {} (not at char boundary)", + start + ); + assert!( + code.is_char_boundary(end), + "Invalid end position {} (not at char boundary)", + end + ); } - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let mut generator: Generator = (&stylist).into(); - generator.unparse_suite(&python_ast); + generator.unparse_suite(parsed.suite()); Corpus::Keep }