diff --git a/Cargo.lock b/Cargo.lock index d0018e76e3..b6ca38375b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3349,6 +3349,7 @@ dependencies = [ "compact_str", "get-size2", "insta", + "itertools 0.14.0", "memchr", "ruff_annotate_snippets", "ruff_python_ast", diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index ae45871866..c527f96e11 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -35,6 +35,7 @@ ruff_source_file = { workspace = true } anyhow = { workspace = true } insta = { workspace = true, features = ["glob"] } +itertools = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } walkdir = { workspace = true } diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py b/crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py new file mode 100644 index 0000000000..3e953b541f --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py @@ -0,0 +1,5 @@ +# Regression test for https://github.com/astral-sh/ty/issues/1828 +(c: int = 1,f"""{d=[ +def a( +class A: + pass diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index e5755806e3..e22196e00e 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -67,26 +67,59 @@ impl<'src> TokenSource<'src> { /// /// [`re_lex_logical_token`]: Lexer::re_lex_logical_token pub(crate) fn re_lex_logical_token(&mut self) { - let mut non_logical_newline_start = None; - for token in self.tokens.iter().rev() { + let mut non_logical_newline = None; + + #[cfg(debug_assertions)] + let last_non_trivia_end_before = { + self.tokens + .iter() + .rev() + .find(|tok| !tok.kind().is_trivia()) + .map(ruff_text_size::Ranged::end) + }; + + for (index, token) in self.tokens.iter().enumerate().rev() { match token.kind() { TokenKind::NonLogicalNewline => { - non_logical_newline_start = Some(token.start()); + non_logical_newline = Some((index, token.start())); } TokenKind::Comment => continue, _ => break, } } - if self.lexer.re_lex_logical_token(non_logical_newline_start) { - let current_start = self.current_range().start(); - while self - .tokens - .last() - .is_some_and(|last| last.start() >= current_start) - { - self.tokens.pop(); - } + if !self + .lexer + .re_lex_logical_token(non_logical_newline.map(|(_, start)| start)) + { + return; + } + + let non_logical_line_index = non_logical_newline + .expect( + "`re_lex_logical_token` should only return `true` if `non_logical_line` is `Some`", + ) + .0; + + // Trim the already bumped logical line token (and comments coming after it) as it might now have become a logical line token + self.tokens.truncate(non_logical_line_index); + + #[cfg(debug_assertions)] + { + let last_non_trivia_end_now = { + self.tokens + .iter() + .rev() + .find(|tok| !tok.kind().is_trivia()) + .map(ruff_text_size::Ranged::end) + }; + + assert_eq!(last_non_trivia_end_before, last_non_trivia_end_now); + } + + // Ensure `current` is positioned at a non-trivia token. + if self.current_kind().is_trivia() { + self.bump(self.current_kind()); } } diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs index 2c89ba7aad..a9378eddfe 100644 --- a/crates/ruff_python_parser/tests/fixtures.rs +++ b/crates/ruff_python_parser/tests/fixtures.rs @@ -4,15 +4,16 @@ use std::fmt::{Formatter, Write}; use std::fs; use std::path::Path; +use itertools::Itertools; use ruff_annotate_snippets::{Level, Renderer, Snippet}; -use ruff_python_ast::token::Token; +use ruff_python_ast::token::{Token, Tokens}; use ruff_python_ast::visitor::Visitor; use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_module}; use ruff_python_ast::{self as ast, AnyNodeRef, Mod, PythonVersion}; use ruff_python_parser::semantic_errors::{ SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, }; -use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, parse_unchecked}; +use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -81,7 +82,7 @@ fn test_valid_syntax(input_path: &Path) { } validate_tokens(parsed.tokens(), source.text_len(), input_path); - validate_ast(parsed.syntax(), source.text_len(), input_path); + validate_ast(&parsed, source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); @@ -139,7 +140,7 @@ fn test_invalid_syntax(input_path: &Path) { let parsed = parse_unchecked(&source, options.clone()); validate_tokens(parsed.tokens(), source.text_len(), input_path); - validate_ast(parsed.syntax(), source.text_len(), input_path); + validate_ast(&parsed, source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); @@ -402,12 +403,16 @@ Tokens: {tokens:#?} /// * the range of the parent node fully encloses all its child nodes /// * the ranges are strictly increasing when traversing the nodes in pre-order. /// * all ranges are within the length of the source code. -fn validate_ast(root: &Mod, source_len: TextSize, test_path: &Path) { - walk_module(&mut ValidateAstVisitor::new(source_len, test_path), root); +fn validate_ast(parsed: &Parsed, source_len: TextSize, test_path: &Path) { + walk_module( + &mut ValidateAstVisitor::new(parsed.tokens(), source_len, test_path), + parsed.syntax(), + ); } #[derive(Debug)] struct ValidateAstVisitor<'a> { + tokens: std::iter::Peekable>, parents: Vec>, previous: Option>, source_length: TextSize, @@ -415,8 +420,9 @@ struct ValidateAstVisitor<'a> { } impl<'a> ValidateAstVisitor<'a> { - fn new(source_length: TextSize, test_path: &'a Path) -> Self { + fn new(tokens: &'a Tokens, source_length: TextSize, test_path: &'a Path) -> Self { Self { + tokens: tokens.iter().peekable(), parents: Vec::new(), previous: None, source_length, @@ -425,6 +431,47 @@ impl<'a> ValidateAstVisitor<'a> { } } +impl ValidateAstVisitor<'_> { + /// Check that the node's start doesn't fall within a token. + /// Called in `enter_node` before visiting children. + fn assert_start_boundary(&mut self, node: AnyNodeRef<'_>) { + // Skip tokens that end at or before the node starts. + self.tokens + .peeking_take_while(|t| t.end() <= node.start()) + .last(); + + if let Some(next) = self.tokens.peek() { + // At this point, next_token.end() > node.start() + assert!( + next.start() >= node.start(), + "{path}: The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}", + path = self.test_path.display(), + root = self.parents.first() + ); + } + } + + /// Check that the node's end doesn't fall within a token. + /// Called in `leave_node` after visiting children, so all tokens + /// within the node have been consumed. + fn assert_end_boundary(&mut self, node: AnyNodeRef<'_>) { + // Skip tokens that end at or before the node ends. + self.tokens + .peeking_take_while(|t| t.end() <= node.end()) + .last(); + + if let Some(next) = self.tokens.peek() { + // At this point, `next_token.end() > node.end()` + assert!( + next.start() >= node.end(), + "{path}: The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}", + path = self.test_path.display(), + root = self.parents.first() + ); + } + } +} + impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> { fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal { assert!( @@ -452,12 +499,16 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> { ); } + self.assert_start_boundary(node); + self.parents.push(node); TraversalSignal::Traverse } fn leave_node(&mut self, node: AnyNodeRef<'ast>) { + self.assert_end_boundary(node); + self.parents.pop().expect("Expected tree to be balanced"); self.previous = Some(node); diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap index 61f3230855..93b1439a58 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap @@ -296,7 +296,7 @@ Module( test: Call( ExprCall { node_index: NodeIndex(None), - range: 456..472, + range: 456..471, func: Name( ExprName { node_index: NodeIndex(None), @@ -306,7 +306,7 @@ Module( }, ), arguments: Arguments { - range: 460..472, + range: 460..471, node_index: NodeIndex(None), args: [ Name( @@ -581,7 +581,7 @@ Module( test: Call( ExprCall { node_index: NodeIndex(None), - range: 890..906, + range: 890..905, func: Name( ExprName { node_index: NodeIndex(None), @@ -591,7 +591,7 @@ Module( }, ), arguments: Arguments { - range: 894..906, + range: 894..905, node_index: NodeIndex(None), args: [ FString( @@ -832,7 +832,16 @@ Module( | 28 | # The lexer is nested with multiple levels of parentheses 29 | if call(foo, [a, b - | ^ Syntax Error: Expected `]`, found NonLogicalNewline +30 | def bar(): + | ^^^ Syntax Error: Expected `]`, found `def` +31 | pass + | + + + | +28 | # The lexer is nested with multiple levels of parentheses +29 | if call(foo, [a, b + | ^ Syntax Error: Expected `)`, found newline 30 | def bar(): 31 | pass | @@ -857,11 +866,10 @@ Module( | -41 | # test is to make sure it emits a `NonLogicalNewline` token after `b`. 42 | if call(foo, [a, 43 | b - | ^ Syntax Error: Expected `]`, found NonLogicalNewline 44 | ) + | ^ Syntax Error: Expected `]`, found `)` 45 | def bar(): 46 | pass | @@ -898,7 +906,7 @@ Module( | 49 | # F-strings uses normal list parsing, so test those as well 50 | if call(f"hello {x - | ^ Syntax Error: Expected FStringEnd, found NonLogicalNewline + | ^ Syntax Error: Expected `)`, found newline 51 | def bar(): 52 | pass | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap index 5567459c70..2eb9515848 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap @@ -17,7 +17,7 @@ Module( test: Call( ExprCall { node_index: NodeIndex(None), - range: 3..19, + range: 3..18, func: Name( ExprName { node_index: NodeIndex(None), @@ -27,7 +27,7 @@ Module( }, ), arguments: Arguments { - range: 7..19, + range: 7..18, node_index: NodeIndex(None), args: [ Name( @@ -113,5 +113,11 @@ Module( | 1 | if call(foo, [a, b def bar(): pass - | ^ Syntax Error: Expected `]`, found NonLogicalNewline + | ^^^ Syntax Error: Expected `]`, found `def` + | + + + | +1 | if call(foo, [a, b def bar(): pass + | ^ Syntax Error: Expected `)`, found newline | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap index ae03fee095..692755d4e8 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap @@ -17,7 +17,7 @@ Module( test: Call( ExprCall { node_index: NodeIndex(None), - range: 3..20, + range: 3..18, func: Name( ExprName { node_index: NodeIndex(None), @@ -27,7 +27,7 @@ Module( }, ), arguments: Arguments { - range: 7..20, + range: 7..18, node_index: NodeIndex(None), args: [ Name( @@ -113,7 +113,15 @@ Module( | 1 | if call(foo, [a, b - | ^ Syntax Error: Expected `]`, found NonLogicalNewline +2 | def bar(): + | ^^^ Syntax Error: Expected `]`, found `def` +3 | pass + | + + + | +1 | if call(foo, [a, b + | ^ Syntax Error: Expected `)`, found newline 2 | def bar(): 3 | pass | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__ty_1828.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__ty_1828.py.snap new file mode 100644 index 0000000000..49ea0c7d58 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__ty_1828.py.snap @@ -0,0 +1,227 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py +--- +## AST + +``` +Module( + ModModule { + node_index: NodeIndex(None), + range: 0..112, + body: [ + AnnAssign( + StmtAnnAssign { + node_index: NodeIndex(None), + range: 66..93, + target: Name( + ExprName { + node_index: NodeIndex(None), + range: 67..68, + id: Name("c"), + ctx: Store, + }, + ), + annotation: Name( + ExprName { + node_index: NodeIndex(None), + range: 70..73, + id: Name("int"), + ctx: Load, + }, + ), + value: Some( + Tuple( + ExprTuple { + node_index: NodeIndex(None), + range: 76..93, + elts: [ + NumberLiteral( + ExprNumberLiteral { + node_index: NodeIndex(None), + range: 76..77, + value: Int( + 1, + ), + }, + ), + Subscript( + ExprSubscript { + node_index: NodeIndex(None), + range: 78..90, + value: FString( + ExprFString { + node_index: NodeIndex(None), + range: 78..85, + value: FStringValue { + inner: Single( + FString( + FString { + range: 78..85, + node_index: NodeIndex(None), + elements: [ + Interpolation( + InterpolatedElement { + range: 82..85, + node_index: NodeIndex(None), + expression: Name( + ExprName { + node_index: NodeIndex(None), + range: 83..84, + id: Name("d"), + ctx: Load, + }, + ), + debug_text: Some( + DebugText { + leading: "", + trailing: "=", + }, + ), + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: true, + unclosed: true, + }, + }, + ), + ), + }, + }, + ), + slice: Slice( + ExprSlice { + node_index: NodeIndex(None), + range: 87..90, + lower: None, + upper: Some( + Name( + ExprName { + node_index: NodeIndex(None), + range: 87..90, + id: Name("def"), + ctx: Load, + }, + ), + ), + step: None, + }, + ), + ctx: Load, + }, + ), + Call( + ExprCall { + node_index: NodeIndex(None), + range: 91..93, + func: Name( + ExprName { + node_index: NodeIndex(None), + range: 91..92, + id: Name("a"), + ctx: Load, + }, + ), + arguments: Arguments { + range: 92..93, + node_index: NodeIndex(None), + args: [], + keywords: [], + }, + }, + ), + ], + ctx: Load, + parenthesized: false, + }, + ), + ), + simple: false, + }, + ), + ], + }, +) +``` +## Errors + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ + | ^ Syntax Error: Expected `)`, found `:` +3 | def a( +4 | class A: + | + + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ + | ^ Syntax Error: f-string: expecting `}` +3 | def a( +4 | class A: + | + + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ +3 | def a( + | ^^^ Syntax Error: Expected `:`, found `def` +4 | class A: +5 | pass + | + + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ +3 | def a( + | ^ Syntax Error: Expected `]`, found name +4 | class A: +5 | pass + | + + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ +3 | def a( + | _______^ +4 | | class A: +5 | | pass + | |_________^ Syntax Error: f-string: unterminated triple-quoted string + | + + + | +2 | (c: int = 1,f"""{d=[ +3 | def a( +4 | class A: + | ^^^^^ Syntax Error: Expected `)`, found `class` +5 | pass + | + + + | +1 | # Regression test for https://github.com/astral-sh/ty/issues/1828 +2 | (c: int = 1,f"""{d=[ +3 | def a( + | _______^ +4 | | class A: +5 | | pass + | |_________^ Syntax Error: Expected a statement + | + + + | +4 | class A: +5 | pass + | ^ Syntax Error: unexpected EOF while parsing + |