Skip over trivia tokens after re-lexing (#21895)

This commit is contained in:
Micha Reiser 2025-12-11 11:45:18 +01:00 committed by GitHub
parent 5c320990f7
commit aa27925e87
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 373 additions and 33 deletions

1
Cargo.lock generated
View File

@ -3349,6 +3349,7 @@ dependencies = [
"compact_str",
"get-size2",
"insta",
"itertools 0.14.0",
"memchr",
"ruff_annotate_snippets",
"ruff_python_ast",

View File

@ -35,6 +35,7 @@ ruff_source_file = { workspace = true }
anyhow = { workspace = true }
insta = { workspace = true, features = ["glob"] }
itertools = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
walkdir = { workspace = true }

View File

@ -0,0 +1,5 @@
# Regression test for https://github.com/astral-sh/ty/issues/1828
(c: int = 1,f"""{d=[
def a(
class A:
pass

View File

@ -67,26 +67,59 @@ impl<'src> TokenSource<'src> {
///
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
pub(crate) fn re_lex_logical_token(&mut self) {
let mut non_logical_newline_start = None;
for token in self.tokens.iter().rev() {
let mut non_logical_newline = None;
#[cfg(debug_assertions)]
let last_non_trivia_end_before = {
self.tokens
.iter()
.rev()
.find(|tok| !tok.kind().is_trivia())
.map(ruff_text_size::Ranged::end)
};
for (index, token) in self.tokens.iter().enumerate().rev() {
match token.kind() {
TokenKind::NonLogicalNewline => {
non_logical_newline_start = Some(token.start());
non_logical_newline = Some((index, token.start()));
}
TokenKind::Comment => continue,
_ => break,
}
}
if self.lexer.re_lex_logical_token(non_logical_newline_start) {
let current_start = self.current_range().start();
while self
.tokens
.last()
.is_some_and(|last| last.start() >= current_start)
{
self.tokens.pop();
}
if !self
.lexer
.re_lex_logical_token(non_logical_newline.map(|(_, start)| start))
{
return;
}
let non_logical_line_index = non_logical_newline
.expect(
"`re_lex_logical_token` should only return `true` if `non_logical_line` is `Some`",
)
.0;
// Trim the already bumped logical line token (and comments coming after it) as it might now have become a logical line token
self.tokens.truncate(non_logical_line_index);
#[cfg(debug_assertions)]
{
let last_non_trivia_end_now = {
self.tokens
.iter()
.rev()
.find(|tok| !tok.kind().is_trivia())
.map(ruff_text_size::Ranged::end)
};
assert_eq!(last_non_trivia_end_before, last_non_trivia_end_now);
}
// Ensure `current` is positioned at a non-trivia token.
if self.current_kind().is_trivia() {
self.bump(self.current_kind());
}
}

View File

@ -4,15 +4,16 @@ use std::fmt::{Formatter, Write};
use std::fs;
use std::path::Path;
use itertools::Itertools;
use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_python_ast::token::Token;
use ruff_python_ast::token::{Token, Tokens};
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_module};
use ruff_python_ast::{self as ast, AnyNodeRef, Mod, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError,
};
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, parse_unchecked};
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked};
use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
@ -81,7 +82,7 @@ fn test_valid_syntax(input_path: &Path) {
}
validate_tokens(parsed.tokens(), source.text_len(), input_path);
validate_ast(parsed.syntax(), source.text_len(), input_path);
validate_ast(&parsed, source.text_len(), input_path);
let mut output = String::new();
writeln!(&mut output, "## AST").unwrap();
@ -139,7 +140,7 @@ fn test_invalid_syntax(input_path: &Path) {
let parsed = parse_unchecked(&source, options.clone());
validate_tokens(parsed.tokens(), source.text_len(), input_path);
validate_ast(parsed.syntax(), source.text_len(), input_path);
validate_ast(&parsed, source.text_len(), input_path);
let mut output = String::new();
writeln!(&mut output, "## AST").unwrap();
@ -402,12 +403,16 @@ Tokens: {tokens:#?}
/// * the range of the parent node fully encloses all its child nodes
/// * the ranges are strictly increasing when traversing the nodes in pre-order.
/// * all ranges are within the length of the source code.
fn validate_ast(root: &Mod, source_len: TextSize, test_path: &Path) {
walk_module(&mut ValidateAstVisitor::new(source_len, test_path), root);
fn validate_ast(parsed: &Parsed<Mod>, source_len: TextSize, test_path: &Path) {
walk_module(
&mut ValidateAstVisitor::new(parsed.tokens(), source_len, test_path),
parsed.syntax(),
);
}
#[derive(Debug)]
struct ValidateAstVisitor<'a> {
tokens: std::iter::Peekable<std::slice::Iter<'a, Token>>,
parents: Vec<AnyNodeRef<'a>>,
previous: Option<AnyNodeRef<'a>>,
source_length: TextSize,
@ -415,8 +420,9 @@ struct ValidateAstVisitor<'a> {
}
impl<'a> ValidateAstVisitor<'a> {
fn new(source_length: TextSize, test_path: &'a Path) -> Self {
fn new(tokens: &'a Tokens, source_length: TextSize, test_path: &'a Path) -> Self {
Self {
tokens: tokens.iter().peekable(),
parents: Vec::new(),
previous: None,
source_length,
@ -425,6 +431,47 @@ impl<'a> ValidateAstVisitor<'a> {
}
}
impl ValidateAstVisitor<'_> {
/// Check that the node's start doesn't fall within a token.
/// Called in `enter_node` before visiting children.
fn assert_start_boundary(&mut self, node: AnyNodeRef<'_>) {
// Skip tokens that end at or before the node starts.
self.tokens
.peeking_take_while(|t| t.end() <= node.start())
.last();
if let Some(next) = self.tokens.peek() {
// At this point, next_token.end() > node.start()
assert!(
next.start() >= node.start(),
"{path}: The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first()
);
}
}
/// Check that the node's end doesn't fall within a token.
/// Called in `leave_node` after visiting children, so all tokens
/// within the node have been consumed.
fn assert_end_boundary(&mut self, node: AnyNodeRef<'_>) {
// Skip tokens that end at or before the node ends.
self.tokens
.peeking_take_while(|t| t.end() <= node.end())
.last();
if let Some(next) = self.tokens.peek() {
// At this point, `next_token.end() > node.end()`
assert!(
next.start() >= node.end(),
"{path}: The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first()
);
}
}
}
impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal {
assert!(
@ -452,12 +499,16 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
);
}
self.assert_start_boundary(node);
self.parents.push(node);
TraversalSignal::Traverse
}
fn leave_node(&mut self, node: AnyNodeRef<'ast>) {
self.assert_end_boundary(node);
self.parents.pop().expect("Expected tree to be balanced");
self.previous = Some(node);

View File

@ -296,7 +296,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 456..472,
range: 456..471,
func: Name(
ExprName {
node_index: NodeIndex(None),
@ -306,7 +306,7 @@ Module(
},
),
arguments: Arguments {
range: 460..472,
range: 460..471,
node_index: NodeIndex(None),
args: [
Name(
@ -581,7 +581,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 890..906,
range: 890..905,
func: Name(
ExprName {
node_index: NodeIndex(None),
@ -591,7 +591,7 @@ Module(
},
),
arguments: Arguments {
range: 894..906,
range: 894..905,
node_index: NodeIndex(None),
args: [
FString(
@ -832,7 +832,16 @@ Module(
|
28 | # The lexer is nested with multiple levels of parentheses
29 | if call(foo, [a, b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
30 | def bar():
| ^^^ Syntax Error: Expected `]`, found `def`
31 | pass
|
|
28 | # The lexer is nested with multiple levels of parentheses
29 | if call(foo, [a, b
| ^ Syntax Error: Expected `)`, found newline
30 | def bar():
31 | pass
|
@ -857,11 +866,10 @@ Module(
|
41 | # test is to make sure it emits a `NonLogicalNewline` token after `b`.
42 | if call(foo, [a,
43 | b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
44 | )
| ^ Syntax Error: Expected `]`, found `)`
45 | def bar():
46 | pass
|
@ -898,7 +906,7 @@ Module(
|
49 | # F-strings uses normal list parsing, so test those as well
50 | if call(f"hello {x
| ^ Syntax Error: Expected FStringEnd, found NonLogicalNewline
| ^ Syntax Error: Expected `)`, found newline
51 | def bar():
52 | pass
|

View File

@ -17,7 +17,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 3..19,
range: 3..18,
func: Name(
ExprName {
node_index: NodeIndex(None),
@ -27,7 +27,7 @@ Module(
},
),
arguments: Arguments {
range: 7..19,
range: 7..18,
node_index: NodeIndex(None),
args: [
Name(
@ -113,5 +113,11 @@ Module(
|
1 | if call(foo, [a, b def bar(): pass
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
| ^^^ Syntax Error: Expected `]`, found `def`
|
|
1 | if call(foo, [a, b def bar(): pass
| ^ Syntax Error: Expected `)`, found newline
|

View File

@ -17,7 +17,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 3..20,
range: 3..18,
func: Name(
ExprName {
node_index: NodeIndex(None),
@ -27,7 +27,7 @@ Module(
},
),
arguments: Arguments {
range: 7..20,
range: 7..18,
node_index: NodeIndex(None),
args: [
Name(
@ -113,7 +113,15 @@ Module(
|
1 | if call(foo, [a, b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
2 | def bar():
| ^^^ Syntax Error: Expected `]`, found `def`
3 | pass
|
|
1 | if call(foo, [a, b
| ^ Syntax Error: Expected `)`, found newline
2 | def bar():
3 | pass
|

View File

@ -0,0 +1,227 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py
---
## AST
```
Module(
ModModule {
node_index: NodeIndex(None),
range: 0..112,
body: [
AnnAssign(
StmtAnnAssign {
node_index: NodeIndex(None),
range: 66..93,
target: Name(
ExprName {
node_index: NodeIndex(None),
range: 67..68,
id: Name("c"),
ctx: Store,
},
),
annotation: Name(
ExprName {
node_index: NodeIndex(None),
range: 70..73,
id: Name("int"),
ctx: Load,
},
),
value: Some(
Tuple(
ExprTuple {
node_index: NodeIndex(None),
range: 76..93,
elts: [
NumberLiteral(
ExprNumberLiteral {
node_index: NodeIndex(None),
range: 76..77,
value: Int(
1,
),
},
),
Subscript(
ExprSubscript {
node_index: NodeIndex(None),
range: 78..90,
value: FString(
ExprFString {
node_index: NodeIndex(None),
range: 78..85,
value: FStringValue {
inner: Single(
FString(
FString {
range: 78..85,
node_index: NodeIndex(None),
elements: [
Interpolation(
InterpolatedElement {
range: 82..85,
node_index: NodeIndex(None),
expression: Name(
ExprName {
node_index: NodeIndex(None),
range: 83..84,
id: Name("d"),
ctx: Load,
},
),
debug_text: Some(
DebugText {
leading: "",
trailing: "=",
},
),
conversion: None,
format_spec: None,
},
),
],
flags: FStringFlags {
quote_style: Double,
prefix: Regular,
triple_quoted: true,
unclosed: true,
},
},
),
),
},
},
),
slice: Slice(
ExprSlice {
node_index: NodeIndex(None),
range: 87..90,
lower: None,
upper: Some(
Name(
ExprName {
node_index: NodeIndex(None),
range: 87..90,
id: Name("def"),
ctx: Load,
},
),
),
step: None,
},
),
ctx: Load,
},
),
Call(
ExprCall {
node_index: NodeIndex(None),
range: 91..93,
func: Name(
ExprName {
node_index: NodeIndex(None),
range: 91..92,
id: Name("a"),
ctx: Load,
},
),
arguments: Arguments {
range: 92..93,
node_index: NodeIndex(None),
args: [],
keywords: [],
},
},
),
],
ctx: Load,
parenthesized: false,
},
),
),
simple: false,
},
),
],
},
)
```
## Errors
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
| ^ Syntax Error: Expected `)`, found `:`
3 | def a(
4 | class A:
|
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
| ^ Syntax Error: f-string: expecting `}`
3 | def a(
4 | class A:
|
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
3 | def a(
| ^^^ Syntax Error: Expected `:`, found `def`
4 | class A:
5 | pass
|
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
3 | def a(
| ^ Syntax Error: Expected `]`, found name
4 | class A:
5 | pass
|
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
3 | def a(
| _______^
4 | | class A:
5 | | pass
| |_________^ Syntax Error: f-string: unterminated triple-quoted string
|
|
2 | (c: int = 1,f"""{d=[
3 | def a(
4 | class A:
| ^^^^^ Syntax Error: Expected `)`, found `class`
5 | pass
|
|
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
2 | (c: int = 1,f"""{d=[
3 | def a(
| _______^
4 | | class A:
5 | | pass
| |_________^ Syntax Error: Expected a statement
|
|
4 | class A:
5 | pass
| ^ Syntax Error: unexpected EOF while parsing
|