mirror of https://github.com/astral-sh/ruff
Skip over trivia tokens after re-lexing
This commit is contained in:
parent
ff7086d9ad
commit
7d47f711c1
|
|
@ -3349,6 +3349,7 @@ dependencies = [
|
|||
"compact_str",
|
||||
"get-size2",
|
||||
"insta",
|
||||
"itertools 0.14.0",
|
||||
"memchr",
|
||||
"ruff_annotate_snippets",
|
||||
"ruff_python_ast",
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ ruff_source_file = { workspace = true }
|
|||
|
||||
anyhow = { workspace = true }
|
||||
insta = { workspace = true, features = ["glob"] }
|
||||
itertools = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
# Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
(c: int = 1,f"""{d=[
|
||||
def a(
|
||||
class A:
|
||||
pass
|
||||
|
|
@ -67,26 +67,59 @@ impl<'src> TokenSource<'src> {
|
|||
///
|
||||
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
|
||||
pub(crate) fn re_lex_logical_token(&mut self) {
|
||||
let mut non_logical_newline_start = None;
|
||||
for token in self.tokens.iter().rev() {
|
||||
let mut non_logical_line = None;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
let last_non_trivia_end_before = {
|
||||
self.tokens
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|tok| !tok.kind().is_trivia())
|
||||
.map(ruff_text_size::Ranged::end)
|
||||
};
|
||||
|
||||
for (index, token) in self.tokens.iter().enumerate().rev() {
|
||||
match token.kind() {
|
||||
TokenKind::NonLogicalNewline => {
|
||||
non_logical_newline_start = Some(token.start());
|
||||
non_logical_line = Some((index, token.start()));
|
||||
}
|
||||
TokenKind::Comment => continue,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
if self.lexer.re_lex_logical_token(non_logical_newline_start) {
|
||||
let current_start = self.current_range().start();
|
||||
while self
|
||||
.tokens
|
||||
.last()
|
||||
.is_some_and(|last| last.start() >= current_start)
|
||||
{
|
||||
self.tokens.pop();
|
||||
}
|
||||
if !self
|
||||
.lexer
|
||||
.re_lex_logical_token(non_logical_line.map(|(_, start)| start))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let non_logical_line_index = non_logical_line
|
||||
.expect(
|
||||
"`re_lex_logical_token` should only return `true` if `non_logical_line` is `Some`",
|
||||
)
|
||||
.0;
|
||||
|
||||
// Trim the already bumped logical line token (and comments coming after it) as it might now have become a logical line token
|
||||
self.tokens.truncate(non_logical_line_index);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
let last_non_trivia_end_now = {
|
||||
self.tokens
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|tok| !tok.kind().is_trivia())
|
||||
.map(ruff_text_size::Ranged::end)
|
||||
};
|
||||
|
||||
assert_eq!(last_non_trivia_end_before, last_non_trivia_end_now);
|
||||
}
|
||||
|
||||
// Ensure `current` is positioned at a non-trivia token.
|
||||
if self.current_kind().is_trivia() {
|
||||
self.bump(self.current_kind());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,15 +4,16 @@ use std::fmt::{Formatter, Write};
|
|||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use itertools::Itertools;
|
||||
use ruff_annotate_snippets::{Level, Renderer, Snippet};
|
||||
use ruff_python_ast::token::Token;
|
||||
use ruff_python_ast::token::{Token, Tokens};
|
||||
use ruff_python_ast::visitor::Visitor;
|
||||
use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_module};
|
||||
use ruff_python_ast::{self as ast, AnyNodeRef, Mod, PythonVersion};
|
||||
use ruff_python_parser::semantic_errors::{
|
||||
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError,
|
||||
};
|
||||
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, parse_unchecked};
|
||||
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked};
|
||||
use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
|
||||
|
|
@ -81,7 +82,7 @@ fn test_valid_syntax(input_path: &Path) {
|
|||
}
|
||||
|
||||
validate_tokens(parsed.tokens(), source.text_len(), input_path);
|
||||
validate_ast(parsed.syntax(), source.text_len(), input_path);
|
||||
validate_ast(&parsed, source.text_len(), input_path);
|
||||
|
||||
let mut output = String::new();
|
||||
writeln!(&mut output, "## AST").unwrap();
|
||||
|
|
@ -139,7 +140,7 @@ fn test_invalid_syntax(input_path: &Path) {
|
|||
let parsed = parse_unchecked(&source, options.clone());
|
||||
|
||||
validate_tokens(parsed.tokens(), source.text_len(), input_path);
|
||||
validate_ast(parsed.syntax(), source.text_len(), input_path);
|
||||
validate_ast(&parsed, source.text_len(), input_path);
|
||||
|
||||
let mut output = String::new();
|
||||
writeln!(&mut output, "## AST").unwrap();
|
||||
|
|
@ -402,12 +403,16 @@ Tokens: {tokens:#?}
|
|||
/// * the range of the parent node fully encloses all its child nodes
|
||||
/// * the ranges are strictly increasing when traversing the nodes in pre-order.
|
||||
/// * all ranges are within the length of the source code.
|
||||
fn validate_ast(root: &Mod, source_len: TextSize, test_path: &Path) {
|
||||
walk_module(&mut ValidateAstVisitor::new(source_len, test_path), root);
|
||||
fn validate_ast(parsed: &Parsed<Mod>, source_len: TextSize, test_path: &Path) {
|
||||
walk_module(
|
||||
&mut ValidateAstVisitor::new(parsed.tokens(), source_len, test_path),
|
||||
parsed.syntax(),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ValidateAstVisitor<'a> {
|
||||
tokens: std::iter::Peekable<std::slice::Iter<'a, Token>>,
|
||||
parents: Vec<AnyNodeRef<'a>>,
|
||||
previous: Option<AnyNodeRef<'a>>,
|
||||
source_length: TextSize,
|
||||
|
|
@ -415,8 +420,9 @@ struct ValidateAstVisitor<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ValidateAstVisitor<'a> {
|
||||
fn new(source_length: TextSize, test_path: &'a Path) -> Self {
|
||||
fn new(tokens: &'a Tokens, source_length: TextSize, test_path: &'a Path) -> Self {
|
||||
Self {
|
||||
tokens: tokens.iter().peekable(),
|
||||
parents: Vec::new(),
|
||||
previous: None,
|
||||
source_length,
|
||||
|
|
@ -425,6 +431,47 @@ impl<'a> ValidateAstVisitor<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl ValidateAstVisitor<'_> {
|
||||
/// Check that the node's start doesn't fall within a token.
|
||||
/// Called in `enter_node` before visiting children.
|
||||
fn assert_start_boundary(&mut self, node: AnyNodeRef<'_>) {
|
||||
// Skip tokens that end at or before the node starts.
|
||||
self.tokens
|
||||
.peeking_take_while(|t| t.end() <= node.start())
|
||||
.last();
|
||||
|
||||
if let Some(next) = self.tokens.peek() {
|
||||
// At this point, next_token.end() > node.start()
|
||||
assert!(
|
||||
next.start() >= node.start(),
|
||||
"{path}: The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
|
||||
path = self.test_path.display(),
|
||||
root = self.parents.first()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that the node's end doesn't fall within a token.
|
||||
/// Called in `leave_node` after visiting children, so all tokens
|
||||
/// within the node have been consumed.
|
||||
fn assert_end_boundary(&mut self, node: AnyNodeRef<'_>) {
|
||||
// Skip tokens that end at or before the node ends.
|
||||
self.tokens
|
||||
.peeking_take_while(|t| t.end() <= node.end())
|
||||
.last();
|
||||
|
||||
if let Some(next) = self.tokens.peek() {
|
||||
// At this point, `next_token.end() > node.end()`
|
||||
assert!(
|
||||
next.start() >= node.end(),
|
||||
"{path}: The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
|
||||
path = self.test_path.display(),
|
||||
root = self.parents.first()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
|
||||
fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal {
|
||||
assert!(
|
||||
|
|
@ -452,12 +499,16 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
|
|||
);
|
||||
}
|
||||
|
||||
self.assert_start_boundary(node);
|
||||
|
||||
self.parents.push(node);
|
||||
|
||||
TraversalSignal::Traverse
|
||||
}
|
||||
|
||||
fn leave_node(&mut self, node: AnyNodeRef<'ast>) {
|
||||
self.assert_end_boundary(node);
|
||||
|
||||
self.parents.pop().expect("Expected tree to be balanced");
|
||||
|
||||
self.previous = Some(node);
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ Module(
|
|||
test: Call(
|
||||
ExprCall {
|
||||
node_index: NodeIndex(None),
|
||||
range: 456..472,
|
||||
range: 456..471,
|
||||
func: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
|
|
@ -306,7 +306,7 @@ Module(
|
|||
},
|
||||
),
|
||||
arguments: Arguments {
|
||||
range: 460..472,
|
||||
range: 460..471,
|
||||
node_index: NodeIndex(None),
|
||||
args: [
|
||||
Name(
|
||||
|
|
@ -581,7 +581,7 @@ Module(
|
|||
test: Call(
|
||||
ExprCall {
|
||||
node_index: NodeIndex(None),
|
||||
range: 890..906,
|
||||
range: 890..905,
|
||||
func: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
|
|
@ -591,7 +591,7 @@ Module(
|
|||
},
|
||||
),
|
||||
arguments: Arguments {
|
||||
range: 894..906,
|
||||
range: 894..905,
|
||||
node_index: NodeIndex(None),
|
||||
args: [
|
||||
FString(
|
||||
|
|
@ -832,7 +832,16 @@ Module(
|
|||
|
|
||||
28 | # The lexer is nested with multiple levels of parentheses
|
||||
29 | if call(foo, [a, b
|
||||
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
|
||||
30 | def bar():
|
||||
| ^^^ Syntax Error: Expected `]`, found `def`
|
||||
31 | pass
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
28 | # The lexer is nested with multiple levels of parentheses
|
||||
29 | if call(foo, [a, b
|
||||
| ^ Syntax Error: Expected `)`, found newline
|
||||
30 | def bar():
|
||||
31 | pass
|
||||
|
|
||||
|
|
@ -857,11 +866,10 @@ Module(
|
|||
|
||||
|
||||
|
|
||||
41 | # test is to make sure it emits a `NonLogicalNewline` token after `b`.
|
||||
42 | if call(foo, [a,
|
||||
43 | b
|
||||
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
|
||||
44 | )
|
||||
| ^ Syntax Error: Expected `]`, found `)`
|
||||
45 | def bar():
|
||||
46 | pass
|
||||
|
|
||||
|
|
@ -898,7 +906,7 @@ Module(
|
|||
|
|
||||
49 | # F-strings uses normal list parsing, so test those as well
|
||||
50 | if call(f"hello {x
|
||||
| ^ Syntax Error: Expected FStringEnd, found NonLogicalNewline
|
||||
| ^ Syntax Error: Expected `)`, found newline
|
||||
51 | def bar():
|
||||
52 | pass
|
||||
|
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ Module(
|
|||
test: Call(
|
||||
ExprCall {
|
||||
node_index: NodeIndex(None),
|
||||
range: 3..19,
|
||||
range: 3..18,
|
||||
func: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
|
|
@ -27,7 +27,7 @@ Module(
|
|||
},
|
||||
),
|
||||
arguments: Arguments {
|
||||
range: 7..19,
|
||||
range: 7..18,
|
||||
node_index: NodeIndex(None),
|
||||
args: [
|
||||
Name(
|
||||
|
|
@ -113,5 +113,11 @@ Module(
|
|||
|
||||
|
|
||||
1 | if call(foo, [a, b
def bar():
pass
|
||||
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
|
||||
| ^^^ Syntax Error: Expected `]`, found `def`
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | if call(foo, [a, b
def bar():
pass
|
||||
| ^ Syntax Error: Expected `)`, found newline
|
||||
|
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ Module(
|
|||
test: Call(
|
||||
ExprCall {
|
||||
node_index: NodeIndex(None),
|
||||
range: 3..20,
|
||||
range: 3..18,
|
||||
func: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
|
|
@ -27,7 +27,7 @@ Module(
|
|||
},
|
||||
),
|
||||
arguments: Arguments {
|
||||
range: 7..20,
|
||||
range: 7..18,
|
||||
node_index: NodeIndex(None),
|
||||
args: [
|
||||
Name(
|
||||
|
|
@ -113,7 +113,15 @@ Module(
|
|||
|
||||
|
|
||||
1 | if call(foo, [a, b
|
||||
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
|
||||
2 | def bar():
|
||||
| ^^^ Syntax Error: Expected `]`, found `def`
|
||||
3 | pass
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | if call(foo, [a, b
|
||||
| ^ Syntax Error: Expected `)`, found newline
|
||||
2 | def bar():
|
||||
3 | pass
|
||||
|
|
||||
|
|
|
|||
|
|
@ -0,0 +1,227 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/tests/fixtures.rs
|
||||
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/ty_1828.py
|
||||
---
|
||||
## AST
|
||||
|
||||
```
|
||||
Module(
|
||||
ModModule {
|
||||
node_index: NodeIndex(None),
|
||||
range: 0..112,
|
||||
body: [
|
||||
AnnAssign(
|
||||
StmtAnnAssign {
|
||||
node_index: NodeIndex(None),
|
||||
range: 66..93,
|
||||
target: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
range: 67..68,
|
||||
id: Name("c"),
|
||||
ctx: Store,
|
||||
},
|
||||
),
|
||||
annotation: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
range: 70..73,
|
||||
id: Name("int"),
|
||||
ctx: Load,
|
||||
},
|
||||
),
|
||||
value: Some(
|
||||
Tuple(
|
||||
ExprTuple {
|
||||
node_index: NodeIndex(None),
|
||||
range: 76..93,
|
||||
elts: [
|
||||
NumberLiteral(
|
||||
ExprNumberLiteral {
|
||||
node_index: NodeIndex(None),
|
||||
range: 76..77,
|
||||
value: Int(
|
||||
1,
|
||||
),
|
||||
},
|
||||
),
|
||||
Subscript(
|
||||
ExprSubscript {
|
||||
node_index: NodeIndex(None),
|
||||
range: 78..90,
|
||||
value: FString(
|
||||
ExprFString {
|
||||
node_index: NodeIndex(None),
|
||||
range: 78..85,
|
||||
value: FStringValue {
|
||||
inner: Single(
|
||||
FString(
|
||||
FString {
|
||||
range: 78..85,
|
||||
node_index: NodeIndex(None),
|
||||
elements: [
|
||||
Interpolation(
|
||||
InterpolatedElement {
|
||||
range: 82..85,
|
||||
node_index: NodeIndex(None),
|
||||
expression: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
range: 83..84,
|
||||
id: Name("d"),
|
||||
ctx: Load,
|
||||
},
|
||||
),
|
||||
debug_text: Some(
|
||||
DebugText {
|
||||
leading: "",
|
||||
trailing: "=",
|
||||
},
|
||||
),
|
||||
conversion: None,
|
||||
format_spec: None,
|
||||
},
|
||||
),
|
||||
],
|
||||
flags: FStringFlags {
|
||||
quote_style: Double,
|
||||
prefix: Regular,
|
||||
triple_quoted: true,
|
||||
unclosed: true,
|
||||
},
|
||||
},
|
||||
),
|
||||
),
|
||||
},
|
||||
},
|
||||
),
|
||||
slice: Slice(
|
||||
ExprSlice {
|
||||
node_index: NodeIndex(None),
|
||||
range: 87..90,
|
||||
lower: None,
|
||||
upper: Some(
|
||||
Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
range: 87..90,
|
||||
id: Name("def"),
|
||||
ctx: Load,
|
||||
},
|
||||
),
|
||||
),
|
||||
step: None,
|
||||
},
|
||||
),
|
||||
ctx: Load,
|
||||
},
|
||||
),
|
||||
Call(
|
||||
ExprCall {
|
||||
node_index: NodeIndex(None),
|
||||
range: 91..93,
|
||||
func: Name(
|
||||
ExprName {
|
||||
node_index: NodeIndex(None),
|
||||
range: 91..92,
|
||||
id: Name("a"),
|
||||
ctx: Load,
|
||||
},
|
||||
),
|
||||
arguments: Arguments {
|
||||
range: 92..93,
|
||||
node_index: NodeIndex(None),
|
||||
args: [],
|
||||
keywords: [],
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
ctx: Load,
|
||||
parenthesized: false,
|
||||
},
|
||||
),
|
||||
),
|
||||
simple: false,
|
||||
},
|
||||
),
|
||||
],
|
||||
},
|
||||
)
|
||||
```
|
||||
## Errors
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
| ^ Syntax Error: Expected `)`, found `:`
|
||||
3 | def a(
|
||||
4 | class A:
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
| ^ Syntax Error: f-string: expecting `}`
|
||||
3 | def a(
|
||||
4 | class A:
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
3 | def a(
|
||||
| ^^^ Syntax Error: Expected `:`, found `def`
|
||||
4 | class A:
|
||||
5 | pass
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
3 | def a(
|
||||
| ^ Syntax Error: Expected `]`, found name
|
||||
4 | class A:
|
||||
5 | pass
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
3 | def a(
|
||||
| _______^
|
||||
4 | | class A:
|
||||
5 | | pass
|
||||
| |_________^ Syntax Error: f-string: unterminated triple-quoted string
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
3 | def a(
|
||||
4 | class A:
|
||||
| ^^^^^ Syntax Error: Expected `)`, found `class`
|
||||
5 | pass
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | # Regression test for https://github.com/astral-sh/ty/issues/1828
|
||||
2 | (c: int = 1,f"""{d=[
|
||||
3 | def a(
|
||||
| _______^
|
||||
4 | | class A:
|
||||
5 | | pass
|
||||
| |_________^ Syntax Error: Expected a statement
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
4 | class A:
|
||||
5 | pass
|
||||
| ^ Syntax Error: unexpected EOF while parsing
|
||||
|
|
||||
Loading…
Reference in New Issue