mirror of https://github.com/astral-sh/ruff
Add NonLogicalNewline token
This token is completely ignored by the parser, but it's useful for other users of the lexer, such as the Ruff linter. For example, the token is helpful for a "trailing comma" lint. The same idea exists in Python's `tokenize` module - there is a NEWLINE token (logical newline), and a NL token (non-logical newline). Fixes #4385.
This commit is contained in:
parent
4f1e7c6291
commit
674eeec29c
|
|
@ -1075,10 +1075,13 @@ where
|
|||
self.next_char();
|
||||
let tok_end = self.get_pos();
|
||||
|
||||
// Depending on the nesting level, we emit newline or not:
|
||||
// Depending on the nesting level, we emit a logical or
|
||||
// non-logical newline:
|
||||
if self.nesting == 0 {
|
||||
self.at_begin_of_line = true;
|
||||
self.emit((tok_start, Tok::Newline, tok_end));
|
||||
} else {
|
||||
self.emit((tok_start, Tok::NonLogicalNewline, tok_end));
|
||||
}
|
||||
}
|
||||
' ' | '\t' | '\x0C' => {
|
||||
|
|
@ -1464,7 +1467,16 @@ mod tests {
|
|||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol);
|
||||
let source = r"x = [
|
||||
|
||||
1,2
|
||||
,(3,
|
||||
4,
|
||||
), {
|
||||
5,
|
||||
6,\
|
||||
7}]
|
||||
".replace("\n", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
|
|
@ -1474,9 +1486,32 @@ mod tests {
|
|||
},
|
||||
Tok::Equal,
|
||||
Tok::Lsqb,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(1) },
|
||||
Tok::Comma,
|
||||
Tok::Int { value: BigInt::from(2) },
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Comma,
|
||||
Tok::Lpar,
|
||||
Tok::Int { value: BigInt::from(3) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(4) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Comma,
|
||||
Tok::Lbrace,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(5) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(6) },
|
||||
Tok::Comma,
|
||||
// Continuation here - no NonLogicalNewline.
|
||||
Tok::Int { value: BigInt::from(7) },
|
||||
Tok::Rbrace,
|
||||
Tok::Rsqb,
|
||||
Tok::Newline,
|
||||
]
|
||||
|
|
@ -1492,6 +1527,50 @@ mod tests {
|
|||
test_newline_in_brackets_unix_eol: UNIX_EOL,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_logical_newline_in_string_continuation() {
|
||||
let source = r"(
|
||||
'a'
|
||||
'b'
|
||||
|
||||
'c' \
|
||||
'd'
|
||||
)";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Lpar,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("a"),
|
||||
Tok::NonLogicalNewline,
|
||||
stok("b"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("c"),
|
||||
stok("d"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Newline,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_logical_newline_line_comment() {
|
||||
let source = "#Hello\n#World";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Comment("#Hello".to_owned()),
|
||||
// tokenize.py does put an NL here...
|
||||
Tok::Comment("#World".to_owned()),
|
||||
// ... and here, but doesn't seem very useful.
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operators() {
|
||||
let source = "//////=/ /";
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ pub fn parse_located(
|
|||
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
||||
let tokenizer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. }));
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
|
||||
python::TopParser::new()
|
||||
.parse(tokenizer)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ pub enum Tok {
|
|||
triple_quoted: bool,
|
||||
},
|
||||
Newline,
|
||||
NonLogicalNewline,
|
||||
Indent,
|
||||
Dedent,
|
||||
StartModule,
|
||||
|
|
@ -136,6 +137,7 @@ impl fmt::Display for Tok {
|
|||
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||
}
|
||||
Newline => f.write_str("Newline"),
|
||||
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
||||
Indent => f.write_str("Indent"),
|
||||
Dedent => f.write_str("Dedent"),
|
||||
StartModule => f.write_str("StartProgram"),
|
||||
|
|
|
|||
Loading…
Reference in New Issue