From bfc17fecaa98089eca3aa29ee10a13f09df82c27 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 15 Apr 2025 21:26:12 +0530 Subject: [PATCH] Raise syntax error when `\` is at end of file (#17409) ## Summary This PR fixes a bug in the lexer specifically around line continuation character at end of file. The reason this was occurring is because the lexer wouldn't check for EOL _after_ consuming the escaped newline but only if the EOL was right after the line continuation character. fixes: #17398 ## Test Plan Add tests for the scenarios where this should occur mainly (a) when the state is `AfterNewline` and (b) when the state is `Other`. --- crates/ruff_python_parser/src/lexer.rs | 56 ++++++++++++++++--- ...inuation_at_eof_after_newline_mac_eol.snap | 22 ++++++++ ...nuation_at_eof_after_newline_unix_eol.snap | 22 ++++++++ ...tion_at_eof_after_newline_windows_eol.snap | 22 ++++++++ ...sts__line_continuation_at_eof_mac_eol.snap | 36 ++++++++++++ ...ts__line_continuation_at_eof_unix_eol.snap | 36 ++++++++++++ ..._line_continuation_at_eof_windows_eol.snap | 36 ++++++++++++ 7 files changed, 223 insertions(+), 7 deletions(-) create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_mac_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_unix_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_windows_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_mac_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_unix_eol.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_windows_eol.snap diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 4b529855aa..c3e52f844c 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -246,17 +246,18 @@ impl<'src> Lexer<'src> { self.cursor.bump(); if self.cursor.eat_char('\r') { self.cursor.eat_char('\n'); - } else if self.cursor.is_eof() { - return Some(self.push_error(LexicalError::new( - LexicalErrorType::Eof, - self.token_range(), - ))); } else if !self.cursor.eat_char('\n') { return Some(self.push_error(LexicalError::new( LexicalErrorType::LineContinuationError, TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()), ))); } + if self.cursor.is_eof() { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::Eof, + self.token_range(), + ))); + } indentation = Indentation::root(); } // Form feed @@ -341,14 +342,15 @@ impl<'src> Lexer<'src> { self.cursor.bump(); if self.cursor.eat_char('\r') { self.cursor.eat_char('\n'); - } else if self.cursor.is_eof() { - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); } else if !self.cursor.eat_char('\n') { return Err(LexicalError::new( LexicalErrorType::LineContinuationError, TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()), )); } + if self.cursor.is_eof() { + return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + } } // Form feed '\x0C' => { @@ -2212,6 +2214,46 @@ if first: assert_snapshot!(triple_quoted_eol(WINDOWS_EOL)); } + fn line_continuation_at_eof_after_newline(eol: &str) -> LexerOutput { + let source = format!(r"\{eol}"); + lex_invalid(&source, Mode::Module) + } + + #[test] + fn test_line_continuation_at_eof_after_newline_unix_eol() { + assert_snapshot!(line_continuation_at_eof_after_newline(UNIX_EOL)); + } + + #[test] + fn test_line_continuation_at_eof_after_newline_mac_eol() { + assert_snapshot!(line_continuation_at_eof_after_newline(MAC_EOL)); + } + + #[test] + fn test_line_continuation_at_eof_after_newline_windows_eol() { + assert_snapshot!(line_continuation_at_eof_after_newline(WINDOWS_EOL)); + } + + fn line_continuation_at_eof(eol: &str) -> LexerOutput { + let source = format!(r"1, \{eol}"); + lex_invalid(&source, Mode::Module) + } + + #[test] + fn test_line_continuation_at_eof_unix_eol() { + assert_snapshot!(line_continuation_at_eof(UNIX_EOL)); + } + + #[test] + fn test_line_continuation_at_eof_mac_eol() { + assert_snapshot!(line_continuation_at_eof(MAC_EOL)); + } + + #[test] + fn test_line_continuation_at_eof_windows_eol() { + assert_snapshot!(line_continuation_at_eof(WINDOWS_EOL)); + } + // This test case is to just make sure that the lexer doesn't go into // infinite loop on invalid input. #[test] diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_mac_eol.snap new file mode 100644 index 0000000000..80e6fcfcf0 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_mac_eol.snap @@ -0,0 +1,22 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof_after_newline(MAC_EOL) +--- +## Tokens +``` +[ + ( + Unknown, + 0..2, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 0..2, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_unix_eol.snap new file mode 100644 index 0000000000..d0a13f6859 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_unix_eol.snap @@ -0,0 +1,22 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof_after_newline(UNIX_EOL) +--- +## Tokens +``` +[ + ( + Unknown, + 0..2, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 0..2, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_windows_eol.snap new file mode 100644 index 0000000000..8001fa8ee4 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_after_newline_windows_eol.snap @@ -0,0 +1,22 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof_after_newline(WINDOWS_EOL) +--- +## Tokens +``` +[ + ( + Unknown, + 0..3, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 0..3, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_mac_eol.snap new file mode 100644 index 0000000000..22104ae35b --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_mac_eol.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof(MAC_EOL) +--- +## Tokens +``` +[ + ( + Int( + 1, + ), + 0..1, + ), + ( + Comma, + 1..2, + ), + ( + Unknown, + 2..5, + ), + ( + Newline, + 5..5, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 2..5, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_unix_eol.snap new file mode 100644 index 0000000000..8e61cceafe --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_unix_eol.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof(UNIX_EOL) +--- +## Tokens +``` +[ + ( + Int( + 1, + ), + 0..1, + ), + ( + Comma, + 1..2, + ), + ( + Unknown, + 2..5, + ), + ( + Newline, + 5..5, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 2..5, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_windows_eol.snap new file mode 100644 index 0000000000..bca2a7153c --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_continuation_at_eof_windows_eol.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: line_continuation_at_eof(WINDOWS_EOL) +--- +## Tokens +``` +[ + ( + Int( + 1, + ), + 0..1, + ), + ( + Comma, + 1..2, + ), + ( + Unknown, + 2..6, + ), + ( + Newline, + 6..6, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: Eof, + location: 2..6, + }, +] +```