From 53fc0614dac7753eaaccf8e04d89c0a384c51fc1 Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 20 Jul 2025 17:04:14 -0500 Subject: [PATCH] Fix `unreachable` panic in parser (#19183) Parsing the (invalid) expression `f"{\t"i}"` caused a panic because the `TStringMiddle` character was "unreachable" due the way the parser recovered from the line continuation (it ate the t-string start). The cause of the issue is as follows: The parser begins parsing the f-string and expects to see a list of objects, essentially alternating between _interpolated elements_ and ordinary strings. It is happy to see the first left brace, but then there is a lexical error caused by the line-continuation character. So instead of the parser seeing a list of elements with just one member, it sees a list that starts like this: - Interpolated element with an invalid token, stored as a `Name` - Something else built from tokens beginning with `TStringStart` and `TStringMiddle` When it sees the `TStringStart` error recovery says "that's a list element I don't know what to do with, let's skip it". When it sees `TStringMiddle` it says "oh, that looks like the middle of _some interpolated string_ so let's try to parse it as one of the literal elements of my `FString`". Unfortunately, the function being used to parse individual list elements thinks (arguably correctly) that it's not possible to have a `TStringMiddle` sitting in your `FString`, and hits `unreachable`. Two potential ways (among many) to solve this issue are: 1. Allow a `TStringMiddle` as a valid "literal" part of an f-string during parsing (with the hope/understanding that this would only occur in an invalid context) 2. Skip the `TStringMiddle` as an "unexpected/invalid list item" in the same way that we skipped `TStringStart`. I have opted for the second approach since it seems somehow more morally correct, even though it loses more information. To implement this, the recovery context needs to know whether we are in an f-string or t-string - hence the changes to that enum. As a bonus we get slightly more specific error messages in some cases. Closes #18860 --- .../resources/test/fixtures/ruff/RUF027_0.py | 5 ++ .../src/parser/expression.rs | 2 +- crates/ruff_python_parser/src/parser/mod.rs | 63 ++++++++++++------- ..._inner_line_continuation_and_t_string.snap | 10 +++ ...er_line_continuation_newline_t_string.snap | 12 ++++ crates/ruff_python_parser/src/parser/tests.rs | 23 +++++++ crates/ruff_python_parser/src/string.rs | 2 +- ..._string_lambda_without_parentheses.py.snap | 2 +- ...ated_unterminated_string_multiline.py.snap | 2 +- ..._string_lambda_without_parentheses.py.snap | 2 +- 10 files changed, 96 insertions(+), 27 deletions(-) create mode 100644 crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_and_t_string.snap create mode 100644 crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_newline_t_string.snap diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF027_0.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF027_0.py index e5bef3033f..c863347069 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF027_0.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF027_0.py @@ -79,3 +79,8 @@ def in_type_def(): from typing import cast a = 'int' cast('f"{a}"','11') + +# Regression test for parser bug +# https://github.com/astral-sh/ruff/issues/18860 +def fuzz_bug(): + c('{\t"i}') diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index e1f5f8c124..f953cc367c 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -1527,7 +1527,7 @@ impl<'src> Parser<'src> { self.bump(kind.start_token()); let elements = self.parse_interpolated_string_elements( flags, - InterpolatedStringElementsKind::Regular, + InterpolatedStringElementsKind::Regular(kind), kind, ); diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 904e92df95..ccd44b2592 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -8,6 +8,7 @@ use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::error::UnsupportedSyntaxError; use crate::parser::expression::ExpressionContext; use crate::parser::progress::{ParserProgress, TokenId}; +use crate::string::InterpolatedStringKind; use crate::token::TokenValue; use crate::token_set::TokenSet; use crate::token_source::{TokenSource, TokenSourceCheckpoint}; @@ -799,7 +800,7 @@ impl WithItemKind { } } -#[derive(Debug, PartialEq, Copy, Clone)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] enum InterpolatedStringElementsKind { /// The regular f-string elements. /// @@ -807,7 +808,7 @@ enum InterpolatedStringElementsKind { /// ```py /// f"hello {x:.2f} world" /// ``` - Regular, + Regular(InterpolatedStringKind), /// The f-string elements are part of the format specifier. /// @@ -819,15 +820,13 @@ enum InterpolatedStringElementsKind { } impl InterpolatedStringElementsKind { - const fn list_terminators(self) -> TokenSet { + const fn list_terminator(self) -> TokenKind { match self { - InterpolatedStringElementsKind::Regular => { - TokenSet::new([TokenKind::FStringEnd, TokenKind::TStringEnd]) - } + InterpolatedStringElementsKind::Regular(string_kind) => string_kind.end_token(), // test_ok fstring_format_spec_terminator // f"hello {x:} world" // f"hello {x:.3f} world" - InterpolatedStringElementsKind::FormatSpec => TokenSet::new([TokenKind::Rbrace]), + InterpolatedStringElementsKind::FormatSpec => TokenKind::Rbrace, } } } @@ -1121,7 +1120,7 @@ impl RecoveryContextKind { .then_some(ListTerminatorKind::Regular), }, RecoveryContextKind::InterpolatedStringElements(kind) => { - if p.at_ts(kind.list_terminators()) { + if p.at(kind.list_terminator()) { Some(ListTerminatorKind::Regular) } else { // test_err unterminated_fstring_newline_recovery @@ -1177,13 +1176,23 @@ impl RecoveryContextKind { ) || p.at_name_or_soft_keyword() } RecoveryContextKind::WithItems(_) => p.at_expr(), - RecoveryContextKind::InterpolatedStringElements(_) => matches!( - p.current_token_kind(), - // Literal element - TokenKind::FStringMiddle | TokenKind::TStringMiddle - // Expression element - | TokenKind::Lbrace - ), + RecoveryContextKind::InterpolatedStringElements(elements_kind) => { + match elements_kind { + InterpolatedStringElementsKind::Regular(interpolated_string_kind) => { + p.current_token_kind() == interpolated_string_kind.middle_token() + || p.current_token_kind() == TokenKind::Lbrace + } + InterpolatedStringElementsKind::FormatSpec => { + matches!( + p.current_token_kind(), + // Literal element + TokenKind::FStringMiddle | TokenKind::TStringMiddle + // Expression element + | TokenKind::Lbrace + ) + } + } + } } } @@ -1272,8 +1281,8 @@ impl RecoveryContextKind { ), }, RecoveryContextKind::InterpolatedStringElements(kind) => match kind { - InterpolatedStringElementsKind::Regular => ParseErrorType::OtherError( - "Expected an f-string or t-string element or the end of the f-string or t-string".to_string(), + InterpolatedStringElementsKind::Regular(string_kind) => ParseErrorType::OtherError( + format!("Expected an element of or the end of the {string_kind}"), ), InterpolatedStringElementsKind::FormatSpec => ParseErrorType::OtherError( "Expected an f-string or t-string element or a '}'".to_string(), @@ -1316,8 +1325,9 @@ bitflags! { const WITH_ITEMS_PARENTHESIZED = 1 << 25; const WITH_ITEMS_PARENTHESIZED_EXPRESSION = 1 << 26; const WITH_ITEMS_UNPARENTHESIZED = 1 << 28; - const FT_STRING_ELEMENTS = 1 << 29; - const FT_STRING_ELEMENTS_IN_FORMAT_SPEC = 1 << 30; + const F_STRING_ELEMENTS = 1 << 29; + const T_STRING_ELEMENTS = 1 << 30; + const FT_STRING_ELEMENTS_IN_FORMAT_SPEC = 1 << 31; } } @@ -1371,7 +1381,13 @@ impl RecoveryContext { WithItemKind::Unparenthesized => RecoveryContext::WITH_ITEMS_UNPARENTHESIZED, }, RecoveryContextKind::InterpolatedStringElements(kind) => match kind { - InterpolatedStringElementsKind::Regular => RecoveryContext::FT_STRING_ELEMENTS, + InterpolatedStringElementsKind::Regular(InterpolatedStringKind::FString) => { + RecoveryContext::F_STRING_ELEMENTS + } + InterpolatedStringElementsKind::Regular(InterpolatedStringKind::TString) => { + RecoveryContext::T_STRING_ELEMENTS + } + InterpolatedStringElementsKind::FormatSpec => { RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC } @@ -1442,8 +1458,11 @@ impl RecoveryContext { RecoveryContext::WITH_ITEMS_UNPARENTHESIZED => { RecoveryContextKind::WithItems(WithItemKind::Unparenthesized) } - RecoveryContext::FT_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements( - InterpolatedStringElementsKind::Regular, + RecoveryContext::F_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements( + InterpolatedStringElementsKind::Regular(InterpolatedStringKind::FString), + ), + RecoveryContext::T_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements( + InterpolatedStringElementsKind::Regular(InterpolatedStringKind::TString), ), RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC => { RecoveryContextKind::InterpolatedStringElements( diff --git a/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_and_t_string.snap b/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_and_t_string.snap new file mode 100644 index 0000000000..490211dd4e --- /dev/null +++ b/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_and_t_string.snap @@ -0,0 +1,10 @@ +--- +source: crates/ruff_python_parser/src/parser/tests.rs +expression: error +--- +ParseError { + error: Lexical( + LineContinuationError, + ), + location: 3..4, +} diff --git a/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_newline_t_string.snap b/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_newline_t_string.snap new file mode 100644 index 0000000000..36cd036382 --- /dev/null +++ b/crates/ruff_python_parser/src/parser/snapshots/ruff_python_parser__parser__tests__fstring_expr_inner_line_continuation_newline_t_string.snap @@ -0,0 +1,12 @@ +--- +source: crates/ruff_python_parser/src/parser/tests.rs +expression: error +--- +ParseError { + error: Lexical( + TStringError( + SingleRbrace, + ), + ), + location: 8..9, +} diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs index 778637597c..dcb9ac16a0 100644 --- a/crates/ruff_python_parser/src/parser/tests.rs +++ b/crates/ruff_python_parser/src/parser/tests.rs @@ -134,3 +134,26 @@ foo.bar[0].baz[2].egg?? .unwrap(); insta::assert_debug_snapshot!(parsed.syntax()); } + +#[test] +fn test_fstring_expr_inner_line_continuation_and_t_string() { + let source = r#"f'{\t"i}'"#; + + let parsed = parse_expression(source); + + let error = parsed.unwrap_err(); + + insta::assert_debug_snapshot!(error); +} + +#[test] +fn test_fstring_expr_inner_line_continuation_newline_t_string() { + let source = r#"f'{\ +t"i}'"#; + + let parsed = parse_expression(source); + + let error = parsed.unwrap_err(); + + insta::assert_debug_snapshot!(error); +} diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 8dd9190b90..a3fe1490de 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -41,7 +41,7 @@ impl From for Expr { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum InterpolatedStringKind { FString, TString, diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_lambda_without_parentheses.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_lambda_without_parentheses.py.snap index 6f98814ce9..3610b8d114 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_lambda_without_parentheses.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_lambda_without_parentheses.py.snap @@ -124,5 +124,5 @@ Module( | 1 | f"{lambda x: x}" - | ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string + | ^ Syntax Error: Expected an element of or the end of the f-string | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@implicitly_concatenated_unterminated_string_multiline.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@implicitly_concatenated_unterminated_string_multiline.py.snap index da094ee09c..59016cc425 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@implicitly_concatenated_unterminated_string_multiline.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@implicitly_concatenated_unterminated_string_multiline.py.snap @@ -221,7 +221,7 @@ Module( 2 | 'hello' 3 | f'world {x} 4 | ) - | ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string + | ^ Syntax Error: Expected an element of or the end of the f-string 5 | 1 + 1 6 | ( | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@t_string_lambda_without_parentheses.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@t_string_lambda_without_parentheses.py.snap index 121d002f28..d87a72858f 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@t_string_lambda_without_parentheses.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@t_string_lambda_without_parentheses.py.snap @@ -128,5 +128,5 @@ Module( | 1 | # parse_options: {"target-version": "3.14"} 2 | t"{lambda x: x}" - | ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string + | ^ Syntax Error: Expected an element of or the end of the t-string |