mirror of
https://github.com/astral-sh/ruff
synced 2026-01-20 21:10:48 -05:00
Parsing the (invalid) expression `f"{\t"i}"` caused a panic because the
`TStringMiddle` character was "unreachable" due the way the parser
recovered from the line continuation (it ate the t-string start).
The cause of the issue is as follows:
The parser begins parsing the f-string and expects to see a list of
objects, essentially alternating between _interpolated elements_ and
ordinary strings. It is happy to see the first left brace, but then
there is a lexical error caused by the line-continuation character. So
instead of the parser seeing a list of elements with just one member, it
sees a list that starts like this:
- Interpolated element with an invalid token, stored as a `Name`
- Something else built from tokens beginning with `TStringStart` and
`TStringMiddle`
When it sees the `TStringStart` error recovery says "that's a list
element I don't know what to do with, let's skip it". When it sees
`TStringMiddle` it says "oh, that looks like the middle of _some
interpolated string_ so let's try to parse it as one of the literal
elements of my `FString`". Unfortunately, the function being used to
parse individual list elements thinks (arguably correctly) that it's not
possible to have a `TStringMiddle` sitting in your `FString`, and hits
`unreachable`.
Two potential ways (among many) to solve this issue are:
1. Allow a `TStringMiddle` as a valid "literal" part of an f-string
during parsing (with the hope/understanding that this would only occur
in an invalid context)
2. Skip the `TStringMiddle` as an "unexpected/invalid list item" in the
same way that we skipped `TStringStart`.
I have opted for the second approach since it seems somehow more morally
correct, even though it loses more information. To implement this, the
recovery context needs to know whether we are in an f-string or t-string
- hence the changes to that enum. As a bonus we get slightly more
specific error messages in some cases.
Closes #18860
160 lines
2.8 KiB
Rust
160 lines
2.8 KiB
Rust
use crate::{Mode, ParseOptions, parse, parse_expression, parse_module};
|
|
|
|
#[test]
|
|
fn test_modes() {
|
|
let source = "a[0][1][2][3][4]";
|
|
|
|
assert!(parse(source, ParseOptions::from(Mode::Expression)).is_ok());
|
|
assert!(parse(source, ParseOptions::from(Mode::Module)).is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_expr_mode_invalid_syntax1() {
|
|
let source = "first second";
|
|
let error = parse_expression(source).unwrap_err();
|
|
|
|
insta::assert_debug_snapshot!(error);
|
|
}
|
|
|
|
#[test]
|
|
fn test_expr_mode_invalid_syntax2() {
|
|
let source = r"first
|
|
|
|
second
|
|
";
|
|
let error = parse_expression(source).unwrap_err();
|
|
|
|
insta::assert_debug_snapshot!(error);
|
|
}
|
|
|
|
#[test]
|
|
fn test_expr_mode_invalid_syntax3() {
|
|
let source = r"first
|
|
|
|
second
|
|
|
|
third
|
|
";
|
|
let error = parse_expression(source).unwrap_err();
|
|
|
|
insta::assert_debug_snapshot!(error);
|
|
}
|
|
|
|
#[test]
|
|
fn test_expr_mode_valid_syntax() {
|
|
let source = "first
|
|
|
|
";
|
|
let parsed = parse_expression(source).unwrap();
|
|
|
|
insta::assert_debug_snapshot!(parsed.expr());
|
|
}
|
|
|
|
#[test]
|
|
fn test_unicode_aliases() {
|
|
// https://github.com/RustPython/RustPython/issues/4566
|
|
let source = r#"x = "\N{BACKSPACE}another cool trick""#;
|
|
let suite = parse_module(source).unwrap().into_suite();
|
|
|
|
insta::assert_debug_snapshot!(suite);
|
|
}
|
|
|
|
#[test]
|
|
fn test_ipython_escape_commands() {
|
|
let parsed = parse(
|
|
r"
|
|
# Normal Python code
|
|
(
|
|
a
|
|
%
|
|
b
|
|
)
|
|
|
|
# Dynamic object info
|
|
??a.foo
|
|
?a.foo
|
|
?a.foo?
|
|
??a.foo()??
|
|
|
|
# Line magic
|
|
%timeit a = b
|
|
%timeit foo(b) % 3
|
|
%alias showPath pwd && ls -a
|
|
%timeit a =\
|
|
foo(b); b = 2
|
|
%matplotlib --inline
|
|
%matplotlib \
|
|
--inline
|
|
|
|
# System shell access
|
|
!pwd && ls -a | sed 's/^/\ /'
|
|
!pwd \
|
|
&& ls -a | sed 's/^/\\ /'
|
|
!!cd /Users/foo/Library/Application\ Support/
|
|
|
|
# Let's add some Python code to make sure that earlier escapes were handled
|
|
# correctly and that we didn't consume any of the following code as a result
|
|
# of the escapes.
|
|
def foo():
|
|
return (
|
|
a
|
|
!=
|
|
b
|
|
)
|
|
|
|
# Transforms into `foo(..)`
|
|
/foo 1 2
|
|
;foo 1 2
|
|
,foo 1 2
|
|
|
|
# Indented escape commands
|
|
for a in range(5):
|
|
!ls
|
|
|
|
p1 = !pwd
|
|
p2: str = !pwd
|
|
foo = %foo \
|
|
bar
|
|
|
|
% foo
|
|
foo = %foo # comment
|
|
|
|
# Help end line magics
|
|
foo?
|
|
foo.bar??
|
|
foo.bar.baz?
|
|
foo[0]??
|
|
foo[0][1]?
|
|
foo.bar[0].baz[1]??
|
|
foo.bar[0].baz[2].egg??
|
|
"
|
|
.trim(),
|
|
ParseOptions::from(Mode::Ipython),
|
|
)
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(parsed.syntax());
|
|
}
|
|
|
|
#[test]
|
|
fn test_fstring_expr_inner_line_continuation_and_t_string() {
|
|
let source = r#"f'{\t"i}'"#;
|
|
|
|
let parsed = parse_expression(source);
|
|
|
|
let error = parsed.unwrap_err();
|
|
|
|
insta::assert_debug_snapshot!(error);
|
|
}
|
|
|
|
#[test]
|
|
fn test_fstring_expr_inner_line_continuation_newline_t_string() {
|
|
let source = r#"f'{\
|
|
t"i}'"#;
|
|
|
|
let parsed = parse_expression(source);
|
|
|
|
let error = parsed.unwrap_err();
|
|
|
|
insta::assert_debug_snapshot!(error);
|
|
}
|