From f2b7c82534909a5eeccd48e20aa5a1bb0df4f635 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 25 Sep 2025 14:33:37 -0500 Subject: [PATCH] Handle t-string prefixes in `SimpleTokenizer` (#20578) The simple tokenizer is meant to skip strings, but it was recording a `Name` token for t-strings (from the `t`). This PR fixes that. --- crates/ruff_python_trivia/src/tokenizer.rs | 10 ++++++++++ .../tests/simple_tokenizer.rs | 16 ++++++++++++++++ .../snapshots/simple_tokenizer__fstring.snap | 14 ++++++++++++++ .../snapshots/simple_tokenizer__tstring.snap | 14 ++++++++++++++ 4 files changed, 54 insertions(+) create mode 100644 crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap create mode 100644 crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index 8b59197b77..8e508d1049 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -599,6 +599,16 @@ impl<'a> SimpleTokenizer<'a> { | "rb" | "rf" | "u" + | "T" + | "TR" + | "Tr" + | "RT" + | "Rt" + | "t" + | "tR" + | "tr" + | "rT" + | "rt" ) { self.bogus = true; diff --git a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs index b26218732b..26ff34f08e 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs @@ -169,6 +169,22 @@ fn string_with_byte_kind() { // note: not reversible: [other, bogus] vs [bogus, other] } +#[test] +fn fstring() { + let source = "f'foo'"; + + let test_case = tokenize(source); + assert_debug_snapshot!(test_case.tokens()); +} + +#[test] +fn tstring() { + let source = "t'foo'"; + + let test_case = tokenize(source); + assert_debug_snapshot!(test_case.tokens()); +} + #[test] fn string_with_invalid_kind() { let source = "abc'foo'"; diff --git a/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap new file mode 100644 index 0000000000..4f20942cc6 --- /dev/null +++ b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap @@ -0,0 +1,14 @@ +--- +source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +expression: test_case.tokens() +--- +[ + SimpleToken { + kind: Other, + range: 0..1, + }, + SimpleToken { + kind: Bogus, + range: 1..6, + }, +] diff --git a/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap new file mode 100644 index 0000000000..4f20942cc6 --- /dev/null +++ b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap @@ -0,0 +1,14 @@ +--- +source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +expression: test_case.tokens() +--- +[ + SimpleToken { + kind: Other, + range: 0..1, + }, + SimpleToken { + kind: Bogus, + range: 1..6, + }, +]