From cffd1866ce1ac6da4d6a5bc12435316d2d99755b Mon Sep 17 00:00:00 2001 From: Brent Westbrook <36778786+ntBre@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:12:10 -0500 Subject: [PATCH] Preserve raw string prefix and escapes (#15694) ## Summary Fixes #9663 and also improves the fixes for [RUF055](https://docs.astral.sh/ruff/rules/unnecessary-regular-expression/) since regular expressions are often written as raw strings. This doesn't include raw f-strings. ## Test Plan Existing snapshots for RUF055 and PT009, plus a new `Generator` test and a regression test for the reported `PIE810` issue. --- .../test/fixtures/flake8_pie/PIE810.py | 6 +++++ ...__flake8_pie__tests__PIE810_PIE810.py.snap | 18 +++++++++++++++ ...es__flake8_pytest_style__tests__PT009.snap | 8 +++---- ...f__tests__preview__RUF055_RUF055_0.py.snap | 16 ++++++------- ...f__tests__preview__RUF055_RUF055_1.py.snap | 4 ++-- crates/ruff_python_ast/src/str.rs | 8 +++++++ crates/ruff_python_codegen/src/generator.rs | 23 +++++++++++++++---- 7 files changed, 65 insertions(+), 18 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py b/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py index 34a02deaf8..39a4a496df 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py @@ -76,3 +76,9 @@ def func(): if msg.startswith(y) or msg.endswith(x) or msg.startswith("h"): # OK print("yes") + + +def func(): + "Regression test for https://github.com/astral-sh/ruff/issues/9663" + if x.startswith("a") or x.startswith("b") or re.match(r"a\.b", x): + print("yes") diff --git a/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap b/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap index 8a23e3ea53..929042cc47 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap +++ b/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap @@ -142,3 +142,21 @@ PIE810.py:25:8: PIE810 [*] Call `startswith` once with a `tuple` 26 26 | print("yes") 27 27 | 28 28 | # ok + +PIE810.py:83:8: PIE810 [*] Call `startswith` once with a `tuple` + | +81 | def func(): +82 | "Regression test for https://github.com/astral-sh/ruff/issues/9663" +83 | if x.startswith("a") or x.startswith("b") or re.match(r"a\.b", x): + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PIE810 +84 | print("yes") + | + = help: Merge into a single `startswith` call + +ℹ Unsafe fix +80 80 | +81 81 | def func(): +82 82 | "Regression test for https://github.com/astral-sh/ruff/issues/9663" +83 |- if x.startswith("a") or x.startswith("b") or re.match(r"a\.b", x): + 83 |+ if x.startswith(("a", "b")) or re.match(r"a\.b", x): +84 84 | print("yes") diff --git a/crates/ruff_linter/src/rules/flake8_pytest_style/snapshots/ruff_linter__rules__flake8_pytest_style__tests__PT009.snap b/crates/ruff_linter/src/rules/flake8_pytest_style/snapshots/ruff_linter__rules__flake8_pytest_style__tests__PT009.snap index 54aa27fa16..f77df5ab5c 100644 --- a/crates/ruff_linter/src/rules/flake8_pytest_style/snapshots/ruff_linter__rules__flake8_pytest_style__tests__PT009.snap +++ b/crates/ruff_linter/src/rules/flake8_pytest_style/snapshots/ruff_linter__rules__flake8_pytest_style__tests__PT009.snap @@ -498,7 +498,7 @@ PT009.py:73:9: PT009 [*] Use a regular `assert` instead of unittest-style `asser 71 71 | 72 72 | def test_assert_regex(self): 73 |- self.assertRegex("abc", r"def") # Error - 73 |+ assert re.search("def", "abc") # Error + 73 |+ assert re.search(r"def", "abc") # Error 74 74 | 75 75 | def test_assert_not_regex(self): 76 76 | self.assertNotRegex("abc", r"abc") # Error @@ -518,7 +518,7 @@ PT009.py:76:9: PT009 [*] Use a regular `assert` instead of unittest-style `asser 74 74 | 75 75 | def test_assert_not_regex(self): 76 |- self.assertNotRegex("abc", r"abc") # Error - 76 |+ assert not re.search("abc", "abc") # Error + 76 |+ assert not re.search(r"abc", "abc") # Error 77 77 | 78 78 | def test_assert_regexp_matches(self): 79 79 | self.assertRegexpMatches("abc", r"def") # Error @@ -538,7 +538,7 @@ PT009.py:79:9: PT009 [*] Use a regular `assert` instead of unittest-style `asser 77 77 | 78 78 | def test_assert_regexp_matches(self): 79 |- self.assertRegexpMatches("abc", r"def") # Error - 79 |+ assert re.search("def", "abc") # Error + 79 |+ assert re.search(r"def", "abc") # Error 80 80 | 81 81 | def test_assert_not_regexp_matches(self): 82 82 | self.assertNotRegex("abc", r"abc") # Error @@ -558,7 +558,7 @@ PT009.py:82:9: PT009 [*] Use a regular `assert` instead of unittest-style `asser 80 80 | 81 81 | def test_assert_not_regexp_matches(self): 82 |- self.assertNotRegex("abc", r"abc") # Error - 82 |+ assert not re.search("abc", "abc") # Error + 82 |+ assert not re.search(r"abc", "abc") # Error 83 83 | 84 84 | def test_fail_if(self): 85 85 | self.failIf("abc") # Error diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap index f778d0a023..a2a3ee2304 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap @@ -142,14 +142,14 @@ RUF055_0.py:89:1: RUF055 [*] Plain string pattern passed to `re` function 90 | re.sub(r"a", r"\n", "a") 91 | re.sub(r"a", "\a", "a") | - = help: Replace with `"a".replace("a", "\n")` + = help: Replace with `"a".replace(r"a", "\n")` ℹ Safe fix 86 86 | # `re.sub(r"a", r"\n", some_string)` is fixed to `some_string.replace("a", "\n")` 87 87 | # *not* `some_string.replace("a", "\\n")`. 88 88 | # We currently emit diagnostics for some of these without fixing them. 89 |-re.sub(r"a", "\n", "a") - 89 |+"a".replace("a", "\n") + 89 |+"a".replace(r"a", "\n") 90 90 | re.sub(r"a", r"\n", "a") 91 91 | re.sub(r"a", "\a", "a") 92 92 | re.sub(r"a", r"\a", "a") @@ -172,14 +172,14 @@ RUF055_0.py:91:1: RUF055 [*] Plain string pattern passed to `re` function | ^^^^^^^^^^^^^^^^^^^^^^^ RUF055 92 | re.sub(r"a", r"\a", "a") | - = help: Replace with `"a".replace("a", "\x07")` + = help: Replace with `"a".replace(r"a", "\x07")` ℹ Safe fix 88 88 | # We currently emit diagnostics for some of these without fixing them. 89 89 | re.sub(r"a", "\n", "a") 90 90 | re.sub(r"a", r"\n", "a") 91 |-re.sub(r"a", "\a", "a") - 91 |+"a".replace("a", "\x07") + 91 |+"a".replace(r"a", "\x07") 92 92 | re.sub(r"a", r"\a", "a") 93 93 | 94 94 | re.sub(r"a", "\?", "a") @@ -202,14 +202,14 @@ RUF055_0.py:94:1: RUF055 [*] Plain string pattern passed to `re` function | ^^^^^^^^^^^^^^^^^^^^^^^ RUF055 95 | re.sub(r"a", r"\?", "a") | - = help: Replace with `"a".replace("a", "\\?")` + = help: Replace with `"a".replace(r"a", "\\?")` ℹ Safe fix 91 91 | re.sub(r"a", "\a", "a") 92 92 | re.sub(r"a", r"\a", "a") 93 93 | 94 |-re.sub(r"a", "\?", "a") - 94 |+"a".replace("a", "\\?") + 94 |+"a".replace(r"a", "\\?") 95 95 | re.sub(r"a", r"\?", "a") RUF055_0.py:95:1: RUF055 [*] Plain string pattern passed to `re` function @@ -218,11 +218,11 @@ RUF055_0.py:95:1: RUF055 [*] Plain string pattern passed to `re` function 95 | re.sub(r"a", r"\?", "a") | ^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 | - = help: Replace with `"a".replace("a", "\\?")` + = help: Replace with `"a".replace(r"a", r"\?")` ℹ Safe fix 92 92 | re.sub(r"a", r"\a", "a") 93 93 | 94 94 | re.sub(r"a", "\?", "a") 95 |-re.sub(r"a", r"\?", "a") - 95 |+"a".replace("a", "\\?") + 95 |+"a".replace(r"a", r"\?") diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_1.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_1.py.snap index ce46702918..cf97bd8909 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_1.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_1.py.snap @@ -29,11 +29,11 @@ RUF055_1.py:17:1: RUF055 [*] Plain string pattern passed to `re` function 17 | re.sub(r"abc", repl, haystack) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 | - = help: Replace with `haystack.replace("abc", repl)` + = help: Replace with `haystack.replace(r"abc", repl)` ℹ Safe fix 14 14 | 15 15 | # also works for the `repl` argument in sub 16 16 | repl = "new" 17 |-re.sub(r"abc", repl, haystack) - 17 |+haystack.replace("abc", repl) + 17 |+haystack.replace(r"abc", repl) diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs index 13a08d2ccc..33a9bf0d0c 100644 --- a/crates/ruff_python_ast/src/str.rs +++ b/crates/ruff_python_ast/src/str.rs @@ -24,6 +24,14 @@ impl Quote { } } + #[inline] + pub const fn as_str(self) -> &'static str { + match self { + Self::Single => "'", + Self::Double => "\"", + } + } + #[must_use] #[inline] pub const fn opposite(self) -> Self { diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index a937130809..847038ff1c 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -1284,10 +1284,19 @@ impl<'a> Generator<'a> { fn unparse_string_literal(&mut self, string_literal: &ast::StringLiteral) { let ast::StringLiteral { value, flags, .. } = string_literal; - if flags.prefix().is_unicode() { - self.p("u"); + // for raw strings, we don't want to perform the UnicodeEscape in `p_str_repr`, so build the + // replacement here + if flags.prefix().is_raw() { + self.p(flags.prefix().as_str()); + self.p(self.quote.as_str()); + self.p(value); + self.p(self.quote.as_str()); + } else { + if flags.prefix().is_unicode() { + self.p("u"); + } + self.p_str_repr(value); } - self.p_str_repr(value); } fn unparse_string_literal_value(&mut self, value: &ast::StringLiteralValue) { @@ -1712,7 +1721,7 @@ class Foo: assert_eq!(round_trip(r#""hello""#), r#""hello""#); assert_eq!(round_trip(r"'hello'"), r#""hello""#); assert_eq!(round_trip(r"u'hello'"), r#"u"hello""#); - assert_eq!(round_trip(r"r'hello'"), r#""hello""#); + assert_eq!(round_trip(r"r'hello'"), r#"r"hello""#); assert_eq!(round_trip(r"b'hello'"), r#"b"hello""#); assert_eq!(round_trip(r#"("abc" "def" "ghi")"#), r#""abc" "def" "ghi""#); assert_eq!(round_trip(r#""he\"llo""#), r#"'he"llo'"#); @@ -1720,6 +1729,12 @@ class Foo: assert_eq!(round_trip(r#"f'abc{"def"}{1}'"#), r#"f"abc{'def'}{1}""#); } + #[test] + fn raw() { + assert_round_trip!(r#"r"a\.b""#); // https://github.com/astral-sh/ruff/issues/9663 + assert_round_trip!(r#"R"a\.b""#); + } + #[test] fn self_documenting_fstring() { assert_round_trip!(r#"f"{ chr(65) = }""#);