Update the `invalid-escape-sequence` rule (#5359)

Just a couple small tweaks based on reading the rule with fresh eyes and new best-practices.
2023-06-25 18:20:31 -04:00 · 2023-06-25 18:20:31 -04:00 · 1fe4073b56
parent b233763156
commit 1fe4073b56
1 changed files with 100 additions and 88 deletions
--- a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
@ -1,10 +1,9 @@
-use anyhow::{bail, Result};
-use log::error;
 use ruff_text_size::{TextLen, TextRange, TextSize};

 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::source_code::Locator;
+use ruff_python_ast::str::{leading_quote, trailing_quote};

 /// ## What it does
 /// Checks for invalid escape sequences.
@ -21,6 +20,9 @@ use ruff_python_ast::source_code::Locator;
 /// ```python
 /// regex = r"\.png$"
 /// ```
+///
+/// ## References
+/// - [Python documentation: String and Bytes literals](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals)
 #[violation]
 pub struct InvalidEscapeSequence(char);

@ -36,24 +38,6 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
    }
 }

-// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
-const VALID_ESCAPE_SEQUENCES: &[char; 23] = &[
-    '\n', '\\', '\'', '"', 'a', 'b', 'f', 'n', 'r', 't', 'v', '0', '1', '2', '3', '4', '5', '6',
-    '7', 'x', // Escape sequences only recognized in string literals
-    'N', 'u', 'U',
-];
-
-/// Return the quotation markers used for a String token.
-fn extract_quote(text: &str) -> Result<&str> {
-    for quote in ["'''", "\"\"\"", "'", "\""] {
-        if text.ends_with(quote) {
-            return Ok(quote);
-        }
-    }
-
-    bail!("Unable to find quotation mark for String token")
-}
-
 /// W605
 pub(crate) fn invalid_escape_sequence(
    locator: &Locator,
@ -65,17 +49,19 @@ pub(crate) fn invalid_escape_sequence(
    let text = locator.slice(range);

    // Determine whether the string is single- or triple-quoted.
-    let Ok(quote) = extract_quote(text) else {
-        error!("Unable to find quotation mark for string token");
+    let Some(leading_quote) = leading_quote(text) else {
        return diagnostics;
    };
-    let quote_pos = text.find(quote).unwrap();
-    let prefix = &text[..quote_pos];
-    let body = &text[quote_pos + quote.len()..text.len() - quote.len()];
+    let Some(trailing_quote) = trailing_quote(text) else {
+        return diagnostics;
+    };
+    let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];

-    if !prefix.contains(['r', 'R']) {
-        let start_offset =
-            range.start() + TextSize::try_from(quote_pos).unwrap() + quote.text_len();
+    if leading_quote.contains(['r', 'R']) {
+        return diagnostics;
+    }
+
+    let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();

    let mut chars_iter = body.char_indices().peekable();

@ -102,7 +88,34 @@ pub(crate) fn invalid_escape_sequence(
        }

        // If the next character is a valid escape sequence, skip.
-            if VALID_ESCAPE_SEQUENCES.contains(next_char) {
+        // See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals.
+        if matches!(
+            next_char,
+            '\n'
+            | '\\'
+            | '\''
+            | '"'
+            | 'a'
+            | 'b'
+            | 'f'
+            | 'n'
+            | 'r'
+            | 't'
+            | 'v'
+            | '0'
+            | '1'
+            | '2'
+            | '3'
+            | '4'
+            | '5'
+            | '6'
+            | '7'
+            | 'x'
+            // Escape sequences only recognized in string literals
+            | 'N'
+            | 'u'
+            | 'U'
+        ) {
            contains_valid_escape_sequence = true;
            continue;
        }
@ -140,12 +153,11 @@ pub(crate) fn invalid_escape_sequence(
                    } else {
                        "r".to_string()
                    },
-                        range.start() + TextSize::try_from(quote_pos).unwrap(),
+                    range.start(),
                )));
            }
        }
    }
-    }

    diagnostics
 }