From ca99e9e2f0fe7043bb2a145860229866f269152d Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Mon, 13 May 2024 21:43:06 +0530
Subject: [PATCH] Move `W605` to the AST checker (#11402)

## Summary

This PR moves the `W605` rule to the AST checker.

This is part of #11401

## Test Plan

`cargo test`
---
 .../src/checkers/ast/analyze/string_like.rs   |   5 +-
 crates/ruff_linter/src/checkers/tokens.rs     |  12 --
 crates/ruff_linter/src/registry.rs            |   1 -
 .../rules/invalid_escape_sequence.rs          | 122 ++++++++++++------
 crates/ruff_python_ast/src/expression.rs      |  14 +-
 5 files changed, 98 insertions(+), 56 deletions(-)

diff --git a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs
index 6a7f723bc7..88d040acfb 100644
--- a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs
+++ b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs
@@ -2,7 +2,7 @@ use ruff_python_ast::StringLike;
 
 use crate::checkers::ast::Checker;
 use crate::codes::Rule;
-use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, ruff};
+use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, pycodestyle, ruff};
 
 /// Run lint rules over a [`StringLike`] syntax nodes.
 pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
@@ -36,4 +36,7 @@ pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
     if checker.enabled(Rule::AvoidableEscapedQuote) && checker.settings.flake8_quotes.avoid_escape {
         flake8_quotes::rules::avoidable_escaped_quote(checker, string_like);
     }
+    if checker.enabled(Rule::InvalidEscapeSequence) {
+        pycodestyle::rules::invalid_escape_sequence(checker, string_like);
+    }
 }
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
index ca08b27f5b..66148b2273 100644
--- a/crates/ruff_linter/src/checkers/tokens.rs
+++ b/crates/ruff_linter/src/checkers/tokens.rs
@@ -75,18 +75,6 @@ pub(crate) fn check_tokens(
         pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer);
     }
 
-    if settings.rules.enabled(Rule::InvalidEscapeSequence) {
-        for (tok, range) in tokens.iter().flatten() {
-            pycodestyle::rules::invalid_escape_sequence(
-                &mut diagnostics,
-                locator,
-                indexer,
-                tok,
-                *range,
-            );
-        }
-    }
-
     if settings.rules.enabled(Rule::TabIndentation) {
         pycodestyle::rules::tab_indentation(&mut diagnostics, locator, indexer);
     }
diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs
index 77b5d97df5..fa6bdee458 100644
--- a/crates/ruff_linter/src/registry.rs
+++ b/crates/ruff_linter/src/registry.rs
@@ -270,7 +270,6 @@ impl Rule {
             | Rule::InvalidCharacterNul
             | Rule::InvalidCharacterSub
             | Rule::InvalidCharacterZeroWidthSpace
-            | Rule::InvalidEscapeSequence
             | Rule::InvalidTodoCapitalization
             | Rule::InvalidTodoTag
             | Rule::LineContainsFixme
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
index 8e7c25852c..3f3b5bce60 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
@@ -2,11 +2,11 @@ use memchr::memchr_iter;
 
 use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_index::Indexer;
-use ruff_python_parser::Tok;
+use ruff_python_ast::{AnyStringFlags, FStringElement, StringLike, StringLikePart};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
+use crate::checkers::ast::Checker;
 use crate::fix::edits::pad_start;
 
 /// ## What it does
@@ -59,38 +59,83 @@ impl AlwaysFixableViolation for InvalidEscapeSequence {
 }
 
 /// W605
-pub(crate) fn invalid_escape_sequence(
+pub(crate) fn invalid_escape_sequence(checker: &mut Checker, string_like: StringLike) {
+    let locator = checker.locator();
+
+    for part in string_like.parts() {
+        if part.flags().is_raw_string() {
+            continue;
+        }
+        match part {
+            StringLikePart::String(string_literal) => {
+                check(
+                    &mut checker.diagnostics,
+                    locator,
+                    string_literal.start(),
+                    string_literal.range(),
+                    AnyStringFlags::from(string_literal.flags),
+                );
+            }
+            StringLikePart::Bytes(bytes_literal) => {
+                check(
+                    &mut checker.diagnostics,
+                    locator,
+                    bytes_literal.start(),
+                    bytes_literal.range(),
+                    AnyStringFlags::from(bytes_literal.flags),
+                );
+            }
+            StringLikePart::FString(f_string) => {
+                let flags = AnyStringFlags::from(f_string.flags);
+                for element in f_string.elements.iter() {
+                    match element {
+                        FStringElement::Literal(literal) => {
+                            check(
+                                &mut checker.diagnostics,
+                                locator,
+                                f_string.start(),
+                                literal.range(),
+                                flags,
+                            );
+                        }
+                        FStringElement::Expression(expression) => {
+                            let Some(format_spec) = expression.format_spec.as_ref() else {
+                                continue;
+                            };
+                            for literal in format_spec.elements.literals() {
+                                check(
+                                    &mut checker.diagnostics,
+                                    locator,
+                                    f_string.start(),
+                                    literal.range(),
+                                    flags,
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn check(
     diagnostics: &mut Vec<Diagnostic>,
     locator: &Locator,
-    indexer: &Indexer,
-    token: &Tok,
-    token_range: TextRange,
+    // Start position of the expression that contains the source range. This is used to generate
+    // the fix when the source range is part of the expression like in f-string which contains
+    // other f-string literal elements.
+    expr_start: TextSize,
+    // Range in the source code to perform the check on.
+    source_range: TextRange,
+    flags: AnyStringFlags,
 ) {
-    let (token_source_code, string_start_location, flags) = match token {
-        Tok::FStringMiddle { flags, .. } => {
-            if flags.is_raw_string() {
-                return;
-            }
-            let Some(f_string_range) = indexer.fstring_ranges().innermost(token_range.start())
-            else {
-                return;
-            };
-            (locator.slice(token_range), f_string_range.start(), flags)
-        }
-        Tok::String { flags, .. } => {
-            if flags.is_raw_string() {
-                return;
-            }
-            (locator.slice(token_range), token_range.start(), flags)
-        }
-        _ => return,
-    };
-
+    let source = locator.slice(source_range);
     let mut contains_valid_escape_sequence = false;
     let mut invalid_escape_chars = Vec::new();
 
     let mut prev = None;
-    let bytes = token_source_code.as_bytes();
+    let bytes = source.as_bytes();
     for i in memchr_iter(b'\\', bytes) {
         // If the previous character was also a backslash, skip.
         if prev.is_some_and(|prev| prev == i - 1) {
@@ -100,9 +145,9 @@ pub(crate) fn invalid_escape_sequence(
 
         prev = Some(i);
 
-        let next_char = match token_source_code[i + 1..].chars().next() {
+        let next_char = match source[i + 1..].chars().next() {
             Some(next_char) => next_char,
-            None if token.is_f_string_middle() => {
+            None if flags.is_f_string() => {
                 // If we're at the end of a f-string middle token, the next character
                 // is actually emitted as a different token. For example,
                 //
@@ -124,7 +169,7 @@ pub(crate) fn invalid_escape_sequence(
                 // f-string. This means that if there's a `FStringMiddle` token and we
                 // encounter a `\` character, then the next character is always going to
                 // be part of the f-string.
-                if let Some(next_char) = locator.after(token_range.end()).chars().next() {
+                if let Some(next_char) = locator.after(source_range.end()).chars().next() {
                     next_char
                 } else {
                     continue;
@@ -172,7 +217,7 @@ pub(crate) fn invalid_escape_sequence(
             continue;
         }
 
-        let location = token_range.start() + TextSize::try_from(i).unwrap();
+        let location = source_range.start() + TextSize::try_from(i).unwrap();
         let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
         invalid_escape_chars.push(InvalidEscapeChar {
             ch: next_char,
@@ -180,7 +225,6 @@ pub(crate) fn invalid_escape_sequence(
         });
     }
 
-    let mut invalid_escape_sequence = Vec::new();
     if contains_valid_escape_sequence {
         // Escape with backslash.
         for invalid_escape_char in &invalid_escape_chars {
@@ -195,7 +239,7 @@ pub(crate) fn invalid_escape_sequence(
                 r"\".to_string(),
                 invalid_escape_char.start() + TextSize::from(1),
             )));
-            invalid_escape_sequence.push(diagnostic);
+            diagnostics.push(diagnostic);
         }
     } else {
         // Turn into raw string.
@@ -212,8 +256,8 @@ pub(crate) fn invalid_escape_sequence(
                 // Replace the Unicode prefix with `r`.
                 diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
                     "r".to_string(),
-                    string_start_location,
-                    string_start_location + TextSize::from(1),
+                    expr_start,
+                    expr_start + TextSize::from(1),
                 )));
             } else {
                 // Insert the `r` prefix.
@@ -222,17 +266,15 @@ pub(crate) fn invalid_escape_sequence(
                     // `assert`, etc.) and the string. For example, `return"foo"` is valid, but
                     // `returnr"foo"` is not.
                     Fix::safe_edit(Edit::insertion(
-                        pad_start("r".to_string(), string_start_location, locator),
-                        string_start_location,
+                        pad_start("r".to_string(), expr_start, locator),
+                        expr_start,
                     )),
                 );
             }
 
-            invalid_escape_sequence.push(diagnostic);
+            diagnostics.push(diagnostic);
         }
     }
-
-    diagnostics.extend(invalid_escape_sequence);
 }
 
 #[derive(Debug, PartialEq, Eq)]
diff --git a/crates/ruff_python_ast/src/expression.rs b/crates/ruff_python_ast/src/expression.rs
index 3e19d5b3fd..b3bd0a6701 100644
--- a/crates/ruff_python_ast/src/expression.rs
+++ b/crates/ruff_python_ast/src/expression.rs
@@ -2,8 +2,7 @@ use std::iter::FusedIterator;
 
 use ruff_text_size::{Ranged, TextRange};
 
-use crate::AnyNodeRef;
-use crate::{self as ast, Expr};
+use crate::{self as ast, AnyNodeRef, AnyStringFlags, Expr};
 
 /// Unowned pendant to [`ast::Expr`] that stores a reference instead of a owned value.
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -452,6 +451,17 @@ pub enum StringLikePart<'a> {
     FString(&'a ast::FString),
 }
 
+impl StringLikePart<'_> {
+    /// Returns the [`AnyStringFlags`] for the current string-like part.
+    pub fn flags(&self) -> AnyStringFlags {
+        match self {
+            StringLikePart::String(string) => AnyStringFlags::from(string.flags),
+            StringLikePart::Bytes(bytes) => AnyStringFlags::from(bytes.flags),
+            StringLikePart::FString(f_string) => AnyStringFlags::from(f_string.flags),
+        }
+    }
+}
+
 impl<'a> From<&'a ast::StringLiteral> for StringLikePart<'a> {
     fn from(value: &'a ast::StringLiteral) -> Self {
         StringLikePart::String(value)