Add `Checker::context` method, deduplicate Unicode checks (#19609)

Summary -- This PR adds a `Checker::context` method that returns the underlying `LintContext` to unify `Candidate::into_diagnostic` and `Candidate::report_diagnostic` in our ambiguous Unicode character checks. This avoids some duplication and also avoids collecting a `Vec` of `Candidate`s only to iterate over it later. Test Plan -- Existing tests
2025-07-29 16:07:55 -04:00 · 2025-07-29 16:07:55 -04:00 · 864196b988
parent ae26fa020c
commit 864196b988
2 changed files with 29 additions and 59 deletions
--- a/crates/ruff_linter/src/checkers/ast/mod.rs
+++ b/crates/ruff_linter/src/checkers/ast/mod.rs
@ -590,6 +590,16 @@ impl<'a> Checker<'a> {
            member,
        })
    }
+
+    /// Return the [`LintContext`] for the current analysis.
+    ///
+    /// Note that you should always prefer calling methods like `settings`, `report_diagnostic`, or
+    /// `is_rule_enabled` directly on [`Checker`] when possible. This method exists only for the
+    /// rare cases where rules or helper functions need to be accessed by both a `Checker` and a
+    /// `LintContext` in different analysis phases.
+    pub(crate) const fn context(&self) -> &'a LintContext<'a> {
+        self.context
+    }
 }

 pub(crate) struct TypingImporter<'a, 'b> {
--- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs
@ -12,7 +12,6 @@ use crate::checkers::ast::{Checker, LintContext};
 use crate::preview::is_unicode_to_unicode_confusables_enabled;
 use crate::rules::ruff::rules::Context;
 use crate::rules::ruff::rules::confusables::confusable;
-use crate::settings::LinterSettings;

 /// ## What it does
 /// Checks for ambiguous Unicode characters in strings.
@ -180,9 +179,7 @@ pub(crate) fn ambiguous_unicode_character_comment(
    range: TextRange,
 ) {
    let text = locator.slice(range);
-    for candidate in ambiguous_unicode_character(text, range, context.settings()) {
-        candidate.into_diagnostic(Context::Comment, context);
-    }
+    ambiguous_unicode_character(text, range, Context::Comment, context);
 }

 /// RUF001, RUF002
@ -203,22 +200,19 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
        match part {
            ast::StringLikePart::String(string_literal) => {
                let text = checker.locator().slice(string_literal);
-                for candidate in
-                    ambiguous_unicode_character(text, string_literal.range(), checker.settings())
-                {
-                    candidate.report_diagnostic(checker, context);
-                }
+                ambiguous_unicode_character(
+                    text,
+                    string_literal.range(),
+                    context,
+                    checker.context(),
+                );
            }
            ast::StringLikePart::Bytes(_) => {}
            ast::StringLikePart::FString(FString { elements, .. })
            | ast::StringLikePart::TString(TString { elements, .. }) => {
                for literal in elements.literals() {
                    let text = checker.locator().slice(literal);
-                    for candidate in
-                        ambiguous_unicode_character(text, literal.range(), checker.settings())
-                    {
-                        candidate.report_diagnostic(checker, context);
-                    }
+                    ambiguous_unicode_character(text, literal.range(), context, checker.context());
                }
            }
        }
@ -228,13 +222,12 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
 fn ambiguous_unicode_character(
    text: &str,
    range: TextRange,
-    settings: &LinterSettings,
-) -> Vec<Candidate> {
-    let mut candidates = Vec::new();
-
+    context: Context,
+    lint_context: &LintContext,
+) {
    // Most of the time, we don't need to check for ambiguous unicode characters at all.
    if text.is_ascii() {
-        return candidates;
+        return;
    }

    // Iterate over the "words" in the text.
@ -246,7 +239,7 @@ fn ambiguous_unicode_character(
            if !word_candidates.is_empty() {
                if word_flags.is_candidate_word() {
                    for candidate in word_candidates.drain(..) {
-                        candidates.push(candidate);
+                        candidate.into_diagnostic(context, lint_context);
                    }
                }
                word_candidates.clear();
@ -257,21 +250,23 @@ fn ambiguous_unicode_character(
            // case, it's always included as a diagnostic.
            if !current_char.is_ascii() {
                if let Some(representant) = confusable(current_char as u32).filter(|representant| {
-                    is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii()
+                    is_unicode_to_unicode_confusables_enabled(lint_context.settings())
+                        || representant.is_ascii()
                }) {
                    let candidate = Candidate::new(
                        TextSize::try_from(relative_offset).unwrap() + range.start(),
                        current_char,
                        representant,
                    );
-                    candidates.push(candidate);
+                    candidate.into_diagnostic(context, lint_context);
                }
            }
        } else if current_char.is_ascii() {
            // The current word contains at least one ASCII character.
            word_flags |= WordFlags::ASCII;
        } else if let Some(representant) = confusable(current_char as u32).filter(|representant| {
-            is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii()
+            is_unicode_to_unicode_confusables_enabled(lint_context.settings())
+                || representant.is_ascii()
        }) {
            // The current word contains an ambiguous unicode character.
            word_candidates.push(Candidate::new(
@ -289,13 +284,11 @@ fn ambiguous_unicode_character(
    if !word_candidates.is_empty() {
        if word_flags.is_candidate_word() {
            for candidate in word_candidates.drain(..) {
-                candidates.push(candidate);
+                candidate.into_diagnostic(context, lint_context);
            }
        }
        word_candidates.clear();
    }
-
-    candidates
 }

 bitflags! {
@ -373,39 +366,6 @@ impl Candidate {
            };
        }
    }
-
-    fn report_diagnostic(self, checker: &Checker, context: Context) {
-        if !checker
-            .settings()
-            .allowed_confusables
-            .contains(&self.confusable)
-        {
-            let char_range = TextRange::at(self.offset, self.confusable.text_len());
-            match context {
-                Context::String => checker.report_diagnostic_if_enabled(
-                    AmbiguousUnicodeCharacterString {
-                        confusable: self.confusable,
-                        representant: self.representant,
-                    },
-                    char_range,
-                ),
-                Context::Docstring => checker.report_diagnostic_if_enabled(
-                    AmbiguousUnicodeCharacterDocstring {
-                        confusable: self.confusable,
-                        representant: self.representant,
-                    },
-                    char_range,
-                ),
-                Context::Comment => checker.report_diagnostic_if_enabled(
-                    AmbiguousUnicodeCharacterComment {
-                        confusable: self.confusable,
-                        representant: self.representant,
-                    },
-                    char_range,
-                ),
-            };
-        }
-    }
 }

 struct NamedUnicode(char);