From 864196b988a4f1d783eb40477fac4e2cea8334a2 Mon Sep 17 00:00:00 2001 From: Brent Westbrook <36778786+ntBre@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:07:55 -0400 Subject: [PATCH] Add `Checker::context` method, deduplicate Unicode checks (#19609) Summary -- This PR adds a `Checker::context` method that returns the underlying `LintContext` to unify `Candidate::into_diagnostic` and `Candidate::report_diagnostic` in our ambiguous Unicode character checks. This avoids some duplication and also avoids collecting a `Vec` of `Candidate`s only to iterate over it later. Test Plan -- Existing tests --- crates/ruff_linter/src/checkers/ast/mod.rs | 10 +++ .../ruff/rules/ambiguous_unicode_character.rs | 78 +++++-------------- 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index f2587a6597..8f9e5511fc 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -590,6 +590,16 @@ impl<'a> Checker<'a> { member, }) } + + /// Return the [`LintContext`] for the current analysis. + /// + /// Note that you should always prefer calling methods like `settings`, `report_diagnostic`, or + /// `is_rule_enabled` directly on [`Checker`] when possible. This method exists only for the + /// rare cases where rules or helper functions need to be accessed by both a `Checker` and a + /// `LintContext` in different analysis phases. + pub(crate) const fn context(&self) -> &'a LintContext<'a> { + self.context + } } pub(crate) struct TypingImporter<'a, 'b> { diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 4d599d40f7..9ee9352c64 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -12,7 +12,6 @@ use crate::checkers::ast::{Checker, LintContext}; use crate::preview::is_unicode_to_unicode_confusables_enabled; use crate::rules::ruff::rules::Context; use crate::rules::ruff::rules::confusables::confusable; -use crate::settings::LinterSettings; /// ## What it does /// Checks for ambiguous Unicode characters in strings. @@ -180,9 +179,7 @@ pub(crate) fn ambiguous_unicode_character_comment( range: TextRange, ) { let text = locator.slice(range); - for candidate in ambiguous_unicode_character(text, range, context.settings()) { - candidate.into_diagnostic(Context::Comment, context); - } + ambiguous_unicode_character(text, range, Context::Comment, context); } /// RUF001, RUF002 @@ -203,22 +200,19 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like: match part { ast::StringLikePart::String(string_literal) => { let text = checker.locator().slice(string_literal); - for candidate in - ambiguous_unicode_character(text, string_literal.range(), checker.settings()) - { - candidate.report_diagnostic(checker, context); - } + ambiguous_unicode_character( + text, + string_literal.range(), + context, + checker.context(), + ); } ast::StringLikePart::Bytes(_) => {} ast::StringLikePart::FString(FString { elements, .. }) | ast::StringLikePart::TString(TString { elements, .. }) => { for literal in elements.literals() { let text = checker.locator().slice(literal); - for candidate in - ambiguous_unicode_character(text, literal.range(), checker.settings()) - { - candidate.report_diagnostic(checker, context); - } + ambiguous_unicode_character(text, literal.range(), context, checker.context()); } } } @@ -228,13 +222,12 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like: fn ambiguous_unicode_character( text: &str, range: TextRange, - settings: &LinterSettings, -) -> Vec { - let mut candidates = Vec::new(); - + context: Context, + lint_context: &LintContext, +) { // Most of the time, we don't need to check for ambiguous unicode characters at all. if text.is_ascii() { - return candidates; + return; } // Iterate over the "words" in the text. @@ -246,7 +239,7 @@ fn ambiguous_unicode_character( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidates.push(candidate); + candidate.into_diagnostic(context, lint_context); } } word_candidates.clear(); @@ -257,21 +250,23 @@ fn ambiguous_unicode_character( // case, it's always included as a diagnostic. if !current_char.is_ascii() { if let Some(representant) = confusable(current_char as u32).filter(|representant| { - is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() + is_unicode_to_unicode_confusables_enabled(lint_context.settings()) + || representant.is_ascii() }) { let candidate = Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, representant, ); - candidates.push(candidate); + candidate.into_diagnostic(context, lint_context); } } } else if current_char.is_ascii() { // The current word contains at least one ASCII character. word_flags |= WordFlags::ASCII; } else if let Some(representant) = confusable(current_char as u32).filter(|representant| { - is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() + is_unicode_to_unicode_confusables_enabled(lint_context.settings()) + || representant.is_ascii() }) { // The current word contains an ambiguous unicode character. word_candidates.push(Candidate::new( @@ -289,13 +284,11 @@ fn ambiguous_unicode_character( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidates.push(candidate); + candidate.into_diagnostic(context, lint_context); } } word_candidates.clear(); } - - candidates } bitflags! { @@ -373,39 +366,6 @@ impl Candidate { }; } } - - fn report_diagnostic(self, checker: &Checker, context: Context) { - if !checker - .settings() - .allowed_confusables - .contains(&self.confusable) - { - let char_range = TextRange::at(self.offset, self.confusable.text_len()); - match context { - Context::String => checker.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterString { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - Context::Docstring => checker.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterDocstring { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - Context::Comment => checker.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterComment { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - }; - } - } } struct NamedUnicode(char);