Add `Checker::context` method, deduplicate Unicode checks (#19609)

Summary
--

This PR adds a `Checker::context` method that returns the underlying
`LintContext` to unify `Candidate::into_diagnostic` and
`Candidate::report_diagnostic` in our ambiguous Unicode character
checks. This avoids some duplication and also avoids collecting a `Vec`
of `Candidate`s only to iterate over it later.

Test Plan
--

Existing tests
This commit is contained in:
Brent Westbrook 2025-07-29 16:07:55 -04:00 committed by GitHub
parent ae26fa020c
commit 864196b988
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 59 deletions

View File

@ -590,6 +590,16 @@ impl<'a> Checker<'a> {
member, member,
}) })
} }
/// Return the [`LintContext`] for the current analysis.
///
/// Note that you should always prefer calling methods like `settings`, `report_diagnostic`, or
/// `is_rule_enabled` directly on [`Checker`] when possible. This method exists only for the
/// rare cases where rules or helper functions need to be accessed by both a `Checker` and a
/// `LintContext` in different analysis phases.
pub(crate) const fn context(&self) -> &'a LintContext<'a> {
self.context
}
} }
pub(crate) struct TypingImporter<'a, 'b> { pub(crate) struct TypingImporter<'a, 'b> {

View File

@ -12,7 +12,6 @@ use crate::checkers::ast::{Checker, LintContext};
use crate::preview::is_unicode_to_unicode_confusables_enabled; use crate::preview::is_unicode_to_unicode_confusables_enabled;
use crate::rules::ruff::rules::Context; use crate::rules::ruff::rules::Context;
use crate::rules::ruff::rules::confusables::confusable; use crate::rules::ruff::rules::confusables::confusable;
use crate::settings::LinterSettings;
/// ## What it does /// ## What it does
/// Checks for ambiguous Unicode characters in strings. /// Checks for ambiguous Unicode characters in strings.
@ -180,9 +179,7 @@ pub(crate) fn ambiguous_unicode_character_comment(
range: TextRange, range: TextRange,
) { ) {
let text = locator.slice(range); let text = locator.slice(range);
for candidate in ambiguous_unicode_character(text, range, context.settings()) { ambiguous_unicode_character(text, range, Context::Comment, context);
candidate.into_diagnostic(Context::Comment, context);
}
} }
/// RUF001, RUF002 /// RUF001, RUF002
@ -203,22 +200,19 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
match part { match part {
ast::StringLikePart::String(string_literal) => { ast::StringLikePart::String(string_literal) => {
let text = checker.locator().slice(string_literal); let text = checker.locator().slice(string_literal);
for candidate in ambiguous_unicode_character(
ambiguous_unicode_character(text, string_literal.range(), checker.settings()) text,
{ string_literal.range(),
candidate.report_diagnostic(checker, context); context,
} checker.context(),
);
} }
ast::StringLikePart::Bytes(_) => {} ast::StringLikePart::Bytes(_) => {}
ast::StringLikePart::FString(FString { elements, .. }) ast::StringLikePart::FString(FString { elements, .. })
| ast::StringLikePart::TString(TString { elements, .. }) => { | ast::StringLikePart::TString(TString { elements, .. }) => {
for literal in elements.literals() { for literal in elements.literals() {
let text = checker.locator().slice(literal); let text = checker.locator().slice(literal);
for candidate in ambiguous_unicode_character(text, literal.range(), context, checker.context());
ambiguous_unicode_character(text, literal.range(), checker.settings())
{
candidate.report_diagnostic(checker, context);
}
} }
} }
} }
@ -228,13 +222,12 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
fn ambiguous_unicode_character( fn ambiguous_unicode_character(
text: &str, text: &str,
range: TextRange, range: TextRange,
settings: &LinterSettings, context: Context,
) -> Vec<Candidate> { lint_context: &LintContext,
let mut candidates = Vec::new(); ) {
// Most of the time, we don't need to check for ambiguous unicode characters at all. // Most of the time, we don't need to check for ambiguous unicode characters at all.
if text.is_ascii() { if text.is_ascii() {
return candidates; return;
} }
// Iterate over the "words" in the text. // Iterate over the "words" in the text.
@ -246,7 +239,7 @@ fn ambiguous_unicode_character(
if !word_candidates.is_empty() { if !word_candidates.is_empty() {
if word_flags.is_candidate_word() { if word_flags.is_candidate_word() {
for candidate in word_candidates.drain(..) { for candidate in word_candidates.drain(..) {
candidates.push(candidate); candidate.into_diagnostic(context, lint_context);
} }
} }
word_candidates.clear(); word_candidates.clear();
@ -257,21 +250,23 @@ fn ambiguous_unicode_character(
// case, it's always included as a diagnostic. // case, it's always included as a diagnostic.
if !current_char.is_ascii() { if !current_char.is_ascii() {
if let Some(representant) = confusable(current_char as u32).filter(|representant| { if let Some(representant) = confusable(current_char as u32).filter(|representant| {
is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() is_unicode_to_unicode_confusables_enabled(lint_context.settings())
|| representant.is_ascii()
}) { }) {
let candidate = Candidate::new( let candidate = Candidate::new(
TextSize::try_from(relative_offset).unwrap() + range.start(), TextSize::try_from(relative_offset).unwrap() + range.start(),
current_char, current_char,
representant, representant,
); );
candidates.push(candidate); candidate.into_diagnostic(context, lint_context);
} }
} }
} else if current_char.is_ascii() { } else if current_char.is_ascii() {
// The current word contains at least one ASCII character. // The current word contains at least one ASCII character.
word_flags |= WordFlags::ASCII; word_flags |= WordFlags::ASCII;
} else if let Some(representant) = confusable(current_char as u32).filter(|representant| { } else if let Some(representant) = confusable(current_char as u32).filter(|representant| {
is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() is_unicode_to_unicode_confusables_enabled(lint_context.settings())
|| representant.is_ascii()
}) { }) {
// The current word contains an ambiguous unicode character. // The current word contains an ambiguous unicode character.
word_candidates.push(Candidate::new( word_candidates.push(Candidate::new(
@ -289,13 +284,11 @@ fn ambiguous_unicode_character(
if !word_candidates.is_empty() { if !word_candidates.is_empty() {
if word_flags.is_candidate_word() { if word_flags.is_candidate_word() {
for candidate in word_candidates.drain(..) { for candidate in word_candidates.drain(..) {
candidates.push(candidate); candidate.into_diagnostic(context, lint_context);
} }
} }
word_candidates.clear(); word_candidates.clear();
} }
candidates
} }
bitflags! { bitflags! {
@ -373,39 +366,6 @@ impl Candidate {
}; };
} }
} }
fn report_diagnostic(self, checker: &Checker, context: Context) {
if !checker
.settings()
.allowed_confusables
.contains(&self.confusable)
{
let char_range = TextRange::at(self.offset, self.confusable.text_len());
match context {
Context::String => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterString {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
Context::Docstring => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterDocstring {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
Context::Comment => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterComment {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
};
}
}
} }
struct NamedUnicode(char); struct NamedUnicode(char);