Add `Checker::context` method, deduplicate Unicode checks (#19609)

Summary
--

This PR adds a `Checker::context` method that returns the underlying
`LintContext` to unify `Candidate::into_diagnostic` and
`Candidate::report_diagnostic` in our ambiguous Unicode character
checks. This avoids some duplication and also avoids collecting a `Vec`
of `Candidate`s only to iterate over it later.

Test Plan
--

Existing tests
This commit is contained in:
Brent Westbrook 2025-07-29 16:07:55 -04:00 committed by GitHub
parent ae26fa020c
commit 864196b988
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 59 deletions

View File

@ -590,6 +590,16 @@ impl<'a> Checker<'a> {
member,
})
}
/// Return the [`LintContext`] for the current analysis.
///
/// Note that you should always prefer calling methods like `settings`, `report_diagnostic`, or
/// `is_rule_enabled` directly on [`Checker`] when possible. This method exists only for the
/// rare cases where rules or helper functions need to be accessed by both a `Checker` and a
/// `LintContext` in different analysis phases.
pub(crate) const fn context(&self) -> &'a LintContext<'a> {
self.context
}
}
pub(crate) struct TypingImporter<'a, 'b> {

View File

@ -12,7 +12,6 @@ use crate::checkers::ast::{Checker, LintContext};
use crate::preview::is_unicode_to_unicode_confusables_enabled;
use crate::rules::ruff::rules::Context;
use crate::rules::ruff::rules::confusables::confusable;
use crate::settings::LinterSettings;
/// ## What it does
/// Checks for ambiguous Unicode characters in strings.
@ -180,9 +179,7 @@ pub(crate) fn ambiguous_unicode_character_comment(
range: TextRange,
) {
let text = locator.slice(range);
for candidate in ambiguous_unicode_character(text, range, context.settings()) {
candidate.into_diagnostic(Context::Comment, context);
}
ambiguous_unicode_character(text, range, Context::Comment, context);
}
/// RUF001, RUF002
@ -203,22 +200,19 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
match part {
ast::StringLikePart::String(string_literal) => {
let text = checker.locator().slice(string_literal);
for candidate in
ambiguous_unicode_character(text, string_literal.range(), checker.settings())
{
candidate.report_diagnostic(checker, context);
}
ambiguous_unicode_character(
text,
string_literal.range(),
context,
checker.context(),
);
}
ast::StringLikePart::Bytes(_) => {}
ast::StringLikePart::FString(FString { elements, .. })
| ast::StringLikePart::TString(TString { elements, .. }) => {
for literal in elements.literals() {
let text = checker.locator().slice(literal);
for candidate in
ambiguous_unicode_character(text, literal.range(), checker.settings())
{
candidate.report_diagnostic(checker, context);
}
ambiguous_unicode_character(text, literal.range(), context, checker.context());
}
}
}
@ -228,13 +222,12 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like:
fn ambiguous_unicode_character(
text: &str,
range: TextRange,
settings: &LinterSettings,
) -> Vec<Candidate> {
let mut candidates = Vec::new();
context: Context,
lint_context: &LintContext,
) {
// Most of the time, we don't need to check for ambiguous unicode characters at all.
if text.is_ascii() {
return candidates;
return;
}
// Iterate over the "words" in the text.
@ -246,7 +239,7 @@ fn ambiguous_unicode_character(
if !word_candidates.is_empty() {
if word_flags.is_candidate_word() {
for candidate in word_candidates.drain(..) {
candidates.push(candidate);
candidate.into_diagnostic(context, lint_context);
}
}
word_candidates.clear();
@ -257,21 +250,23 @@ fn ambiguous_unicode_character(
// case, it's always included as a diagnostic.
if !current_char.is_ascii() {
if let Some(representant) = confusable(current_char as u32).filter(|representant| {
is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii()
is_unicode_to_unicode_confusables_enabled(lint_context.settings())
|| representant.is_ascii()
}) {
let candidate = Candidate::new(
TextSize::try_from(relative_offset).unwrap() + range.start(),
current_char,
representant,
);
candidates.push(candidate);
candidate.into_diagnostic(context, lint_context);
}
}
} else if current_char.is_ascii() {
// The current word contains at least one ASCII character.
word_flags |= WordFlags::ASCII;
} else if let Some(representant) = confusable(current_char as u32).filter(|representant| {
is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii()
is_unicode_to_unicode_confusables_enabled(lint_context.settings())
|| representant.is_ascii()
}) {
// The current word contains an ambiguous unicode character.
word_candidates.push(Candidate::new(
@ -289,13 +284,11 @@ fn ambiguous_unicode_character(
if !word_candidates.is_empty() {
if word_flags.is_candidate_word() {
for candidate in word_candidates.drain(..) {
candidates.push(candidate);
candidate.into_diagnostic(context, lint_context);
}
}
word_candidates.clear();
}
candidates
}
bitflags! {
@ -373,39 +366,6 @@ impl Candidate {
};
}
}
fn report_diagnostic(self, checker: &Checker, context: Context) {
if !checker
.settings()
.allowed_confusables
.contains(&self.confusable)
{
let char_range = TextRange::at(self.offset, self.confusable.text_len());
match context {
Context::String => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterString {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
Context::Docstring => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterDocstring {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
Context::Comment => checker.report_diagnostic_if_enabled(
AmbiguousUnicodeCharacterComment {
confusable: self.confusable,
representant: self.representant,
},
char_range,
),
};
}
}
}
struct NamedUnicode(char);