From 7c3d387abdded7583205d74dc626fa9bbcc449d2 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 7 Nov 2022 21:16:34 -0500 Subject: [PATCH] Implement confusing unicode character detection for comments (#653) --- README.md | 1 + resources/test/fixtures/RUF001.py | 3 ++- resources/test/fixtures/RUF002.py | 3 ++- resources/test/fixtures/RUF003.py | 7 ++++++ src/check_tokens.rs | 18 ++++++++++--- src/checks.rs | 14 ++++++++++- src/checks_gen.rs | 9 ++++--- src/linter.rs | 1 + src/rules/checks.rs | 23 +++++++++++++---- ...ruff__linter__tests__RUF002_RUF002.py.snap | 8 +++--- ...ruff__linter__tests__RUF003_RUF003.py.snap | 25 +++++++++++++++++++ 11 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 resources/test/fixtures/RUF003.py create mode 100644 src/snapshots/ruff__linter__tests__RUF003_RUF003.py.snap diff --git a/README.md b/README.md index 2b1f1d7985..da7ab484cd 100644 --- a/README.md +++ b/README.md @@ -552,6 +552,7 @@ For more, see [flake8-annotations](https://pypi.org/project/flake8-annotations/2 | ---- | ---- | ------- | --- | | RUF001 | AmbiguousUnicodeCharacterString | String contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 | | RUF002 | AmbiguousUnicodeCharacterDocstring | Docstring contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 | +| RUF003 | AmbiguousUnicodeCharacterComment | Comment contains ambiguous unicode character '𝐁' (did you mean 'B'?) | | ### Meta rules diff --git a/resources/test/fixtures/RUF001.py b/resources/test/fixtures/RUF001.py index 63e76990c1..cf488a9250 100644 --- a/resources/test/fixtures/RUF001.py +++ b/resources/test/fixtures/RUF001.py @@ -2,5 +2,6 @@ x = "𝐁ad string" def f(): - """Here's a comment with an unusual parenthesis: )""" + """Here's a docstring with an unusual parenthesis: )""" + # And here's a comment with an unusual punctuation mark: ᜵ ... diff --git a/resources/test/fixtures/RUF002.py b/resources/test/fixtures/RUF002.py index 63e76990c1..cf488a9250 100644 --- a/resources/test/fixtures/RUF002.py +++ b/resources/test/fixtures/RUF002.py @@ -2,5 +2,6 @@ x = "𝐁ad string" def f(): - """Here's a comment with an unusual parenthesis: )""" + """Here's a docstring with an unusual parenthesis: )""" + # And here's a comment with an unusual punctuation mark: ᜵ ... diff --git a/resources/test/fixtures/RUF003.py b/resources/test/fixtures/RUF003.py new file mode 100644 index 0000000000..cf488a9250 --- /dev/null +++ b/resources/test/fixtures/RUF003.py @@ -0,0 +1,7 @@ +x = "𝐁ad string" + + +def f(): + """Here's a docstring with an unusual parenthesis: )""" + # And here's a comment with an unusual punctuation mark: ᜵ + ... diff --git a/src/check_tokens.rs b/src/check_tokens.rs index 9ae1f8a762..a8d068ddf4 100644 --- a/src/check_tokens.rs +++ b/src/check_tokens.rs @@ -5,6 +5,7 @@ use rustpython_parser::lexer::{LexResult, Tok}; use crate::autofix::fixer; use crate::checks::{Check, CheckCode}; use crate::lex::docstring_detection::StateMachine; +use crate::rules::checks::Context; use crate::source_code_locator::SourceCodeLocator; use crate::{flake8_quotes, pycodestyle, rules, Settings}; @@ -16,7 +17,8 @@ pub fn check_tokens( autofix: &fixer::Mode, ) { let enforce_ambiguous_unicode_character = settings.enabled.contains(&CheckCode::RUF001) - || settings.enabled.contains(&CheckCode::RUF002); + || settings.enabled.contains(&CheckCode::RUF002) + || settings.enabled.contains(&CheckCode::RUF003); let enforce_quotes = settings.enabled.contains(&CheckCode::Q000) || settings.enabled.contains(&CheckCode::Q001) || settings.enabled.contains(&CheckCode::Q002) @@ -31,14 +33,22 @@ pub fn check_tokens( false }; - // RUF001, RUF002 + // RUF001, RUF002, RUF003 if enforce_ambiguous_unicode_character { - if matches!(tok, Tok::String { .. }) { + if matches!(tok, Tok::String { .. } | Tok::Comment) { for check in rules::checks::ambiguous_unicode_character( locator, start, end, - is_docstring, + if matches!(tok, Tok::String { .. }) { + if is_docstring { + Context::Docstring + } else { + Context::String + } + } else { + Context::Comment + }, autofix.patch(), ) { if settings.enabled.contains(check.kind.code()) { diff --git a/src/checks.rs b/src/checks.rs index 569dfe37b2..e4f312f42f 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -202,6 +202,7 @@ pub enum CheckCode { // Ruff RUF001, RUF002, + RUF003, // Meta M001, } @@ -462,6 +463,7 @@ pub enum CheckKind { // Ruff AmbiguousUnicodeCharacterString(char, char), AmbiguousUnicodeCharacterDocstring(char, char), + AmbiguousUnicodeCharacterComment(char, char), // Meta UnusedNOQA(Option>), } @@ -480,7 +482,8 @@ impl CheckCode { | CheckCode::Q003 | CheckCode::W605 | CheckCode::RUF001 - | CheckCode::RUF002 => &LintSource::Tokens, + | CheckCode::RUF002 + | CheckCode::RUF003 => &LintSource::Tokens, CheckCode::E902 => &LintSource::FileSystem, _ => &LintSource::AST, } @@ -702,6 +705,7 @@ impl CheckCode { // Ruff CheckCode::RUF001 => CheckKind::AmbiguousUnicodeCharacterString('𝐁', 'B'), CheckCode::RUF002 => CheckKind::AmbiguousUnicodeCharacterDocstring('𝐁', 'B'), + CheckCode::RUF003 => CheckKind::AmbiguousUnicodeCharacterComment('𝐁', 'B'), // Meta CheckCode::M001 => CheckKind::UnusedNOQA(None), } @@ -873,6 +877,7 @@ impl CheckCode { CheckCode::N818 => CheckCategory::PEP8Naming, CheckCode::RUF001 => CheckCategory::Ruff, CheckCode::RUF002 => CheckCategory::Ruff, + CheckCode::RUF003 => CheckCategory::Ruff, CheckCode::M001 => CheckCategory::Meta, } } @@ -1058,6 +1063,7 @@ impl CheckKind { // Ruff CheckKind::AmbiguousUnicodeCharacterString(..) => &CheckCode::RUF001, CheckKind::AmbiguousUnicodeCharacterDocstring(..) => &CheckCode::RUF002, + CheckKind::AmbiguousUnicodeCharacterComment(..) => &CheckCode::RUF003, // Meta CheckKind::UnusedNOQA(_) => &CheckCode::M001, } @@ -1606,6 +1612,12 @@ impl CheckKind { '{representant}'?)" ) } + CheckKind::AmbiguousUnicodeCharacterComment(confusable, representant) => { + format!( + "Comment contains ambiguous unicode character '{confusable}' (did you mean \ + '{representant}'?)" + ) + } // Meta CheckKind::UnusedNOQA(codes) => match codes { None => "Unused `noqa` directive".to_string(), diff --git a/src/checks_gen.rs b/src/checks_gen.rs index 848dd8c2a3..9661b568a2 100644 --- a/src/checks_gen.rs +++ b/src/checks_gen.rs @@ -233,6 +233,7 @@ pub enum CheckCodePrefix { RUF00, RUF001, RUF002, + RUF003, T, T2, T20, @@ -926,11 +927,12 @@ impl CheckCodePrefix { CheckCodePrefix::Q001 => vec![CheckCode::Q001], CheckCodePrefix::Q002 => vec![CheckCode::Q002], CheckCodePrefix::Q003 => vec![CheckCode::Q003], - CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002], - CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002], - CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002], + CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003], + CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003], + CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003], CheckCodePrefix::RUF001 => vec![CheckCode::RUF001], CheckCodePrefix::RUF002 => vec![CheckCode::RUF002], + CheckCodePrefix::RUF003 => vec![CheckCode::RUF003], CheckCodePrefix::T => vec![CheckCode::T201, CheckCode::T203], CheckCodePrefix::T2 => vec![CheckCode::T201, CheckCode::T203], CheckCodePrefix::T20 => vec![CheckCode::T201, CheckCode::T203], @@ -1218,6 +1220,7 @@ impl CheckCodePrefix { CheckCodePrefix::RUF00 => PrefixSpecificity::Tens, CheckCodePrefix::RUF001 => PrefixSpecificity::Explicit, CheckCodePrefix::RUF002 => PrefixSpecificity::Explicit, + CheckCodePrefix::RUF003 => PrefixSpecificity::Explicit, CheckCodePrefix::T => PrefixSpecificity::Category, CheckCodePrefix::T2 => PrefixSpecificity::Hundreds, CheckCodePrefix::T20 => PrefixSpecificity::Tens, diff --git a/src/linter.rs b/src/linter.rs index 72d1a6a1b2..4c1568e463 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -452,6 +452,7 @@ mod tests { #[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")] #[test_case(CheckCode::RUF001, Path::new("RUF001.py"); "RUF001")] #[test_case(CheckCode::RUF002, Path::new("RUF002.py"); "RUF002")] + #[test_case(CheckCode::RUF003, Path::new("RUF003.py"); "RUF003")] fn checks(check_code: CheckCode, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy()); let mut checks = check_path( diff --git a/src/rules/checks.rs b/src/rules/checks.rs index 038bc39745..5d3f6acdfc 100644 --- a/src/rules/checks.rs +++ b/src/rules/checks.rs @@ -1596,11 +1596,17 @@ static CONFUSABLES: Lazy> = Lazy::new(|| { ]) }); +pub enum Context { + String, + Docstring, + Comment, +} + pub fn ambiguous_unicode_character( locator: &SourceCodeLocator, start: &Location, end: &Location, - is_docstring: bool, + context: Context, fix: bool, ) -> Vec { let mut checks = vec![]; @@ -1623,10 +1629,17 @@ pub fn ambiguous_unicode_character( }; let end_location = Location::new(location.row(), location.column() + 1); let mut check = Check::new( - if is_docstring { - CheckKind::AmbiguousUnicodeCharacterDocstring(current_char, representant) - } else { - CheckKind::AmbiguousUnicodeCharacterString(current_char, representant) + match context { + Context::String => { + CheckKind::AmbiguousUnicodeCharacterString(current_char, representant) + } + Context::Docstring => CheckKind::AmbiguousUnicodeCharacterDocstring( + current_char, + representant, + ), + Context::Comment => { + CheckKind::AmbiguousUnicodeCharacterComment(current_char, representant) + } }, Range { location, diff --git a/src/snapshots/ruff__linter__tests__RUF002_RUF002.py.snap b/src/snapshots/ruff__linter__tests__RUF002_RUF002.py.snap index 90a05b35f3..0eb1a7c45c 100644 --- a/src/snapshots/ruff__linter__tests__RUF002_RUF002.py.snap +++ b/src/snapshots/ruff__linter__tests__RUF002_RUF002.py.snap @@ -8,18 +8,18 @@ expression: checks - ) location: row: 5 - column: 53 + column: 55 end_location: row: 5 - column: 54 + column: 56 fix: patch: content: ) location: row: 5 - column: 53 + column: 55 end_location: row: 5 - column: 54 + column: 56 applied: false diff --git a/src/snapshots/ruff__linter__tests__RUF003_RUF003.py.snap b/src/snapshots/ruff__linter__tests__RUF003_RUF003.py.snap new file mode 100644 index 0000000000..90f5a71bbd --- /dev/null +++ b/src/snapshots/ruff__linter__tests__RUF003_RUF003.py.snap @@ -0,0 +1,25 @@ +--- +source: src/linter.rs +expression: checks +--- +- kind: + AmbiguousUnicodeCharacterComment: + - ᜵ + - / + location: + row: 6 + column: 61 + end_location: + row: 6 + column: 62 + fix: + patch: + content: / + location: + row: 6 + column: 61 + end_location: + row: 6 + column: 62 + applied: false +