Implement confusing unicode character detection for comments (#653)

This commit is contained in:
Charlie Marsh 2022-11-07 21:16:34 -05:00 committed by GitHub
parent d600650214
commit 7c3d387abd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 93 additions and 19 deletions

View File

@ -552,6 +552,7 @@ For more, see [flake8-annotations](https://pypi.org/project/flake8-annotations/2
| ---- | ---- | ------- | --- | | ---- | ---- | ------- | --- |
| RUF001 | AmbiguousUnicodeCharacterString | String contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 | | RUF001 | AmbiguousUnicodeCharacterString | String contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
| RUF002 | AmbiguousUnicodeCharacterDocstring | Docstring contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 | | RUF002 | AmbiguousUnicodeCharacterDocstring | Docstring contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
| RUF003 | AmbiguousUnicodeCharacterComment | Comment contains ambiguous unicode character '𝐁' (did you mean 'B'?) | |
### Meta rules ### Meta rules

View File

@ -2,5 +2,6 @@ x = "𝐁ad string"
def f(): def f():
"""Here's a comment with an unusual parenthesis: """ """Here's a docstring with an unusual parenthesis: """
# And here's a comment with an unusual punctuation mark:
... ...

View File

@ -2,5 +2,6 @@ x = "𝐁ad string"
def f(): def f():
"""Here's a comment with an unusual parenthesis: """ """Here's a docstring with an unusual parenthesis: """
# And here's a comment with an unusual punctuation mark:
... ...

7
resources/test/fixtures/RUF003.py vendored Normal file
View File

@ -0,0 +1,7 @@
x = "𝐁ad string"
def f():
"""Here's a docstring with an unusual parenthesis: """
# And here's a comment with an unusual punctuation mark:
...

View File

@ -5,6 +5,7 @@ use rustpython_parser::lexer::{LexResult, Tok};
use crate::autofix::fixer; use crate::autofix::fixer;
use crate::checks::{Check, CheckCode}; use crate::checks::{Check, CheckCode};
use crate::lex::docstring_detection::StateMachine; use crate::lex::docstring_detection::StateMachine;
use crate::rules::checks::Context;
use crate::source_code_locator::SourceCodeLocator; use crate::source_code_locator::SourceCodeLocator;
use crate::{flake8_quotes, pycodestyle, rules, Settings}; use crate::{flake8_quotes, pycodestyle, rules, Settings};
@ -16,7 +17,8 @@ pub fn check_tokens(
autofix: &fixer::Mode, autofix: &fixer::Mode,
) { ) {
let enforce_ambiguous_unicode_character = settings.enabled.contains(&CheckCode::RUF001) let enforce_ambiguous_unicode_character = settings.enabled.contains(&CheckCode::RUF001)
|| settings.enabled.contains(&CheckCode::RUF002); || settings.enabled.contains(&CheckCode::RUF002)
|| settings.enabled.contains(&CheckCode::RUF003);
let enforce_quotes = settings.enabled.contains(&CheckCode::Q000) let enforce_quotes = settings.enabled.contains(&CheckCode::Q000)
|| settings.enabled.contains(&CheckCode::Q001) || settings.enabled.contains(&CheckCode::Q001)
|| settings.enabled.contains(&CheckCode::Q002) || settings.enabled.contains(&CheckCode::Q002)
@ -31,14 +33,22 @@ pub fn check_tokens(
false false
}; };
// RUF001, RUF002 // RUF001, RUF002, RUF003
if enforce_ambiguous_unicode_character { if enforce_ambiguous_unicode_character {
if matches!(tok, Tok::String { .. }) { if matches!(tok, Tok::String { .. } | Tok::Comment) {
for check in rules::checks::ambiguous_unicode_character( for check in rules::checks::ambiguous_unicode_character(
locator, locator,
start, start,
end, end,
is_docstring, if matches!(tok, Tok::String { .. }) {
if is_docstring {
Context::Docstring
} else {
Context::String
}
} else {
Context::Comment
},
autofix.patch(), autofix.patch(),
) { ) {
if settings.enabled.contains(check.kind.code()) { if settings.enabled.contains(check.kind.code()) {

View File

@ -202,6 +202,7 @@ pub enum CheckCode {
// Ruff // Ruff
RUF001, RUF001,
RUF002, RUF002,
RUF003,
// Meta // Meta
M001, M001,
} }
@ -462,6 +463,7 @@ pub enum CheckKind {
// Ruff // Ruff
AmbiguousUnicodeCharacterString(char, char), AmbiguousUnicodeCharacterString(char, char),
AmbiguousUnicodeCharacterDocstring(char, char), AmbiguousUnicodeCharacterDocstring(char, char),
AmbiguousUnicodeCharacterComment(char, char),
// Meta // Meta
UnusedNOQA(Option<Vec<String>>), UnusedNOQA(Option<Vec<String>>),
} }
@ -480,7 +482,8 @@ impl CheckCode {
| CheckCode::Q003 | CheckCode::Q003
| CheckCode::W605 | CheckCode::W605
| CheckCode::RUF001 | CheckCode::RUF001
| CheckCode::RUF002 => &LintSource::Tokens, | CheckCode::RUF002
| CheckCode::RUF003 => &LintSource::Tokens,
CheckCode::E902 => &LintSource::FileSystem, CheckCode::E902 => &LintSource::FileSystem,
_ => &LintSource::AST, _ => &LintSource::AST,
} }
@ -702,6 +705,7 @@ impl CheckCode {
// Ruff // Ruff
CheckCode::RUF001 => CheckKind::AmbiguousUnicodeCharacterString('𝐁', 'B'), CheckCode::RUF001 => CheckKind::AmbiguousUnicodeCharacterString('𝐁', 'B'),
CheckCode::RUF002 => CheckKind::AmbiguousUnicodeCharacterDocstring('𝐁', 'B'), CheckCode::RUF002 => CheckKind::AmbiguousUnicodeCharacterDocstring('𝐁', 'B'),
CheckCode::RUF003 => CheckKind::AmbiguousUnicodeCharacterComment('𝐁', 'B'),
// Meta // Meta
CheckCode::M001 => CheckKind::UnusedNOQA(None), CheckCode::M001 => CheckKind::UnusedNOQA(None),
} }
@ -873,6 +877,7 @@ impl CheckCode {
CheckCode::N818 => CheckCategory::PEP8Naming, CheckCode::N818 => CheckCategory::PEP8Naming,
CheckCode::RUF001 => CheckCategory::Ruff, CheckCode::RUF001 => CheckCategory::Ruff,
CheckCode::RUF002 => CheckCategory::Ruff, CheckCode::RUF002 => CheckCategory::Ruff,
CheckCode::RUF003 => CheckCategory::Ruff,
CheckCode::M001 => CheckCategory::Meta, CheckCode::M001 => CheckCategory::Meta,
} }
} }
@ -1058,6 +1063,7 @@ impl CheckKind {
// Ruff // Ruff
CheckKind::AmbiguousUnicodeCharacterString(..) => &CheckCode::RUF001, CheckKind::AmbiguousUnicodeCharacterString(..) => &CheckCode::RUF001,
CheckKind::AmbiguousUnicodeCharacterDocstring(..) => &CheckCode::RUF002, CheckKind::AmbiguousUnicodeCharacterDocstring(..) => &CheckCode::RUF002,
CheckKind::AmbiguousUnicodeCharacterComment(..) => &CheckCode::RUF003,
// Meta // Meta
CheckKind::UnusedNOQA(_) => &CheckCode::M001, CheckKind::UnusedNOQA(_) => &CheckCode::M001,
} }
@ -1606,6 +1612,12 @@ impl CheckKind {
'{representant}'?)" '{representant}'?)"
) )
} }
CheckKind::AmbiguousUnicodeCharacterComment(confusable, representant) => {
format!(
"Comment contains ambiguous unicode character '{confusable}' (did you mean \
'{representant}'?)"
)
}
// Meta // Meta
CheckKind::UnusedNOQA(codes) => match codes { CheckKind::UnusedNOQA(codes) => match codes {
None => "Unused `noqa` directive".to_string(), None => "Unused `noqa` directive".to_string(),

View File

@ -233,6 +233,7 @@ pub enum CheckCodePrefix {
RUF00, RUF00,
RUF001, RUF001,
RUF002, RUF002,
RUF003,
T, T,
T2, T2,
T20, T20,
@ -926,11 +927,12 @@ impl CheckCodePrefix {
CheckCodePrefix::Q001 => vec![CheckCode::Q001], CheckCodePrefix::Q001 => vec![CheckCode::Q001],
CheckCodePrefix::Q002 => vec![CheckCode::Q002], CheckCodePrefix::Q002 => vec![CheckCode::Q002],
CheckCodePrefix::Q003 => vec![CheckCode::Q003], CheckCodePrefix::Q003 => vec![CheckCode::Q003],
CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002], CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002], CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002], CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
CheckCodePrefix::RUF001 => vec![CheckCode::RUF001], CheckCodePrefix::RUF001 => vec![CheckCode::RUF001],
CheckCodePrefix::RUF002 => vec![CheckCode::RUF002], CheckCodePrefix::RUF002 => vec![CheckCode::RUF002],
CheckCodePrefix::RUF003 => vec![CheckCode::RUF003],
CheckCodePrefix::T => vec![CheckCode::T201, CheckCode::T203], CheckCodePrefix::T => vec![CheckCode::T201, CheckCode::T203],
CheckCodePrefix::T2 => vec![CheckCode::T201, CheckCode::T203], CheckCodePrefix::T2 => vec![CheckCode::T201, CheckCode::T203],
CheckCodePrefix::T20 => vec![CheckCode::T201, CheckCode::T203], CheckCodePrefix::T20 => vec![CheckCode::T201, CheckCode::T203],
@ -1218,6 +1220,7 @@ impl CheckCodePrefix {
CheckCodePrefix::RUF00 => PrefixSpecificity::Tens, CheckCodePrefix::RUF00 => PrefixSpecificity::Tens,
CheckCodePrefix::RUF001 => PrefixSpecificity::Explicit, CheckCodePrefix::RUF001 => PrefixSpecificity::Explicit,
CheckCodePrefix::RUF002 => PrefixSpecificity::Explicit, CheckCodePrefix::RUF002 => PrefixSpecificity::Explicit,
CheckCodePrefix::RUF003 => PrefixSpecificity::Explicit,
CheckCodePrefix::T => PrefixSpecificity::Category, CheckCodePrefix::T => PrefixSpecificity::Category,
CheckCodePrefix::T2 => PrefixSpecificity::Hundreds, CheckCodePrefix::T2 => PrefixSpecificity::Hundreds,
CheckCodePrefix::T20 => PrefixSpecificity::Tens, CheckCodePrefix::T20 => PrefixSpecificity::Tens,

View File

@ -452,6 +452,7 @@ mod tests {
#[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")] #[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")]
#[test_case(CheckCode::RUF001, Path::new("RUF001.py"); "RUF001")] #[test_case(CheckCode::RUF001, Path::new("RUF001.py"); "RUF001")]
#[test_case(CheckCode::RUF002, Path::new("RUF002.py"); "RUF002")] #[test_case(CheckCode::RUF002, Path::new("RUF002.py"); "RUF002")]
#[test_case(CheckCode::RUF003, Path::new("RUF003.py"); "RUF003")]
fn checks(check_code: CheckCode, path: &Path) -> Result<()> { fn checks(check_code: CheckCode, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy()); let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy());
let mut checks = check_path( let mut checks = check_path(

View File

@ -1596,11 +1596,17 @@ static CONFUSABLES: Lazy<BTreeMap<u32, u32>> = Lazy::new(|| {
]) ])
}); });
pub enum Context {
String,
Docstring,
Comment,
}
pub fn ambiguous_unicode_character( pub fn ambiguous_unicode_character(
locator: &SourceCodeLocator, locator: &SourceCodeLocator,
start: &Location, start: &Location,
end: &Location, end: &Location,
is_docstring: bool, context: Context,
fix: bool, fix: bool,
) -> Vec<Check> { ) -> Vec<Check> {
let mut checks = vec![]; let mut checks = vec![];
@ -1623,10 +1629,17 @@ pub fn ambiguous_unicode_character(
}; };
let end_location = Location::new(location.row(), location.column() + 1); let end_location = Location::new(location.row(), location.column() + 1);
let mut check = Check::new( let mut check = Check::new(
if is_docstring { match context {
CheckKind::AmbiguousUnicodeCharacterDocstring(current_char, representant) Context::String => {
} else { CheckKind::AmbiguousUnicodeCharacterString(current_char, representant)
CheckKind::AmbiguousUnicodeCharacterString(current_char, representant) }
Context::Docstring => CheckKind::AmbiguousUnicodeCharacterDocstring(
current_char,
representant,
),
Context::Comment => {
CheckKind::AmbiguousUnicodeCharacterComment(current_char, representant)
}
}, },
Range { Range {
location, location,

View File

@ -8,18 +8,18 @@ expression: checks
- ) - )
location: location:
row: 5 row: 5
column: 53 column: 55
end_location: end_location:
row: 5 row: 5
column: 54 column: 56
fix: fix:
patch: patch:
content: ) content: )
location: location:
row: 5 row: 5
column: 53 column: 55
end_location: end_location:
row: 5 row: 5
column: 54 column: 56
applied: false applied: false

View File

@ -0,0 +1,25 @@
---
source: src/linter.rs
expression: checks
---
- kind:
AmbiguousUnicodeCharacterComment:
-
- /
location:
row: 6
column: 61
end_location:
row: 6
column: 62
fix:
patch:
content: /
location:
row: 6
column: 61
end_location:
row: 6
column: 62
applied: false