mirror of https://github.com/astral-sh/ruff
Implement confusing unicode character detection for comments (#653)
This commit is contained in:
parent
d600650214
commit
7c3d387abd
|
|
@ -552,6 +552,7 @@ For more, see [flake8-annotations](https://pypi.org/project/flake8-annotations/2
|
||||||
| ---- | ---- | ------- | --- |
|
| ---- | ---- | ------- | --- |
|
||||||
| RUF001 | AmbiguousUnicodeCharacterString | String contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
|
| RUF001 | AmbiguousUnicodeCharacterString | String contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
|
||||||
| RUF002 | AmbiguousUnicodeCharacterDocstring | Docstring contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
|
| RUF002 | AmbiguousUnicodeCharacterDocstring | Docstring contains ambiguous unicode character '𝐁' (did you mean 'B'?) | 🛠 |
|
||||||
|
| RUF003 | AmbiguousUnicodeCharacterComment | Comment contains ambiguous unicode character '𝐁' (did you mean 'B'?) | |
|
||||||
|
|
||||||
### Meta rules
|
### Meta rules
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,6 @@ x = "𝐁ad string"
|
||||||
|
|
||||||
|
|
||||||
def f():
|
def f():
|
||||||
"""Here's a comment with an unusual parenthesis: )"""
|
"""Here's a docstring with an unusual parenthesis: )"""
|
||||||
|
# And here's a comment with an unusual punctuation mark: ᜵
|
||||||
...
|
...
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,6 @@ x = "𝐁ad string"
|
||||||
|
|
||||||
|
|
||||||
def f():
|
def f():
|
||||||
"""Here's a comment with an unusual parenthesis: )"""
|
"""Here's a docstring with an unusual parenthesis: )"""
|
||||||
|
# And here's a comment with an unusual punctuation mark: ᜵
|
||||||
...
|
...
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
x = "𝐁ad string"
|
||||||
|
|
||||||
|
|
||||||
|
def f():
|
||||||
|
"""Here's a docstring with an unusual parenthesis: )"""
|
||||||
|
# And here's a comment with an unusual punctuation mark: ᜵
|
||||||
|
...
|
||||||
|
|
@ -5,6 +5,7 @@ use rustpython_parser::lexer::{LexResult, Tok};
|
||||||
use crate::autofix::fixer;
|
use crate::autofix::fixer;
|
||||||
use crate::checks::{Check, CheckCode};
|
use crate::checks::{Check, CheckCode};
|
||||||
use crate::lex::docstring_detection::StateMachine;
|
use crate::lex::docstring_detection::StateMachine;
|
||||||
|
use crate::rules::checks::Context;
|
||||||
use crate::source_code_locator::SourceCodeLocator;
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
use crate::{flake8_quotes, pycodestyle, rules, Settings};
|
use crate::{flake8_quotes, pycodestyle, rules, Settings};
|
||||||
|
|
||||||
|
|
@ -16,7 +17,8 @@ pub fn check_tokens(
|
||||||
autofix: &fixer::Mode,
|
autofix: &fixer::Mode,
|
||||||
) {
|
) {
|
||||||
let enforce_ambiguous_unicode_character = settings.enabled.contains(&CheckCode::RUF001)
|
let enforce_ambiguous_unicode_character = settings.enabled.contains(&CheckCode::RUF001)
|
||||||
|| settings.enabled.contains(&CheckCode::RUF002);
|
|| settings.enabled.contains(&CheckCode::RUF002)
|
||||||
|
|| settings.enabled.contains(&CheckCode::RUF003);
|
||||||
let enforce_quotes = settings.enabled.contains(&CheckCode::Q000)
|
let enforce_quotes = settings.enabled.contains(&CheckCode::Q000)
|
||||||
|| settings.enabled.contains(&CheckCode::Q001)
|
|| settings.enabled.contains(&CheckCode::Q001)
|
||||||
|| settings.enabled.contains(&CheckCode::Q002)
|
|| settings.enabled.contains(&CheckCode::Q002)
|
||||||
|
|
@ -31,14 +33,22 @@ pub fn check_tokens(
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
|
||||||
// RUF001, RUF002
|
// RUF001, RUF002, RUF003
|
||||||
if enforce_ambiguous_unicode_character {
|
if enforce_ambiguous_unicode_character {
|
||||||
if matches!(tok, Tok::String { .. }) {
|
if matches!(tok, Tok::String { .. } | Tok::Comment) {
|
||||||
for check in rules::checks::ambiguous_unicode_character(
|
for check in rules::checks::ambiguous_unicode_character(
|
||||||
locator,
|
locator,
|
||||||
start,
|
start,
|
||||||
end,
|
end,
|
||||||
is_docstring,
|
if matches!(tok, Tok::String { .. }) {
|
||||||
|
if is_docstring {
|
||||||
|
Context::Docstring
|
||||||
|
} else {
|
||||||
|
Context::String
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Context::Comment
|
||||||
|
},
|
||||||
autofix.patch(),
|
autofix.patch(),
|
||||||
) {
|
) {
|
||||||
if settings.enabled.contains(check.kind.code()) {
|
if settings.enabled.contains(check.kind.code()) {
|
||||||
|
|
|
||||||
|
|
@ -202,6 +202,7 @@ pub enum CheckCode {
|
||||||
// Ruff
|
// Ruff
|
||||||
RUF001,
|
RUF001,
|
||||||
RUF002,
|
RUF002,
|
||||||
|
RUF003,
|
||||||
// Meta
|
// Meta
|
||||||
M001,
|
M001,
|
||||||
}
|
}
|
||||||
|
|
@ -462,6 +463,7 @@ pub enum CheckKind {
|
||||||
// Ruff
|
// Ruff
|
||||||
AmbiguousUnicodeCharacterString(char, char),
|
AmbiguousUnicodeCharacterString(char, char),
|
||||||
AmbiguousUnicodeCharacterDocstring(char, char),
|
AmbiguousUnicodeCharacterDocstring(char, char),
|
||||||
|
AmbiguousUnicodeCharacterComment(char, char),
|
||||||
// Meta
|
// Meta
|
||||||
UnusedNOQA(Option<Vec<String>>),
|
UnusedNOQA(Option<Vec<String>>),
|
||||||
}
|
}
|
||||||
|
|
@ -480,7 +482,8 @@ impl CheckCode {
|
||||||
| CheckCode::Q003
|
| CheckCode::Q003
|
||||||
| CheckCode::W605
|
| CheckCode::W605
|
||||||
| CheckCode::RUF001
|
| CheckCode::RUF001
|
||||||
| CheckCode::RUF002 => &LintSource::Tokens,
|
| CheckCode::RUF002
|
||||||
|
| CheckCode::RUF003 => &LintSource::Tokens,
|
||||||
CheckCode::E902 => &LintSource::FileSystem,
|
CheckCode::E902 => &LintSource::FileSystem,
|
||||||
_ => &LintSource::AST,
|
_ => &LintSource::AST,
|
||||||
}
|
}
|
||||||
|
|
@ -702,6 +705,7 @@ impl CheckCode {
|
||||||
// Ruff
|
// Ruff
|
||||||
CheckCode::RUF001 => CheckKind::AmbiguousUnicodeCharacterString('𝐁', 'B'),
|
CheckCode::RUF001 => CheckKind::AmbiguousUnicodeCharacterString('𝐁', 'B'),
|
||||||
CheckCode::RUF002 => CheckKind::AmbiguousUnicodeCharacterDocstring('𝐁', 'B'),
|
CheckCode::RUF002 => CheckKind::AmbiguousUnicodeCharacterDocstring('𝐁', 'B'),
|
||||||
|
CheckCode::RUF003 => CheckKind::AmbiguousUnicodeCharacterComment('𝐁', 'B'),
|
||||||
// Meta
|
// Meta
|
||||||
CheckCode::M001 => CheckKind::UnusedNOQA(None),
|
CheckCode::M001 => CheckKind::UnusedNOQA(None),
|
||||||
}
|
}
|
||||||
|
|
@ -873,6 +877,7 @@ impl CheckCode {
|
||||||
CheckCode::N818 => CheckCategory::PEP8Naming,
|
CheckCode::N818 => CheckCategory::PEP8Naming,
|
||||||
CheckCode::RUF001 => CheckCategory::Ruff,
|
CheckCode::RUF001 => CheckCategory::Ruff,
|
||||||
CheckCode::RUF002 => CheckCategory::Ruff,
|
CheckCode::RUF002 => CheckCategory::Ruff,
|
||||||
|
CheckCode::RUF003 => CheckCategory::Ruff,
|
||||||
CheckCode::M001 => CheckCategory::Meta,
|
CheckCode::M001 => CheckCategory::Meta,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1058,6 +1063,7 @@ impl CheckKind {
|
||||||
// Ruff
|
// Ruff
|
||||||
CheckKind::AmbiguousUnicodeCharacterString(..) => &CheckCode::RUF001,
|
CheckKind::AmbiguousUnicodeCharacterString(..) => &CheckCode::RUF001,
|
||||||
CheckKind::AmbiguousUnicodeCharacterDocstring(..) => &CheckCode::RUF002,
|
CheckKind::AmbiguousUnicodeCharacterDocstring(..) => &CheckCode::RUF002,
|
||||||
|
CheckKind::AmbiguousUnicodeCharacterComment(..) => &CheckCode::RUF003,
|
||||||
// Meta
|
// Meta
|
||||||
CheckKind::UnusedNOQA(_) => &CheckCode::M001,
|
CheckKind::UnusedNOQA(_) => &CheckCode::M001,
|
||||||
}
|
}
|
||||||
|
|
@ -1606,6 +1612,12 @@ impl CheckKind {
|
||||||
'{representant}'?)"
|
'{representant}'?)"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
CheckKind::AmbiguousUnicodeCharacterComment(confusable, representant) => {
|
||||||
|
format!(
|
||||||
|
"Comment contains ambiguous unicode character '{confusable}' (did you mean \
|
||||||
|
'{representant}'?)"
|
||||||
|
)
|
||||||
|
}
|
||||||
// Meta
|
// Meta
|
||||||
CheckKind::UnusedNOQA(codes) => match codes {
|
CheckKind::UnusedNOQA(codes) => match codes {
|
||||||
None => "Unused `noqa` directive".to_string(),
|
None => "Unused `noqa` directive".to_string(),
|
||||||
|
|
|
||||||
|
|
@ -233,6 +233,7 @@ pub enum CheckCodePrefix {
|
||||||
RUF00,
|
RUF00,
|
||||||
RUF001,
|
RUF001,
|
||||||
RUF002,
|
RUF002,
|
||||||
|
RUF003,
|
||||||
T,
|
T,
|
||||||
T2,
|
T2,
|
||||||
T20,
|
T20,
|
||||||
|
|
@ -926,11 +927,12 @@ impl CheckCodePrefix {
|
||||||
CheckCodePrefix::Q001 => vec![CheckCode::Q001],
|
CheckCodePrefix::Q001 => vec![CheckCode::Q001],
|
||||||
CheckCodePrefix::Q002 => vec![CheckCode::Q002],
|
CheckCodePrefix::Q002 => vec![CheckCode::Q002],
|
||||||
CheckCodePrefix::Q003 => vec![CheckCode::Q003],
|
CheckCodePrefix::Q003 => vec![CheckCode::Q003],
|
||||||
CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002],
|
CheckCodePrefix::RUF => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
|
||||||
CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002],
|
CheckCodePrefix::RUF0 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
|
||||||
CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002],
|
CheckCodePrefix::RUF00 => vec![CheckCode::RUF001, CheckCode::RUF002, CheckCode::RUF003],
|
||||||
CheckCodePrefix::RUF001 => vec![CheckCode::RUF001],
|
CheckCodePrefix::RUF001 => vec![CheckCode::RUF001],
|
||||||
CheckCodePrefix::RUF002 => vec![CheckCode::RUF002],
|
CheckCodePrefix::RUF002 => vec![CheckCode::RUF002],
|
||||||
|
CheckCodePrefix::RUF003 => vec![CheckCode::RUF003],
|
||||||
CheckCodePrefix::T => vec![CheckCode::T201, CheckCode::T203],
|
CheckCodePrefix::T => vec![CheckCode::T201, CheckCode::T203],
|
||||||
CheckCodePrefix::T2 => vec![CheckCode::T201, CheckCode::T203],
|
CheckCodePrefix::T2 => vec![CheckCode::T201, CheckCode::T203],
|
||||||
CheckCodePrefix::T20 => vec![CheckCode::T201, CheckCode::T203],
|
CheckCodePrefix::T20 => vec![CheckCode::T201, CheckCode::T203],
|
||||||
|
|
@ -1218,6 +1220,7 @@ impl CheckCodePrefix {
|
||||||
CheckCodePrefix::RUF00 => PrefixSpecificity::Tens,
|
CheckCodePrefix::RUF00 => PrefixSpecificity::Tens,
|
||||||
CheckCodePrefix::RUF001 => PrefixSpecificity::Explicit,
|
CheckCodePrefix::RUF001 => PrefixSpecificity::Explicit,
|
||||||
CheckCodePrefix::RUF002 => PrefixSpecificity::Explicit,
|
CheckCodePrefix::RUF002 => PrefixSpecificity::Explicit,
|
||||||
|
CheckCodePrefix::RUF003 => PrefixSpecificity::Explicit,
|
||||||
CheckCodePrefix::T => PrefixSpecificity::Category,
|
CheckCodePrefix::T => PrefixSpecificity::Category,
|
||||||
CheckCodePrefix::T2 => PrefixSpecificity::Hundreds,
|
CheckCodePrefix::T2 => PrefixSpecificity::Hundreds,
|
||||||
CheckCodePrefix::T20 => PrefixSpecificity::Tens,
|
CheckCodePrefix::T20 => PrefixSpecificity::Tens,
|
||||||
|
|
|
||||||
|
|
@ -452,6 +452,7 @@ mod tests {
|
||||||
#[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")]
|
#[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")]
|
||||||
#[test_case(CheckCode::RUF001, Path::new("RUF001.py"); "RUF001")]
|
#[test_case(CheckCode::RUF001, Path::new("RUF001.py"); "RUF001")]
|
||||||
#[test_case(CheckCode::RUF002, Path::new("RUF002.py"); "RUF002")]
|
#[test_case(CheckCode::RUF002, Path::new("RUF002.py"); "RUF002")]
|
||||||
|
#[test_case(CheckCode::RUF003, Path::new("RUF003.py"); "RUF003")]
|
||||||
fn checks(check_code: CheckCode, path: &Path) -> Result<()> {
|
fn checks(check_code: CheckCode, path: &Path) -> Result<()> {
|
||||||
let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy());
|
let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy());
|
||||||
let mut checks = check_path(
|
let mut checks = check_path(
|
||||||
|
|
|
||||||
|
|
@ -1596,11 +1596,17 @@ static CONFUSABLES: Lazy<BTreeMap<u32, u32>> = Lazy::new(|| {
|
||||||
])
|
])
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub enum Context {
|
||||||
|
String,
|
||||||
|
Docstring,
|
||||||
|
Comment,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ambiguous_unicode_character(
|
pub fn ambiguous_unicode_character(
|
||||||
locator: &SourceCodeLocator,
|
locator: &SourceCodeLocator,
|
||||||
start: &Location,
|
start: &Location,
|
||||||
end: &Location,
|
end: &Location,
|
||||||
is_docstring: bool,
|
context: Context,
|
||||||
fix: bool,
|
fix: bool,
|
||||||
) -> Vec<Check> {
|
) -> Vec<Check> {
|
||||||
let mut checks = vec![];
|
let mut checks = vec![];
|
||||||
|
|
@ -1623,10 +1629,17 @@ pub fn ambiguous_unicode_character(
|
||||||
};
|
};
|
||||||
let end_location = Location::new(location.row(), location.column() + 1);
|
let end_location = Location::new(location.row(), location.column() + 1);
|
||||||
let mut check = Check::new(
|
let mut check = Check::new(
|
||||||
if is_docstring {
|
match context {
|
||||||
CheckKind::AmbiguousUnicodeCharacterDocstring(current_char, representant)
|
Context::String => {
|
||||||
} else {
|
CheckKind::AmbiguousUnicodeCharacterString(current_char, representant)
|
||||||
CheckKind::AmbiguousUnicodeCharacterString(current_char, representant)
|
}
|
||||||
|
Context::Docstring => CheckKind::AmbiguousUnicodeCharacterDocstring(
|
||||||
|
current_char,
|
||||||
|
representant,
|
||||||
|
),
|
||||||
|
Context::Comment => {
|
||||||
|
CheckKind::AmbiguousUnicodeCharacterComment(current_char, representant)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Range {
|
Range {
|
||||||
location,
|
location,
|
||||||
|
|
|
||||||
|
|
@ -8,18 +8,18 @@ expression: checks
|
||||||
- )
|
- )
|
||||||
location:
|
location:
|
||||||
row: 5
|
row: 5
|
||||||
column: 53
|
column: 55
|
||||||
end_location:
|
end_location:
|
||||||
row: 5
|
row: 5
|
||||||
column: 54
|
column: 56
|
||||||
fix:
|
fix:
|
||||||
patch:
|
patch:
|
||||||
content: )
|
content: )
|
||||||
location:
|
location:
|
||||||
row: 5
|
row: 5
|
||||||
column: 53
|
column: 55
|
||||||
end_location:
|
end_location:
|
||||||
row: 5
|
row: 5
|
||||||
column: 54
|
column: 56
|
||||||
applied: false
|
applied: false
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
---
|
||||||
|
source: src/linter.rs
|
||||||
|
expression: checks
|
||||||
|
---
|
||||||
|
- kind:
|
||||||
|
AmbiguousUnicodeCharacterComment:
|
||||||
|
- ᜵
|
||||||
|
- /
|
||||||
|
location:
|
||||||
|
row: 6
|
||||||
|
column: 61
|
||||||
|
end_location:
|
||||||
|
row: 6
|
||||||
|
column: 62
|
||||||
|
fix:
|
||||||
|
patch:
|
||||||
|
content: /
|
||||||
|
location:
|
||||||
|
row: 6
|
||||||
|
column: 61
|
||||||
|
end_location:
|
||||||
|
row: 6
|
||||||
|
column: 62
|
||||||
|
applied: false
|
||||||
|
|
||||||
Loading…
Reference in New Issue