Allow named unicodes in bidirectional escape check (#2710)

This commit is contained in:
Charlie Marsh 2023-02-10 16:59:28 -05:00 committed by GitHub
parent e5f5142e3e
commit 6eb9268675
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 83 additions and 81 deletions

View File

@ -1368,7 +1368,7 @@ For more, see [Pylint](https://pypi.org/project/pylint/) on PyPI.
| PLE1142 | await-outside-async | `await` should be used within an async function | | | PLE1142 | await-outside-async | `await` should be used within an async function | |
| PLE1307 | [bad-string-format-type](https://github.com/charliermarsh/ruff/blob/main/docs/rules/bad-string-format-type.md) | Format type does not match argument type | | | PLE1307 | [bad-string-format-type](https://github.com/charliermarsh/ruff/blob/main/docs/rules/bad-string-format-type.md) | Format type does not match argument type | |
| PLE1310 | bad-str-strip-call | String `{strip}` call contains duplicate characters (did you mean `{removal}`?) | | | PLE1310 | bad-str-strip-call | String `{strip}` call contains duplicate characters (did you mean `{removal}`?) | |
| PLE2502 | bidirectional-unicode | Avoid using bidirectional unicode | | | PLE2502 | bidirectional-unicode | Contains control characters that can permit obfuscated code | |
#### Refactor (PLR) #### Refactor (PLR)

View File

@ -1,6 +1,3 @@
# E2502
print("\u202B\u202E\u05e9\u05DC\u05D5\u05DD\u202C")
# E2502 # E2502
print("שלום‬") print("שלום‬")
@ -20,5 +17,12 @@ def subtract_funds(account: str, amount: int):
return return
# OK
print("\u202B\u202E\u05e9\u05DC\u05D5\u05DD\u202C")
# OK
print("\N{RIGHT-TO-LEFT MARK}")
# OK # OK
print("Hello World") print("Hello World")

View File

@ -3155,9 +3155,6 @@ where
if self.settings.rules.enabled(&Rule::RewriteUnicodeLiteral) { if self.settings.rules.enabled(&Rule::RewriteUnicodeLiteral) {
pyupgrade::rules::rewrite_unicode_literal(self, expr, kind.as_deref()); pyupgrade::rules::rewrite_unicode_literal(self, expr, kind.as_deref());
} }
if self.settings.rules.enabled(&Rule::BidirectionalUnicode) {
pylint::rules::bidirectional_unicode(self, expr, value);
}
} }
ExprKind::Lambda { args, body, .. } => { ExprKind::Lambda { args, body, .. } => {
if self.settings.rules.enabled(&Rule::PreferListBuiltin) { if self.settings.rules.enabled(&Rule::PreferListBuiltin) {

View File

@ -11,6 +11,7 @@ use crate::rules::pycodestyle::rules::{
doc_line_too_long, line_too_long, mixed_spaces_and_tabs, no_newline_at_end_of_file, doc_line_too_long, line_too_long, mixed_spaces_and_tabs, no_newline_at_end_of_file,
}; };
use crate::rules::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore}; use crate::rules::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore};
use crate::rules::pylint;
use crate::rules::pyupgrade::rules::unnecessary_coding_comment; use crate::rules::pyupgrade::rules::unnecessary_coding_comment;
use crate::settings::{flags, Settings}; use crate::settings::{flags, Settings};
use crate::source_code::Stylist; use crate::source_code::Stylist;
@ -41,6 +42,7 @@ pub fn check_physical_lines(
.rules .rules
.enabled(&Rule::PEP3120UnnecessaryCodingComment); .enabled(&Rule::PEP3120UnnecessaryCodingComment);
let enforce_mixed_spaces_and_tabs = settings.rules.enabled(&Rule::MixedSpacesAndTabs); let enforce_mixed_spaces_and_tabs = settings.rules.enabled(&Rule::MixedSpacesAndTabs);
let enforce_bidirectional_unicode = settings.rules.enabled(&Rule::BidirectionalUnicode);
let fix_unnecessary_coding_comment = matches!(autofix, flags::Autofix::Enabled) let fix_unnecessary_coding_comment = matches!(autofix, flags::Autofix::Enabled)
&& settings && settings
@ -137,6 +139,10 @@ pub fn check_physical_lines(
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
} }
if enforce_bidirectional_unicode {
diagnostics.extend(pylint::rules::bidirectional_unicode(index, line));
}
} }
if enforce_no_newline_at_end_of_file { if enforce_no_newline_at_end_of_file {

View File

@ -58,10 +58,8 @@ pub fn check_tokens(
|| settings.rules.enabled(&Rule::TrailingCommaProhibited); || settings.rules.enabled(&Rule::TrailingCommaProhibited);
let enforce_extraneous_parenthesis = settings.rules.enabled(&Rule::ExtraneousParentheses); let enforce_extraneous_parenthesis = settings.rules.enabled(&Rule::ExtraneousParentheses);
if enforce_ambiguous_unicode_character // RUF001, RUF002, RUF003
|| enforce_commented_out_code if enforce_ambiguous_unicode_character {
|| enforce_invalid_escape_sequence
{
let mut state_machine = StateMachine::default(); let mut state_machine = StateMachine::default();
for &(start, ref tok, end) in tokens.iter().flatten() { for &(start, ref tok, end) in tokens.iter().flatten() {
let is_docstring = if enforce_ambiguous_unicode_character { let is_docstring = if enforce_ambiguous_unicode_character {
@ -70,8 +68,6 @@ pub fn check_tokens(
false false
}; };
// RUF001, RUF002, RUF003
if enforce_ambiguous_unicode_character {
if matches!(tok, Tok::String { .. } | Tok::Comment(_)) { if matches!(tok, Tok::String { .. } | Tok::Comment(_)) {
diagnostics.extend(ruff::rules::ambiguous_unicode_character( diagnostics.extend(ruff::rules::ambiguous_unicode_character(
locator, locator,
@ -91,32 +87,35 @@ pub fn check_tokens(
)); ));
} }
} }
}
// eradicate // ERA001
if enforce_commented_out_code { if enforce_commented_out_code {
for (start, tok, end) in tokens.iter().flatten() {
if matches!(tok, Tok::Comment(_)) { if matches!(tok, Tok::Comment(_)) {
if let Some(diagnostic) = if let Some(diagnostic) =
eradicate::rules::commented_out_code(locator, start, end, settings, autofix) eradicate::rules::commented_out_code(locator, *start, *end, settings, autofix)
{ {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
} }
} }
}
// W605 // W605
if enforce_invalid_escape_sequence { if enforce_invalid_escape_sequence {
for (start, tok, end) in tokens.iter().flatten() {
if matches!(tok, Tok::String { .. }) { if matches!(tok, Tok::String { .. }) {
diagnostics.extend(pycodestyle::rules::invalid_escape_sequence( diagnostics.extend(pycodestyle::rules::invalid_escape_sequence(
locator, locator,
start, *start,
end, *end,
matches!(autofix, flags::Autofix::Enabled) matches!(autofix, flags::Autofix::Enabled)
&& settings.rules.should_fix(&Rule::InvalidEscapeSequence), && settings.rules.should_fix(&Rule::InvalidEscapeSequence),
)); ));
} }
} }
} }
}
// E701, E702, E703, E704 // E701, E702, E703, E704
if enforce_compound_statements { if enforce_compound_statements {

View File

@ -755,6 +755,7 @@ impl Rule {
| Rule::ShebangMissingExecutableFile | Rule::ShebangMissingExecutableFile
| Rule::ShebangNotExecutable | Rule::ShebangNotExecutable
| Rule::ShebangNewline | Rule::ShebangNewline
| Rule::BidirectionalUnicode
| Rule::ShebangPython | Rule::ShebangPython
| Rule::ShebangWhitespace => &LintSource::PhysicalLines, | Rule::ShebangWhitespace => &LintSource::PhysicalLines,
Rule::AmbiguousUnicodeCharacterComment Rule::AmbiguousUnicodeCharacterComment

View File

@ -1,8 +1,8 @@
use rustpython_parser::ast::Location;
use ruff_macros::{define_violation, derive_message_formats}; use ruff_macros::{define_violation, derive_message_formats};
use rustpython_parser::ast::Expr;
use crate::ast::types::Range; use crate::ast::types::Range;
use crate::checkers::ast::Checker;
use crate::registry::Diagnostic; use crate::registry::Diagnostic;
use crate::violation::Violation; use crate::violation::Violation;
@ -32,16 +32,21 @@ define_violation!(
impl Violation for BidirectionalUnicode { impl Violation for BidirectionalUnicode {
#[derive_message_formats] #[derive_message_formats]
fn message(&self) -> String { fn message(&self) -> String {
format!("Avoid using bidirectional unicode") format!("Contains control characters that can permit obfuscated code")
} }
} }
/// PLE2502 /// PLE2502
pub fn bidirectional_unicode(checker: &mut Checker, expr: &Expr, value: &str) { pub fn bidirectional_unicode(lineno: usize, line: &str) -> Vec<Diagnostic> {
if value.contains(BIDI_UNICODE) { let mut diagnostics = Vec::new();
checker.diagnostics.push(Diagnostic::new( if line.contains(BIDI_UNICODE) {
diagnostics.push(Diagnostic::new(
BidirectionalUnicode, BidirectionalUnicode,
Range::from_located(expr), Range::new(
Location::new(lineno + 1, 0),
Location::new((lineno + 1) + 1, 0),
),
)); ));
} }
diagnostics
} }

View File

@ -6,50 +6,40 @@ expression: diagnostics
BidirectionalUnicode: ~ BidirectionalUnicode: ~
location: location:
row: 2 row: 2
column: 6 column: 0
end_location: end_location:
row: 2 row: 3
column: 50 column: 0
fix: ~ fix: ~
parent: ~ parent: ~
- kind: - kind:
BidirectionalUnicode: ~ BidirectionalUnicode: ~
location: location:
row: 5 row: 5
column: 6 column: 0
end_location: end_location:
row: 5 row: 6
column: 13 column: 0
fix: ~ fix: ~
parent: ~ parent: ~
- kind: - kind:
BidirectionalUnicode: ~ BidirectionalUnicode: ~
location: location:
row: 8 row: 8
column: 10 column: 0
end_location: end_location:
row: 8 row: 9
column: 14 column: 0
fix: ~ fix: ~
parent: ~ parent: ~
- kind: - kind:
BidirectionalUnicode: ~ BidirectionalUnicode: ~
location: location:
row: 11 row: 14
column: 19 column: 0
end_location: end_location:
row: 11 row: 15
column: 27 column: 0
fix: ~
parent: ~
- kind:
BidirectionalUnicode: ~
location:
row: 17
column: 4
end_location:
row: 17
column: 49
fix: ~ fix: ~
parent: ~ parent: ~