From 627229318016013c6c23f69f11afcebe2318fe57 Mon Sep 17 00:00:00 2001 From: Colin Delahunty <72827203+colin99d@users.noreply.github.com> Date: Mon, 6 Feb 2023 15:34:37 -0500 Subject: [PATCH] [`pylint`]: bad-str-strip-call (With Autofix) (#2570) --- README.md | 1 + .../fixtures/pylint/bad_str_strip_call.py | 73 +++++ crates/ruff/src/checkers/ast.rs | 3 + crates/ruff/src/registry.rs | 1 + crates/ruff/src/rules/pylint/mod.rs | 1 + .../rules/pylint/rules/bad_str_strip_call.rs | 194 ++++++++++++ crates/ruff/src/rules/pylint/rules/mod.rs | 2 + ..._tests__PLE1310_bad_str_strip_call.py.snap | 277 ++++++++++++++++++ ruff.schema.json | 3 + 9 files changed, 555 insertions(+) create mode 100644 crates/ruff/resources/test/fixtures/pylint/bad_str_strip_call.py create mode 100644 crates/ruff/src/rules/pylint/rules/bad_str_strip_call.rs create mode 100644 crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1310_bad_str_strip_call.py.snap diff --git a/README.md b/README.md index c0842ad2f3..0bbb6d8437 100644 --- a/README.md +++ b/README.md @@ -1336,6 +1336,7 @@ For more, see [Pylint](https://pypi.org/project/pylint/) on PyPI. | PLE0604 | invalid-all-object | Invalid object in `__all__`, must contain only strings | | | PLE0605 | invalid-all-format | Invalid format for `__all__`, must be `tuple` or `list` | | | PLE1142 | await-outside-async | `await` should be used within an async function | | +| PLE1310 | bad-str-strip-call | String `{kind}` call contains duplicate characters | 🛠 | #### Refactor (PLR) diff --git a/crates/ruff/resources/test/fixtures/pylint/bad_str_strip_call.py b/crates/ruff/resources/test/fixtures/pylint/bad_str_strip_call.py new file mode 100644 index 0000000000..fafc233398 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/pylint/bad_str_strip_call.py @@ -0,0 +1,73 @@ +# PLE1310 +"Hello World".strip("Hello") + +# PLE1310 +"Hello World".strip("Hello") + +# PLE1310 +"Hello World".strip(u"Hello") + +# PLE1310 +"Hello World".strip(r"Hello") + +# PLE1310 +"Hello World".strip("Hel\tlo") + +# PLE1310 +"Hello World".strip(r"He\tllo") + +# PLE1310 +"Hello World".strip("Hel\\lo") + +# PLE1310 +"Hello World".strip(r"He\\llo") + +# PLE1310 +"Hello World".strip("🤣🤣🤣🤣🙃👀😀") + +# PLE1310 +"Hello World".strip( + """ +there are a lot of characters to strip +""" +) + +# PLE1310 +"Hello World".strip("can we get a long " \ + "string of characters to strip " \ + "please?") + +# PLE1310 +"Hello World".strip( + "can we get a long " + "string of characters to strip " + "please?" +) + +# PLE1310 +"Hello World".strip( + "can \t we get a long" + "string \t of characters to strip" + "please?" +) + +# PLE1310 +"Hello World".strip( + "abc def" + "ghi" +) + +# PLE1310 +u''.strip('http://') + +# PLE1310 +u''.lstrip('http://') + +# PLE1310 +b''.rstrip('http://') + +# OK +''.strip('Hi') + +# OK +''.strip() diff --git a/crates/ruff/src/checkers/ast.rs b/crates/ruff/src/checkers/ast.rs index 16c5926dd6..4d92e1df84 100644 --- a/crates/ruff/src/checkers/ast.rs +++ b/crates/ruff/src/checkers/ast.rs @@ -2637,6 +2637,9 @@ where if self.settings.rules.enabled(&Rule::ConsiderUsingSysExit) { pylint::rules::consider_using_sys_exit(self, func); } + if self.settings.rules.enabled(&Rule::BadStrStripCall) { + pylint::rules::bad_str_strip_call(self, func, args); + } // flake8-pytest-style if self.settings.rules.enabled(&Rule::PatchWithLambda) { diff --git a/crates/ruff/src/registry.rs b/crates/ruff/src/registry.rs index c75d1ab3b7..2e40ca2f69 100644 --- a/crates/ruff/src/registry.rs +++ b/crates/ruff/src/registry.rs @@ -107,6 +107,7 @@ ruff_macros::define_rule_mapping!( // pylint PLE0604 => rules::pylint::rules::InvalidAllObject, PLE0605 => rules::pylint::rules::InvalidAllFormat, + PLE1310 => rules::pylint::rules::BadStrStripCall, PLC0414 => rules::pylint::rules::UselessImportAlias, PLC3002 => rules::pylint::rules::UnnecessaryDirectLambdaCall, PLE0117 => rules::pylint::rules::NonlocalWithoutBinding, diff --git a/crates/ruff/src/rules/pylint/mod.rs b/crates/ruff/src/rules/pylint/mod.rs index 6b421cb84b..17fbb7b6bd 100644 --- a/crates/ruff/src/rules/pylint/mod.rs +++ b/crates/ruff/src/rules/pylint/mod.rs @@ -40,6 +40,7 @@ mod tests { #[test_case(Rule::TooManyArguments, Path::new("too_many_arguments.py"); "PLR0913")] #[test_case(Rule::TooManyBranches, Path::new("too_many_branches.py"); "PLR0912")] #[test_case(Rule::TooManyStatements, Path::new("too_many_statements.py"); "PLR0915")] + #[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"); "PLE01310")] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff/src/rules/pylint/rules/bad_str_strip_call.rs b/crates/ruff/src/rules/pylint/rules/bad_str_strip_call.rs new file mode 100644 index 0000000000..c3ef0b47f9 --- /dev/null +++ b/crates/ruff/src/rules/pylint/rules/bad_str_strip_call.rs @@ -0,0 +1,194 @@ +use std::fmt; + +use rustc_hash::FxHashSet; +use rustpython_ast::{Constant, Expr, ExprKind}; +use rustpython_parser::lexer; +use rustpython_parser::lexer::Tok; +use serde::{Deserialize, Serialize}; + +use ruff_macros::derive_message_formats; + +use crate::ast::types::Range; +use crate::checkers::ast::Checker; +use crate::define_violation; +use crate::fix::Fix; +use crate::registry::Diagnostic; +use crate::rules::pydocstyle::helpers::{leading_quote, trailing_quote}; +use crate::violation::AlwaysAutofixableViolation; + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum StripKind { + Strip, + LStrip, + RStrip, +} + +impl StripKind { + pub fn from_str(s: &str) -> Option { + match s { + "strip" => Some(Self::Strip), + "lstrip" => Some(Self::LStrip), + "rstrip" => Some(Self::RStrip), + _ => None, + } + } +} + +impl fmt::Display for StripKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let representation = match self { + Self::Strip => "strip", + Self::LStrip => "lstrip", + Self::RStrip => "rstrip", + }; + write!(f, "{representation}") + } +} + +define_violation!( + pub struct BadStrStripCall { + kind: StripKind, + } +); +impl AlwaysAutofixableViolation for BadStrStripCall { + #[derive_message_formats] + fn message(&self) -> String { + let Self { kind } = self; + format!("String `{kind}` call contains duplicate characters") + } + + fn autofix_title(&self) -> String { + "Remove duplicate characters".to_string() + } +} + +/// Remove duplicate characters from an escaped string. +fn deduplicate_escaped(s: &str) -> String { + let mut result = String::new(); + let mut escaped = false; + let mut seen = FxHashSet::default(); + for ch in s.chars() { + if escaped { + escaped = false; + let pair = format!("\\{}", ch); + if !seen.insert(pair) { + continue; + } + } else if ch == '\\' { + escaped = true; + } else if !seen.insert(ch.to_string()) { + continue; + } + result.push(ch); + } + result +} + +/// Remove duplicate characters from a raw string. +fn deduplicate_raw(s: &str) -> String { + let mut result = String::new(); + let mut seen = FxHashSet::default(); + for ch in s.chars() { + if !seen.insert(ch) { + continue; + } + result.push(ch); + } + result +} + +/// Return `true` if a string contains duplicate characters, taking into account escapes. +fn has_duplicates(s: &str) -> bool { + let mut escaped = false; + let mut seen = FxHashSet::default(); + for ch in s.chars() { + if escaped { + escaped = false; + let pair = format!("\\{}", ch); + if !seen.insert(pair) { + return true; + } + } else if ch == '\\' { + escaped = true; + } else if !seen.insert(ch.to_string()) { + return true; + } + } + false +} + +/// PLE1310 +pub fn bad_str_strip_call(checker: &mut Checker, func: &Expr, args: &[Expr]) { + if let ExprKind::Attribute { value, attr, .. } = &func.node { + if matches!( + value.node, + ExprKind::Constant { + value: Constant::Str(_) | Constant::Bytes(_), + .. + } + ) { + if let Some(kind) = StripKind::from_str(attr.as_str()) { + if let Some(arg) = args.get(0) { + if let ExprKind::Constant { + value: Constant::Str(value), + .. + } = &arg.node + { + let is_multiline = arg.location.row() != arg.end_location.unwrap().row(); + + let module_text = checker + .locator + .slice_source_code_range(&Range::from_located(arg)); + + if !is_multiline + && lexer::make_tokenizer_located(module_text, arg.location) + .flatten() + .filter(|(_, tok, _)| matches!(tok, Tok::String { .. })) + .nth(1) + .is_none() + { + // If we have a single string (no implicit concatenation), fix it. + let Some(leading_quote) = leading_quote(module_text) else { + return; + }; + let Some(trailing_quote) = trailing_quote(module_text) else { + return; + }; + let content = &module_text + [leading_quote.len()..module_text.len() - trailing_quote.len()]; + + let deduplicated = + if leading_quote.contains('r') || leading_quote.contains('R') { + deduplicate_raw(content) + } else { + deduplicate_escaped(content) + }; + if content != deduplicated { + let mut diagnostic = Diagnostic::new( + BadStrStripCall { kind }, + Range::from_located(arg), + ); + if checker.patch(diagnostic.kind.rule()) { + diagnostic.amend(Fix::replacement( + format!("{leading_quote}{deduplicated}{trailing_quote}"), + arg.location, + arg.end_location.unwrap(), + )); + }; + checker.diagnostics.push(diagnostic); + } + } else { + // Otherwise, let's just look for duplicates. + if has_duplicates(value) { + checker.diagnostics.push(Diagnostic::new( + BadStrStripCall { kind }, + Range::from_located(arg), + )); + } + } + } + } + } + } + } +} diff --git a/crates/ruff/src/rules/pylint/rules/mod.rs b/crates/ruff/src/rules/pylint/rules/mod.rs index 07dfb9cefd..3a9bc19369 100644 --- a/crates/ruff/src/rules/pylint/rules/mod.rs +++ b/crates/ruff/src/rules/pylint/rules/mod.rs @@ -1,4 +1,5 @@ pub use await_outside_async::{await_outside_async, AwaitOutsideAsync}; +pub use bad_str_strip_call::{bad_str_strip_call, BadStrStripCall}; pub use comparison_of_constant::{comparison_of_constant, ComparisonOfConstant}; pub use consider_using_sys_exit::{consider_using_sys_exit, ConsiderUsingSysExit}; pub use global_variable_not_assigned::GlobalVariableNotAssigned; @@ -23,6 +24,7 @@ pub use useless_else_on_loop::{useless_else_on_loop, UselessElseOnLoop}; pub use useless_import_alias::{useless_import_alias, UselessImportAlias}; mod await_outside_async; +mod bad_str_strip_call; mod comparison_of_constant; mod consider_using_sys_exit; mod global_variable_not_assigned; diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1310_bad_str_strip_call.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1310_bad_str_strip_call.py.snap new file mode 100644 index 0000000000..954d345465 --- /dev/null +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1310_bad_str_strip_call.py.snap @@ -0,0 +1,277 @@ +--- +source: crates/ruff/src/rules/pylint/mod.rs +expression: diagnostics +--- +- kind: + BadStrStripCall: + kind: Strip + location: + row: 2 + column: 20 + end_location: + row: 2 + column: 27 + fix: + content: + - "\"Helo\"" + location: + row: 2 + column: 20 + end_location: + row: 2 + column: 27 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 5 + column: 20 + end_location: + row: 5 + column: 27 + fix: + content: + - "\"Helo\"" + location: + row: 5 + column: 20 + end_location: + row: 5 + column: 27 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 8 + column: 20 + end_location: + row: 8 + column: 28 + fix: + content: + - "u\"Helo\"" + location: + row: 8 + column: 20 + end_location: + row: 8 + column: 28 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 11 + column: 20 + end_location: + row: 11 + column: 28 + fix: + content: + - "r\"Helo\"" + location: + row: 11 + column: 20 + end_location: + row: 11 + column: 28 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 14 + column: 20 + end_location: + row: 14 + column: 29 + fix: + content: + - "\"Hel\\to\"" + location: + row: 14 + column: 20 + end_location: + row: 14 + column: 29 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 17 + column: 20 + end_location: + row: 17 + column: 30 + fix: + content: + - "r\"He\\tlo\"" + location: + row: 17 + column: 20 + end_location: + row: 17 + column: 30 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 20 + column: 20 + end_location: + row: 20 + column: 29 + fix: + content: + - "\"Hel\\\\o\"" + location: + row: 20 + column: 20 + end_location: + row: 20 + column: 29 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 23 + column: 20 + end_location: + row: 23 + column: 30 + fix: + content: + - "r\"He\\lo\"" + location: + row: 23 + column: 20 + end_location: + row: 23 + column: 30 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 26 + column: 20 + end_location: + row: 26 + column: 29 + fix: + content: + - "\"🤣🙃👀😀\"" + location: + row: 26 + column: 20 + end_location: + row: 26 + column: 29 + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 30 + column: 4 + end_location: + row: 32 + column: 3 + fix: ~ + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 36 + column: 20 + end_location: + row: 38 + column: 29 + fix: ~ + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 42 + column: 4 + end_location: + row: 44 + column: 13 + fix: ~ + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 49 + column: 4 + end_location: + row: 51 + column: 13 + fix: ~ + parent: ~ +- kind: + BadStrStripCall: + kind: Strip + location: + row: 61 + column: 10 + end_location: + row: 61 + column: 19 + fix: + content: + - "'htp:/'" + location: + row: 61 + column: 10 + end_location: + row: 61 + column: 19 + parent: ~ +- kind: + BadStrStripCall: + kind: LStrip + location: + row: 64 + column: 11 + end_location: + row: 64 + column: 20 + fix: + content: + - "'htp:/'" + location: + row: 64 + column: 11 + end_location: + row: 64 + column: 20 + parent: ~ +- kind: + BadStrStripCall: + kind: RStrip + location: + row: 67 + column: 11 + end_location: + row: 67 + column: 20 + fix: + content: + - "'htp:/'" + location: + row: 67 + column: 11 + end_location: + row: 67 + column: 20 + parent: ~ + diff --git a/ruff.schema.json b/ruff.schema.json index e18e7b2722..139e724fac 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -1674,6 +1674,9 @@ "PLE11", "PLE114", "PLE1142", + "PLE13", + "PLE131", + "PLE1310", "PLR", "PLR0", "PLR01",