From 2b03bd18f4fc8090fb2342befaa6e6c4bc0c12b4 Mon Sep 17 00:00:00 2001 From: Tom Kuson Date: Thu, 13 Jul 2023 02:32:34 +0100 Subject: [PATCH] Implement Pylint `consider-using-in` (#5193) ## Summary Implement Pylint rule [`consider-using-in` (`R1714`)](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/consider-using-in.html) as `repeated-equality-comparison-target` (`PLR1714`). This rule checks for expressions that can be re-written as a membership test for better readability and performance. For example, ```python foo == "bar" or foo == "baz" or foo == "qux" ``` should be rewritten as ```python foo in {"bar", "baz", "qux"} ``` Related to #970. Includes documentation. ### Implementation quirks The implementation does not work with Yoda conditions (e.g., `"a" == foo` instead of `foo == "a"`). The Pylint version does. I couldn't find a way of supporting Yoda-style conditions without it being inefficient, so didn't (I don't think people write Yoda conditions any way). ## Test Plan Added fixture. `cargo test` --- .../repeated_equality_comparison_target.py | 34 ++++ crates/ruff/src/checkers/ast/mod.rs | 15 +- crates/ruff/src/codes.rs | 1 + crates/ruff/src/rules/pylint/mod.rs | 4 + crates/ruff/src/rules/pylint/rules/mod.rs | 2 + .../repeated_equality_comparison_target.rs | 151 ++++++++++++++++++ ...epeated_equality_comparison_target.py.snap | 53 ++++++ .../rules/ruff/rules/invalid_index_type.rs | 5 +- ruff.schema.json | 1 + 9 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 crates/ruff/resources/test/fixtures/pylint/repeated_equality_comparison_target.py create mode 100644 crates/ruff/src/rules/pylint/rules/repeated_equality_comparison_target.rs create mode 100644 crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLR1714_repeated_equality_comparison_target.py.snap diff --git a/crates/ruff/resources/test/fixtures/pylint/repeated_equality_comparison_target.py b/crates/ruff/resources/test/fixtures/pylint/repeated_equality_comparison_target.py new file mode 100644 index 0000000000..f82e19a761 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/pylint/repeated_equality_comparison_target.py @@ -0,0 +1,34 @@ +# Errors. +foo == "a" or foo == "b" + +foo != "a" and foo != "b" + +foo == "a" or foo == "b" or foo == "c" + +foo != "a" and foo != "b" and foo != "c" + +foo == a or foo == "b" or foo == 3 # Mixed types. + +# False negatives (the current implementation doesn't support Yoda conditions). +"a" == foo or "b" == foo or "c" == foo + +"a" != foo and "b" != foo and "c" != foo + +"a" == foo or foo == "b" or "c" == foo + +# OK +foo == "a" and foo == "b" and foo == "c" # `and` mixed with `==`. + +foo != "a" or foo != "b" or foo != "c" # `or` mixed with `!=`. + +foo == a or foo == b() or foo == c # Call expression. + +foo != a or foo() != b or foo != c # Call expression. + +foo in {"a", "b", "c"} # Uses membership test already. + +foo not in {"a", "b", "c"} # Uses membership test already. + +foo == "a" # Single comparison. + +foo != "a" # Single comparison. diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 9fac7dde22..2a92755808 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -3484,11 +3484,13 @@ where } } } - Expr::BoolOp(ast::ExprBoolOp { - op, - values, - range: _, - }) => { + Expr::BoolOp( + bool_op @ ast::ExprBoolOp { + op, + values, + range: _, + }, + ) => { if self.enabled(Rule::RepeatedIsinstanceCalls) { pylint::rules::repeated_isinstance_calls(self, expr, *op, values); } @@ -3513,6 +3515,9 @@ where if self.enabled(Rule::ExprAndFalse) { flake8_simplify::rules::expr_and_false(self, expr); } + if self.enabled(Rule::RepeatedEqualityComparisonTarget) { + pylint::rules::repeated_equality_comparison_target(self, bool_op); + } } _ => {} }; diff --git a/crates/ruff/src/codes.rs b/crates/ruff/src/codes.rs index 704deb1437..27d20325d7 100644 --- a/crates/ruff/src/codes.rs +++ b/crates/ruff/src/codes.rs @@ -209,6 +209,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "R0915") => (RuleGroup::Unspecified, rules::pylint::rules::TooManyStatements), (Pylint, "R1701") => (RuleGroup::Unspecified, rules::pylint::rules::RepeatedIsinstanceCalls), (Pylint, "R1711") => (RuleGroup::Unspecified, rules::pylint::rules::UselessReturn), + (Pylint, "R1714") => (RuleGroup::Unspecified, rules::pylint::rules::RepeatedEqualityComparisonTarget), (Pylint, "R1722") => (RuleGroup::Unspecified, rules::pylint::rules::SysExitAlias), (Pylint, "R2004") => (RuleGroup::Unspecified, rules::pylint::rules::MagicValueComparison), (Pylint, "R5501") => (RuleGroup::Unspecified, rules::pylint::rules::CollapsibleElseIf), diff --git a/crates/ruff/src/rules/pylint/mod.rs b/crates/ruff/src/rules/pylint/mod.rs index f6f5330d06..c182a14b23 100644 --- a/crates/ruff/src/rules/pylint/mod.rs +++ b/crates/ruff/src/rules/pylint/mod.rs @@ -112,6 +112,10 @@ mod tests { )] #[test_case(Rule::YieldInInit, Path::new("yield_in_init.py"))] #[test_case(Rule::NestedMinMax, Path::new("nested_min_max.py"))] + #[test_case( + Rule::RepeatedEqualityComparisonTarget, + Path::new("repeated_equality_comparison_target.py") + )] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff/src/rules/pylint/rules/mod.rs b/crates/ruff/src/rules/pylint/rules/mod.rs index 551abb1ada..829c9e6916 100644 --- a/crates/ruff/src/rules/pylint/rules/mod.rs +++ b/crates/ruff/src/rules/pylint/rules/mod.rs @@ -29,6 +29,7 @@ pub(crate) use nested_min_max::*; pub(crate) use nonlocal_without_binding::*; pub(crate) use property_with_parameters::*; pub(crate) use redefined_loop_name::*; +pub(crate) use repeated_equality_comparison_target::*; pub(crate) use repeated_isinstance_calls::*; pub(crate) use return_in_init::*; pub(crate) use single_string_slots::*; @@ -79,6 +80,7 @@ mod nested_min_max; mod nonlocal_without_binding; mod property_with_parameters; mod redefined_loop_name; +mod repeated_equality_comparison_target; mod repeated_isinstance_calls; mod return_in_init; mod single_string_slots; diff --git a/crates/ruff/src/rules/pylint/rules/repeated_equality_comparison_target.rs b/crates/ruff/src/rules/pylint/rules/repeated_equality_comparison_target.rs new file mode 100644 index 0000000000..78608cd005 --- /dev/null +++ b/crates/ruff/src/rules/pylint/rules/repeated_equality_comparison_target.rs @@ -0,0 +1,151 @@ +use std::hash::BuildHasherDefault; +use std::ops::Deref; + +use itertools::{any, Itertools}; +use rustc_hash::FxHashMap; +use rustpython_parser::ast::{BoolOp, CmpOp, Expr, ExprBoolOp, ExprCompare, Ranged}; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::hashable::HashableExpr; +use ruff_python_ast::source_code::Locator; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for repeated equality comparisons that can rewritten as a membership +/// test. +/// +/// ## Why is this bad? +/// To check if a variable is equal to one of many values, it is common to +/// write a series of equality comparisons (e.g., +/// `foo == "bar" or foo == "baz"). +/// +/// Instead, prefer to combine the values into a collection and use the `in` +/// operator to check for membership, which is more performant and succinct. +/// If the items are hashable, use a `set` for efficiency; otherwise, use a +/// `tuple`. +/// +/// ## Example +/// ```python +/// foo == "bar" or foo == "baz" or foo == "qux" +/// ``` +/// +/// Use instead: +/// ```python +/// foo in {"bar", "baz", "qux"} +/// ``` +/// +/// ## References +/// - [Python documentation: Comparisons](https://docs.python.org/3/reference/expressions.html#comparisons) +/// - [Python documentation: Membership test operations](https://docs.python.org/3/reference/expressions.html#membership-test-operations) +/// - [Python documentation: `set`](https://docs.python.org/3/library/stdtypes.html#set) +#[violation] +pub struct RepeatedEqualityComparisonTarget { + expr: String, +} + +impl Violation for RepeatedEqualityComparisonTarget { + #[derive_message_formats] + fn message(&self) -> String { + let RepeatedEqualityComparisonTarget { expr } = self; + format!( + "Consider merging multiple comparisons: `{expr}`. Use a `set` if the elements are hashable." + ) + } +} + +/// PLR1714 +pub(crate) fn repeated_equality_comparison_target(checker: &mut Checker, bool_op: &ExprBoolOp) { + if bool_op + .values + .iter() + .any(|value| !is_allowed_value(bool_op.op, value)) + { + return; + } + + let mut left_to_comparators: FxHashMap)> = + FxHashMap::with_capacity_and_hasher(bool_op.values.len(), BuildHasherDefault::default()); + for value in &bool_op.values { + if let Expr::Compare(ExprCompare { + left, comparators, .. + }) = value + { + let (count, matches) = left_to_comparators + .entry(left.deref().into()) + .or_insert_with(|| (0, Vec::new())); + *count += 1; + matches.extend(comparators); + } + } + + for (left, (count, comparators)) in left_to_comparators { + if count > 1 { + checker.diagnostics.push(Diagnostic::new( + RepeatedEqualityComparisonTarget { + expr: merged_membership_test( + left.as_expr(), + bool_op.op, + &comparators, + checker.locator, + ), + }, + bool_op.range(), + )); + } + } +} + +/// Return `true` if the given expression is compatible with a membership test. +/// E.g., `==` operators can be joined with `or` and `!=` operators can be +/// joined with `and`. +fn is_allowed_value(bool_op: BoolOp, value: &Expr) -> bool { + let Expr::Compare(ExprCompare { + left, + ops, + comparators, + .. + }) = value + else { + return false; + }; + + ops.iter().all(|op| { + if match bool_op { + BoolOp::Or => !matches!(op, CmpOp::Eq), + BoolOp::And => !matches!(op, CmpOp::NotEq), + } { + return false; + } + + if left.is_call_expr() { + return false; + } + + if any(comparators.iter(), Expr::is_call_expr) { + return false; + } + + true + }) +} + +/// Generate a string like `obj in (a, b, c)` or `obj not in (a, b, c)`. +fn merged_membership_test( + left: &Expr, + op: BoolOp, + comparators: &[&Expr], + locator: &Locator, +) -> String { + let op = match op { + BoolOp::Or => "in", + BoolOp::And => "not in", + }; + let left = locator.slice(left.range()); + let members = comparators + .iter() + .map(|comparator| locator.slice(comparator.range())) + .join(", "); + format!("{left} {op} ({members})",) +} diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLR1714_repeated_equality_comparison_target.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLR1714_repeated_equality_comparison_target.py.snap new file mode 100644 index 0000000000..9fa0ddc667 --- /dev/null +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLR1714_repeated_equality_comparison_target.py.snap @@ -0,0 +1,53 @@ +--- +source: crates/ruff/src/rules/pylint/mod.rs +--- +repeated_equality_comparison_target.py:2:1: PLR1714 Consider merging multiple comparisons: `foo in ("a", "b")`. Use a `set` if the elements are hashable. + | +1 | # Errors. +2 | foo == "a" or foo == "b" + | ^^^^^^^^^^^^^^^^^^^^^^^^ PLR1714 +3 | +4 | foo != "a" and foo != "b" + | + +repeated_equality_comparison_target.py:4:1: PLR1714 Consider merging multiple comparisons: `foo not in ("a", "b")`. Use a `set` if the elements are hashable. + | +2 | foo == "a" or foo == "b" +3 | +4 | foo != "a" and foo != "b" + | ^^^^^^^^^^^^^^^^^^^^^^^^^ PLR1714 +5 | +6 | foo == "a" or foo == "b" or foo == "c" + | + +repeated_equality_comparison_target.py:6:1: PLR1714 Consider merging multiple comparisons: `foo in ("a", "b", "c")`. Use a `set` if the elements are hashable. + | +4 | foo != "a" and foo != "b" +5 | +6 | foo == "a" or foo == "b" or foo == "c" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLR1714 +7 | +8 | foo != "a" and foo != "b" and foo != "c" + | + +repeated_equality_comparison_target.py:8:1: PLR1714 Consider merging multiple comparisons: `foo not in ("a", "b", "c")`. Use a `set` if the elements are hashable. + | + 6 | foo == "a" or foo == "b" or foo == "c" + 7 | + 8 | foo != "a" and foo != "b" and foo != "c" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLR1714 + 9 | +10 | foo == a or foo == "b" or foo == 3 # Mixed types. + | + +repeated_equality_comparison_target.py:10:1: PLR1714 Consider merging multiple comparisons: `foo in (a, "b", 3)`. Use a `set` if the elements are hashable. + | + 8 | foo != "a" and foo != "b" and foo != "c" + 9 | +10 | foo == a or foo == "b" or foo == 3 # Mixed types. + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLR1714 +11 | +12 | # False negatives (the current implementation doesn't support Yoda conditions). + | + + diff --git a/crates/ruff/src/rules/ruff/rules/invalid_index_type.rs b/crates/ruff/src/rules/ruff/rules/invalid_index_type.rs index 399a77dbab..b8e832a62e 100644 --- a/crates/ruff/src/rules/ruff/rules/invalid_index_type.rs +++ b/crates/ruff/src/rules/ruff/rules/invalid_index_type.rs @@ -74,7 +74,10 @@ pub(crate) fn invalid_index_type(checker: &mut Checker, expr: &ExprSubscript) { // The value types supported by this rule should always be checkable let Some(value_type) = CheckableExprType::try_from(value) else { - debug_assert!(false, "Index value must be a checkable type to generate a violation message."); + debug_assert!( + false, + "Index value must be a checkable type to generate a violation message." + ); return; }; diff --git a/ruff.schema.json b/ruff.schema.json index 2fc94daff7..696a7d9cca 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2222,6 +2222,7 @@ "PLR1701", "PLR171", "PLR1711", + "PLR1714", "PLR172", "PLR1722", "PLR2",