From b9139a31d5654498e1b70937768458d2ca4f4aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20Lepp=C3=A4nen?= Date: Sun, 28 Jan 2024 21:29:04 +0200 Subject: [PATCH] [`flake8-pie`] Omit bound tuples passed to `.startswith` or `.endswith` (#9661) ## Summary This review contains a fix for [PIE810](https://docs.astral.sh/ruff/rules/multiple-starts-ends-with/) (multiple-starts-ends-with) The problem is that ruff suggests combining multiple startswith/endswith calls into a single call even though there might be a call with tuple of strs. This leads to calling startswith/endswith with tuple of tuple of strs which is incorrect and violates startswith/endswith conctract and results in runtime failure. However the following will be valid and fixed correctly => ```python x = ("hello", "world") y = "h" z = "w" msg = "hello world" if msg.startswith(x) or msg.startswith(y) or msg.startswith(z) : sys.exit(1) ``` ``` ruff --fix --select PIE810 --unsafe-fixes ``` => ```python if msg.startswith(x) or msg.startswith((y,z)): sys.exit(1) ``` See: https://github.com/astral-sh/ruff/issues/8906 ## Test Plan ```bash cargo test ``` --- .../test/fixtures/flake8_pie/PIE810.py | 59 +++++++++++++++++++ .../rules/multiple_starts_ends_with.rs | 39 +++++++++++- ...__flake8_pie__tests__PIE810_PIE810.py.snap | 46 ++++++++++++++- 3 files changed, 139 insertions(+), 5 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py b/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py index ba64f2a44d..34a02deaf8 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_pie/PIE810.py @@ -9,6 +9,22 @@ obj.startswith(foo) or obj.startswith("foo") # error obj.endswith(foo) or obj.startswith(foo) or obj.startswith("foo") +def func(): + msg = "hello world" + + x = "h" + y = ("h", "e", "l", "l", "o") + z = "w" + + if msg.startswith(x) or msg.startswith(y) or msg.startswith(z): # Error + print("yes") + +def func(): + msg = "hello world" + + if msg.startswith(("h", "e", "l", "l", "o")) or msg.startswith("h") or msg.startswith("w"): # Error + print("yes") + # ok obj.startswith(("foo", "bar")) # ok @@ -17,3 +33,46 @@ obj.endswith(("foo", "bar")) obj.startswith("foo") or obj.endswith("bar") # ok obj.startswith("foo") or abc.startswith("bar") + +def func(): + msg = "hello world" + + x = "h" + y = ("h", "e", "l", "l", "o") + + if msg.startswith(x) or msg.startswith(y): # OK + print("yes") + +def func(): + msg = "hello world" + + y = ("h", "e", "l", "l", "o") + + if msg.startswith(y): # OK + print("yes") + +def func(): + msg = "hello world" + + y = ("h", "e", "l", "l", "o") + + if msg.startswith(y) or msg.startswith(y): # OK + print("yes") + +def func(): + msg = "hello world" + + y = ("h", "e", "l", "l", "o") + x = ("w", "o", "r", "l", "d") + + if msg.startswith(y) or msg.startswith(x) or msg.startswith("h"): # OK + print("yes") + +def func(): + msg = "hello world" + + y = ("h", "e", "l", "l", "o") + x = ("w", "o", "r", "l", "d") + + if msg.startswith(y) or msg.endswith(x) or msg.startswith("h"): # OK + print("yes") diff --git a/crates/ruff_linter/src/rules/flake8_pie/rules/multiple_starts_ends_with.rs b/crates/ruff_linter/src/rules/flake8_pie/rules/multiple_starts_ends_with.rs index 8c95a77556..ea2e45230b 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/rules/multiple_starts_ends_with.rs +++ b/crates/ruff_linter/src/rules/flake8_pie/rules/multiple_starts_ends_with.rs @@ -3,6 +3,7 @@ use std::iter; use itertools::Either::{Left, Right}; +use ruff_python_semantic::{analyze, SemanticModel}; use ruff_text_size::{Ranged, TextRange}; use ruff_python_ast::{self as ast, Arguments, BoolOp, Expr, ExprContext, Identifier}; @@ -36,6 +37,14 @@ use crate::checkers::ast::Checker; /// print("Greetings!") /// ``` /// +/// ## Fix safety +/// This rule's fix is unsafe, as in some cases, it will be unable to determine +/// whether the argument to an existing `.startswith` or `.endswith` call is a +/// tuple. For example, given `msg.startswith(x) or msg.startswith(y)`, if `x` +/// or `y` is a tuple, and the semantic model is unable to detect it as such, +/// the rule will suggest `msg.startswith((x, y))`, which will error at +/// runtime. +/// /// ## References /// - [Python documentation: `str.startswith`](https://docs.python.org/3/library/stdtypes.html#str.startswith) /// - [Python documentation: `str.endswith`](https://docs.python.org/3/library/stdtypes.html#str.endswith) @@ -84,10 +93,14 @@ pub(crate) fn multiple_starts_ends_with(checker: &mut Checker, expr: &Expr) { continue; }; - if !(args.len() == 1 && keywords.is_empty()) { + if !keywords.is_empty() { continue; } + let [arg] = args.as_slice() else { + continue; + }; + let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() else { continue; }; @@ -99,6 +112,13 @@ pub(crate) fn multiple_starts_ends_with(checker: &mut Checker, expr: &Expr) { continue; }; + // If the argument is bound to a tuple, skip it, since we don't want to suggest + // `startswith((x, y))` where `x` or `y` are tuples. (Tuple literals are okay, since we + // inline them below.) + if is_bound_to_tuple(arg, checker.semantic()) { + continue; + } + duplicates .entry((attr.as_str(), arg_name.as_str())) .or_insert_with(Vec::new) @@ -149,7 +169,7 @@ pub(crate) fn multiple_starts_ends_with(checker: &mut Checker, expr: &Expr) { Right(iter::once(*value)) } }) - .map(Clone::clone) + .cloned() .collect(), ctx: ExprContext::Load, range: TextRange::default(), @@ -202,3 +222,18 @@ pub(crate) fn multiple_starts_ends_with(checker: &mut Checker, expr: &Expr) { } } } + +/// Returns `true` if the expression definitively resolves to a tuple (e.g., `x` in `x = (1, 2)`). +fn is_bound_to_tuple(arg: &Expr, semantic: &SemanticModel) -> bool { + let Expr::Name(ast::ExprName { id, .. }) = arg else { + return false; + }; + + let Some(binding_id) = semantic.lookup_symbol(id.as_str()) else { + return false; + }; + + let binding = semantic.binding(binding_id); + + analyze::typing::is_tuple(binding, semantic) +} diff --git a/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap b/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap index f871e12428..d2d29d693f 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap +++ b/crates/ruff_linter/src/rules/flake8_pie/snapshots/ruff_linter__rules__flake8_pie__tests__PIE810_PIE810.py.snap @@ -89,7 +89,7 @@ PIE810.py:10:1: PIE810 [*] Call `startswith` once with a `tuple` 10 | obj.endswith(foo) or obj.startswith(foo) or obj.startswith("foo") | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PIE810 11 | -12 | # ok +12 | def func(): | = help: Merge into a single `startswith` call @@ -100,7 +100,47 @@ PIE810.py:10:1: PIE810 [*] Call `startswith` once with a `tuple` 10 |-obj.endswith(foo) or obj.startswith(foo) or obj.startswith("foo") 10 |+obj.endswith(foo) or obj.startswith((foo, "foo")) 11 11 | -12 12 | # ok -13 13 | obj.startswith(("foo", "bar")) +12 12 | def func(): +13 13 | msg = "hello world" + +PIE810.py:19:8: PIE810 [*] Call `startswith` once with a `tuple` + | +17 | z = "w" +18 | +19 | if msg.startswith(x) or msg.startswith(y) or msg.startswith(z): # Error + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PIE810 +20 | print("yes") + | + = help: Merge into a single `startswith` call + +ℹ Unsafe fix +16 16 | y = ("h", "e", "l", "l", "o") +17 17 | z = "w" +18 18 | +19 |- if msg.startswith(x) or msg.startswith(y) or msg.startswith(z): # Error + 19 |+ if msg.startswith((x, z)) or msg.startswith(y): # Error +20 20 | print("yes") +21 21 | +22 22 | def func(): + +PIE810.py:25:8: PIE810 [*] Call `startswith` once with a `tuple` + | +23 | msg = "hello world" +24 | +25 | if msg.startswith(("h", "e", "l", "l", "o")) or msg.startswith("h") or msg.startswith("w"): # Error + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PIE810 +26 | print("yes") + | + = help: Merge into a single `startswith` call + +ℹ Unsafe fix +22 22 | def func(): +23 23 | msg = "hello world" +24 24 | +25 |- if msg.startswith(("h", "e", "l", "l", "o")) or msg.startswith("h") or msg.startswith("w"): # Error + 25 |+ if msg.startswith(("h", "e", "l", "l", "o", "h", "w")): # Error +26 26 | print("yes") +27 27 | +28 28 | # ok