[refurb] Count codepoints not bytes for `slice-to-remove-prefix-or-suffix (FURB188)` (#13631)

This commit is contained in:
Dylan 2024-10-07 09:13:28 -05:00 committed by GitHub
parent 27ac34d683
commit 14ee5dbfde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 114 additions and 7 deletions

View File

@ -170,3 +170,31 @@ def ignore_step():
if text.startswith("!"): if text.startswith("!"):
text = text[1::2] text = text[1::2]
print(text) print(text)
def handle_unicode():
# should be skipped!
text = "řetězec"
if text.startswith("ř"):
text = text[2:]
# should be linted
# with fix `text = text.removeprefix("ř")`
text = "řetězec"
if text.startswith("ř"):
text = text[1:]
def handle_surrogates():
# should be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[2:]
text = "\U00010000heythere"
if text.startswith("\U00010000"):
text = text[1:]
# should not be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[1:]

View File

@ -4,7 +4,7 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast; use ruff_python_ast as ast;
use ruff_python_semantic::SemanticModel; use ruff_python_semantic::SemanticModel;
use ruff_source_file::Locator; use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen}; use ruff_text_size::Ranged;
/// ## What it does /// ## What it does
/// Checks for the removal of a prefix or suffix from a string by assigning /// Checks for the removal of a prefix or suffix from a string by assigning
@ -334,8 +334,9 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
}), }),
) => num ) => num
.as_int() .as_int()
.and_then(ast::Int::as_u32) // Only support prefix removal for size at most `u32::MAX` // Only support prefix removal for size at most `usize::MAX`
.is_some_and(|x| x == string_val.to_str().text_len().to_u32()), .and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.chars().count()),
( (
AffixKind::StartsWith, AffixKind::StartsWith,
ast::Expr::Call(ast::ExprCall { ast::Expr::Call(ast::ExprCall {
@ -369,8 +370,8 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
// Only support prefix removal for size at most `u32::MAX` // Only support prefix removal for size at most `u32::MAX`
value value
.as_int() .as_int()
.and_then(ast::Int::as_u32) .and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.to_str().text_len().to_u32()) .is_some_and(|x| x == string_val.chars().count())
}, },
), ),
( (

View File

@ -251,3 +251,73 @@ FURB188.py:162:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing
164 163 | print(text) 164 163 | print(text)
165 164 | 165 164 |
166 165 | 166 165 |
FURB188.py:183:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
181 | # with fix `text = text.removeprefix("ř")`
182 | text = "řetězec"
183 | if text.startswith("ř"):
| _____^
184 | | text = text[1:]
| |_______________________^ FURB188
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
180 180 | # should be linted
181 181 | # with fix `text = text.removeprefix("ř")`
182 182 | text = "řetězec"
183 |- if text.startswith("ř"):
184 |- text = text[1:]
183 |+ text = text.removeprefix("ř")
185 184 |
186 185 |
187 186 | def handle_surrogates():
FURB188.py:190:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
188 | # should be linted
189 | text = "\ud800\udc00heythere"
190 | if text.startswith("\ud800\udc00"):
| _____^
191 | | text = text[2:]
| |_______________________^ FURB188
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
187 187 | def handle_surrogates():
188 188 | # should be linted
189 189 | text = "\ud800\udc00heythere"
190 |- if text.startswith("\ud800\udc00"):
191 |- text = text[2:]
190 |+ text = text.removeprefix("\ud800\udc00")
192 191 | text = "\U00010000heythere"
193 192 | if text.startswith("\U00010000"):
194 193 | text = text[1:]
FURB188.py:193:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
191 | text = text[2:]
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
| _____^
194 | | text = text[1:]
| |_______________________^ FURB188
195 |
196 | # should not be linted
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
190 190 | if text.startswith("\ud800\udc00"):
191 191 | text = text[2:]
192 192 | text = "\U00010000heythere"
193 |- if text.startswith("\U00010000"):
194 |- text = text[1:]
193 |+ text = text.removeprefix("\U00010000")
195 194 |
196 195 | # should not be linted
197 196 | text = "\ud800\udc00heythere"

View File

@ -96,6 +96,14 @@ impl Int {
} }
} }
/// Return the [`Int`] as an u64, if it can be represented as that data type.
pub fn as_usize(&self) -> Option<usize> {
match &self.0 {
Number::Small(small) => usize::try_from(*small).ok(),
Number::Big(_) => None,
}
}
/// Return the [`Int`] as an i8, if it can be represented as that data type. /// Return the [`Int`] as an i8, if it can be represented as that data type.
pub fn as_i8(&self) -> Option<i8> { pub fn as_i8(&self) -> Option<i8> {
match &self.0 { match &self.0 {