[refurb] Count codepoints not bytes for `slice-to-remove-prefix-or-suffix (FURB188)` (#13631)

This commit is contained in:
Dylan 2024-10-07 09:13:28 -05:00 committed by GitHub
parent 27ac34d683
commit 14ee5dbfde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 114 additions and 7 deletions

View File

@ -169,4 +169,32 @@ def ignore_step():
text = "!x!y!z"
if text.startswith("!"):
text = text[1::2]
print(text)
print(text)
def handle_unicode():
# should be skipped!
text = "řetězec"
if text.startswith("ř"):
text = text[2:]
# should be linted
# with fix `text = text.removeprefix("ř")`
text = "řetězec"
if text.startswith("ř"):
text = text[1:]
def handle_surrogates():
# should be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[2:]
text = "\U00010000heythere"
if text.startswith("\U00010000"):
text = text[1:]
# should not be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[1:]

View File

@ -4,7 +4,7 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_semantic::SemanticModel;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen};
use ruff_text_size::Ranged;
/// ## What it does
/// Checks for the removal of a prefix or suffix from a string by assigning
@ -334,8 +334,9 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
}),
) => num
.as_int()
.and_then(ast::Int::as_u32) // Only support prefix removal for size at most `u32::MAX`
.is_some_and(|x| x == string_val.to_str().text_len().to_u32()),
// Only support prefix removal for size at most `usize::MAX`
.and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.chars().count()),
(
AffixKind::StartsWith,
ast::Expr::Call(ast::ExprCall {
@ -369,8 +370,8 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
// Only support prefix removal for size at most `u32::MAX`
value
.as_int()
.and_then(ast::Int::as_u32)
.is_some_and(|x| x == string_val.to_str().text_len().to_u32())
.and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.chars().count())
},
),
(

View File

@ -250,4 +250,74 @@ FURB188.py:162:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing
162 |+ text = text.removeprefix("!")
164 163 | print(text)
165 164 |
166 165 |
166 165 |
FURB188.py:183:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
181 | # with fix `text = text.removeprefix("ř")`
182 | text = "řetězec"
183 | if text.startswith("ř"):
| _____^
184 | | text = text[1:]
| |_______________________^ FURB188
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
180 180 | # should be linted
181 181 | # with fix `text = text.removeprefix("ř")`
182 182 | text = "řetězec"
183 |- if text.startswith("ř"):
184 |- text = text[1:]
183 |+ text = text.removeprefix("ř")
185 184 |
186 185 |
187 186 | def handle_surrogates():
FURB188.py:190:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
188 | # should be linted
189 | text = "\ud800\udc00heythere"
190 | if text.startswith("\ud800\udc00"):
| _____^
191 | | text = text[2:]
| |_______________________^ FURB188
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
187 187 | def handle_surrogates():
188 188 | # should be linted
189 189 | text = "\ud800\udc00heythere"
190 |- if text.startswith("\ud800\udc00"):
191 |- text = text[2:]
190 |+ text = text.removeprefix("\ud800\udc00")
192 191 | text = "\U00010000heythere"
193 192 | if text.startswith("\U00010000"):
194 193 | text = text[1:]
FURB188.py:193:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
191 | text = text[2:]
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
| _____^
194 | | text = text[1:]
| |_______________________^ FURB188
195 |
196 | # should not be linted
|
= help: Use removeprefix instead of assignment conditional upon startswith.
Safe fix
190 190 | if text.startswith("\ud800\udc00"):
191 191 | text = text[2:]
192 192 | text = "\U00010000heythere"
193 |- if text.startswith("\U00010000"):
194 |- text = text[1:]
193 |+ text = text.removeprefix("\U00010000")
195 194 |
196 195 | # should not be linted
197 196 | text = "\ud800\udc00heythere"

View File

@ -96,6 +96,14 @@ impl Int {
}
}
/// Return the [`Int`] as an u64, if it can be represented as that data type.
pub fn as_usize(&self) -> Option<usize> {
match &self.0 {
Number::Small(small) => usize::try_from(*small).ok(),
Number::Big(_) => None,
}
}
/// Return the [`Int`] as an i8, if it can be represented as that data type.
pub fn as_i8(&self) -> Option<i8> {
match &self.0 {