mirror of https://github.com/astral-sh/ruff
Update the `invalid-escape-sequence` rule (#5359)
Just a couple small tweaks based on reading the rule with fresh eyes and new best-practices.
This commit is contained in:
parent
b233763156
commit
1fe4073b56
|
|
@ -1,10 +1,9 @@
|
|||
use anyhow::{bail, Result};
|
||||
use log::error;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::source_code::Locator;
|
||||
use ruff_python_ast::str::{leading_quote, trailing_quote};
|
||||
|
||||
/// ## What it does
|
||||
/// Checks for invalid escape sequences.
|
||||
|
|
@ -21,6 +20,9 @@ use ruff_python_ast::source_code::Locator;
|
|||
/// ```python
|
||||
/// regex = r"\.png$"
|
||||
/// ```
|
||||
///
|
||||
/// ## References
|
||||
/// - [Python documentation: String and Bytes literals](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals)
|
||||
#[violation]
|
||||
pub struct InvalidEscapeSequence(char);
|
||||
|
||||
|
|
@ -36,24 +38,6 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
|
|||
}
|
||||
}
|
||||
|
||||
// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
const VALID_ESCAPE_SEQUENCES: &[char; 23] = &[
|
||||
'\n', '\\', '\'', '"', 'a', 'b', 'f', 'n', 'r', 't', 'v', '0', '1', '2', '3', '4', '5', '6',
|
||||
'7', 'x', // Escape sequences only recognized in string literals
|
||||
'N', 'u', 'U',
|
||||
];
|
||||
|
||||
/// Return the quotation markers used for a String token.
|
||||
fn extract_quote(text: &str) -> Result<&str> {
|
||||
for quote in ["'''", "\"\"\"", "'", "\""] {
|
||||
if text.ends_with(quote) {
|
||||
return Ok(quote);
|
||||
}
|
||||
}
|
||||
|
||||
bail!("Unable to find quotation mark for String token")
|
||||
}
|
||||
|
||||
/// W605
|
||||
pub(crate) fn invalid_escape_sequence(
|
||||
locator: &Locator,
|
||||
|
|
@ -65,17 +49,19 @@ pub(crate) fn invalid_escape_sequence(
|
|||
let text = locator.slice(range);
|
||||
|
||||
// Determine whether the string is single- or triple-quoted.
|
||||
let Ok(quote) = extract_quote(text) else {
|
||||
error!("Unable to find quotation mark for string token");
|
||||
let Some(leading_quote) = leading_quote(text) else {
|
||||
return diagnostics;
|
||||
};
|
||||
let quote_pos = text.find(quote).unwrap();
|
||||
let prefix = &text[..quote_pos];
|
||||
let body = &text[quote_pos + quote.len()..text.len() - quote.len()];
|
||||
let Some(trailing_quote) = trailing_quote(text) else {
|
||||
return diagnostics;
|
||||
};
|
||||
let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];
|
||||
|
||||
if !prefix.contains(['r', 'R']) {
|
||||
let start_offset =
|
||||
range.start() + TextSize::try_from(quote_pos).unwrap() + quote.text_len();
|
||||
if leading_quote.contains(['r', 'R']) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();
|
||||
|
||||
let mut chars_iter = body.char_indices().peekable();
|
||||
|
||||
|
|
@ -102,7 +88,34 @@ pub(crate) fn invalid_escape_sequence(
|
|||
}
|
||||
|
||||
// If the next character is a valid escape sequence, skip.
|
||||
if VALID_ESCAPE_SEQUENCES.contains(next_char) {
|
||||
// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals.
|
||||
if matches!(
|
||||
next_char,
|
||||
'\n'
|
||||
| '\\'
|
||||
| '\''
|
||||
| '"'
|
||||
| 'a'
|
||||
| 'b'
|
||||
| 'f'
|
||||
| 'n'
|
||||
| 'r'
|
||||
| 't'
|
||||
| 'v'
|
||||
| '0'
|
||||
| '1'
|
||||
| '2'
|
||||
| '3'
|
||||
| '4'
|
||||
| '5'
|
||||
| '6'
|
||||
| '7'
|
||||
| 'x'
|
||||
// Escape sequences only recognized in string literals
|
||||
| 'N'
|
||||
| 'u'
|
||||
| 'U'
|
||||
) {
|
||||
contains_valid_escape_sequence = true;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -140,12 +153,11 @@ pub(crate) fn invalid_escape_sequence(
|
|||
} else {
|
||||
"r".to_string()
|
||||
},
|
||||
range.start() + TextSize::try_from(quote_pos).unwrap(),
|
||||
range.start(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
diagnostics
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue