Update the `invalid-escape-sequence` rule (#5359)

Just a couple small tweaks based on reading the rule with fresh eyes and
new best-practices.
This commit is contained in:
Charlie Marsh 2023-06-25 18:20:31 -04:00 committed by GitHub
parent b233763156
commit 1fe4073b56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 100 additions and 88 deletions

View File

@ -1,10 +1,9 @@
use anyhow::{bail, Result};
use log::error;
use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::str::{leading_quote, trailing_quote};
/// ## What it does /// ## What it does
/// Checks for invalid escape sequences. /// Checks for invalid escape sequences.
@ -21,6 +20,9 @@ use ruff_python_ast::source_code::Locator;
/// ```python /// ```python
/// regex = r"\.png$" /// regex = r"\.png$"
/// ``` /// ```
///
/// ## References
/// - [Python documentation: String and Bytes literals](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals)
#[violation] #[violation]
pub struct InvalidEscapeSequence(char); pub struct InvalidEscapeSequence(char);
@ -36,24 +38,6 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
} }
} }
// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
const VALID_ESCAPE_SEQUENCES: &[char; 23] = &[
'\n', '\\', '\'', '"', 'a', 'b', 'f', 'n', 'r', 't', 'v', '0', '1', '2', '3', '4', '5', '6',
'7', 'x', // Escape sequences only recognized in string literals
'N', 'u', 'U',
];
/// Return the quotation markers used for a String token.
fn extract_quote(text: &str) -> Result<&str> {
for quote in ["'''", "\"\"\"", "'", "\""] {
if text.ends_with(quote) {
return Ok(quote);
}
}
bail!("Unable to find quotation mark for String token")
}
/// W605 /// W605
pub(crate) fn invalid_escape_sequence( pub(crate) fn invalid_escape_sequence(
locator: &Locator, locator: &Locator,
@ -65,17 +49,19 @@ pub(crate) fn invalid_escape_sequence(
let text = locator.slice(range); let text = locator.slice(range);
// Determine whether the string is single- or triple-quoted. // Determine whether the string is single- or triple-quoted.
let Ok(quote) = extract_quote(text) else { let Some(leading_quote) = leading_quote(text) else {
error!("Unable to find quotation mark for string token");
return diagnostics; return diagnostics;
}; };
let quote_pos = text.find(quote).unwrap(); let Some(trailing_quote) = trailing_quote(text) else {
let prefix = &text[..quote_pos]; return diagnostics;
let body = &text[quote_pos + quote.len()..text.len() - quote.len()]; };
let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];
if !prefix.contains(['r', 'R']) { if leading_quote.contains(['r', 'R']) {
let start_offset = return diagnostics;
range.start() + TextSize::try_from(quote_pos).unwrap() + quote.text_len(); }
let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();
let mut chars_iter = body.char_indices().peekable(); let mut chars_iter = body.char_indices().peekable();
@ -102,7 +88,34 @@ pub(crate) fn invalid_escape_sequence(
} }
// If the next character is a valid escape sequence, skip. // If the next character is a valid escape sequence, skip.
if VALID_ESCAPE_SEQUENCES.contains(next_char) { // See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals.
if matches!(
next_char,
'\n'
| '\\'
| '\''
| '"'
| 'a'
| 'b'
| 'f'
| 'n'
| 'r'
| 't'
| 'v'
| '0'
| '1'
| '2'
| '3'
| '4'
| '5'
| '6'
| '7'
| 'x'
// Escape sequences only recognized in string literals
| 'N'
| 'u'
| 'U'
) {
contains_valid_escape_sequence = true; contains_valid_escape_sequence = true;
continue; continue;
} }
@ -140,12 +153,11 @@ pub(crate) fn invalid_escape_sequence(
} else { } else {
"r".to_string() "r".to_string()
}, },
range.start() + TextSize::try_from(quote_pos).unwrap(), range.start(),
))); )));
} }
} }
} }
}
diagnostics diagnostics
} }