mirror of https://github.com/astral-sh/ruff
Update the `invalid-escape-sequence` rule (#5359)
Just a couple small tweaks based on reading the rule with fresh eyes and new best-practices.
This commit is contained in:
parent
b233763156
commit
1fe4073b56
|
|
@ -1,10 +1,9 @@
|
||||||
use anyhow::{bail, Result};
|
|
||||||
use log::error;
|
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
|
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_ast::source_code::Locator;
|
use ruff_python_ast::source_code::Locator;
|
||||||
|
use ruff_python_ast::str::{leading_quote, trailing_quote};
|
||||||
|
|
||||||
/// ## What it does
|
/// ## What it does
|
||||||
/// Checks for invalid escape sequences.
|
/// Checks for invalid escape sequences.
|
||||||
|
|
@ -21,6 +20,9 @@ use ruff_python_ast::source_code::Locator;
|
||||||
/// ```python
|
/// ```python
|
||||||
/// regex = r"\.png$"
|
/// regex = r"\.png$"
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// ## References
|
||||||
|
/// - [Python documentation: String and Bytes literals](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals)
|
||||||
#[violation]
|
#[violation]
|
||||||
pub struct InvalidEscapeSequence(char);
|
pub struct InvalidEscapeSequence(char);
|
||||||
|
|
||||||
|
|
@ -36,24 +38,6 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
|
||||||
const VALID_ESCAPE_SEQUENCES: &[char; 23] = &[
|
|
||||||
'\n', '\\', '\'', '"', 'a', 'b', 'f', 'n', 'r', 't', 'v', '0', '1', '2', '3', '4', '5', '6',
|
|
||||||
'7', 'x', // Escape sequences only recognized in string literals
|
|
||||||
'N', 'u', 'U',
|
|
||||||
];
|
|
||||||
|
|
||||||
/// Return the quotation markers used for a String token.
|
|
||||||
fn extract_quote(text: &str) -> Result<&str> {
|
|
||||||
for quote in ["'''", "\"\"\"", "'", "\""] {
|
|
||||||
if text.ends_with(quote) {
|
|
||||||
return Ok(quote);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bail!("Unable to find quotation mark for String token")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// W605
|
/// W605
|
||||||
pub(crate) fn invalid_escape_sequence(
|
pub(crate) fn invalid_escape_sequence(
|
||||||
locator: &Locator,
|
locator: &Locator,
|
||||||
|
|
@ -65,84 +49,112 @@ pub(crate) fn invalid_escape_sequence(
|
||||||
let text = locator.slice(range);
|
let text = locator.slice(range);
|
||||||
|
|
||||||
// Determine whether the string is single- or triple-quoted.
|
// Determine whether the string is single- or triple-quoted.
|
||||||
let Ok(quote) = extract_quote(text) else {
|
let Some(leading_quote) = leading_quote(text) else {
|
||||||
error!("Unable to find quotation mark for string token");
|
|
||||||
return diagnostics;
|
return diagnostics;
|
||||||
};
|
};
|
||||||
let quote_pos = text.find(quote).unwrap();
|
let Some(trailing_quote) = trailing_quote(text) else {
|
||||||
let prefix = &text[..quote_pos];
|
return diagnostics;
|
||||||
let body = &text[quote_pos + quote.len()..text.len() - quote.len()];
|
};
|
||||||
|
let body = &text[leading_quote.len()..text.len() - trailing_quote.len()];
|
||||||
|
|
||||||
if !prefix.contains(['r', 'R']) {
|
if leading_quote.contains(['r', 'R']) {
|
||||||
let start_offset =
|
return diagnostics;
|
||||||
range.start() + TextSize::try_from(quote_pos).unwrap() + quote.text_len();
|
}
|
||||||
|
|
||||||
let mut chars_iter = body.char_indices().peekable();
|
let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();
|
||||||
|
|
||||||
let mut contains_valid_escape_sequence = false;
|
let mut chars_iter = body.char_indices().peekable();
|
||||||
|
|
||||||
while let Some((i, c)) = chars_iter.next() {
|
let mut contains_valid_escape_sequence = false;
|
||||||
if c != '\\' {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the previous character was also a backslash, skip.
|
while let Some((i, c)) = chars_iter.next() {
|
||||||
if i > 0 && body.as_bytes()[i - 1] == b'\\' {
|
if c != '\\' {
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
// If we're at the end of the file, skip.
|
|
||||||
let Some((_, next_char)) = chars_iter.peek() else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
// If we're at the end of the line, skip
|
|
||||||
if matches!(next_char, '\n' | '\r') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the next character is a valid escape sequence, skip.
|
|
||||||
if VALID_ESCAPE_SEQUENCES.contains(next_char) {
|
|
||||||
contains_valid_escape_sequence = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let location = start_offset + TextSize::try_from(i).unwrap();
|
|
||||||
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
|
|
||||||
let diagnostic = Diagnostic::new(InvalidEscapeSequence(*next_char), range);
|
|
||||||
diagnostics.push(diagnostic);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if autofix {
|
// If the previous character was also a backslash, skip.
|
||||||
if contains_valid_escape_sequence {
|
if i > 0 && body.as_bytes()[i - 1] == b'\\' {
|
||||||
// Escape with backslash.
|
continue;
|
||||||
for diagnostic in &mut diagnostics {
|
}
|
||||||
diagnostic.set_fix(Fix::automatic(Edit::insertion(
|
|
||||||
r"\".to_string(),
|
|
||||||
diagnostic.range().start() + TextSize::from(1),
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Turn into raw string.
|
|
||||||
for diagnostic in &mut diagnostics {
|
|
||||||
// If necessary, add a space between any leading keyword (`return`, `yield`,
|
|
||||||
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
|
|
||||||
// `returnr"foo"` is not.
|
|
||||||
let requires_space = locator
|
|
||||||
.slice(TextRange::up_to(range.start()))
|
|
||||||
.chars()
|
|
||||||
.last()
|
|
||||||
.map_or(false, |char| char.is_ascii_alphabetic());
|
|
||||||
|
|
||||||
diagnostic.set_fix(Fix::automatic(Edit::insertion(
|
// If we're at the end of the file, skip.
|
||||||
if requires_space {
|
let Some((_, next_char)) = chars_iter.peek() else {
|
||||||
" r".to_string()
|
continue;
|
||||||
} else {
|
};
|
||||||
"r".to_string()
|
|
||||||
},
|
// If we're at the end of the line, skip
|
||||||
range.start() + TextSize::try_from(quote_pos).unwrap(),
|
if matches!(next_char, '\n' | '\r') {
|
||||||
)));
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the next character is a valid escape sequence, skip.
|
||||||
|
// See: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals.
|
||||||
|
if matches!(
|
||||||
|
next_char,
|
||||||
|
'\n'
|
||||||
|
| '\\'
|
||||||
|
| '\''
|
||||||
|
| '"'
|
||||||
|
| 'a'
|
||||||
|
| 'b'
|
||||||
|
| 'f'
|
||||||
|
| 'n'
|
||||||
|
| 'r'
|
||||||
|
| 't'
|
||||||
|
| 'v'
|
||||||
|
| '0'
|
||||||
|
| '1'
|
||||||
|
| '2'
|
||||||
|
| '3'
|
||||||
|
| '4'
|
||||||
|
| '5'
|
||||||
|
| '6'
|
||||||
|
| '7'
|
||||||
|
| 'x'
|
||||||
|
// Escape sequences only recognized in string literals
|
||||||
|
| 'N'
|
||||||
|
| 'u'
|
||||||
|
| 'U'
|
||||||
|
) {
|
||||||
|
contains_valid_escape_sequence = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let location = start_offset + TextSize::try_from(i).unwrap();
|
||||||
|
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
|
||||||
|
let diagnostic = Diagnostic::new(InvalidEscapeSequence(*next_char), range);
|
||||||
|
diagnostics.push(diagnostic);
|
||||||
|
}
|
||||||
|
|
||||||
|
if autofix {
|
||||||
|
if contains_valid_escape_sequence {
|
||||||
|
// Escape with backslash.
|
||||||
|
for diagnostic in &mut diagnostics {
|
||||||
|
diagnostic.set_fix(Fix::automatic(Edit::insertion(
|
||||||
|
r"\".to_string(),
|
||||||
|
diagnostic.range().start() + TextSize::from(1),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Turn into raw string.
|
||||||
|
for diagnostic in &mut diagnostics {
|
||||||
|
// If necessary, add a space between any leading keyword (`return`, `yield`,
|
||||||
|
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
|
||||||
|
// `returnr"foo"` is not.
|
||||||
|
let requires_space = locator
|
||||||
|
.slice(TextRange::up_to(range.start()))
|
||||||
|
.chars()
|
||||||
|
.last()
|
||||||
|
.map_or(false, |char| char.is_ascii_alphabetic());
|
||||||
|
|
||||||
|
diagnostic.set_fix(Fix::automatic(Edit::insertion(
|
||||||
|
if requires_space {
|
||||||
|
" r".to_string()
|
||||||
|
} else {
|
||||||
|
"r".to_string()
|
||||||
|
},
|
||||||
|
range.start(),
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue