ruff/crates/ruff_python_formatter/src/string/normalize.rs

1098 lines
39 KiB
Rust

use std::borrow::Cow;
use std::cmp::Ordering;
use std::iter::FusedIterator;
use ruff_formatter::FormatContext;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::{
str::{Quote, TripleQuotes},
AnyStringFlags, BytesLiteral, FString, FStringElement, FStringElements, FStringFlags,
StringFlags, StringLikePart, StringLiteral,
};
use ruff_text_size::{Ranged, TextRange, TextSlice};
use crate::context::FStringState;
use crate::prelude::*;
use crate::string::StringQuotes;
use crate::QuoteStyle;
pub(crate) struct StringNormalizer<'a, 'src> {
preferred_quote_style: Option<QuoteStyle>,
context: &'a PyFormatContext<'src>,
}
impl<'a, 'src> StringNormalizer<'a, 'src> {
pub(crate) fn from_context(context: &'a PyFormatContext<'src>) -> Self {
Self {
preferred_quote_style: None,
context,
}
}
pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
self.preferred_quote_style = Some(quote_style);
self
}
/// Determines the preferred quote style for `string`.
/// The formatter should use the preferred quote style unless
/// it can't because the string contains the preferred quotes OR
/// it leads to more escaping.
///
/// Note: If you add more cases here where we return `QuoteStyle::Preserve`,
/// make sure to also add them to [`FormatImplicitConcatenatedStringFlat::new`].
pub(super) fn preferred_quote_style(&self, string: StringLikePart) -> QuoteStyle {
let preferred_quote_style = self
.preferred_quote_style
.unwrap_or(self.context.options().quote_style());
let supports_pep_701 = self.context.options().target_version().supports_pep_701();
// For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
if let FStringState::InsideExpressionElement(parent_context) = self.context.f_string_state()
{
let parent_flags = parent_context.f_string().flags();
if !parent_flags.is_triple_quoted() || string.flags().is_triple_quoted() {
// This logic is even necessary when using preserve and the target python version doesn't support PEP701 because
// we might end up joining two f-strings that have different quote styles, in which case we need to alternate the quotes
// for inner strings to avoid a syntax error: `string = "this is my string with " f'"{params.get("mine")}"'`
if !preferred_quote_style.is_preserve() || !supports_pep_701 {
return QuoteStyle::from(parent_flags.quote_style().opposite());
}
}
}
// Leave the quotes unchanged for all other strings.
if preferred_quote_style.is_preserve() {
return QuoteStyle::Preserve;
}
// There are cases where it is necessary to preserve the quotes to prevent an invalid f-string.
if let StringLikePart::FString(fstring) = string {
// There are two cases where it's necessary to preserve the quotes if the
// target version is pre 3.12 and the part is an f-string.
if !supports_pep_701 {
// An f-string expression contains a debug text with a quote character
// because the formatter will emit the debug expression **exactly** the
// same as in the source text.
if is_fstring_with_quoted_debug_expression(fstring, self.context) {
return QuoteStyle::Preserve;
}
// An f-string expression that contains a triple quoted string literal
// expression that contains a quote.
if is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring,
self.context,
) {
return QuoteStyle::Preserve;
}
}
// An f-string expression element contains a debug text and the corresponding
// format specifier has a literal element with a quote character.
if is_fstring_with_quoted_format_spec_and_debug(fstring, self.context) {
return QuoteStyle::Preserve;
}
}
// Per PEP 8, always prefer double quotes for triple-quoted strings.
if string.flags().is_triple_quoted() {
// ... unless we're formatting a code snippet inside a docstring,
// then we specifically want to invert our quote style to avoid
// writing out invalid Python.
//
// It's worth pointing out that we can actually wind up being
// somewhat out of sync with PEP8 in this case. Consider this
// example:
//
// def foo():
// '''
// Something.
//
// >>> """tricksy"""
// '''
// pass
//
// Ideally, this would be reformatted as:
//
// def foo():
// """
// Something.
//
// >>> '''tricksy'''
// """
// pass
//
// But the logic here results in the original quoting being
// preserved. This is because the quoting style of the outer
// docstring is determined, in part, by looking at its contents. In
// this case, it notices that it contains a `"""` and thus infers
// that using `'''` would overall read better because it avoids
// the need to escape the interior `"""`. Except... in this case,
// the `"""` is actually part of a code snippet that could get
// reformatted to using a different quoting style itself.
//
// Fixing this would, I believe, require some fairly seismic
// changes to how formatting strings works. Namely, we would need
// to look for code snippets before normalizing the docstring, and
// then figure out the quoting style more holistically by looking
// at the various kinds of quotes used in the code snippets and
// what reformatting them might look like.
//
// Overall this is a bit of a corner case and just inverting the
// style from what the parent ultimately decided upon works, even
// if it doesn't have perfect alignment with PEP8.
if let Some(quote) = self.context.docstring() {
QuoteStyle::from(quote.opposite())
} else {
QuoteStyle::Double
}
} else {
preferred_quote_style
}
}
/// Computes the strings preferred quotes.
pub(crate) fn choose_quotes(&self, string: StringLikePart) -> QuoteSelection {
let raw_content = &self.context.source()[string.content_range()];
let first_quote_or_normalized_char_offset = raw_content
.bytes()
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r'));
let string_flags = string.flags();
let preferred_style = self.preferred_quote_style(string);
let new_kind = match (
Quote::try_from(preferred_style),
first_quote_or_normalized_char_offset,
) {
// The string contains no quotes so it's safe to use the preferred quote style
(Ok(preferred_quote), None) => string_flags.with_quote_style(preferred_quote),
// The preferred quote style is single or double quotes, and the string contains a quote or
// another character that may require escaping
(Ok(preferred_quote), Some(first_quote_or_normalized_char_offset)) => {
let metadata = if string.is_fstring() {
QuoteMetadata::from_part(string, self.context, preferred_quote)
} else {
QuoteMetadata::from_str(
&raw_content[first_quote_or_normalized_char_offset..],
string.flags(),
preferred_quote,
)
};
let quote = metadata.choose(preferred_quote);
string_flags.with_quote_style(quote)
}
// The preferred quote style is to preserve the quotes, so let's do that.
(Err(()), _) => string_flags,
};
QuoteSelection {
flags: new_kind,
first_quote_or_normalized_char_offset,
}
}
/// Computes the strings preferred quotes and normalizes its content.
pub(crate) fn normalize(&self, string: StringLikePart) -> NormalizedString<'src> {
let raw_content = &self.context.source()[string.content_range()];
let quote_selection = self.choose_quotes(string);
let normalized = if let Some(first_quote_or_escape_offset) =
quote_selection.first_quote_or_normalized_char_offset
{
normalize_string(
raw_content,
first_quote_or_escape_offset,
quote_selection.flags,
false,
)
} else {
Cow::Borrowed(raw_content)
};
NormalizedString {
flags: quote_selection.flags,
content_range: string.content_range(),
text: normalized,
}
}
}
#[derive(Debug)]
pub(crate) struct QuoteSelection {
flags: AnyStringFlags,
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
first_quote_or_normalized_char_offset: Option<usize>,
}
impl QuoteSelection {
pub(crate) fn flags(&self) -> AnyStringFlags {
self.flags
}
}
#[derive(Clone, Debug)]
pub(crate) struct QuoteMetadata {
kind: QuoteMetadataKind,
/// The quote style in the source.
source_style: Quote,
}
/// Tracks information about the used quotes in a string which is used
/// to choose the quotes for a part.
impl QuoteMetadata {
pub(crate) fn from_part(
part: StringLikePart,
context: &PyFormatContext,
preferred_quote: Quote,
) -> Self {
match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
let text = &context.source()[part.content_range()];
Self::from_str(text, part.flags(), preferred_quote)
}
StringLikePart::FString(fstring) => {
let metadata = QuoteMetadata::from_str("", part.flags(), preferred_quote);
metadata.merge_fstring_elements(
&fstring.elements,
fstring.flags,
context,
preferred_quote,
)
}
}
}
pub(crate) fn from_str(text: &str, flags: AnyStringFlags, preferred_quote: Quote) -> Self {
let kind = if flags.is_raw_string() {
QuoteMetadataKind::raw(text, preferred_quote, flags.triple_quotes())
} else if flags.is_triple_quoted() {
QuoteMetadataKind::triple_quoted(text, preferred_quote)
} else {
QuoteMetadataKind::regular(text)
};
Self {
kind,
source_style: flags.quote_style(),
}
}
pub(super) fn choose(&self, preferred_quote: Quote) -> Quote {
match self.kind {
QuoteMetadataKind::Raw { contains_preferred } => {
if contains_preferred {
self.source_style
} else {
preferred_quote
}
}
QuoteMetadataKind::Triple { contains_preferred } => {
if contains_preferred {
self.source_style
} else {
preferred_quote
}
}
QuoteMetadataKind::Regular {
single_quotes,
double_quotes,
} => match single_quotes.cmp(&double_quotes) {
Ordering::Less => Quote::Single,
Ordering::Equal => preferred_quote,
Ordering::Greater => Quote::Double,
},
}
}
/// Merges the quotes metadata of different literals.
///
/// ## Raw and triple quoted strings
/// Merging raw and triple quoted strings is only correct if all literals are from the same part.
/// E.g. it's okay to merge triple and raw strings from a single `FString` part's literals
/// but it isn't safe to merge raw and triple quoted strings from different parts of an implicit
/// concatenated string. Where safe means, it may lead to incorrect results.
pub(super) fn merge(self, other: &QuoteMetadata) -> Option<QuoteMetadata> {
let kind = match (self.kind, other.kind) {
(
QuoteMetadataKind::Regular {
single_quotes: self_single,
double_quotes: self_double,
},
QuoteMetadataKind::Regular {
single_quotes: other_single,
double_quotes: other_double,
},
) => QuoteMetadataKind::Regular {
single_quotes: self_single + other_single,
double_quotes: self_double + other_double,
},
// Can't merge quotes from raw strings (even when both strings are raw)
(
QuoteMetadataKind::Raw {
contains_preferred: self_contains_preferred,
},
QuoteMetadataKind::Raw {
contains_preferred: other_contains_preferred,
},
) => QuoteMetadataKind::Raw {
contains_preferred: self_contains_preferred || other_contains_preferred,
},
(
QuoteMetadataKind::Triple {
contains_preferred: self_contains_preferred,
},
QuoteMetadataKind::Triple {
contains_preferred: other_contains_preferred,
},
) => QuoteMetadataKind::Triple {
contains_preferred: self_contains_preferred || other_contains_preferred,
},
(_, _) => return None,
};
Some(Self {
kind,
source_style: self.source_style,
})
}
/// For f-strings, only consider the quotes inside string-literals but ignore
/// quotes inside expressions (except inside the format spec). This allows both the outer and the nested literals
/// to make the optimal local-choice to reduce the total number of quotes necessary.
/// This doesn't require any pre 312 special handling because an expression
/// can never contain the outer quote character, not even escaped:
/// ```python
/// f"{'escaping a quote like this \" is a syntax error pre 312'}"
/// ```
fn merge_fstring_elements(
self,
elements: &FStringElements,
flags: FStringFlags,
context: &PyFormatContext,
preferred_quote: Quote,
) -> Self {
let mut merged = self;
for element in elements {
match element {
FStringElement::Literal(literal) => {
merged = merged
.merge(&QuoteMetadata::from_str(
context.source().slice(literal),
flags.into(),
preferred_quote,
))
.expect("Merge to succeed because all parts have the same flags");
}
FStringElement::Expression(expression) => {
if let Some(spec) = expression.format_spec.as_deref() {
if expression.debug_text.is_none() {
merged = merged.merge_fstring_elements(
&spec.elements,
flags,
context,
preferred_quote,
);
}
}
}
}
}
merged
}
}
#[derive(Copy, Clone, Debug)]
enum QuoteMetadataKind {
/// A raw string.
///
/// For raw strings it's only possible to change the quotes if the preferred quote style
/// isn't used inside the string.
Raw { contains_preferred: bool },
/// Regular (non raw) triple quoted string.
///
/// For triple quoted strings it's only possible to change the quotes if no
/// triple of the preferred quotes is used inside the string.
Triple { contains_preferred: bool },
/// A single quoted string that uses either double or single quotes.
///
/// For regular strings it's desired to pick the quote style that requires the least escaping.
/// E.g. pick single quotes for `'A "dog"'` because using single quotes would require escaping
/// the two `"`.
Regular {
single_quotes: u32,
double_quotes: u32,
},
}
impl QuoteMetadataKind {
/// For triple quoted strings, the preferred quote style can't be used if the string contains
/// a tripled of the quote character (e.g., if double quotes are preferred, double quotes will be
/// used unless the string contains `"""`).
fn triple_quoted(content: &str, preferred_quote: Quote) -> Self {
// True if the string contains a triple quote sequence of the configured quote style.
let mut uses_triple_quotes = false;
let mut chars = content.chars().peekable();
while let Some(c) = chars.next() {
let preferred_quote_char = preferred_quote.as_char();
match c {
'\\' => {
if matches!(chars.peek(), Some('"' | '\\')) {
chars.next();
}
}
// `"` or `'`
c if c == preferred_quote_char => {
match chars.peek().copied() {
Some(c) if c == preferred_quote_char => {
// `""` or `''`
chars.next();
match chars.peek().copied() {
Some(c) if c == preferred_quote_char => {
// `"""` or `'''`
chars.next();
uses_triple_quotes = true;
break;
}
Some(_) => {}
None => {
// Handle `''' ""'''`. At this point we have consumed both
// double quotes, so on the next iteration the iterator is empty
// and we'd miss the string ending with a preferred quote
uses_triple_quotes = true;
break;
}
}
}
Some(_) => {
// A single quote char, this is ok
}
None => {
// Trailing quote at the end of the comment
uses_triple_quotes = true;
break;
}
}
}
_ => continue,
}
}
Self::Triple {
contains_preferred: uses_triple_quotes,
}
}
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
/// would require fewer escapes.
fn regular(text: &str) -> Self {
let mut single_quotes = 0u32;
let mut double_quotes = 0u32;
for c in text.chars() {
match c {
'\'' => {
single_quotes += 1;
}
'"' => {
double_quotes += 1;
}
_ => continue,
}
}
Self::Regular {
single_quotes,
double_quotes,
}
}
/// Computes if a raw string uses the preferred quote. If it does, then it's not possible
/// to change the quote style because it would require escaping which isn't possible in raw strings.
fn raw(text: &str, preferred: Quote, triple_quotes: TripleQuotes) -> Self {
let mut chars = text.chars().peekable();
let preferred_quote_char = preferred.as_char();
let contains_unescaped_configured_quotes = loop {
match chars.next() {
Some('\\') => {
// Ignore escaped characters
chars.next();
}
// `"` or `'`
Some(c) if c == preferred_quote_char => {
if triple_quotes.is_no() {
break true;
}
match chars.peek() {
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
// about where the closing triple quotes start
None => break true,
Some(next) if *next == preferred_quote_char => {
// `""` or `''`
chars.next();
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
// `"""` or `'''` respectively inside the string
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char)
{
break true;
}
}
_ => {}
}
}
Some(_) => continue,
None => break false,
}
};
Self::Raw {
contains_preferred: contains_unescaped_configured_quotes,
}
}
}
#[derive(Debug)]
pub(crate) struct NormalizedString<'a> {
/// Holds data about the quotes and prefix of the string
flags: AnyStringFlags,
/// The range of the string's content in the source (minus prefix and quotes).
content_range: TextRange,
/// The normalized text
text: Cow<'a, str>,
}
impl<'a> NormalizedString<'a> {
pub(crate) fn text(&self) -> &Cow<'a, str> {
&self.text
}
pub(crate) fn flags(&self) -> AnyStringFlags {
self.flags
}
}
impl Ranged for NormalizedString<'_> {
fn range(&self) -> TextRange {
self.content_range
}
}
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let quotes = StringQuotes::from(self.flags);
ruff_formatter::write!(f, [self.flags.prefix(), quotes])?;
match &self.text {
Cow::Borrowed(_) => source_text_slice(self.range()).fmt(f)?,
Cow::Owned(normalized) => text(normalized).fmt(f)?,
}
quotes.fmt(f)
}
}
pub(crate) fn normalize_string(
input: &str,
start_offset: usize,
new_flags: AnyStringFlags,
escape_braces: bool,
) -> Cow<str> {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
// Tracks the last index of `input` that has been written to `output`.
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;
let quote = new_flags.quote_style();
let preferred_quote = quote.as_char();
let opposite_quote = quote.opposite().as_char();
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
let is_raw = new_flags.is_raw_string();
while let Some((index, c)) = chars.next() {
if matches!(c, '{' | '}') {
if escape_braces {
// Escape `{` and `}` when converting a regular string literal to an f-string literal.
output.push_str(&input[last_index..=index]);
output.push(c);
last_index = index + c.len_utf8();
continue;
}
}
if c == '\r' {
output.push_str(&input[last_index..index]);
// Skip over the '\r' character, keep the `\n`
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
chars.next();
}
// Replace the `\r` with a `\n`
else {
output.push('\n');
}
last_index = index + '\r'.len_utf8();
} else if !is_raw {
if c == '\\' {
if let Some((_, next)) = chars.clone().next() {
if next == '\\' {
// Skip over escaped backslashes
chars.next();
} else {
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
if let Some(normalised) =
UnicodeEscape::new(next, !new_flags.is_byte_string()).and_then(
|escape| escape.normalize(&input[index + escape_start_len..]),
)
{
let escape_start_offset = index + escape_start_len;
if let Cow::Owned(normalised) = &normalised {
output.push_str(&input[last_index..escape_start_offset]);
output.push_str(normalised);
last_index = escape_start_offset + normalised.len();
};
// Move the `chars` iterator passed the escape sequence.
// Simply reassigning `chars` doesn't work because the indices` would
// then be off.
for _ in 0..next.len_utf8() + normalised.len() {
chars.next();
}
}
}
if !new_flags.is_triple_quoted() {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
last_index = index + '\\'.len_utf8();
} else if next == preferred_quote {
// Quote is already escaped, skip over it.
chars.next();
}
}
}
} else if !new_flags.is_triple_quoted() && c == preferred_quote {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
output.push(c);
last_index = index + preferred_quote.len_utf8();
}
}
}
if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
}
}
#[derive(Clone, Debug)]
struct CharIndicesWithOffset<'str> {
chars: std::str::Chars<'str>,
next_offset: usize,
}
impl<'str> CharIndicesWithOffset<'str> {
fn new(input: &'str str, start_offset: usize) -> Self {
Self {
chars: input[start_offset..].chars(),
next_offset: start_offset,
}
}
}
impl Iterator for CharIndicesWithOffset<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
self.chars.next().map(|c| {
let index = self.next_offset;
self.next_offset += c.len_utf8();
(index, c)
})
}
}
impl FusedIterator for CharIndicesWithOffset<'_> {}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum UnicodeEscape {
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
Hex(usize),
/// An escaped unicode name (`\N{name}`)
CharacterName,
}
impl UnicodeEscape {
fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
Some(match first {
'x' => UnicodeEscape::Hex(2),
'u' if allow_unicode => UnicodeEscape::Hex(4),
'U' if allow_unicode => UnicodeEscape::Hex(8),
'N' if allow_unicode => UnicodeEscape::CharacterName,
_ => return None,
})
}
/// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
///
/// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
/// * `\N`: To use uppercase letters
fn normalize(self, input: &str) -> Option<Cow<str>> {
let mut normalised = String::new();
let len = match self {
UnicodeEscape::Hex(len) => {
// It's not a valid escape sequence if the input string has fewer characters
// left than required by the escape sequence.
if input.len() < len {
return None;
}
for (index, c) in input.char_indices().take(len) {
match c {
'0'..='9' | 'a'..='f' => {
if !normalised.is_empty() {
normalised.push(c);
}
}
'A'..='F' => {
if normalised.is_empty() {
normalised.reserve(len);
normalised.push_str(&input[..index]);
normalised.push(c.to_ascii_lowercase());
} else {
normalised.push(c.to_ascii_lowercase());
}
}
_ => {
// not a valid escape sequence
return None;
}
}
}
len
}
UnicodeEscape::CharacterName => {
let mut char_indices = input.char_indices();
if !matches!(char_indices.next(), Some((_, '{'))) {
return None;
}
loop {
if let Some((index, c)) = char_indices.next() {
match c {
'}' => {
if !normalised.is_empty() {
normalised.push('}');
}
// Name must be at least two characters long.
if index < 3 {
return None;
}
break index + '}'.len_utf8();
}
'0'..='9' | 'A'..='Z' | ' ' | '-' => {
if !normalised.is_empty() {
normalised.push(c);
}
}
'a'..='z' => {
if normalised.is_empty() {
normalised.reserve(c.len_utf8() + '}'.len_utf8());
normalised.push_str(&input[..index]);
normalised.push(c.to_ascii_uppercase());
} else {
normalised.push(c.to_ascii_uppercase());
}
}
_ => {
// Seems like an invalid escape sequence, don't normalise it.
return None;
}
}
} else {
// Unterminated escape sequence, don't normalise it.
return None;
}
}
}
};
Some(if normalised.is_empty() {
Cow::Borrowed(&input[..len])
} else {
Cow::Owned(normalised)
})
}
}
/// Returns `true` if `string` is an f-string part that contains a debug expression that uses quotes
/// and the format target is pre Python 312
/// We can't join f-strings where:
///
/// ```python
/// f"{10 + len('bar')=}"
/// f'{10 + len("bar")=}'
/// f""""{10 + len('''bar''')=}"""
/// ```
pub(super) fn is_fstring_with_quoted_debug_expression(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
fstring.elements.expressions().any(|expression| {
if expression.debug_text.is_some() {
let content = context.source().slice(expression);
contains_opposite_quote(content, fstring.flags.into())
} else {
false
}
})
}
/// Returns `true` if `string` has any f-string expression element (direct or nested) with a debug expression and a format spec
/// that contains the opposite quote. It's important to preserve the quote style for those f-strings
/// because changing the quote style would result in invalid syntax.
///
/// ```python
/// f'{1=: "abcd \'\'}'
/// f'{x=:a{y:"abcd"}}'
/// f'{x=:a{y:{z:"abcd"}}}'
/// ```
pub(super) fn is_fstring_with_quoted_format_spec_and_debug(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
fn has_format_spec_with_opposite_quote(
elements: &FStringElements,
flags: FStringFlags,
context: &PyFormatContext,
in_debug: bool,
) -> bool {
elements.iter().any(|element| match element {
FStringElement::Literal(literal) => {
let content = context.source().slice(literal);
in_debug && contains_opposite_quote(content, flags.into())
}
FStringElement::Expression(expression) => {
expression.format_spec.as_deref().is_some_and(|spec| {
has_format_spec_with_opposite_quote(
&spec.elements,
flags,
context,
in_debug || expression.debug_text.is_some(),
)
})
}
})
}
fstring.elements.expressions().any(|expression| {
if let Some(spec) = expression.format_spec.as_deref() {
return has_format_spec_with_opposite_quote(
&spec.elements,
fstring.flags,
context,
expression.debug_text.is_some(),
);
}
false
})
}
/// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that
/// contains a quote character opposite to its own quote character.
///
/// ```python
/// f'{"""other " """}'
/// ```
///
/// We can't flip the quote of the outer f-string because it would result in invalid syntax:
/// ```python
/// f"{'''other " '''}'
/// ```
pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
struct Visitor<'a> {
context: &'a PyFormatContext<'a>,
found: bool,
}
impl Visitor<'_> {
fn visit_string_like_part(&mut self, part: StringLikePart) {
if !part.flags().is_triple_quoted() || self.found {
return;
}
let contains_quotes = match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
self.contains_quote(part.content_range(), part.flags())
}
StringLikePart::FString(fstring) => {
let mut contains_quotes = false;
for literal in fstring.elements.literals() {
if self.contains_quote(literal.range(), fstring.flags.into()) {
contains_quotes = true;
break;
}
}
contains_quotes
}
};
if contains_quotes {
self.found = true;
}
}
fn contains_quote(&self, range: TextRange, flags: AnyStringFlags) -> bool {
self.context.source()[range].contains(flags.quote_style().as_char())
}
}
impl SourceOrderVisitor<'_> for Visitor<'_> {
fn visit_f_string(&mut self, f_string: &FString) {
self.visit_string_like_part(StringLikePart::FString(f_string));
}
fn visit_string_literal(&mut self, string_literal: &StringLiteral) {
self.visit_string_like_part(StringLikePart::String(string_literal));
}
fn visit_bytes_literal(&mut self, bytes_literal: &BytesLiteral) {
self.visit_string_like_part(StringLikePart::Bytes(bytes_literal));
}
}
let mut visitor = Visitor {
context,
found: false,
};
ruff_python_ast::visitor::source_order::walk_f_string(&mut visitor, fstring);
visitor.found
}
fn contains_opposite_quote(content: &str, flags: AnyStringFlags) -> bool {
if flags.is_triple_quoted() {
match flags.quote_style() {
Quote::Single => content.contains(r#"""""#),
Quote::Double => content.contains("'''"),
}
} else {
let mut rest = content;
while let Some(index) = rest.find(flags.quote_style().opposite().as_char()) {
// Quotes in raw strings can't be escaped
if flags.is_raw_string() {
return true;
}
// Only if the quote isn't escaped
if rest[..index]
.chars()
.rev()
.take_while(|c| *c == '\\')
.count()
% 2
== 0
{
return true;
}
rest = &rest[index + flags.quote_style().opposite().as_char().len_utf8()..];
}
false
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use ruff_python_ast::{
str::{Quote, TripleQuotes},
str_prefix::{AnyStringPrefix, ByteStringPrefix},
AnyStringFlags,
};
use crate::string::normalize_string;
use super::UnicodeEscape;
#[test]
fn normalize_32_escape() {
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
assert_eq!(
Some(Cow::Owned("0001f60e".to_string())),
escape_sequence.normalize("0001F60E")
);
}
#[test]
fn normalize_hex_in_byte_string() {
let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
let normalized = normalize_string(
input,
0,
AnyStringFlags::new(
AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
Quote::Double,
TripleQuotes::No,
),
false,
);
assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
}
}