From 6f9c128d7737f02d3cad5d7c020ec643e2f3afa2 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 13 Feb 2024 18:14:56 +0530 Subject: [PATCH] Separate `StringNormalizer` from `StringPart` (#9954) ## Summary This PR is a small refactor to extract out the logic for normalizing string in the formatter from the `StringPart` struct. It also separates the quote selection into a separate method on the new `StringNormalizer`. Both of these will help in the f-string formatting to use `StringPart` and `choose_quotes` irrespective of normalization. The reason for having separate quote selection and normalization step is so that the f-string formatting can perform quote selection on its own. Unlike string and byte literals, the f-string formatting would require that the normalization happens only for the literal elements of it i.e., the "foo" and "bar" in `f"foo {x + y} bar"`. This will automatically be handled by the already separate `normalize_string` function. Another use-case in the f-string formatting is to extract out the relevant information from the `StringPart` like quotes and prefix which is to be passed as context while formatting each element of an f-string. ## Test Plan Ensure that clippy is happy and all tests pass. --- .../src/other/bytes_literal.rs | 14 +-- .../src/other/f_string.rs | 12 +- .../src/other/string_literal.rs | 17 ++- .../ruff_python_formatter/src/string/mod.rs | 112 ++++++++++++------ 4 files changed, 96 insertions(+), 59 deletions(-) diff --git a/crates/ruff_python_formatter/src/other/bytes_literal.rs b/crates/ruff_python_formatter/src/other/bytes_literal.rs index 542928ced3..7055e93b36 100644 --- a/crates/ruff_python_formatter/src/other/bytes_literal.rs +++ b/crates/ruff_python_formatter/src/other/bytes_literal.rs @@ -2,8 +2,7 @@ use ruff_python_ast::BytesLiteral; use ruff_text_size::Ranged; use crate::prelude::*; -use crate::preview::is_hex_codes_in_unicode_sequences_enabled; -use crate::string::{Quoting, StringPart}; +use crate::string::{StringNormalizer, StringPart}; #[derive(Default)] pub struct FormatBytesLiteral; @@ -12,14 +11,9 @@ impl FormatNodeRule for FormatBytesLiteral { fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> { let locator = f.context().locator(); - StringPart::from_source(item.range(), &locator) - .normalize( - Quoting::CanChange, - &locator, - f.options().quote_style(), - f.context().docstring(), - is_hex_codes_in_unicode_sequences_enabled(f.context()), - ) + StringNormalizer::from_context(f.context()) + .with_preferred_quote_style(f.options().quote_style()) + .normalize(&StringPart::from_source(item.range(), &locator), &locator) .fmt(f) } } diff --git a/crates/ruff_python_formatter/src/other/f_string.rs b/crates/ruff_python_formatter/src/other/f_string.rs index c3e8ac4ebf..50c973a679 100644 --- a/crates/ruff_python_formatter/src/other/f_string.rs +++ b/crates/ruff_python_formatter/src/other/f_string.rs @@ -2,8 +2,7 @@ use ruff_python_ast::FString; use ruff_text_size::Ranged; use crate::prelude::*; -use crate::preview::is_hex_codes_in_unicode_sequences_enabled; -use crate::string::{Quoting, StringPart}; +use crate::string::{Quoting, StringNormalizer, StringPart}; /// Formats an f-string which is part of a larger f-string expression. /// @@ -26,13 +25,12 @@ impl Format> for FormatFString<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { let locator = f.context().locator(); - let result = StringPart::from_source(self.value.range(), &locator) + let result = StringNormalizer::from_context(f.context()) + .with_quoting(self.quoting) + .with_preferred_quote_style(f.options().quote_style()) .normalize( - self.quoting, + &StringPart::from_source(self.value.range(), &locator), &locator, - f.options().quote_style(), - f.context().docstring(), - is_hex_codes_in_unicode_sequences_enabled(f.context()), ) .fmt(f); diff --git a/crates/ruff_python_formatter/src/other/string_literal.rs b/crates/ruff_python_formatter/src/other/string_literal.rs index 73044f84f8..a64c61ed17 100644 --- a/crates/ruff_python_formatter/src/other/string_literal.rs +++ b/crates/ruff_python_formatter/src/other/string_literal.rs @@ -2,8 +2,7 @@ use ruff_python_ast::StringLiteral; use ruff_text_size::Ranged; use crate::prelude::*; -use crate::preview::is_hex_codes_in_unicode_sequences_enabled; -use crate::string::{docstring, Quoting, StringPart}; +use crate::string::{docstring, Quoting, StringNormalizer, StringPart}; use crate::QuoteStyle; pub(crate) struct FormatStringLiteral<'a> { @@ -59,13 +58,13 @@ impl Format> for FormatStringLiteral<'_> { quote_style }; - let normalized = StringPart::from_source(self.value.range(), &locator).normalize( - self.layout.quoting(), - &locator, - quote_style, - f.context().docstring(), - is_hex_codes_in_unicode_sequences_enabled(f.context()), - ); + let normalized = StringNormalizer::from_context(f.context()) + .with_quoting(self.layout.quoting()) + .with_preferred_quote_style(quote_style) + .normalize( + &StringPart::from_source(self.value.range(), &locator), + &locator, + ); if self.layout.is_docstring() { docstring::format(&normalized, f) diff --git a/crates/ruff_python_formatter/src/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs index b00d3c09ff..047ae7cd36 100644 --- a/crates/ruff_python_formatter/src/string/mod.rs +++ b/crates/ruff_python_formatter/src/string/mod.rs @@ -18,6 +18,7 @@ use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space use crate::other::f_string::FormatFString; use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; use crate::prelude::*; +use crate::preview::is_hex_codes_in_unicode_sequences_enabled; use crate::QuoteStyle; pub(crate) mod docstring; @@ -291,23 +292,54 @@ impl StringPart { } } - /// Computes the strings preferred quotes and normalizes its content. - /// - /// The parent docstring quote style should be set when formatting a code - /// snippet within the docstring. The quote style should correspond to the - /// style of quotes used by said docstring. Normalization will ensure the - /// quoting styles don't conflict. - pub(crate) fn normalize<'a>( - self, - quoting: Quoting, - locator: &'a Locator, - configured_style: QuoteStyle, - parent_docstring_quote_char: Option, - normalize_hex: bool, - ) -> NormalizedString<'a> { + /// Returns the prefix of the string part. + pub(crate) const fn prefix(&self) -> StringPrefix { + self.prefix + } + + /// Returns the surrounding quotes of the string part. + pub(crate) const fn quotes(&self) -> StringQuotes { + self.quotes + } + + /// Returns the range of the string's content in the source (minus prefix and quotes). + pub(crate) const fn content_range(&self) -> TextRange { + self.content_range + } +} + +pub(crate) struct StringNormalizer { + quoting: Quoting, + preferred_quote_style: QuoteStyle, + parent_docstring_quote_char: Option, + normalize_hex: bool, +} + +impl StringNormalizer { + pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self { + Self { + quoting: Quoting::default(), + preferred_quote_style: QuoteStyle::default(), + parent_docstring_quote_char: context.docstring(), + normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context), + } + } + + pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self { + self.preferred_quote_style = quote_style; + self + } + + pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self { + self.quoting = quoting; + self + } + + /// Computes the strings preferred quotes. + pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes { // Per PEP 8, always prefer double quotes for triple-quoted strings. // Except when using quote-style-preserve. - let preferred_style = if self.quotes.triple { + let preferred_style = if string.quotes().triple { // ... unless we're formatting a code snippet inside a docstring, // then we specifically want to invert our quote style to avoid // writing out invalid Python. @@ -353,39 +385,49 @@ impl StringPart { // Overall this is a bit of a corner case and just inverting the // style from what the parent ultimately decided upon works, even // if it doesn't have perfect alignment with PEP8. - if let Some(quote) = parent_docstring_quote_char { + if let Some(quote) = self.parent_docstring_quote_char { QuoteStyle::from(quote.invert()) - } else if configured_style.is_preserve() { + } else if self.preferred_quote_style.is_preserve() { QuoteStyle::Preserve } else { QuoteStyle::Double } } else { - configured_style + self.preferred_quote_style }; - let raw_content = &locator.slice(self.content_range); - - let quotes = match quoting { - Quoting::Preserve => self.quotes, + match self.quoting { + Quoting::Preserve => string.quotes(), Quoting::CanChange => { if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) { - if self.prefix.is_raw_string() { - choose_quotes_raw(raw_content, self.quotes, preferred_quote) + let raw_content = locator.slice(string.content_range()); + if string.prefix().is_raw_string() { + choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote) } else { - choose_quotes(raw_content, self.quotes, preferred_quote) + choose_quotes_impl(raw_content, string.quotes(), preferred_quote) } } else { - self.quotes + string.quotes() } } - }; + } + } - let normalized = normalize_string(raw_content, quotes, self.prefix, normalize_hex); + /// Computes the strings preferred quotes and normalizes its content. + pub(crate) fn normalize<'a>( + &self, + string: &StringPart, + locator: &'a Locator, + ) -> NormalizedString<'a> { + let raw_content = locator.slice(string.content_range()); + + let quotes = self.choose_quotes(string, locator); + + let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex); NormalizedString { - prefix: self.prefix, - content_range: self.content_range, + prefix: string.prefix(), + content_range: string.content_range(), text: normalized, quotes, } @@ -512,7 +554,7 @@ impl Format> for StringPrefix { /// The preferred quote style is chosen unless the string contains unescaped quotes of the /// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote /// style is double quotes. -fn choose_quotes_raw( +fn choose_quotes_for_raw_string( input: &str, quotes: StringQuotes, preferred_quote: QuoteChar, @@ -571,7 +613,11 @@ fn choose_quotes_raw( /// For triple quoted strings, the preferred quote style is always used, unless the string contains /// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be /// used unless the string contains `"""`). -fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) -> StringQuotes { +fn choose_quotes_impl( + input: &str, + quotes: StringQuotes, + preferred_quote: QuoteChar, +) -> StringQuotes { let quote = if quotes.triple { // True if the string contains a triple quote sequence of the configured quote style. let mut uses_triple_quotes = false; @@ -780,7 +826,7 @@ impl TryFrom for QuoteChar { /// with the provided [`StringQuotes`] style. /// /// Returns the normalized string and whether it contains new lines. -fn normalize_string( +pub(crate) fn normalize_string( input: &str, quotes: StringQuotes, prefix: StringPrefix,