diff --git a/crates/ruff_python_formatter/generate.py b/crates/ruff_python_formatter/generate.py index 16fddd11fb..bf89ac1a4b 100755 --- a/crates/ruff_python_formatter/generate.py +++ b/crates/ruff_python_formatter/generate.py @@ -33,9 +33,15 @@ node_lines = ( nodes = [] for node_line in node_lines: node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0] - # These nodes aren't used in the formatter as the formatting of them is handled - # in one of the other nodes containing them. - if node in ("FStringLiteralElement", "FStringExpressionElement"): + # `FString` and `StringLiteral` has a custom implementation while the formatting for + # `FStringLiteralElement` and `FStringExpressionElement` are handled by the `FString` + # implementation. + if node in ( + "FString", + "StringLiteral", + "FStringLiteralElement", + "FStringExpressionElement", + ): continue nodes.append(node) print(nodes) diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 8407e7cdc4..b5dc85fcb3 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -1,5 +1,5 @@ use crate::comments::Comments; -use crate::expression::string::QuoteChar; +use crate::string::QuoteChar; use crate::PyFormatOptions; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_source_file::Locator; diff --git a/crates/ruff_python_formatter/src/expression/binary_like.rs b/crates/ruff_python_formatter/src/expression/binary_like.rs index 9a9feea584..3e153ad8c2 100644 --- a/crates/ruff_python_formatter/src/expression/binary_like.rs +++ b/crates/ruff_python_formatter/src/expression/binary_like.rs @@ -18,10 +18,10 @@ use crate::expression::parentheses::{ is_expression_parenthesized, write_in_parentheses_only_group_end_tag, write_in_parentheses_only_group_start_tag, Parentheses, }; -use crate::expression::string::{AnyString, FormatString, StringLayout}; use crate::expression::OperatorPrecedence; use crate::prelude::*; use crate::preview::is_fix_power_op_line_length_enabled; +use crate::string::{AnyString, FormatStringContinuation}; #[derive(Copy, Clone, Debug)] pub(super) enum BinaryLike<'a> { @@ -395,9 +395,10 @@ impl Format> for BinaryLike<'_> { [ operand.leading_binary_comments().map(leading_comments), leading_comments(comments.leading(&string_constant)), - FormatString::new(&string_constant).with_layout( - StringLayout::ImplicitConcatenatedStringInBinaryLike, - ), + // Call `FormatStringContinuation` directly to avoid formatting + // the implicitly concatenated string with the enclosing group + // because the group is added by the binary like formatting. + FormatStringContinuation::new(&string_constant), trailing_comments(comments.trailing(&string_constant)), operand.trailing_binary_comments().map(trailing_comments), line_suffix_boundary(), @@ -413,9 +414,10 @@ impl Format> for BinaryLike<'_> { f, [ leading_comments(comments.leading(&string_constant)), - FormatString::new(&string_constant).with_layout( - StringLayout::ImplicitConcatenatedStringInBinaryLike - ), + // Call `FormatStringContinuation` directly to avoid formatting + // the implicitly concatenated string with the enclosing group + // because the group is added by the binary like formatting. + FormatStringContinuation::new(&string_constant), trailing_comments(comments.trailing(&string_constant)), ] )?; diff --git a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs index 2fc0cd474c..4869a2d536 100644 --- a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs +++ b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs @@ -3,16 +3,24 @@ use ruff_python_ast::ExprBytesLiteral; use crate::comments::SourceComment; use crate::expression::expr_string_literal::is_multiline_string; -use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::expression::string::{AnyString, FormatString}; +use crate::expression::parentheses::{ + in_parentheses_only_group, NeedsParentheses, OptionalParentheses, +}; use crate::prelude::*; +use crate::string::{AnyString, FormatStringContinuation}; #[derive(Default)] pub struct FormatExprBytesLiteral; impl FormatNodeRule for FormatExprBytesLiteral { fn fmt_fields(&self, item: &ExprBytesLiteral, f: &mut PyFormatter) -> FormatResult<()> { - FormatString::new(&AnyString::Bytes(item)).fmt(f) + let ExprBytesLiteral { value, .. } = item; + + match value.as_slice() { + [bytes_literal] => bytes_literal.format().fmt(f), + _ => in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::Bytes(item))) + .fmt(f), + } } fn fmt_dangling_comments( diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs index 12e112ecc1..8a8ac81d35 100644 --- a/crates/ruff_python_formatter/src/expression/expr_f_string.rs +++ b/crates/ruff_python_formatter/src/expression/expr_f_string.rs @@ -1,21 +1,35 @@ use memchr::memchr2; +use ruff_python_ast::{AnyNodeRef, ExprFString}; +use ruff_source_file::Locator; +use ruff_text_size::Ranged; + use crate::comments::SourceComment; -use ruff_formatter::FormatResult; -use ruff_python_ast::AnyNodeRef; -use ruff_python_ast::ExprFString; - -use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; +use crate::expression::parentheses::{ + in_parentheses_only_group, NeedsParentheses, OptionalParentheses, +}; +use crate::other::f_string_part::FormatFStringPart; use crate::prelude::*; - -use super::string::{AnyString, FormatString}; +use crate::string::{AnyString, FormatStringContinuation, Quoting}; #[derive(Default)] pub struct FormatExprFString; impl FormatNodeRule for FormatExprFString { fn fmt_fields(&self, item: &ExprFString, f: &mut PyFormatter) -> FormatResult<()> { - FormatString::new(&AnyString::FString(item)).fmt(f) + let ExprFString { value, .. } = item; + + match value.as_slice() { + [f_string_part] => FormatFStringPart::new( + f_string_part, + f_string_quoting(item, &f.context().locator()), + ) + .fmt(f), + _ => { + in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::FString(item))) + .fmt(f) + } + } } fn fmt_dangling_comments( @@ -43,3 +57,28 @@ impl NeedsParentheses for ExprFString { } } } + +pub(crate) fn f_string_quoting(f_string: &ExprFString, locator: &Locator) -> Quoting { + let unprefixed = locator + .slice(f_string.range()) + .trim_start_matches(|c| c != '"' && c != '\''); + let triple_quoted = unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''"); + + if f_string + .value + .elements() + .filter_map(|element| element.as_expression()) + .any(|expression| { + let string_content = locator.slice(expression.range()); + if triple_quoted { + string_content.contains(r#"""""#) || string_content.contains("'''") + } else { + string_content.contains(['"', '\'']) + } + }) + { + Quoting::Preserve + } else { + Quoting::CanChange + } +} diff --git a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs index 199fb740ef..442081886d 100644 --- a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs +++ b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs @@ -1,34 +1,66 @@ use ruff_formatter::FormatRuleWithOptions; -use ruff_python_ast::AnyNodeRef; -use ruff_python_ast::ExprStringLiteral; +use ruff_python_ast::{AnyNodeRef, ExprStringLiteral}; use ruff_text_size::{Ranged, TextLen, TextRange}; use crate::comments::SourceComment; -use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::expression::string::{ - AnyString, FormatString, StringLayout, StringPrefix, StringQuotes, +use crate::expression::parentheses::{ + in_parentheses_only_group, NeedsParentheses, OptionalParentheses, }; +use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; use crate::prelude::*; +use crate::string::{AnyString, FormatStringContinuation, StringPrefix, StringQuotes}; #[derive(Default)] pub struct FormatExprStringLiteral { - layout: StringLayout, + kind: ExprStringLiteralKind, +} + +#[derive(Default, Copy, Clone, Debug)] +pub enum ExprStringLiteralKind { + #[default] + String, + Docstring, +} + +impl ExprStringLiteralKind { + const fn string_literal_kind(self) -> StringLiteralKind { + match self { + ExprStringLiteralKind::String => StringLiteralKind::String, + ExprStringLiteralKind::Docstring => StringLiteralKind::Docstring, + } + } + + const fn is_docstring(self) -> bool { + matches!(self, ExprStringLiteralKind::Docstring) + } } impl FormatRuleWithOptions> for FormatExprStringLiteral { - type Options = StringLayout; + type Options = ExprStringLiteralKind; fn with_options(mut self, options: Self::Options) -> Self { - self.layout = options; + self.kind = options; self } } impl FormatNodeRule for FormatExprStringLiteral { fn fmt_fields(&self, item: &ExprStringLiteral, f: &mut PyFormatter) -> FormatResult<()> { - FormatString::new(&AnyString::String(item)) - .with_layout(self.layout) - .fmt(f) + let ExprStringLiteral { value, .. } = item; + + match value.as_slice() { + [string_literal] => { + FormatStringLiteral::new(string_literal, self.kind.string_literal_kind()).fmt(f) + } + _ => { + // This is just a sanity check because [`DocstringStmt::try_from_statement`] + // ensures that the docstring is a *single* string literal. + assert!(!self.kind.is_docstring()); + + in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::String(item))) + } + .fmt(f), + } } fn fmt_dangling_comments( diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs index f2b020af49..335941316c 100644 --- a/crates/ruff_python_formatter/src/expression/mod.rs +++ b/crates/ruff_python_formatter/src/expression/mod.rs @@ -58,7 +58,6 @@ pub(crate) mod expr_yield; pub(crate) mod expr_yield_from; mod operator; pub(crate) mod parentheses; -pub(crate) mod string; #[derive(Copy, Clone, PartialEq, Eq, Default)] pub struct FormatExpr { diff --git a/crates/ruff_python_formatter/src/generated.rs b/crates/ruff_python_formatter/src/generated.rs index 5b01cd16f8..a5217a11d1 100644 --- a/crates/ruff_python_formatter/src/generated.rs +++ b/crates/ruff_python_formatter/src/generated.rs @@ -2943,70 +2943,6 @@ impl<'ast> IntoFormat> for ast::TypeParamParamSpec { } } -impl FormatRule> for crate::other::f_string::FormatFString { - #[inline] - fn fmt(&self, node: &ast::FString, f: &mut PyFormatter) -> FormatResult<()> { - FormatNodeRule::::fmt(self, node, f) - } -} -impl<'ast> AsFormat> for ast::FString { - type Format<'a> = FormatRefWithRule< - 'a, - ast::FString, - crate::other::f_string::FormatFString, - PyFormatContext<'ast>, - >; - fn format(&self) -> Self::Format<'_> { - FormatRefWithRule::new(self, crate::other::f_string::FormatFString::default()) - } -} -impl<'ast> IntoFormat> for ast::FString { - type Format = FormatOwnedWithRule< - ast::FString, - crate::other::f_string::FormatFString, - PyFormatContext<'ast>, - >; - fn into_format(self) -> Self::Format { - FormatOwnedWithRule::new(self, crate::other::f_string::FormatFString::default()) - } -} - -impl FormatRule> - for crate::other::string_literal::FormatStringLiteral -{ - #[inline] - fn fmt(&self, node: &ast::StringLiteral, f: &mut PyFormatter) -> FormatResult<()> { - FormatNodeRule::::fmt(self, node, f) - } -} -impl<'ast> AsFormat> for ast::StringLiteral { - type Format<'a> = FormatRefWithRule< - 'a, - ast::StringLiteral, - crate::other::string_literal::FormatStringLiteral, - PyFormatContext<'ast>, - >; - fn format(&self) -> Self::Format<'_> { - FormatRefWithRule::new( - self, - crate::other::string_literal::FormatStringLiteral::default(), - ) - } -} -impl<'ast> IntoFormat> for ast::StringLiteral { - type Format = FormatOwnedWithRule< - ast::StringLiteral, - crate::other::string_literal::FormatStringLiteral, - PyFormatContext<'ast>, - >; - fn into_format(self) -> Self::Format { - FormatOwnedWithRule::new( - self, - crate::other::string_literal::FormatStringLiteral::default(), - ) - } -} - impl FormatRule> for crate::other::bytes_literal::FormatBytesLiteral { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index beaa4070c0..05f122606b 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -36,6 +36,7 @@ mod prelude; mod preview; mod shared_traits; pub(crate) mod statement; +pub(crate) mod string; pub(crate) mod type_param; mod verbatim; diff --git a/crates/ruff_python_formatter/src/other/bytes_literal.rs b/crates/ruff_python_formatter/src/other/bytes_literal.rs index 55117241f8..c6445c8d6a 100644 --- a/crates/ruff_python_formatter/src/other/bytes_literal.rs +++ b/crates/ruff_python_formatter/src/other/bytes_literal.rs @@ -1,12 +1,23 @@ use ruff_python_ast::BytesLiteral; +use ruff_text_size::Ranged; use crate::prelude::*; +use crate::string::{Quoting, StringPart}; #[derive(Default)] pub struct FormatBytesLiteral; impl FormatNodeRule for FormatBytesLiteral { - fn fmt_fields(&self, _item: &BytesLiteral, _f: &mut PyFormatter) -> FormatResult<()> { - unreachable!("Handled inside of `FormatExprBytesLiteral`"); + fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> { + let locator = f.context().locator(); + + StringPart::from_source(item.range(), &locator) + .normalize( + Quoting::CanChange, + &locator, + f.options().quote_style(), + f.context().docstring(), + ) + .fmt(f) } } diff --git a/crates/ruff_python_formatter/src/other/f_string.rs b/crates/ruff_python_formatter/src/other/f_string.rs index e08254aba7..da81162c2e 100644 --- a/crates/ruff_python_formatter/src/other/f_string.rs +++ b/crates/ruff_python_formatter/src/other/f_string.rs @@ -1,12 +1,49 @@ use ruff_python_ast::FString; +use ruff_text_size::Ranged; use crate::prelude::*; +use crate::string::{Quoting, StringPart}; -#[derive(Default)] -pub struct FormatFString; +/// Formats an f-string which is part of a larger f-string expression. +/// +/// For example, this would be used to format the f-string part in `"foo" f"bar {x}"` +/// or the standalone f-string in `f"foo {x} bar"`. +pub(crate) struct FormatFString<'a> { + value: &'a FString, + /// The quoting of an f-string. This is determined by the parent node + /// (f-string expression) and is required to format an f-string correctly. + quoting: Quoting, +} -impl FormatNodeRule for FormatFString { - fn fmt_fields(&self, _item: &FString, _f: &mut PyFormatter) -> FormatResult<()> { - unreachable!("Handled inside of `FormatExprFString`"); +impl<'a> FormatFString<'a> { + pub(crate) fn new(value: &'a FString, quoting: Quoting) -> Self { + Self { value, quoting } + } +} + +impl Format> for FormatFString<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let locator = f.context().locator(); + + let result = StringPart::from_source(self.value.range(), &locator) + .normalize( + self.quoting, + &locator, + f.options().quote_style(), + f.context().docstring(), + ) + .fmt(f); + + // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. + // This is to mark all of those comments as formatted but we need to + // figure out how to handle them. Note that this needs to be done only + // after the f-string is formatted, so only for all the non-formatted + // comments. + let comments = f.context().comments(); + self.value.elements.iter().for_each(|value| { + comments.mark_verbatim_node_comments_formatted(value.into()); + }); + + result } } diff --git a/crates/ruff_python_formatter/src/other/f_string_part.rs b/crates/ruff_python_formatter/src/other/f_string_part.rs new file mode 100644 index 0000000000..c471b5fc8c --- /dev/null +++ b/crates/ruff_python_formatter/src/other/f_string_part.rs @@ -0,0 +1,39 @@ +use ruff_python_ast::FStringPart; + +use crate::other::f_string::FormatFString; +use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; +use crate::prelude::*; +use crate::string::Quoting; + +/// Formats an f-string part which is either a string literal or an f-string. +/// +/// This delegates the actual formatting to the appropriate formatter. +pub(crate) struct FormatFStringPart<'a> { + part: &'a FStringPart, + /// The quoting to be used for all the f-string parts. This is determined by + /// the parent node (f-string expression) and is required to format all parts + /// correctly. + quoting: Quoting, +} + +impl<'a> FormatFStringPart<'a> { + pub(crate) fn new(part: &'a FStringPart, quoting: Quoting) -> Self { + Self { part, quoting } + } +} + +impl Format> for FormatFStringPart<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + match self.part { + FStringPart::Literal(string_literal) => FormatStringLiteral::new( + string_literal, + // If an f-string part is a string literal, the f-string is always + // implicitly concatenated e.g., `"foo" f"bar {x}"`. A standalone + // string literal would be a string expression, not an f-string. + StringLiteralKind::InImplicitlyConcatenatedFString(self.quoting), + ) + .fmt(f), + FStringPart::FString(f_string) => FormatFString::new(f_string, self.quoting).fmt(f), + } + } +} diff --git a/crates/ruff_python_formatter/src/other/mod.rs b/crates/ruff_python_formatter/src/other/mod.rs index c980a14c0f..d07339f717 100644 --- a/crates/ruff_python_formatter/src/other/mod.rs +++ b/crates/ruff_python_formatter/src/other/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod decorator; pub(crate) mod elif_else_clause; pub(crate) mod except_handler_except_handler; pub(crate) mod f_string; +pub(crate) mod f_string_part; pub(crate) mod identifier; pub(crate) mod keyword; pub(crate) mod match_case; diff --git a/crates/ruff_python_formatter/src/other/string_literal.rs b/crates/ruff_python_formatter/src/other/string_literal.rs index 291552db73..e23db85707 100644 --- a/crates/ruff_python_formatter/src/other/string_literal.rs +++ b/crates/ruff_python_formatter/src/other/string_literal.rs @@ -1,12 +1,72 @@ use ruff_python_ast::StringLiteral; +use ruff_text_size::Ranged; use crate::prelude::*; +use crate::string::{docstring, Quoting, StringPart}; +use crate::QuoteStyle; -#[derive(Default)] -pub struct FormatStringLiteral; +pub(crate) struct FormatStringLiteral<'a> { + value: &'a StringLiteral, + layout: StringLiteralKind, +} -impl FormatNodeRule for FormatStringLiteral { - fn fmt_fields(&self, _item: &StringLiteral, _f: &mut PyFormatter) -> FormatResult<()> { - unreachable!("Handled inside of `FormatExprStringLiteral`"); +impl<'a> FormatStringLiteral<'a> { + pub(crate) fn new(value: &'a StringLiteral, layout: StringLiteralKind) -> Self { + Self { value, layout } + } +} + +/// The kind of a string literal. +#[derive(Copy, Clone, Debug, Default)] +pub(crate) enum StringLiteralKind { + /// A normal string literal e.g., `"foo"`. + #[default] + String, + /// A string literal used as a docstring. + Docstring, + /// A string literal that is implicitly concatenated with an f-string. This + /// makes the overall expression an f-string whose quoting detection comes + /// from the parent node (f-string expression). + InImplicitlyConcatenatedFString(Quoting), +} + +impl StringLiteralKind { + /// Checks if this string literal is a docstring. + pub(crate) const fn is_docstring(self) -> bool { + matches!(self, StringLiteralKind::Docstring) + } + + /// Returns the quoting to be used for this string literal. + fn quoting(self) -> Quoting { + match self { + StringLiteralKind::String | StringLiteralKind::Docstring => Quoting::CanChange, + StringLiteralKind::InImplicitlyConcatenatedFString(quoting) => quoting, + } + } +} + +impl Format> for FormatStringLiteral<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let locator = f.context().locator(); + + let quote_style = if self.layout.is_docstring() { + // Per PEP 8 and PEP 257, always prefer double quotes for docstrings + QuoteStyle::Double + } else { + f.options().quote_style() + }; + + let normalized = StringPart::from_source(self.value.range(), &locator).normalize( + self.layout.quoting(), + &locator, + quote_style, + f.context().docstring(), + ); + + if self.layout.is_docstring() { + docstring::format(&normalized, f) + } else { + normalized.fmt(f) + } } } diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 1b33dbd41a..8b3d9e1a5e 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -9,7 +9,7 @@ use crate::comments::{ leading_comments, trailing_comments, Comments, LeadingDanglingTrailingComments, }; use crate::context::{NodeLevel, TopLevelStatementPosition, WithIndentLevel, WithNodeLevel}; -use crate::expression::string::StringLayout; +use crate::expression::expr_string_literal::ExprStringLiteralKind; use crate::prelude::*; use crate::statement::stmt_expr::FormatStmtExpr; use crate::verbatim::{ @@ -609,7 +609,7 @@ impl Format> for DocstringStmt<'_> { leading_comments(node_comments.leading), string_literal .format() - .with_options(StringLayout::DocString), + .with_options(ExprStringLiteralKind::Docstring), ] )?; diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs similarity index 99% rename from crates/ruff_python_formatter/src/expression/string/docstring.rs rename to crates/ruff_python_formatter/src/string/docstring.rs index 9037edcd91..51fee063ca 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -102,7 +102,7 @@ use super::{NormalizedString, QuoteChar}; /// line c /// """ /// ``` -pub(super) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { +pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { let docstring = &normalized.text; // Black doesn't change the indentation of docstrings that contain an escaped newline diff --git a/crates/ruff_python_formatter/src/expression/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs similarity index 79% rename from crates/ruff_python_formatter/src/expression/string/mod.rs rename to crates/ruff_python_formatter/src/string/mod.rs index 135594be3a..57c11cd622 100644 --- a/crates/ruff_python_formatter/src/expression/string/mod.rs +++ b/crates/ruff_python_formatter/src/string/mod.rs @@ -5,35 +5,41 @@ use bitflags::bitflags; use ruff_formatter::{format_args, write}; use ruff_python_ast::AnyNodeRef; use ruff_python_ast::{ - self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, + self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, }; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::comments::{leading_comments, trailing_comments}; -use crate::expression::parentheses::{ - in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space, -}; -use crate::expression::Expr; +use crate::expression::expr_f_string::f_string_quoting; +use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space; +use crate::other::f_string::FormatFString; +use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; use crate::prelude::*; use crate::QuoteStyle; -mod docstring; +pub(crate) mod docstring; -#[derive(Copy, Clone, Debug)] -enum Quoting { +#[derive(Copy, Clone, Debug, Default)] +pub(crate) enum Quoting { + #[default] CanChange, Preserve, } +/// Represents any kind of string expression. This could be either a string, +/// bytes or f-string. #[derive(Clone, Debug)] -pub(super) enum AnyString<'a> { +pub(crate) enum AnyString<'a> { String(&'a ExprStringLiteral), Bytes(&'a ExprBytesLiteral), FString(&'a ExprFString), } impl<'a> AnyString<'a> { + /// Creates a new [`AnyString`] from the given [`Expr`]. + /// + /// Returns `None` if the expression is not either a string, bytes or f-string. pub(crate) fn from_expression(expression: &'a Expr) -> Option> { match expression { Expr::StringLiteral(string) => Some(AnyString::String(string)), @@ -43,39 +49,8 @@ impl<'a> AnyString<'a> { } } - fn quoting(&self, locator: &Locator) -> Quoting { - match self { - Self::String(_) | Self::Bytes(_) => Quoting::CanChange, - Self::FString(f_string) => { - let unprefixed = locator - .slice(f_string.range) - .trim_start_matches(|c| c != '"' && c != '\''); - let triple_quoted = - unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''"); - if f_string.value.elements().any(|element| match element { - ast::FStringElement::Expression(ast::FStringExpressionElement { - range, - .. - }) => { - let string_content = locator.slice(*range); - if triple_quoted { - string_content.contains(r#"""""#) || string_content.contains("'''") - } else { - string_content.contains(['"', '\'']) - } - } - ast::FStringElement::Literal(_) => false, - }) { - Quoting::Preserve - } else { - Quoting::CanChange - } - } - } - } - /// Returns `true` if the string is implicitly concatenated. - pub(super) fn is_implicit_concatenated(&self) -> bool { + pub(crate) fn is_implicit_concatenated(&self) -> bool { match self { Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(), Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(), @@ -83,21 +58,38 @@ impl<'a> AnyString<'a> { } } - fn parts(&self) -> Vec> { + /// Returns the quoting to be used for this string. + fn quoting(&self, locator: &Locator<'_>) -> Quoting { match self { - Self::String(ExprStringLiteral { value, .. }) => { - value.iter().map(AnyStringPart::String).collect() - } + Self::String(_) | Self::Bytes(_) => Quoting::CanChange, + Self::FString(f_string) => f_string_quoting(f_string, locator), + } + } + + /// Returns a vector of all the [`AnyStringPart`] of this string. + fn parts(&self, quoting: Quoting) -> Vec> { + match self { + Self::String(ExprStringLiteral { value, .. }) => value + .iter() + .map(|part| AnyStringPart::String { + part, + layout: StringLiteralKind::String, + }) + .collect(), Self::Bytes(ExprBytesLiteral { value, .. }) => { value.iter().map(AnyStringPart::Bytes).collect() } Self::FString(ExprFString { value, .. }) => value .iter() .map(|f_string_part| match f_string_part { - ast::FStringPart::Literal(string_literal) => { - AnyStringPart::String(string_literal) - } - ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string), + ast::FStringPart::Literal(string_literal) => AnyStringPart::String { + part: string_literal, + layout: StringLiteralKind::InImplicitlyConcatenatedFString(quoting), + }, + ast::FStringPart::FString(f_string) => AnyStringPart::FString { + part: f_string, + quoting, + }, }) .collect(), } @@ -134,19 +126,29 @@ impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> { } } +/// Represents any kind of string which is part of an implicitly concatenated +/// string. This could be either a string, bytes or f-string. +/// +/// This is constructed from the [`AnyString::parts`] method on [`AnyString`]. #[derive(Clone, Debug)] enum AnyStringPart<'a> { - String(&'a ast::StringLiteral), + String { + part: &'a ast::StringLiteral, + layout: StringLiteralKind, + }, Bytes(&'a ast::BytesLiteral), - FString(&'a ast::FString), + FString { + part: &'a ast::FString, + quoting: Quoting, + }, } impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { fn from(value: &AnyStringPart<'a>) -> Self { match value { - AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part), + AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part), AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part), - AnyStringPart::FString(part) => AnyNodeRef::FString(part), + AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part), } } } @@ -154,99 +156,33 @@ impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { impl Ranged for AnyStringPart<'_> { fn range(&self) -> TextRange { match self { - Self::String(part) => part.range(), + Self::String { part, .. } => part.range(), Self::Bytes(part) => part.range(), - Self::FString(part) => part.range(), + Self::FString { part, .. } => part.range(), } } } -pub(super) struct FormatString<'a> { - string: &'a AnyString<'a>, - layout: StringLayout, -} - -#[derive(Default, Copy, Clone, Debug)] -pub enum StringLayout { - #[default] - Default, - DocString, - /// An implicit concatenated string in a binary like (e.g. `a + b` or `a < b`) expression. - /// - /// Formats the implicit concatenated string parts without the enclosing group because the group - /// is added by the binary like formatting. - ImplicitConcatenatedStringInBinaryLike, -} - -impl<'a> FormatString<'a> { - pub(super) fn new(string: &'a AnyString<'a>) -> Self { - Self { - string, - layout: StringLayout::Default, - } - } - - pub(super) fn with_layout(mut self, layout: StringLayout) -> Self { - self.layout = layout; - self - } -} - -impl<'a> Format> for FormatString<'a> { +impl Format> for AnyStringPart<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - let parent_docstring_quote_style = f.context().docstring(); - let locator = f.context().locator(); - let result = match self.layout { - StringLayout::Default => { - if self.string.is_implicit_concatenated() { - in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) - } else { - StringPart::from_source(self.string.range(), &locator) - .normalize( - self.string.quoting(&locator), - &locator, - f.options().quote_style(), - parent_docstring_quote_style, - ) - .fmt(f) - } + match self { + AnyStringPart::String { part, layout } => { + FormatStringLiteral::new(part, *layout).fmt(f) } - StringLayout::DocString => { - let string_part = StringPart::from_source(self.string.range(), &locator); - let normalized = string_part.normalize( - Quoting::CanChange, - &locator, - // Per PEP 8 and PEP 257, always prefer double quotes for docstrings - QuoteStyle::Double, - parent_docstring_quote_style, - ); - docstring::format(&normalized, f) - } - StringLayout::ImplicitConcatenatedStringInBinaryLike => { - FormatStringContinuation::new(self.string).fmt(f) - } - }; - // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. - // This is to mark all of those comments as formatted but we need to - // figure out how to handle them. Note that this needs to be done only - // after the f-string is formatted, so only for all the non-formatted - // comments. - if let AnyString::FString(fstring) = self.string { - let comments = f.context().comments(); - fstring.value.elements().for_each(|value| { - comments.mark_verbatim_node_comments_formatted(value.into()); - }); + AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f), + AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f), } - result } } -struct FormatStringContinuation<'a> { +/// Formats any implicitly concatenated string. This could be any valid combination +/// of string, bytes or f-string literals. +pub(crate) struct FormatStringContinuation<'a> { string: &'a AnyString<'a>, } impl<'a> FormatStringContinuation<'a> { - fn new(string: &'a AnyString<'a>) -> Self { + pub(crate) fn new(string: &'a AnyString<'a>) -> Self { Self { string } } } @@ -254,24 +190,15 @@ impl<'a> FormatStringContinuation<'a> { impl Format> for FormatStringContinuation<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { let comments = f.context().comments().clone(); - let locator = f.context().locator(); - let in_docstring = f.context().docstring(); - let quote_style = f.options().quote_style(); + let quoting = self.string.quoting(&f.context().locator()); let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space()); - for part in self.string.parts() { - let normalized = StringPart::from_source(part.range(), &locator).normalize( - self.string.quoting(&locator), - &locator, - quote_style, - in_docstring, - ); - + for part in self.string.parts(quoting) { joiner.entry(&format_args![ line_suffix_boundary(), leading_comments(comments.leading(&part)), - normalized, + part, trailing_comments(comments.trailing(&part)) ]); } @@ -281,7 +208,7 @@ impl Format> for FormatStringContinuation<'_> { } #[derive(Debug)] -struct StringPart { +pub(crate) struct StringPart { /// The prefix. prefix: StringPrefix, @@ -293,7 +220,7 @@ struct StringPart { } impl StringPart { - fn from_source(range: TextRange, locator: &Locator) -> Self { + pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self { let string_content = locator.slice(range); let prefix = StringPrefix::parse(string_content); @@ -320,7 +247,7 @@ impl StringPart { /// snippet within the docstring. The quote style should correspond to the /// style of quotes used by said docstring. Normalization will ensure the /// quoting styles don't conflict. - fn normalize<'a>( + pub(crate) fn normalize<'a>( self, quoting: Quoting, locator: &'a Locator, @@ -412,7 +339,7 @@ impl StringPart { } #[derive(Debug)] -struct NormalizedString<'a> { +pub(crate) struct NormalizedString<'a> { prefix: StringPrefix, /// The quotes of the normalized string (preferred quotes) @@ -448,7 +375,7 @@ impl Format> for NormalizedString<'_> { bitflags! { #[derive(Copy, Clone, Debug, PartialEq, Eq)] - pub(super) struct StringPrefix: u8 { + pub(crate) struct StringPrefix: u8 { const UNICODE = 0b0000_0001; /// `r"test"` const RAW = 0b0000_0010; @@ -460,7 +387,7 @@ bitflags! { } impl StringPrefix { - pub(super) fn parse(input: &str) -> StringPrefix { + pub(crate) fn parse(input: &str) -> StringPrefix { let chars = input.chars(); let mut prefix = StringPrefix::empty(); @@ -485,7 +412,7 @@ impl StringPrefix { prefix } - pub(super) const fn text_len(self) -> TextSize { + pub(crate) const fn text_len(self) -> TextSize { TextSize::new(self.bits().count_ones()) } @@ -688,13 +615,13 @@ fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) } #[derive(Copy, Clone, Debug)] -pub(super) struct StringQuotes { +pub(crate) struct StringQuotes { triple: bool, quote_char: QuoteChar, } impl StringQuotes { - pub(super) fn parse(input: &str) -> Option { + pub(crate) fn parse(input: &str) -> Option { let mut chars = input.chars(); let quote_char = chars.next()?; @@ -708,7 +635,7 @@ impl StringQuotes { }) } - pub(super) const fn is_triple(self) -> bool { + pub(crate) const fn is_triple(self) -> bool { self.triple }