Split string formatting to individual nodes (#9058)

This PR splits the string formatting code in the formatter to be handled
by the respective nodes.

Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.

The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).


### Formatter ecosystem result

**This PR**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |

**main**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
This commit is contained in:
Dhruv Manilawala 2023-12-14 12:55:10 -06:00 committed by GitHub
parent 28b1aa201b
commit 189e947808
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 364 additions and 266 deletions

View File

@ -33,9 +33,15 @@ node_lines = (
nodes = [] nodes = []
for node_line in node_lines: for node_line in node_lines:
node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0] node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0]
# These nodes aren't used in the formatter as the formatting of them is handled # `FString` and `StringLiteral` has a custom implementation while the formatting for
# in one of the other nodes containing them. # `FStringLiteralElement` and `FStringExpressionElement` are handled by the `FString`
if node in ("FStringLiteralElement", "FStringExpressionElement"): # implementation.
if node in (
"FString",
"StringLiteral",
"FStringLiteralElement",
"FStringExpressionElement",
):
continue continue
nodes.append(node) nodes.append(node)
print(nodes) print(nodes)

View File

@ -1,5 +1,5 @@
use crate::comments::Comments; use crate::comments::Comments;
use crate::expression::string::QuoteChar; use crate::string::QuoteChar;
use crate::PyFormatOptions; use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_source_file::Locator; use ruff_source_file::Locator;

View File

@ -18,10 +18,10 @@ use crate::expression::parentheses::{
is_expression_parenthesized, write_in_parentheses_only_group_end_tag, is_expression_parenthesized, write_in_parentheses_only_group_end_tag,
write_in_parentheses_only_group_start_tag, Parentheses, write_in_parentheses_only_group_start_tag, Parentheses,
}; };
use crate::expression::string::{AnyString, FormatString, StringLayout};
use crate::expression::OperatorPrecedence; use crate::expression::OperatorPrecedence;
use crate::prelude::*; use crate::prelude::*;
use crate::preview::is_fix_power_op_line_length_enabled; use crate::preview::is_fix_power_op_line_length_enabled;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub(super) enum BinaryLike<'a> { pub(super) enum BinaryLike<'a> {
@ -395,9 +395,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
[ [
operand.leading_binary_comments().map(leading_comments), operand.leading_binary_comments().map(leading_comments),
leading_comments(comments.leading(&string_constant)), leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout( // Call `FormatStringContinuation` directly to avoid formatting
StringLayout::ImplicitConcatenatedStringInBinaryLike, // the implicitly concatenated string with the enclosing group
), // because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)), trailing_comments(comments.trailing(&string_constant)),
operand.trailing_binary_comments().map(trailing_comments), operand.trailing_binary_comments().map(trailing_comments),
line_suffix_boundary(), line_suffix_boundary(),
@ -413,9 +414,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
f, f,
[ [
leading_comments(comments.leading(&string_constant)), leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout( // Call `FormatStringContinuation` directly to avoid formatting
StringLayout::ImplicitConcatenatedStringInBinaryLike // the implicitly concatenated string with the enclosing group
), // because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)), trailing_comments(comments.trailing(&string_constant)),
] ]
)?; )?;

View File

@ -3,16 +3,24 @@ use ruff_python_ast::ExprBytesLiteral;
use crate::comments::SourceComment; use crate::comments::SourceComment;
use crate::expression::expr_string_literal::is_multiline_string; use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; use crate::expression::parentheses::{
use crate::expression::string::{AnyString, FormatString}; in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::prelude::*; use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Default)] #[derive(Default)]
pub struct FormatExprBytesLiteral; pub struct FormatExprBytesLiteral;
impl FormatNodeRule<ExprBytesLiteral> for FormatExprBytesLiteral { impl FormatNodeRule<ExprBytesLiteral> for FormatExprBytesLiteral {
fn fmt_fields(&self, item: &ExprBytesLiteral, f: &mut PyFormatter) -> FormatResult<()> { fn fmt_fields(&self, item: &ExprBytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::Bytes(item)).fmt(f) let ExprBytesLiteral { value, .. } = item;
match value.as_slice() {
[bytes_literal] => bytes_literal.format().fmt(f),
_ => in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::Bytes(item)))
.fmt(f),
}
} }
fn fmt_dangling_comments( fn fmt_dangling_comments(

View File

@ -1,21 +1,35 @@
use memchr::memchr2; use memchr::memchr2;
use ruff_python_ast::{AnyNodeRef, ExprFString};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
use crate::comments::SourceComment; use crate::comments::SourceComment;
use ruff_formatter::FormatResult; use crate::expression::parentheses::{
use ruff_python_ast::AnyNodeRef; in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
use ruff_python_ast::ExprFString; };
use crate::other::f_string_part::FormatFStringPart;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*; use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation, Quoting};
use super::string::{AnyString, FormatString};
#[derive(Default)] #[derive(Default)]
pub struct FormatExprFString; pub struct FormatExprFString;
impl FormatNodeRule<ExprFString> for FormatExprFString { impl FormatNodeRule<ExprFString> for FormatExprFString {
fn fmt_fields(&self, item: &ExprFString, f: &mut PyFormatter) -> FormatResult<()> { fn fmt_fields(&self, item: &ExprFString, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::FString(item)).fmt(f) let ExprFString { value, .. } = item;
match value.as_slice() {
[f_string_part] => FormatFStringPart::new(
f_string_part,
f_string_quoting(item, &f.context().locator()),
)
.fmt(f),
_ => {
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::FString(item)))
.fmt(f)
}
}
} }
fn fmt_dangling_comments( fn fmt_dangling_comments(
@ -43,3 +57,28 @@ impl NeedsParentheses for ExprFString {
} }
} }
} }
pub(crate) fn f_string_quoting(f_string: &ExprFString, locator: &Locator) -> Quoting {
let unprefixed = locator
.slice(f_string.range())
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted = unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string
.value
.elements()
.filter_map(|element| element.as_expression())
.any(|expression| {
let string_content = locator.slice(expression.range());
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
})
{
Quoting::Preserve
} else {
Quoting::CanChange
}
}

View File

@ -1,34 +1,66 @@
use ruff_formatter::FormatRuleWithOptions; use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::AnyNodeRef; use ruff_python_ast::{AnyNodeRef, ExprStringLiteral};
use ruff_python_ast::ExprStringLiteral;
use ruff_text_size::{Ranged, TextLen, TextRange}; use ruff_text_size::{Ranged, TextLen, TextRange};
use crate::comments::SourceComment; use crate::comments::SourceComment;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; use crate::expression::parentheses::{
use crate::expression::string::{ in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
AnyString, FormatString, StringLayout, StringPrefix, StringQuotes,
}; };
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*; use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation, StringPrefix, StringQuotes};
#[derive(Default)] #[derive(Default)]
pub struct FormatExprStringLiteral { pub struct FormatExprStringLiteral {
layout: StringLayout, kind: ExprStringLiteralKind,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum ExprStringLiteralKind {
#[default]
String,
Docstring,
}
impl ExprStringLiteralKind {
const fn string_literal_kind(self) -> StringLiteralKind {
match self {
ExprStringLiteralKind::String => StringLiteralKind::String,
ExprStringLiteralKind::Docstring => StringLiteralKind::Docstring,
}
}
const fn is_docstring(self) -> bool {
matches!(self, ExprStringLiteralKind::Docstring)
}
} }
impl FormatRuleWithOptions<ExprStringLiteral, PyFormatContext<'_>> for FormatExprStringLiteral { impl FormatRuleWithOptions<ExprStringLiteral, PyFormatContext<'_>> for FormatExprStringLiteral {
type Options = StringLayout; type Options = ExprStringLiteralKind;
fn with_options(mut self, options: Self::Options) -> Self { fn with_options(mut self, options: Self::Options) -> Self {
self.layout = options; self.kind = options;
self self
} }
} }
impl FormatNodeRule<ExprStringLiteral> for FormatExprStringLiteral { impl FormatNodeRule<ExprStringLiteral> for FormatExprStringLiteral {
fn fmt_fields(&self, item: &ExprStringLiteral, f: &mut PyFormatter) -> FormatResult<()> { fn fmt_fields(&self, item: &ExprStringLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::String(item)) let ExprStringLiteral { value, .. } = item;
.with_layout(self.layout)
.fmt(f) match value.as_slice() {
[string_literal] => {
FormatStringLiteral::new(string_literal, self.kind.string_literal_kind()).fmt(f)
}
_ => {
// This is just a sanity check because [`DocstringStmt::try_from_statement`]
// ensures that the docstring is a *single* string literal.
assert!(!self.kind.is_docstring());
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::String(item)))
}
.fmt(f),
}
} }
fn fmt_dangling_comments( fn fmt_dangling_comments(

View File

@ -58,7 +58,6 @@ pub(crate) mod expr_yield;
pub(crate) mod expr_yield_from; pub(crate) mod expr_yield_from;
mod operator; mod operator;
pub(crate) mod parentheses; pub(crate) mod parentheses;
pub(crate) mod string;
#[derive(Copy, Clone, PartialEq, Eq, Default)] #[derive(Copy, Clone, PartialEq, Eq, Default)]
pub struct FormatExpr { pub struct FormatExpr {

View File

@ -2943,70 +2943,6 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::TypeParamParamSpec {
} }
} }
impl FormatRule<ast::FString, PyFormatContext<'_>> for crate::other::f_string::FormatFString {
#[inline]
fn fmt(&self, node: &ast::FString, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::FString>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::FString {
type Format<'a> = FormatRefWithRule<
'a,
ast::FString,
crate::other::f_string::FormatFString,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(self, crate::other::f_string::FormatFString::default())
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::FString {
type Format = FormatOwnedWithRule<
ast::FString,
crate::other::f_string::FormatFString,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(self, crate::other::f_string::FormatFString::default())
}
}
impl FormatRule<ast::StringLiteral, PyFormatContext<'_>>
for crate::other::string_literal::FormatStringLiteral
{
#[inline]
fn fmt(&self, node: &ast::StringLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::StringLiteral>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::StringLiteral {
type Format<'a> = FormatRefWithRule<
'a,
ast::StringLiteral,
crate::other::string_literal::FormatStringLiteral,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(
self,
crate::other::string_literal::FormatStringLiteral::default(),
)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::StringLiteral {
type Format = FormatOwnedWithRule<
ast::StringLiteral,
crate::other::string_literal::FormatStringLiteral,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(
self,
crate::other::string_literal::FormatStringLiteral::default(),
)
}
}
impl FormatRule<ast::BytesLiteral, PyFormatContext<'_>> impl FormatRule<ast::BytesLiteral, PyFormatContext<'_>>
for crate::other::bytes_literal::FormatBytesLiteral for crate::other::bytes_literal::FormatBytesLiteral
{ {

View File

@ -36,6 +36,7 @@ mod prelude;
mod preview; mod preview;
mod shared_traits; mod shared_traits;
pub(crate) mod statement; pub(crate) mod statement;
pub(crate) mod string;
pub(crate) mod type_param; pub(crate) mod type_param;
mod verbatim; mod verbatim;

View File

@ -1,12 +1,23 @@
use ruff_python_ast::BytesLiteral; use ruff_python_ast::BytesLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*; use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)] #[derive(Default)]
pub struct FormatBytesLiteral; pub struct FormatBytesLiteral;
impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral { impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
fn fmt_fields(&self, _item: &BytesLiteral, _f: &mut PyFormatter) -> FormatResult<()> { fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprBytesLiteral`"); let locator = f.context().locator();
StringPart::from_source(item.range(), &locator)
.normalize(
Quoting::CanChange,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f)
} }
} }

View File

@ -1,12 +1,49 @@
use ruff_python_ast::FString; use ruff_python_ast::FString;
use ruff_text_size::Ranged;
use crate::prelude::*; use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)] /// Formats an f-string which is part of a larger f-string expression.
pub struct FormatFString; ///
/// For example, this would be used to format the f-string part in `"foo" f"bar {x}"`
/// or the standalone f-string in `f"foo {x} bar"`.
pub(crate) struct FormatFString<'a> {
value: &'a FString,
/// The quoting of an f-string. This is determined by the parent node
/// (f-string expression) and is required to format an f-string correctly.
quoting: Quoting,
}
impl FormatNodeRule<FString> for FormatFString { impl<'a> FormatFString<'a> {
fn fmt_fields(&self, _item: &FString, _f: &mut PyFormatter) -> FormatResult<()> { pub(crate) fn new(value: &'a FString, quoting: Quoting) -> Self {
unreachable!("Handled inside of `FormatExprFString`"); Self { value, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFString<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let result = StringPart::from_source(self.value.range(), &locator)
.normalize(
self.quoting,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f);
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
let comments = f.context().comments();
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
result
} }
} }

View File

@ -0,0 +1,39 @@
use ruff_python_ast::FStringPart;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::Quoting;
/// Formats an f-string part which is either a string literal or an f-string.
///
/// This delegates the actual formatting to the appropriate formatter.
pub(crate) struct FormatFStringPart<'a> {
part: &'a FStringPart,
/// The quoting to be used for all the f-string parts. This is determined by
/// the parent node (f-string expression) and is required to format all parts
/// correctly.
quoting: Quoting,
}
impl<'a> FormatFStringPart<'a> {
pub(crate) fn new(part: &'a FStringPart, quoting: Quoting) -> Self {
Self { part, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFStringPart<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.part {
FStringPart::Literal(string_literal) => FormatStringLiteral::new(
string_literal,
// If an f-string part is a string literal, the f-string is always
// implicitly concatenated e.g., `"foo" f"bar {x}"`. A standalone
// string literal would be a string expression, not an f-string.
StringLiteralKind::InImplicitlyConcatenatedFString(self.quoting),
)
.fmt(f),
FStringPart::FString(f_string) => FormatFString::new(f_string, self.quoting).fmt(f),
}
}
}

View File

@ -7,6 +7,7 @@ pub(crate) mod decorator;
pub(crate) mod elif_else_clause; pub(crate) mod elif_else_clause;
pub(crate) mod except_handler_except_handler; pub(crate) mod except_handler_except_handler;
pub(crate) mod f_string; pub(crate) mod f_string;
pub(crate) mod f_string_part;
pub(crate) mod identifier; pub(crate) mod identifier;
pub(crate) mod keyword; pub(crate) mod keyword;
pub(crate) mod match_case; pub(crate) mod match_case;

View File

@ -1,12 +1,72 @@
use ruff_python_ast::StringLiteral; use ruff_python_ast::StringLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*; use crate::prelude::*;
use crate::string::{docstring, Quoting, StringPart};
use crate::QuoteStyle;
#[derive(Default)] pub(crate) struct FormatStringLiteral<'a> {
pub struct FormatStringLiteral; value: &'a StringLiteral,
layout: StringLiteralKind,
}
impl FormatNodeRule<StringLiteral> for FormatStringLiteral { impl<'a> FormatStringLiteral<'a> {
fn fmt_fields(&self, _item: &StringLiteral, _f: &mut PyFormatter) -> FormatResult<()> { pub(crate) fn new(value: &'a StringLiteral, layout: StringLiteralKind) -> Self {
unreachable!("Handled inside of `FormatExprStringLiteral`"); Self { value, layout }
}
}
/// The kind of a string literal.
#[derive(Copy, Clone, Debug, Default)]
pub(crate) enum StringLiteralKind {
/// A normal string literal e.g., `"foo"`.
#[default]
String,
/// A string literal used as a docstring.
Docstring,
/// A string literal that is implicitly concatenated with an f-string. This
/// makes the overall expression an f-string whose quoting detection comes
/// from the parent node (f-string expression).
InImplicitlyConcatenatedFString(Quoting),
}
impl StringLiteralKind {
/// Checks if this string literal is a docstring.
pub(crate) const fn is_docstring(self) -> bool {
matches!(self, StringLiteralKind::Docstring)
}
/// Returns the quoting to be used for this string literal.
fn quoting(self) -> Quoting {
match self {
StringLiteralKind::String | StringLiteralKind::Docstring => Quoting::CanChange,
StringLiteralKind::InImplicitlyConcatenatedFString(quoting) => quoting,
}
}
}
impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let quote_style = if self.layout.is_docstring() {
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double
} else {
f.options().quote_style()
};
let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
self.layout.quoting(),
&locator,
quote_style,
f.context().docstring(),
);
if self.layout.is_docstring() {
docstring::format(&normalized, f)
} else {
normalized.fmt(f)
}
} }
} }

View File

@ -9,7 +9,7 @@ use crate::comments::{
leading_comments, trailing_comments, Comments, LeadingDanglingTrailingComments, leading_comments, trailing_comments, Comments, LeadingDanglingTrailingComments,
}; };
use crate::context::{NodeLevel, TopLevelStatementPosition, WithIndentLevel, WithNodeLevel}; use crate::context::{NodeLevel, TopLevelStatementPosition, WithIndentLevel, WithNodeLevel};
use crate::expression::string::StringLayout; use crate::expression::expr_string_literal::ExprStringLiteralKind;
use crate::prelude::*; use crate::prelude::*;
use crate::statement::stmt_expr::FormatStmtExpr; use crate::statement::stmt_expr::FormatStmtExpr;
use crate::verbatim::{ use crate::verbatim::{
@ -609,7 +609,7 @@ impl Format<PyFormatContext<'_>> for DocstringStmt<'_> {
leading_comments(node_comments.leading), leading_comments(node_comments.leading),
string_literal string_literal
.format() .format()
.with_options(StringLayout::DocString), .with_options(ExprStringLiteralKind::Docstring),
] ]
)?; )?;

View File

@ -102,7 +102,7 @@ use super::{NormalizedString, QuoteChar};
/// line c /// line c
/// """ /// """
/// ``` /// ```
pub(super) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
let docstring = &normalized.text; let docstring = &normalized.text;
// Black doesn't change the indentation of docstrings that contain an escaped newline // Black doesn't change the indentation of docstrings that contain an escaped newline

View File

@ -5,35 +5,41 @@ use bitflags::bitflags;
use ruff_formatter::{format_args, write}; use ruff_formatter::{format_args, write};
use ruff_python_ast::AnyNodeRef; use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::{ use ruff_python_ast::{
self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
}; };
use ruff_source_file::Locator; use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::comments::{leading_comments, trailing_comments}; use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::{ use crate::expression::expr_f_string::f_string_quoting;
in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space, use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
}; use crate::other::f_string::FormatFString;
use crate::expression::Expr; use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*; use crate::prelude::*;
use crate::QuoteStyle; use crate::QuoteStyle;
mod docstring; pub(crate) mod docstring;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug, Default)]
enum Quoting { pub(crate) enum Quoting {
#[default]
CanChange, CanChange,
Preserve, Preserve,
} }
/// Represents any kind of string expression. This could be either a string,
/// bytes or f-string.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub(super) enum AnyString<'a> { pub(crate) enum AnyString<'a> {
String(&'a ExprStringLiteral), String(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral), Bytes(&'a ExprBytesLiteral),
FString(&'a ExprFString), FString(&'a ExprFString),
} }
impl<'a> AnyString<'a> { impl<'a> AnyString<'a> {
/// Creates a new [`AnyString`] from the given [`Expr`].
///
/// Returns `None` if the expression is not either a string, bytes or f-string.
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> { pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
match expression { match expression {
Expr::StringLiteral(string) => Some(AnyString::String(string)), Expr::StringLiteral(string) => Some(AnyString::String(string)),
@ -43,39 +49,8 @@ impl<'a> AnyString<'a> {
} }
} }
fn quoting(&self, locator: &Locator) -> Quoting {
match self {
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => {
let unprefixed = locator
.slice(f_string.range)
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted =
unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string.value.elements().any(|element| match element {
ast::FStringElement::Expression(ast::FStringExpressionElement {
range,
..
}) => {
let string_content = locator.slice(*range);
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
}
ast::FStringElement::Literal(_) => false,
}) {
Quoting::Preserve
} else {
Quoting::CanChange
}
}
}
}
/// Returns `true` if the string is implicitly concatenated. /// Returns `true` if the string is implicitly concatenated.
pub(super) fn is_implicit_concatenated(&self) -> bool { pub(crate) fn is_implicit_concatenated(&self) -> bool {
match self { match self {
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(), Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(), Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
@ -83,21 +58,38 @@ impl<'a> AnyString<'a> {
} }
} }
fn parts(&self) -> Vec<AnyStringPart<'a>> { /// Returns the quoting to be used for this string.
fn quoting(&self, locator: &Locator<'_>) -> Quoting {
match self { match self {
Self::String(ExprStringLiteral { value, .. }) => { Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
value.iter().map(AnyStringPart::String).collect() Self::FString(f_string) => f_string_quoting(f_string, locator),
} }
}
/// Returns a vector of all the [`AnyStringPart`] of this string.
fn parts(&self, quoting: Quoting) -> Vec<AnyStringPart<'a>> {
match self {
Self::String(ExprStringLiteral { value, .. }) => value
.iter()
.map(|part| AnyStringPart::String {
part,
layout: StringLiteralKind::String,
})
.collect(),
Self::Bytes(ExprBytesLiteral { value, .. }) => { Self::Bytes(ExprBytesLiteral { value, .. }) => {
value.iter().map(AnyStringPart::Bytes).collect() value.iter().map(AnyStringPart::Bytes).collect()
} }
Self::FString(ExprFString { value, .. }) => value Self::FString(ExprFString { value, .. }) => value
.iter() .iter()
.map(|f_string_part| match f_string_part { .map(|f_string_part| match f_string_part {
ast::FStringPart::Literal(string_literal) => { ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
AnyStringPart::String(string_literal) part: string_literal,
} layout: StringLiteralKind::InImplicitlyConcatenatedFString(quoting),
ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string), },
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
part: f_string,
quoting,
},
}) })
.collect(), .collect(),
} }
@ -134,19 +126,29 @@ impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
} }
} }
/// Represents any kind of string which is part of an implicitly concatenated
/// string. This could be either a string, bytes or f-string.
///
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
enum AnyStringPart<'a> { enum AnyStringPart<'a> {
String(&'a ast::StringLiteral), String {
part: &'a ast::StringLiteral,
layout: StringLiteralKind,
},
Bytes(&'a ast::BytesLiteral), Bytes(&'a ast::BytesLiteral),
FString(&'a ast::FString), FString {
part: &'a ast::FString,
quoting: Quoting,
},
} }
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyStringPart<'a>) -> Self { fn from(value: &AnyStringPart<'a>) -> Self {
match value { match value {
AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part), AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part), AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
AnyStringPart::FString(part) => AnyNodeRef::FString(part), AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
} }
} }
} }
@ -154,99 +156,33 @@ impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
impl Ranged for AnyStringPart<'_> { impl Ranged for AnyStringPart<'_> {
fn range(&self) -> TextRange { fn range(&self) -> TextRange {
match self { match self {
Self::String(part) => part.range(), Self::String { part, .. } => part.range(),
Self::Bytes(part) => part.range(), Self::Bytes(part) => part.range(),
Self::FString(part) => part.range(), Self::FString { part, .. } => part.range(),
} }
} }
} }
pub(super) struct FormatString<'a> { impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
string: &'a AnyString<'a>,
layout: StringLayout,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum StringLayout {
#[default]
Default,
DocString,
/// An implicit concatenated string in a binary like (e.g. `a + b` or `a < b`) expression.
///
/// Formats the implicit concatenated string parts without the enclosing group because the group
/// is added by the binary like formatting.
ImplicitConcatenatedStringInBinaryLike,
}
impl<'a> FormatString<'a> {
pub(super) fn new(string: &'a AnyString<'a>) -> Self {
Self {
string,
layout: StringLayout::Default,
}
}
pub(super) fn with_layout(mut self, layout: StringLayout) -> Self {
self.layout = layout;
self
}
}
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let parent_docstring_quote_style = f.context().docstring(); match self {
let locator = f.context().locator(); AnyStringPart::String { part, layout } => {
let result = match self.layout { FormatStringLiteral::new(part, *layout).fmt(f)
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
StringPart::from_source(self.string.range(), &locator)
.normalize(
self.string.quoting(&locator),
&locator,
f.options().quote_style(),
parent_docstring_quote_style,
)
.fmt(f)
} }
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
} }
StringLayout::DocString => {
let string_part = StringPart::from_source(self.string.range(), &locator);
let normalized = string_part.normalize(
Quoting::CanChange,
&locator,
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double,
parent_docstring_quote_style,
);
docstring::format(&normalized, f)
}
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
};
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
if let AnyString::FString(fstring) = self.string {
let comments = f.context().comments();
fstring.value.elements().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
}
result
} }
} }
struct FormatStringContinuation<'a> { /// Formats any implicitly concatenated string. This could be any valid combination
/// of string, bytes or f-string literals.
pub(crate) struct FormatStringContinuation<'a> {
string: &'a AnyString<'a>, string: &'a AnyString<'a>,
} }
impl<'a> FormatStringContinuation<'a> { impl<'a> FormatStringContinuation<'a> {
fn new(string: &'a AnyString<'a>) -> Self { pub(crate) fn new(string: &'a AnyString<'a>) -> Self {
Self { string } Self { string }
} }
} }
@ -254,24 +190,15 @@ impl<'a> FormatStringContinuation<'a> {
impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> { impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone(); let comments = f.context().comments().clone();
let locator = f.context().locator(); let quoting = self.string.quoting(&f.context().locator());
let in_docstring = f.context().docstring();
let quote_style = f.options().quote_style();
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space()); let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for part in self.string.parts() { for part in self.string.parts(quoting) {
let normalized = StringPart::from_source(part.range(), &locator).normalize(
self.string.quoting(&locator),
&locator,
quote_style,
in_docstring,
);
joiner.entry(&format_args![ joiner.entry(&format_args![
line_suffix_boundary(), line_suffix_boundary(),
leading_comments(comments.leading(&part)), leading_comments(comments.leading(&part)),
normalized, part,
trailing_comments(comments.trailing(&part)) trailing_comments(comments.trailing(&part))
]); ]);
} }
@ -281,7 +208,7 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
} }
#[derive(Debug)] #[derive(Debug)]
struct StringPart { pub(crate) struct StringPart {
/// The prefix. /// The prefix.
prefix: StringPrefix, prefix: StringPrefix,
@ -293,7 +220,7 @@ struct StringPart {
} }
impl StringPart { impl StringPart {
fn from_source(range: TextRange, locator: &Locator) -> Self { pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self {
let string_content = locator.slice(range); let string_content = locator.slice(range);
let prefix = StringPrefix::parse(string_content); let prefix = StringPrefix::parse(string_content);
@ -320,7 +247,7 @@ impl StringPart {
/// snippet within the docstring. The quote style should correspond to the /// snippet within the docstring. The quote style should correspond to the
/// style of quotes used by said docstring. Normalization will ensure the /// style of quotes used by said docstring. Normalization will ensure the
/// quoting styles don't conflict. /// quoting styles don't conflict.
fn normalize<'a>( pub(crate) fn normalize<'a>(
self, self,
quoting: Quoting, quoting: Quoting,
locator: &'a Locator, locator: &'a Locator,
@ -412,7 +339,7 @@ impl StringPart {
} }
#[derive(Debug)] #[derive(Debug)]
struct NormalizedString<'a> { pub(crate) struct NormalizedString<'a> {
prefix: StringPrefix, prefix: StringPrefix,
/// The quotes of the normalized string (preferred quotes) /// The quotes of the normalized string (preferred quotes)
@ -448,7 +375,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
bitflags! { bitflags! {
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(super) struct StringPrefix: u8 { pub(crate) struct StringPrefix: u8 {
const UNICODE = 0b0000_0001; const UNICODE = 0b0000_0001;
/// `r"test"` /// `r"test"`
const RAW = 0b0000_0010; const RAW = 0b0000_0010;
@ -460,7 +387,7 @@ bitflags! {
} }
impl StringPrefix { impl StringPrefix {
pub(super) fn parse(input: &str) -> StringPrefix { pub(crate) fn parse(input: &str) -> StringPrefix {
let chars = input.chars(); let chars = input.chars();
let mut prefix = StringPrefix::empty(); let mut prefix = StringPrefix::empty();
@ -485,7 +412,7 @@ impl StringPrefix {
prefix prefix
} }
pub(super) const fn text_len(self) -> TextSize { pub(crate) const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones()) TextSize::new(self.bits().count_ones())
} }
@ -688,13 +615,13 @@ fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar)
} }
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub(super) struct StringQuotes { pub(crate) struct StringQuotes {
triple: bool, triple: bool,
quote_char: QuoteChar, quote_char: QuoteChar,
} }
impl StringQuotes { impl StringQuotes {
pub(super) fn parse(input: &str) -> Option<StringQuotes> { pub(crate) fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars(); let mut chars = input.chars();
let quote_char = chars.next()?; let quote_char = chars.next()?;
@ -708,7 +635,7 @@ impl StringQuotes {
}) })
} }
pub(super) const fn is_triple(self) -> bool { pub(crate) const fn is_triple(self) -> bool {
self.triple self.triple
} }