From 474e8fbcd4c8304ce8aa54139343376448b80aa4 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 24 Aug 2023 23:50:56 -0400 Subject: [PATCH] Format all attribute dot comments manually (#6825) ## Summary This PR modifies our formatting of comments around the `.` in an attribute. Specifically, the goal here is to avoid _reordering_ comments, and the net effect is that we generally leave comments where-they-are when dealing with comments between around the dot (which you can also think of as comments between attributes). All comments around the dot are now treated as dangling and formatted manually, with the exception of end-of-line or parenthesized comments on the value, like those marked as trailing here, which remain trailing: ```python ( ( a # trailing end-of-line # trailing own-line ) # dangling before dot end-of-line .b # trailing end-of-line ) ``` Closes https://github.com/astral-sh/ruff/issues/6823. ## Test Plan `cargo test` Before: | project | similarity index | |--------------|------------------| | cpython | 0.76050 | | django | 0.99820 | | transformers | 0.99800 | | twine | 0.99876 | | typeshed | 0.99953 | | warehouse | 0.99615 | | zulip | 0.99729 | After: | project | similarity index | |--------------|------------------| | cpython | 0.76050 | | django | 0.99820 | | transformers | 0.99800 | | twine | 0.99876 | | typeshed | 0.99953 | | warehouse | 0.99615 | | zulip | 0.99729 | --- .../fixtures/ruff/expression/attribute.py | 16 +++ .../src/comments/placement.rs | 128 +++++++++++------- .../src/expression/expr_attribute.rs | 114 +++++++--------- .../format@expression__attribute.py.snap | 68 ++++++++-- crates/ruff_python_trivia/src/tokenizer.rs | 18 +++ 5 files changed, 216 insertions(+), 128 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py index 63fb3954dc..0219429b29 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py @@ -109,3 +109,19 @@ x6 = ( # regression: https://github.com/astral-sh/ruff/issues/6181 (# ()).a + +( + ( + a # trailing end-of-line + # trailing own-line + ) # dangling before dot end-of-line + .b # trailing end-of-line +) + +( + ( + a + ) + # dangling before dot own-line + .b +) diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 6d96c942a2..02cb2cc72e 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -3,7 +3,9 @@ use std::cmp::Ordering; use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::whitespace::indentation; use ruff_python_ast::{self as ast, Comprehension, Expr, MatchCase, Parameters, Ranged}; -use ruff_python_trivia::{indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer}; +use ruff_python_trivia::{ + find_only_token_in_range, indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer, +}; use ruff_source_file::Locator; use ruff_text_size::{TextLen, TextRange}; @@ -177,7 +179,9 @@ fn handle_enclosed_comment<'a>( AnyNodeRef::Comprehension(comprehension) => { handle_comprehension_comment(comment, comprehension, locator) } - AnyNodeRef::ExprAttribute(attribute) => handle_attribute_comment(comment, attribute), + AnyNodeRef::ExprAttribute(attribute) => { + handle_attribute_comment(comment, attribute, locator) + } AnyNodeRef::ExprBinOp(binary_expression) => { handle_trailing_binary_expression_left_or_operator_comment( comment, @@ -354,7 +358,7 @@ fn is_first_statement_in_body(statement: AnyNodeRef, has_body: AnyNodeRef) -> bo | AnyNodeRef::ExceptHandlerExceptHandler(ast::ExceptHandlerExceptHandler { body, .. }) - | AnyNodeRef::MatchCase(ast::MatchCase { body, .. }) + | AnyNodeRef::MatchCase(MatchCase { body, .. }) | AnyNodeRef::StmtFunctionDef(ast::StmtFunctionDef { body, .. }) | AnyNodeRef::StmtClassDef(ast::StmtClassDef { body, .. }) => { are_same_optional(statement, body.first()) @@ -973,47 +977,88 @@ fn handle_dict_unpacking_comment<'a>( /// Own line comments coming after the node are always dangling comments /// ```python /// ( -/// a -/// # trailing a comment -/// . # dangling comment -/// # or this +/// a # trailing comment on `a` +/// # dangling comment on the attribute +/// . # dangling comment on the attribute +/// # dangling comment on the attribute /// b /// ) /// ``` fn handle_attribute_comment<'a>( comment: DecoratedComment<'a>, attribute: &'a ast::ExprAttribute, + locator: &Locator, ) -> CommentPlacement<'a> { if comment.preceding_node().is_none() { // ```text // ( value) . attr // ^^^^ we're in this range // ``` - return CommentPlacement::leading(attribute.value.as_ref(), comment); } - // ```text - // value . attr - // ^^^^^^^ we're in this range + // If the comment is parenthesized, use the parentheses to either attach it as a trailing + // comment on the value or a dangling comment on the attribute. + // For example, treat this as trailing: + // ```python + // ( + // ( + // value + // # comment + // ) + // .attribute + // ) // ``` - debug_assert!( - TextRange::new(attribute.value.end(), attribute.attr.start()) - .contains(comment.slice().start()) - ); - if comment.line_position().is_end_of_line() { - // Attach as trailing comment to a. The specific placement is only relevant for fluent style - // ```python - // x322 = ( - // a - // . # end-of-line dot comment 2 - // b - // ) - // ``` - CommentPlacement::trailing(attribute.value.as_ref(), comment) - } else { - CommentPlacement::dangling(attribute, comment) + // + // However, treat this as dangling: + // ```python + // ( + // (value) + // # comment + // .attribute + // ) + // ``` + if let Some(right_paren) = SimpleTokenizer::starts_at(attribute.value.end(), locator.contents()) + .skip_trivia() + .take_while(|token| token.kind == SimpleTokenKind::RParen) + .last() + { + return if comment.start() < right_paren.start() { + CommentPlacement::trailing(attribute.value.as_ref(), comment) + } else { + CommentPlacement::dangling(comment.enclosing_node(), comment) + }; } + + // If the comment precedes the `.`, treat it as trailing _if_ it's on the same line as the + // value. For example, treat this as trailing: + // ```python + // ( + // value # comment + // .attribute + // ) + // ``` + // + // However, treat this as dangling: + // ```python + // ( + // value + // # comment + // .attribute + // ) + // ``` + if comment.line_position().is_end_of_line() { + let dot_token = find_only_token_in_range( + TextRange::new(attribute.value.end(), attribute.attr.start()), + SimpleTokenKind::Dot, + locator.contents(), + ); + if comment.end() < dot_token.start() { + return CommentPlacement::trailing(attribute.value.as_ref(), comment); + } + } + + CommentPlacement::dangling(comment.enclosing_node(), comment) } /// Assign comments between `if` and `test` and `else` and `orelse` as leading to the respective @@ -1050,7 +1095,7 @@ fn handle_expr_if_comment<'a>( let if_token = find_only_token_in_range( TextRange::new(body.end(), test.start()), SimpleTokenKind::If, - locator, + locator.contents(), ); // Between `if` and `test` if if_token.range.start() < comment.slice().start() && comment.slice().start() < test.start() { @@ -1060,7 +1105,7 @@ fn handle_expr_if_comment<'a>( let else_token = find_only_token_in_range( TextRange::new(test.end(), orelse.start()), SimpleTokenKind::Else, - locator, + locator.contents(), ); // Between `else` and `orelse` if else_token.range.start() < comment.slice().start() @@ -1132,7 +1177,7 @@ fn handle_with_item_comment<'a>( let as_token = find_only_token_in_range( TextRange::new(context_expr.end(), optional_vars.start()), SimpleTokenKind::As, - locator, + locator.contents(), ); if comment.end() < as_token.start() { @@ -1231,7 +1276,7 @@ fn handle_named_expr_comment<'a>( let colon_equal = find_only_token_in_range( TextRange::new(target.end(), value.start()), SimpleTokenKind::ColonEqual, - locator, + locator.contents(), ); if comment.end() < colon_equal.start() { @@ -1244,23 +1289,6 @@ fn handle_named_expr_comment<'a>( } } -/// Looks for a token in the range that contains no other tokens except for parentheses outside -/// the expression ranges -fn find_only_token_in_range( - range: TextRange, - token_kind: SimpleTokenKind, - locator: &Locator, -) -> SimpleToken { - let mut tokens = SimpleTokenizer::new(locator.contents(), range) - .skip_trivia() - .skip_while(|token| token.kind == SimpleTokenKind::RParen); - let token = tokens.next().expect("Expected a token"); - debug_assert_eq!(token.kind(), token_kind); - let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen); - debug_assert_eq!(tokens.next(), None); - token -} - /// Attach an end-of-line comment immediately following an open bracket as a dangling comment on /// enclosing node. /// @@ -1433,7 +1461,7 @@ fn handle_comprehension_comment<'a>( let in_token = find_only_token_in_range( TextRange::new(comprehension.target.end(), comprehension.iter.start()), SimpleTokenKind::In, - locator, + locator.contents(), ); // Comments between the target and the `in` @@ -1496,7 +1524,7 @@ fn handle_comprehension_comment<'a>( let if_token = find_only_token_in_range( TextRange::new(last_end, if_node.start()), SimpleTokenKind::If, - locator, + locator.contents(), ); if is_own_line { if last_end < comment.slice().start() && comment.slice().start() < if_token.start() { diff --git a/crates/ruff_python_formatter/src/expression/expr_attribute.rs b/crates/ruff_python_formatter/src/expression/expr_attribute.rs index a19c567fab..b21b25701b 100644 --- a/crates/ruff_python_formatter/src/expression/expr_attribute.rs +++ b/crates/ruff_python_formatter/src/expression/expr_attribute.rs @@ -1,8 +1,10 @@ use ruff_formatter::{write, FormatRuleWithOptions}; use ruff_python_ast::node::AnyNodeRef; -use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant}; +use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant, Ranged}; +use ruff_python_trivia::{find_only_token_in_range, SimpleTokenKind}; +use ruff_text_size::TextRange; -use crate::comments::{leading_comments, trailing_comments, SourceComment}; +use crate::comments::{dangling_comments, trailing_comments, SourceComment}; use crate::expression::parentheses::{ is_expression_parenthesized, NeedsParentheses, OptionalParentheses, Parentheses, }; @@ -44,13 +46,6 @@ impl FormatNodeRule for FormatExprAttribute { }) ); - let comments = f.context().comments().clone(); - let dangling_comments = comments.dangling(item); - let leading_attribute_comments_start = dangling_comments - .partition_point(|comment| comment.line_position().is_end_of_line()); - let (trailing_dot_comments, leading_attribute_comments) = - dangling_comments.split_at(leading_attribute_comments_start); - if needs_parentheses { value.format().with_options(Parentheses::Always).fmt(f)?; } else if call_chain_layout == CallChainLayout::Fluent { @@ -88,55 +83,54 @@ impl FormatNodeRule for FormatExprAttribute { value.format().fmt(f)?; } - if comments.has_trailing_own_line(value.as_ref()) { - hard_line_break().fmt(f)?; - } - - if call_chain_layout == CallChainLayout::Fluent { - // Fluent style has line breaks before the dot - // ```python - // blogs3 = ( - // Blog.objects.filter( - // entry__headline__contains="Lennon", - // ) - // .filter( - // entry__pub_date__year=2008, - // ) - // .filter( - // entry__pub_date__year=2008, - // ) - // ) - // ``` - write!( - f, - [ - (!leading_attribute_comments.is_empty()).then_some(hard_line_break()), - leading_comments(leading_attribute_comments), - text("."), - trailing_comments(trailing_dot_comments), - attr.format() - ] - ) + // Identify dangling comments before and after the dot: + // ```python + // ( + // ( + // a + // ) # `before_dot_end_of_line` + // # `before_dot_own_line` + // . # `after_dot_end_of_line` + // # `after_dot_own_line` + // b + // ) + // ``` + let comments = f.context().comments().clone(); + let dangling = comments.dangling(item); + let (before_dot, after_dot) = if dangling.is_empty() { + (dangling, dangling) } else { - // Regular style - // ```python - // blogs2 = Blog.objects.filter( - // entry__headline__contains="Lennon", - // ).filter( - // entry__pub_date__year=2008, - // ) - // ``` - write!( - f, - [ - text("."), - trailing_comments(trailing_dot_comments), - (!leading_attribute_comments.is_empty()).then_some(hard_line_break()), - leading_comments(leading_attribute_comments), - attr.format() - ] + let dot_token = find_only_token_in_range( + TextRange::new(item.value.end(), item.attr.start()), + SimpleTokenKind::Dot, + f.context().source(), + ); + dangling.split_at( + dangling.partition_point(|comment| comment.start() < dot_token.start()), ) - } + }; + + let (before_dot_end_of_line, before_dot_own_line) = before_dot.split_at( + before_dot.partition_point(|comment| comment.line_position().is_end_of_line()), + ); + + let (after_dot_end_of_line, after_dot_own_line) = after_dot.split_at( + after_dot.partition_point(|comment| comment.line_position().is_end_of_line()), + ); + + write!( + f, + [ + trailing_comments(before_dot_end_of_line), + (!before_dot.is_empty()).then_some(hard_line_break()), + dangling_comments(before_dot_own_line), + text("."), + trailing_comments(after_dot_end_of_line), + (!after_dot.is_empty()).then_some(hard_line_break()), + dangling_comments(after_dot_own_line), + attr.format() + ] + ) }); let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default @@ -169,13 +163,7 @@ impl NeedsParentheses for ExprAttribute { == CallChainLayout::Fluent { OptionalParentheses::Multiline - } else if context - .comments() - .dangling(self) - .iter() - .any(|comment| comment.line_position().is_own_line()) - || context.comments().has_trailing_own_line(self) - { + } else if context.comments().has_dangling(self) { OptionalParentheses::Always } else { self.value.needs_parentheses(self.into(), context) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap index 59623f33ac..14f17fc663 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap @@ -115,6 +115,22 @@ x6 = ( # regression: https://github.com/astral-sh/ruff/issues/6181 (# ()).a + +( + ( + a # trailing end-of-line + # trailing own-line + ) # dangling before dot end-of-line + .b # trailing end-of-line +) + +( + ( + a + ) + # dangling before dot own-line + .b +) ``` ## Output @@ -124,27 +140,28 @@ from argparse import Namespace a = Namespace() ( - a. + a # comment - b # trailing comment + .b # trailing comment ) ( - a. + a # comment - b # trailing dot comment # trailing identifier comment + .b # trailing dot comment # trailing identifier comment ) ( - a. + a # comment - b # trailing identifier comment + .b # trailing identifier comment ) ( - a. # trailing dot comment + a # comment + . # trailing dot comment # in between b # trailing identifier comment ) @@ -157,8 +174,9 @@ a.aaaaaaaaaaaaaaaaaaaaa.lllllllllllllllllllllllllllloooooooooong.chaaaaaaaaaaaaa # chain if and only if we need them, that is if there are own line comments inside # the chain. x1 = ( - a.b. # comment 2 + a.b # comment 1 + . # comment 2 # comment 3 c.d ) @@ -169,27 +187,33 @@ x21 = ( a.b # trailing name end-of-line ) x22 = ( - a. + a # outermost leading own line - b # outermost trailing end-of-line + .b # outermost trailing end-of-line ) x31 = ( - a. + a # own line between nodes 1 + .b +) +x321 = ( + a. # end-of-line dot comment b ) -x321 = a.b # end-of-line dot comment -x322 = a.b.c # end-of-line dot comment 2 +x322 = ( + a. # end-of-line dot comment 2 + b.c +) x331 = ( a. # own line between nodes 3 b ) x332 = ( - "". + "" # own line between nodes - find + .find ) x8 = (a + a).b @@ -207,6 +231,20 @@ x6 = ( ( # () ).a + +( + ( + a # trailing end-of-line + # trailing own-line + ) # dangling before dot end-of-line + .b # trailing end-of-line +) + +( + (a) + # dangling before dot own-line + .b +) ``` diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index d767e1d59e..d0632abe4c 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -19,6 +19,24 @@ pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option SimpleToken { + let mut tokens = SimpleTokenizer::new(code, range) + .skip_trivia() + .skip_while(|token| token.kind == SimpleTokenKind::RParen); + let token = tokens.next().expect("Expected a token"); + debug_assert_eq!(token.kind(), token_kind); + let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen); + debug_assert_eq!(tokens.next(), None); + token +} + /// Returns the number of newlines between `offset` and the first non whitespace character in the source code. pub fn lines_before(offset: TextSize, code: &str) -> u32 { let mut cursor = Cursor::new(&code[TextRange::up_to(offset)]);