From 0cd453bdf096ca76d7b7c11611e0947360430b67 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 30 May 2023 11:28:01 +0200 Subject: [PATCH] Generic "comment to node" association logic (#4642) --- Cargo.lock | 1 - Cargo.toml | 1 + crates/ruff_formatter/src/source_code.rs | 4 + crates/ruff_python_ast/Cargo.toml | 2 +- .../src/source_code/comment_ranges.rs | 50 ++ .../src/source_code/indexer.rs | 15 +- crates/ruff_python_ast/src/source_code/mod.rs | 2 + crates/ruff_python_ast/src/whitespace.rs | 11 + crates/ruff_python_formatter/Cargo.toml | 1 - .../src/comments/debug.rs | 80 +- .../ruff_python_formatter/src/comments/map.rs | 7 +- .../ruff_python_formatter/src/comments/mod.rs | 295 +++++++- .../src/comments/placement.rs | 13 + ...matter__comments__debug__tests__debug.snap | 25 +- ...formatter__comments__tests__base_test.snap | 66 ++ ...er__comments__tests__dangling_comment.snap | 21 + ...ormatter__comments__tests__empty_file.snap | 5 + ...__comments__tests__leading_most_outer.snap | 21 + ...atter__comments__tests__only_comments.snap | 26 + ...ents__tests__parenthesized_expression.snap | 36 + ...tests__parenthesized_trailing_comment.snap | 21 + ...comments__tests__trailing_after_comma.snap | 21 + ...nts__tests__trailing_function_comment.snap | 21 + ..._comments__tests__trailing_most_outer.snap | 36 + ...ts__tests__trailing_most_outer_nested.snap | 36 + .../src/comments/visitor.rs | 687 ++++++++++++++++++ crates/ruff_python_formatter/src/context.rs | 42 +- crates/ruff_python_formatter/src/lib.rs | 91 ++- crates/ruff_python_formatter/src/main.rs | 2 +- 29 files changed, 1574 insertions(+), 65 deletions(-) create mode 100644 crates/ruff_python_ast/src/source_code/comment_ranges.rs create mode 100644 crates/ruff_python_formatter/src/comments/placement.rs create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__base_test.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__dangling_comment.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__empty_file.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__leading_most_outer.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__only_comments.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_expression.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_trailing_comment.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_after_comma.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_function_comment.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer.snap create mode 100644 crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer_nested.snap create mode 100644 crates/ruff_python_formatter/src/comments/visitor.rs diff --git a/Cargo.lock b/Cargo.lock index 861fce0cf1..160e132bc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1997,7 +1997,6 @@ dependencies = [ "once_cell", "ruff_formatter", "ruff_python_ast", - "ruff_rustpython", "ruff_testing_macros", "ruff_text_size", "rustc-hash", diff --git a/Cargo.toml b/Cargo.toml index 28993fdee2..433c22748b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ is-macro = { version = "0.2.2" } itertools = { version = "0.10.5" } libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "80e4c1399f95e5beb532fdd1e209ad2dbb470438" } log = { version = "0.4.17" } +memchr = "2.5.0" nohash-hasher = { version = "0.2.0" } once_cell = { version = "1.17.1" } path-absolutize = { version = "3.0.14" } diff --git a/crates/ruff_formatter/src/source_code.rs b/crates/ruff_formatter/src/source_code.rs index 77bf35fab5..91dc47401c 100644 --- a/crates/ruff_formatter/src/source_code.rs +++ b/crates/ruff_formatter/src/source_code.rs @@ -38,6 +38,10 @@ impl<'a> SourceCode<'a> { text: String::from(&self.text[range]).into_boxed_str(), } } + + pub fn as_str(&self) -> &'a str { + self.text + } } impl Debug for SourceCode<'_> { diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index cd7817bb18..047d985440 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -15,7 +15,7 @@ bitflags = { workspace = true } is-macro = { workspace = true } itertools = { workspace = true } log = { workspace = true } -memchr = "2.5.0" +memchr = { workspace = true } num-bigint = { version = "0.4.3" } num-traits = { version = "0.2.15" } once_cell = { workspace = true } diff --git a/crates/ruff_python_ast/src/source_code/comment_ranges.rs b/crates/ruff_python_ast/src/source_code/comment_ranges.rs new file mode 100644 index 0000000000..75deeb1b75 --- /dev/null +++ b/crates/ruff_python_ast/src/source_code/comment_ranges.rs @@ -0,0 +1,50 @@ +use ruff_text_size::TextRange; +use rustpython_parser::Tok; +use std::fmt::{Debug, Formatter}; +use std::ops::Deref; + +/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping. +#[derive(Clone)] +pub struct CommentRanges { + raw: Vec, +} + +impl Deref for CommentRanges { + type Target = [TextRange]; + + fn deref(&self) -> &Self::Target { + self.raw.as_slice() + } +} + +impl Debug for CommentRanges { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("CommentRanges").field(&self.raw).finish() + } +} + +impl<'a> IntoIterator for &'a CommentRanges { + type Item = &'a TextRange; + type IntoIter = std::slice::Iter<'a, TextRange>; + + fn into_iter(self) -> Self::IntoIter { + self.raw.iter() + } +} + +#[derive(Debug, Clone, Default)] +pub struct CommentRangesBuilder { + ranges: Vec, +} + +impl CommentRangesBuilder { + pub fn visit_token(&mut self, token: &Tok, range: TextRange) { + if token.is_comment() { + self.ranges.push(range); + } + } + + pub fn finish(self) -> CommentRanges { + CommentRanges { raw: self.ranges } + } +} diff --git a/crates/ruff_python_ast/src/source_code/indexer.rs b/crates/ruff_python_ast/src/source_code/indexer.rs index 16c9ab68c9..f134640c86 100644 --- a/crates/ruff_python_ast/src/source_code/indexer.rs +++ b/crates/ruff_python_ast/src/source_code/indexer.rs @@ -1,6 +1,7 @@ //! Struct used to index source code, to enable efficient lookup of tokens that //! are omitted from the AST (e.g., commented lines). +use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder}; use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::lexer::LexResult; use rustpython_parser::{StringKind, Tok}; @@ -8,8 +9,7 @@ use rustpython_parser::{StringKind, Tok}; use crate::source_code::Locator; pub struct Indexer { - /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping. - comment_ranges: Vec, + comment_ranges: CommentRanges, /// Stores the start offset of continuation lines. continuation_lines: Vec, @@ -27,7 +27,7 @@ impl Indexer { pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self { assert!(TextSize::try_from(locator.contents().len()).is_ok()); - let mut comment_ranges = Vec::new(); + let mut comment_ranges_builder = CommentRangesBuilder::default(); let mut continuation_lines = Vec::new(); let mut triple_quoted_string_ranges = Vec::new(); let mut f_string_ranges = Vec::new(); @@ -63,10 +63,9 @@ impl Indexer { } } + comment_ranges_builder.visit_token(tok, *range); + match tok { - Tok::Comment(..) => { - comment_ranges.push(*range); - } Tok::Newline | Tok::NonLogicalNewline => { line_start = range.end(); } @@ -89,7 +88,7 @@ impl Indexer { prev_end = range.end(); } Self { - comment_ranges, + comment_ranges: comment_ranges_builder.finish(), continuation_lines, triple_quoted_string_ranges, f_string_ranges, @@ -97,7 +96,7 @@ impl Indexer { } /// Returns the byte offset ranges of comments - pub fn comment_ranges(&self) -> &[TextRange] { + pub fn comment_ranges(&self) -> &CommentRanges { &self.comment_ranges } diff --git a/crates/ruff_python_ast/src/source_code/mod.rs b/crates/ruff_python_ast/src/source_code/mod.rs index c4fdcc718f..cce0a7636a 100644 --- a/crates/ruff_python_ast/src/source_code/mod.rs +++ b/crates/ruff_python_ast/src/source_code/mod.rs @@ -14,7 +14,9 @@ pub use locator::Locator; pub use stylist::{Quote, Stylist}; pub use crate::source_code::line_index::{LineIndex, OneIndexed}; +pub use comment_ranges::{CommentRanges, CommentRangesBuilder}; +mod comment_ranges; mod generator; mod indexer; mod line_index; diff --git a/crates/ruff_python_ast/src/whitespace.rs b/crates/ruff_python_ast/src/whitespace.rs index a3ee25c29b..cf4de111f1 100644 --- a/crates/ruff_python_ast/src/whitespace.rs +++ b/crates/ruff_python_ast/src/whitespace.rs @@ -38,3 +38,14 @@ pub fn clean(indentation: &str) -> String { .map(|char| if char.is_whitespace() { char } else { ' ' }) .collect() } + +/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) +/// or new-line characters. +pub const fn is_python_whitespace(c: char) -> bool { + matches!( + c, + ' ' | '\n' | '\t' | '\r' | + // Form-feed + '\x0C' + ) +} diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 1cbea49df9..b07d4e9a31 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -8,7 +8,6 @@ rust-version = { workspace = true } [dependencies] ruff_formatter = { path = "../ruff_formatter" } ruff_python_ast = { path = "../ruff_python_ast" } -ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { workspace = true } anyhow = { workspace = true } diff --git a/crates/ruff_python_formatter/src/comments/debug.rs b/crates/ruff_python_formatter/src/comments/debug.rs index 2f3ce07059..4ea7a1ef1a 100644 --- a/crates/ruff_python_formatter/src/comments/debug.rs +++ b/crates/ruff_python_formatter/src/comments/debug.rs @@ -1,7 +1,8 @@ use crate::comments::node_key::NodeRefEqualityKey; use crate::comments::{CommentsMap, SourceComment}; use ruff_formatter::SourceCode; -use std::fmt::{Debug, Formatter}; +use ruff_python_ast::prelude::*; +use std::fmt::{Debug, Formatter, Write}; /// Prints a debug representation of [`SourceComment`] that includes the comment's text pub(crate) struct DebugComment<'a> { @@ -22,7 +23,9 @@ impl Debug for DebugComment<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut strut = f.debug_struct("SourceComment"); - strut.field("text", &self.comment.slice.text(self.source_code)); + strut + .field("text", &self.comment.slice.text(self.source_code)) + .field("position", &self.comment.position); #[cfg(debug_assertions)] strut.field("formatted", &self.comment.formatted.get()); @@ -52,7 +55,10 @@ impl Debug for DebugComments<'_> { for node in self.comments.keys() { map.entry( - &node, + &NodeKindWithSource { + key: *node, + source: self.source_code, + }, &DebugNodeComments { comments: self.comments, source_code: self.source_code, @@ -65,6 +71,57 @@ impl Debug for DebugComments<'_> { } } +/// Prints the source code up to the first new line character. Truncates the text if it exceeds 40 characters. +struct NodeKindWithSource<'a> { + key: NodeRefEqualityKey<'a>, + source: SourceCode<'a>, +} + +impl Debug for NodeKindWithSource<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + struct TruncatedSource<'a>(&'a str); + + impl Debug for TruncatedSource<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_char('`')?; + let first_line = if let Some(line_end_pos) = self.0.find(['\n', '\r']) { + &self.0[..line_end_pos] + } else { + self.0 + }; + + if first_line.len() > 40 { + let (head, rest) = first_line.split_at(27); + + f.write_str(head)?; + f.write_str("...")?; + + // Take the last 10 characters + let tail = &rest[rest.len().saturating_sub(10)..]; + f.write_str(tail)?; + } else { + f.write_str(first_line)?; + } + + if first_line.len() < self.0.len() { + f.write_str("\u{23ce}")?; + } + + f.write_char('`') + } + } + + let kind = self.key.node().kind(); + let source = self.source.slice(self.key.node().range()).text(self.source); + + f.debug_struct("Node") + .field("kind", &kind) + .field("range", &self.key.node().range()) + .field("source", &TruncatedSource(source)) + .finish() + } +} + struct DebugNodeComments<'a> { comments: &'a CommentsMap<'a>, source_code: SourceCode<'a>, @@ -120,9 +177,11 @@ impl Debug for DebugNodeCommentSlice<'_> { mod tests { use crate::comments::map::MultiMap; use crate::comments::node_key::NodeRefEqualityKey; - use crate::comments::{node_key, Comments, CommentsData}; - use crate::comments::{CommentsMap, SourceComment}; - use insta::assert_debug_snapshot; + use crate::comments::{ + CommentTextPosition, Comments, CommentsData, CommentsMap, SourceComment, + }; + use insta::_macro_support::assert_snapshot; + use insta::{assert_debug_snapshot, assert_snapshot}; use ruff_formatter::SourceCode; use ruff_python_ast::node::AnyNode; use ruff_python_ast::source_code; @@ -156,6 +215,7 @@ break; SourceComment { slice: source_code.slice(TextRange::at(TextSize::new(0), TextSize::new(17))), formatted: Cell::new(false), + position: CommentTextPosition::OwnLine, }, ); @@ -164,6 +224,7 @@ break; SourceComment { slice: source_code.slice(TextRange::at(TextSize::new(28), TextSize::new(10))), formatted: Cell::new(false), + position: CommentTextPosition::EndOfLine, }, ); @@ -172,14 +233,11 @@ break; SourceComment { slice: source_code.slice(TextRange::at(TextSize::new(39), TextSize::new(15))), formatted: Cell::new(false), + position: CommentTextPosition::OwnLine, }, ); - let comments = Comments { - data: Rc::new(CommentsData { - comments: comments_map, - }), - }; + let comments = Comments::new(comments_map); assert_debug_snapshot!(comments.debug(source_code)); } diff --git a/crates/ruff_python_formatter/src/comments/map.rs b/crates/ruff_python_formatter/src/comments/map.rs index b47fa5a86f..eea4bb50a7 100644 --- a/crates/ruff_python_formatter/src/comments/map.rs +++ b/crates/ruff_python_formatter/src/comments/map.rs @@ -42,6 +42,7 @@ use std::ops::Range; /// * 1 byte for the end sequence, e.g. `\n` /// /// Meaning, the upper bound for comments is `u32::MAX / 2`. +#[derive(Clone)] pub(super) struct MultiMap { /// Lookup table to retrieve the entry for a key. index: FxHashMap, @@ -519,7 +520,7 @@ impl ExactSizeIterator for PartsIterator<'_, V> {} impl FusedIterator for PartsIterator<'_, V> {} -#[derive(Debug)] +#[derive(Clone, Debug)] enum Entry { InOrder(InOrderEntry), OutOfOrder(OutOfOrderEntry), @@ -586,7 +587,7 @@ where } } -#[derive(Debug)] +#[derive(Clone, Debug)] struct InOrderEntry { /// Index into the [MultiMap::parts] vector where the leading parts of this entry start leading_start: PartIndex, @@ -709,7 +710,7 @@ impl InOrderEntry { } } -#[derive(Debug)] +#[derive(Clone, Debug)] struct OutOfOrderEntry { /// Index into the [MultiMap::out_of_order] vector at which offset the leaading vec is stored. leading_index: usize, diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index a4a3271d8d..f0713c58eb 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -89,6 +89,7 @@ //! //! It is possible to add an additional optional label to [`SourceComment`] If ever the need arises to distinguish two *dangling comments* in the formatting logic, +use rustpython_parser::ast::Mod; use std::cell::Cell; use std::fmt::{Debug, Formatter}; use std::rc::Rc; @@ -96,22 +97,28 @@ use std::rc::Rc; mod debug; mod map; mod node_key; +mod placement; +mod visitor; use crate::comments::debug::{DebugComment, DebugComments}; use crate::comments::map::MultiMap; use crate::comments::node_key::NodeRefEqualityKey; +use crate::comments::visitor::CommentsVisitor; use ruff_formatter::{SourceCode, SourceCodeSlice}; use ruff_python_ast::node::AnyNodeRef; +use ruff_python_ast::source_code::CommentRanges; /// A comment in the source document. #[derive(Debug, Clone)] pub(crate) struct SourceComment { /// The location of the comment in the source document. - pub(super) slice: SourceCodeSlice, + slice: SourceCodeSlice, /// Whether the comment has been formatted or not. #[cfg(debug_assertions)] - pub(super) formatted: Cell, + formatted: Cell, + + position: CommentTextPosition, } impl SourceComment { @@ -121,6 +128,10 @@ impl SourceComment { &self.slice } + pub(crate) fn position(&self) -> CommentTextPosition { + self.position + } + #[cfg(not(debug_assertions))] #[inline(always)] pub fn mark_formatted(&self) {} @@ -138,6 +149,49 @@ impl SourceComment { DebugComment::new(self, source_code) } } + +/// The position of a comment in the source text. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) enum CommentTextPosition { + /// A comment that is on the same line as the preceding token and is separated by at least one line break from the following token. + /// + /// # Examples + /// + /// ## End of line + /// + /// ```python + /// a; # comment + /// b; + /// ``` + /// + /// `# comment` is an end of line comments because it is separated by at least one line break from the following token `b`. + /// Comments that not only end, but also start on a new line are [`OwnLine`](CommentTextPosition::OwnLine) comments. + EndOfLine, + + /// A Comment that is separated by at least one line break from the preceding token. + /// + /// # Examples + /// + /// ```python + /// a; + /// # comment + /// b; + /// ``` + /// + /// `# comment` line comments because they are separated by one line break from the preceding token `a`. + OwnLine, +} + +impl CommentTextPosition { + pub(crate) const fn is_own_line(self) -> bool { + matches!(self, CommentTextPosition::OwnLine) + } + + pub(crate) const fn is_end_of_line(self) -> bool { + matches!(self, CommentTextPosition::EndOfLine) + } +} + type CommentsMap<'a> = MultiMap, SourceComment>; /// The comments of a syntax tree stored by node. @@ -171,6 +225,27 @@ pub(crate) struct Comments<'a> { } impl<'a> Comments<'a> { + fn new(comments: CommentsMap<'a>) -> Self { + Self { + data: Rc::new(CommentsData { comments }), + } + } + + /// Extracts the comments from the AST. + pub(crate) fn from_ast( + root: &'a Mod, + source_code: SourceCode<'a>, + comment_ranges: &'a CommentRanges, + ) -> Self { + let map = if comment_ranges.is_empty() { + CommentsMap::new() + } else { + CommentsVisitor::new(source_code, comment_ranges).visit(root) + }; + + Self::new(map) + } + #[inline] pub(crate) fn has_comments(&self, node: AnyNodeRef) -> bool { self.data.comments.has(&NodeRefEqualityKey::from_ref(node)) @@ -272,3 +347,219 @@ impl<'a> Comments<'a> { struct CommentsData<'a> { comments: CommentsMap<'a>, } + +#[cfg(test)] +mod tests { + use crate::comments::Comments; + use insta::assert_debug_snapshot; + use ruff_formatter::SourceCode; + use ruff_python_ast::prelude::*; + use ruff_python_ast::source_code::{CommentRanges, CommentRangesBuilder}; + use rustpython_parser::lexer::lex; + use rustpython_parser::{parse_tokens, Mode}; + + struct CommentsTestCase<'a> { + module: Mod, + comment_ranges: CommentRanges, + source_code: SourceCode<'a>, + } + + impl<'a> CommentsTestCase<'a> { + fn from_code(code: &'a str) -> Self { + let source_code = SourceCode::new(code); + let tokens: Vec<_> = lex(code, Mode::Module).collect(); + + let mut comment_ranges = CommentRangesBuilder::default(); + + for (token, range) in tokens.iter().flatten() { + comment_ranges.visit_token(token, *range); + } + + let comment_ranges = comment_ranges.finish(); + + let parsed = parse_tokens(tokens.into_iter(), Mode::Module, "test.py") + .expect("Expect source to be valid Python"); + + CommentsTestCase { + source_code, + module: parsed, + comment_ranges, + } + } + + fn to_comments(&self) -> Comments { + Comments::from_ast(&self.module, self.source_code, &self.comment_ranges) + } + } + + #[test] + fn base_test() { + let source = r#" +# Function Leading comment +def test(x, y): + if x == y: # if statement end of line comment + print("Equal") + + # Leading comment + elif x < y: + print("Less") + else: + print("Greater") + +# own line comment + +test(10, 20) +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn only_comments() { + let source = r#" +# Some comment + +# another comment +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn empty_file() { + let source = r#""#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn dangling_comment() { + let source = r#" +def test( + # Some comment + ): + pass +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn parenthesized_expression() { + let source = r#" +a = ( # Trailing comment + 10 + # More comments + 3 + ) +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn parenthesized_trailing_comment() { + let source = r#"( + a + # comment +) +"#; + + let test_case = CommentsTestCase::from_code(source); + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn trailing_function_comment() { + let source = r#" +def test(x, y): + if x == y: + pass + elif x < y: + print("Less") + else: + print("Greater") + + # Trailing comment + +test(10, 20) +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn leading_most_outer() { + let source = r#" +# leading comment +x +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + // Comment should be attached to the statement + #[test] + fn trailing_most_outer() { + let source = r#" +x # trailing comment +y # trailing last node +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn trailing_most_outer_nested() { + let source = r#" +x + ( + 3 # trailing comment +) # outer +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } + + #[test] + fn trailing_after_comma() { + let source = r#" +def test( + a, # Trailing comment for argument `a` + b, +): pass +"#; + let test_case = CommentsTestCase::from_code(source); + + let comments = test_case.to_comments(); + + assert_debug_snapshot!(comments.debug(test_case.source_code)); + } +} diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs new file mode 100644 index 0000000000..ee23283161 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -0,0 +1,13 @@ +use crate::comments::visitor::{CommentPlacement, DecoratedComment}; +use crate::comments::{CommentTextPosition, SourceComment}; +use ruff_formatter::{SourceCode, SourceCodeSlice}; +use ruff_python_ast::node::AnyNodeRef; +use std::cell::Cell; + +/// Implements the custom comment placement logic. +pub(super) fn place_comment<'a>( + comment: DecoratedComment<'a>, + _source_code: SourceCode, +) -> CommentPlacement<'a> { + CommentPlacement::Default(comment) +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__debug__tests__debug.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__debug__tests__debug.snap index e22980944f..f4eaabf199 100644 --- a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__debug__tests__debug.snap +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__debug__tests__debug.snap @@ -1,16 +1,17 @@ --- source: crates/ruff_python_formatter/src/comments/debug.rs -expression: formatted +expression: comments.debug(source_code) --- { - StmtContinue( - StmtContinue { - range: 0..0, - }, - ): { + Node { + kind: StmtContinue, + range: 0..0, + source: ``, + }: { "leading": [ SourceComment { text: "# leading comment", + position: OwnLine, formatted: false, }, ], @@ -18,18 +19,20 @@ expression: formatted "trailing": [ SourceComment { text: "# trailing", + position: EndOfLine, formatted: false, }, ], }, - StmtBreak( - StmtBreak { - range: 0..0, - }, - ): { + Node { + kind: StmtBreak, + range: 0..0, + source: ``, + }: { "leading": [ SourceComment { text: "# break leading", + position: OwnLine, formatted: false, }, ], diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__base_test.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__base_test.snap new file mode 100644 index 0000000000..51d2b14f09 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__base_test.snap @@ -0,0 +1,66 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: StmtFunctionDef, + range: 28..212, + source: `def test(x, y):⏎`, + }: { + "leading": [ + SourceComment { + text: "# Function Leading comment", + position: OwnLine, + formatted: false, + }, + ], + "dangling": [], + "trailing": [], + }, + Node { + kind: ExprCompare, + range: 51..57, + source: `x == y`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# if statement end of line comment", + position: EndOfLine, + formatted: false, + }, + ], + }, + Node { + kind: StmtIf, + range: 144..212, + source: `elif x < y:⏎`, + }: { + "leading": [ + SourceComment { + text: "# Leading comment", + position: OwnLine, + formatted: false, + }, + ], + "dangling": [], + "trailing": [], + }, + Node { + kind: StmtExpr, + range: 234..246, + source: `test(10, 20)`, + }: { + "leading": [ + SourceComment { + text: "# own line comment", + position: OwnLine, + formatted: false, + }, + ], + "dangling": [], + "trailing": [], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__dangling_comment.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__dangling_comment.snap new file mode 100644 index 0000000000..58138c9524 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__dangling_comment.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: Arguments, + range: 9..39, + source: `(⏎`, + }: { + "leading": [], + "dangling": [ + SourceComment { + text: "# Some comment", + position: OwnLine, + formatted: false, + }, + ], + "trailing": [], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__empty_file.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__empty_file.snap new file mode 100644 index 0000000000..7ceb71b7a5 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__empty_file.snap @@ -0,0 +1,5 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__leading_most_outer.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__leading_most_outer.snap new file mode 100644 index 0000000000..9bef07f421 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__leading_most_outer.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: StmtExpr, + range: 19..20, + source: `x`, + }: { + "leading": [ + SourceComment { + text: "# leading comment", + position: OwnLine, + formatted: false, + }, + ], + "dangling": [], + "trailing": [], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__only_comments.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__only_comments.snap new file mode 100644 index 0000000000..c471113557 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__only_comments.snap @@ -0,0 +1,26 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: ModModule, + range: 0..0, + source: ``, + }: { + "leading": [], + "dangling": [ + SourceComment { + text: "# Some comment", + position: OwnLine, + formatted: false, + }, + SourceComment { + text: "# another comment", + position: OwnLine, + formatted: false, + }, + ], + "trailing": [], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_expression.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_expression.snap new file mode 100644 index 0000000000..e9dbfbf221 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_expression.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: ExprName, + range: 1..2, + source: `a`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# Trailing comment", + position: EndOfLine, + formatted: false, + }, + ], + }, + Node { + kind: ExprConstant, + range: 30..32, + source: `10`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# More comments", + position: EndOfLine, + formatted: false, + }, + ], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_trailing_comment.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_trailing_comment.snap new file mode 100644 index 0000000000..73aa8104d9 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__parenthesized_trailing_comment.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: ExprName, + range: 6..7, + source: `a`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# comment", + position: OwnLine, + formatted: false, + }, + ], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_after_comma.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_after_comma.snap new file mode 100644 index 0000000000..030b63f38b --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_after_comma.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: Arg, + range: 15..16, + source: `a`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# Trailing comment for argument `a`", + position: EndOfLine, + formatted: false, + }, + ], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_function_comment.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_function_comment.snap new file mode 100644 index 0000000000..4ca22d68ee --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_function_comment.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: StmtExpr, + range: 143..155, + source: `test(10, 20)`, + }: { + "leading": [ + SourceComment { + text: "# Trailing comment", + position: OwnLine, + formatted: false, + }, + ], + "dangling": [], + "trailing": [], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer.snap new file mode 100644 index 0000000000..bb9a65c480 --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: StmtExpr, + range: 1..2, + source: `x`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# trailing comment", + position: EndOfLine, + formatted: false, + }, + ], + }, + Node { + kind: StmtExpr, + range: 22..23, + source: `y`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# trailing last node", + position: EndOfLine, + formatted: false, + }, + ], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer_nested.snap b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer_nested.snap new file mode 100644 index 0000000000..bc1b444b3d --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/snapshots/ruff_python_formatter__comments__tests__trailing_most_outer_nested.snap @@ -0,0 +1,36 @@ +--- +source: crates/ruff_python_formatter/src/comments/mod.rs +expression: comments.debug(test_case.source_code) +--- +{ + Node { + kind: ExprConstant, + range: 11..12, + source: `3`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# trailing comment", + position: EndOfLine, + formatted: false, + }, + ], + }, + Node { + kind: StmtExpr, + range: 1..33, + source: `x + (⏎`, + }: { + "leading": [], + "dangling": [], + "trailing": [ + SourceComment { + text: "# outer", + position: EndOfLine, + formatted: false, + }, + ], + }, +} diff --git a/crates/ruff_python_formatter/src/comments/visitor.rs b/crates/ruff_python_formatter/src/comments/visitor.rs new file mode 100644 index 0000000000..d107c8366f --- /dev/null +++ b/crates/ruff_python_formatter/src/comments/visitor.rs @@ -0,0 +1,687 @@ +use crate::comments::map::MultiMap; +use crate::comments::node_key::NodeRefEqualityKey; +use crate::comments::placement::place_comment; +use crate::comments::{CommentTextPosition, CommentsMap, SourceComment}; +use ruff_formatter::{SourceCode, SourceCodeSlice}; +use ruff_python_ast::node::AnyNodeRef; +use ruff_python_ast::prelude::*; +use ruff_python_ast::source_code::CommentRanges; +use std::cell::Cell; +// The interface is designed to only export the members relevant for iterating nodes in +// pre-order. +#[allow(clippy::wildcard_imports)] +use ruff_python_ast::visitor::preorder::*; +use ruff_python_ast::whitespace::is_python_whitespace; +use ruff_text_size::TextRange; +use std::cmp::Ordering; +use std::iter::Peekable; + +/// Visitor extracting the comments from an AST. +#[derive(Debug, Clone)] +pub(crate) struct CommentsVisitor<'a> { + builder: CommentsBuilder<'a>, + source_code: SourceCode<'a>, + parents: Vec>, + preceding_node: Option>, + comment_ranges: Peekable>, +} + +impl<'a> CommentsVisitor<'a> { + pub(crate) fn new(source_code: SourceCode<'a>, comment_ranges: &'a CommentRanges) -> Self { + Self { + builder: CommentsBuilder::default(), + source_code, + parents: Vec::new(), + preceding_node: None, + comment_ranges: comment_ranges.iter().peekable(), + } + } + + pub(super) fn visit(mut self, root: &'a Mod) -> CommentsMap<'a> { + self.visit_mod(root); + + self.finish() + } + + fn start_node(&mut self, node: N) -> TraversalSignal + where + N: Into>, + { + self.start_node_impl(node.into()) + } + + fn start_node_impl(&mut self, node: AnyNodeRef<'a>) -> TraversalSignal { + let node_range = node.range(); + + let enclosing_node = self.parents.last().copied().unwrap_or(node); + + // Process all remaining comments that end before this node's start position. + // If the `preceding` node is set, then it process all comments ending after the `preceding` node + // and ending before this node's start position + while let Some(comment_range) = self.comment_ranges.peek().copied() { + // Exit if the comment is enclosed by this node or comes after it + if comment_range.end() > node_range.start() { + break; + } + + let comment = DecoratedComment { + enclosing: enclosing_node, + preceding: self.preceding_node, + following: Some(node), + text_position: text_position(*comment_range, self.source_code), + slice: self.source_code.slice(*comment_range), + }; + + self.builder + .add_comment(place_comment(comment, self.source_code)); + self.comment_ranges.next(); + } + + // From here on, we're inside of `node`, meaning, we're passed the preceding node. + self.preceding_node = None; + self.parents.push(node); + + // Try to skip the subtree if + // * there are no comments + // * if the next comment comes after this node (meaning, this nodes subtree contains no comments) + self.comment_ranges + .peek() + .map_or(TraversalSignal::Skip, |next_comment| { + if node.range().contains(next_comment.start()) { + TraversalSignal::Traverse + } else { + TraversalSignal::Skip + } + }) + } + + fn finish_node(&mut self, node: N) + where + N: Into>, + { + self.finish_node_impl(node.into()); + } + + fn finish_node_impl(&mut self, node: AnyNodeRef<'a>) { + // We are leaving this node, pop it from the parent stack. + self.parents.pop(); + + let node_end = node.end(); + let is_root = self.parents.is_empty(); + + // Process all comments that start after the `preceding` node and end before this node's end. + while let Some(comment_range) = self.comment_ranges.peek().copied() { + // If the comment starts after this node, break. + // If this is the root node and there are comments after the node, attach them to the root node + // anyway because there's no other node we can attach the comments to (RustPython should include the comments in the node's range) + if comment_range.start() >= node_end && !is_root { + break; + } + + let comment = DecoratedComment { + enclosing: node, + preceding: self.preceding_node, + following: None, + text_position: text_position(*comment_range, self.source_code), + slice: self.source_code.slice(*comment_range), + }; + + self.builder + .add_comment(place_comment(comment, self.source_code)); + + self.comment_ranges.next(); + } + + self.preceding_node = Some(node); + } + + fn finish(mut self) -> CommentsMap<'a> { + self.builder.finish() + } +} + +impl<'ast> PreorderVisitor<'ast> for CommentsVisitor<'ast> { + fn visit_mod(&mut self, module: &'ast Mod) { + if self.start_node(module).is_traverse() { + walk_module(self, module); + } + self.finish_node(module); + } + + fn visit_stmt(&mut self, stmt: &'ast Stmt) { + if self.start_node(stmt).is_traverse() { + walk_stmt(self, stmt); + } + self.finish_node(stmt); + } + + fn visit_annotation(&mut self, expr: &'ast Expr) { + if self.start_node(expr).is_traverse() { + walk_expr(self, expr); + } + self.finish_node(expr); + } + + fn visit_expr(&mut self, expr: &'ast Expr) { + if self.start_node(expr).is_traverse() { + walk_expr(self, expr); + } + self.finish_node(expr); + } + + fn visit_comprehension(&mut self, comprehension: &'ast Comprehension) { + if self.start_node(comprehension).is_traverse() { + walk_comprehension(self, comprehension); + } + self.finish_node(comprehension); + } + + fn visit_excepthandler(&mut self, excepthandler: &'ast Excepthandler) { + if self.start_node(excepthandler).is_traverse() { + walk_excepthandler(self, excepthandler); + } + self.finish_node(excepthandler); + } + + fn visit_format_spec(&mut self, format_spec: &'ast Expr) { + if self.start_node(format_spec).is_traverse() { + walk_expr(self, format_spec); + } + self.finish_node(format_spec); + } + + fn visit_arguments(&mut self, arguments: &'ast Arguments) { + if self.start_node(arguments).is_traverse() { + walk_arguments(self, arguments); + } + self.finish_node(arguments); + } + + fn visit_arg(&mut self, arg: &'ast Arg) { + if self.start_node(arg).is_traverse() { + walk_arg(self, arg); + } + self.finish_node(arg); + } + + fn visit_keyword(&mut self, keyword: &'ast Keyword) { + if self.start_node(keyword).is_traverse() { + walk_keyword(self, keyword); + } + self.finish_node(keyword); + } + + fn visit_alias(&mut self, alias: &'ast Alias) { + if self.start_node(alias).is_traverse() { + walk_alias(self, alias); + } + self.finish_node(alias); + } + + fn visit_withitem(&mut self, withitem: &'ast Withitem) { + if self.start_node(withitem).is_traverse() { + walk_withitem(self, withitem); + } + + self.finish_node(withitem); + } + fn visit_match_case(&mut self, match_case: &'ast MatchCase) { + if self.start_node(match_case).is_traverse() { + walk_match_case(self, match_case); + } + self.finish_node(match_case); + } + + fn visit_pattern(&mut self, pattern: &'ast Pattern) { + if self.start_node(pattern).is_traverse() { + walk_pattern(self, pattern); + } + self.finish_node(pattern); + } +} + +fn text_position(comment_range: TextRange, source_code: SourceCode) -> CommentTextPosition { + let before = &source_code.as_str()[TextRange::up_to(comment_range.start())]; + + for c in before.chars().rev() { + match c { + '\n' | '\r' => { + break; + } + c if is_python_whitespace(c) => continue, + _ => return CommentTextPosition::EndOfLine, + } + } + + CommentTextPosition::OwnLine +} + +/// A comment decorated with additional information about its surrounding context in the source document. +/// +/// Used by [`CommentStyle::place_comment`] to determine if this should become a [leading](self#leading-comments), [dangling](self#dangling-comments), or [trailing](self#trailing-comments) comment. +#[derive(Debug, Clone)] +pub(super) struct DecoratedComment<'a> { + enclosing: AnyNodeRef<'a>, + preceding: Option>, + following: Option>, + text_position: CommentTextPosition, + slice: SourceCodeSlice, +} + +impl<'a> DecoratedComment<'a> { + /// The closest parent node that fully encloses the comment. + /// + /// A node encloses a comment when the comment is between two of its direct children (ignoring lists). + /// + /// # Examples + /// + /// ```python + /// [ + /// a, + /// # comment + /// b + /// ] + /// ``` + /// + /// The enclosing node is the list expression and not the name `b` because + /// `a` and `b` are children of the list expression and `comment` is between the two nodes. + pub(super) fn enclosing_node(&self) -> AnyNodeRef<'a> { + self.enclosing + } + + /// Returns the slice into the source code. + pub(super) fn slice(&self) -> &SourceCodeSlice { + &self.slice + } + + /// Returns the comment's preceding node. + /// + /// The direct child node (ignoring lists) of the [`enclosing_node`](DecoratedComment::enclosing_node) that precedes this comment. + /// + /// Returns [None] if the [`enclosing_node`](DecoratedComment::enclosing_node) only consists of tokens or if + /// all preceding children of the [`enclosing_node`](DecoratedComment::enclosing_node) have been tokens. + /// + /// The Preceding node is guaranteed to be a sibling of [`following_node`](DecoratedComment::following_node). + /// + /// # Examples + /// + /// ## Preceding tokens only + /// + /// ```python + /// [ + /// # comment + /// ] + /// ``` + /// Returns [None] because the comment has no preceding node, only a preceding `[` token. + /// + /// ## Preceding node + /// + /// ```python + /// a # comment + /// b + /// ``` + /// + /// Returns `Some(a)` because `a` directly precedes the comment. + /// + /// ## Preceding token and node + /// + /// ```python + /// [ + /// a, # comment + /// b + /// ] + /// ``` + /// + /// Returns `Some(a)` because `a` is the preceding node of `comment`. The presence of the `,` token + /// doesn't change that. + pub(super) fn preceding_node(&self) -> Option> { + self.preceding + } + + /// Returns the node following the comment. + /// + /// The direct child node (ignoring lists) of the [`enclosing_node`](DecoratedComment::enclosing_node) that follows this comment. + /// + /// Returns [None] if the [`enclosing_node`](DecoratedComment::enclosing_node) only consists of tokens or if + /// all children children of the [`enclosing_node`](DecoratedComment::enclosing_node) following this comment are tokens. + /// + /// The following node is guaranteed to be a sibling of [`preceding_node`](DecoratedComment::preceding_node). + /// + /// # Examples + /// + /// ## Following tokens only + /// + /// ```python + /// [ + /// # comment + /// ] + /// ``` + /// + /// Returns [None] because there's no node following the comment, only the `]` token. + /// + /// ## Following node + /// + /// ```python + /// [ # comment + /// a + /// ] + /// ``` + /// + /// Returns `Some(a)` because `a` is the node directly following the comment. + /// + /// ## Following token and node + /// + /// ```python + /// [ + /// a # comment + /// , b + /// ] + /// ``` + /// + /// Returns `Some(b)` because the `b` identifier is the first node following `comment`. + /// + /// ## Following parenthesized expression + /// + /// ```python + /// ( + /// a + /// # comment + /// ) + /// b + /// ``` + /// + /// Returns `None` because `comment` is enclosed inside the parenthesized expression and it has no children + /// following `# comment`. + pub(super) fn following_node(&self) -> Option> { + self.following + } + + /// The position of the comment in the text. + pub(super) fn text_position(&self) -> CommentTextPosition { + self.text_position + } +} + +impl From> for SourceComment { + fn from(decorated: DecoratedComment) -> Self { + Self { + slice: decorated.slice, + position: decorated.text_position, + #[cfg(debug_assertions)] + formatted: Cell::new(false), + } + } +} + +#[derive(Debug)] +pub(super) enum CommentPlacement<'a> { + /// Makes `comment` a [leading comment](self#leading-comments) of `node`. + Leading { + node: AnyNodeRef<'a>, + comment: SourceComment, + }, + /// Makes `comment` a [trailing comment](self#trailing-comments) of `node`. + Trailing { + node: AnyNodeRef<'a>, + comment: SourceComment, + }, + + /// Makes `comment` a [dangling comment](self#dangling-comments) of `node`. + Dangling { + node: AnyNodeRef<'a>, + comment: SourceComment, + }, + + /// Uses the default heuristic to determine the placement of the comment. + /// + /// # End of line comments + /// + /// Makes the comment a... + /// + /// * [trailing comment] of the [`preceding_node`] if both the [`following_node`] and [`preceding_node`] are not [None] + /// and the comment and [`preceding_node`] are only separated by a space (there's no token between the comment and [`preceding_node`]). + /// * [leading comment] of the [`following_node`] if the [`following_node`] is not [None] + /// * [trailing comment] of the [`preceding_node`] if the [`preceding_node`] is not [None] + /// * [dangling comment] of the [`enclosing_node`]. + /// + /// ## Examples + /// ### Comment with preceding and following nodes + /// + /// ```python + /// [ + /// a, # comment + /// b + /// ] + /// ``` + /// + /// The comment becomes a [trailing comment] of the node `a`. + /// + /// ### Comment with preceding node only + /// + /// ```python + /// [ + /// a # comment + /// ] + /// ``` + /// + /// The comment becomes a [trailing comment] of the node `a`. + /// + /// ### Comment with following node only + /// + /// ```python + /// [ # comment + /// b + /// ] + /// ``` + /// + /// The comment becomes a [leading comment] of the node `b`. + /// + /// ### Dangling comment + /// + /// ```python + /// [ # comment + /// ] + /// ``` + /// + /// The comment becomes a [dangling comment] of the enclosing list expression because both the [`preceding_node`] and [`following_node`] are [None]. + /// + /// # Own line comments + /// + /// Makes the comment a... + /// + /// * [leading comment] of the [`following_node`] if the [`following_node`] is not [None] + /// * or a [trailing comment] of the [`preceding_node`] if the [`preceding_node`] is not [None] + /// * or a [dangling comment] of the [`enclosing_node`]. + /// + /// ## Examples + /// + /// ### Comment with leading and preceding nodes + /// + /// ```python + /// [ + /// a, + /// # comment + /// b + /// ] + /// ``` + /// + /// The comment becomes a [leading comment] of the node `b`. + /// + /// ### Comment with preceding node only + /// + /// ```python + /// [ + /// a + /// # comment + /// ] + /// ``` + /// + /// The comment becomes a [trailing comment] of the node `a`. + /// + /// ### Comment with following node only + /// + /// ```python + /// [ + /// # comment + /// b + /// ] + /// ``` + /// + /// The comment becomes a [leading comment] of the node `b`. + /// + /// ### Dangling comment + /// + /// ```python + /// [ + /// # comment + /// ] + /// ``` + /// + /// The comment becomes a [dangling comment] of the list expression because both [`preceding_node`] and [`following_node`] are [None]. + /// + /// [`preceding_node`]: DecoratedComment::preceding_node + /// [`following_node`]: DecoratedComment::following_node + /// [`enclosing_node`]: DecoratedComment::enclosing_node_id + /// [trailing comment]: self#trailing-comments + /// [leading comment]: self#leading-comments + /// [dangling comment]: self#dangling-comments + Default(DecoratedComment<'a>), +} + +impl<'a> CommentPlacement<'a> { + /// Makes `comment` a [leading comment](self#leading-comments) of `node`. + #[inline] + pub(super) fn leading(node: AnyNodeRef<'a>, comment: impl Into) -> Self { + Self::Leading { + node, + comment: comment.into(), + } + } + + /// Makes `comment` a [dangling comment](self::dangling-comments) of `node`. + pub(super) fn dangling(node: AnyNodeRef<'a>, comment: impl Into) -> Self { + Self::Dangling { + node, + comment: comment.into(), + } + } + + /// Makes `comment` a [trailing comment](self::trailing-comments) of `node`. + #[inline] + pub(super) fn trailing(node: AnyNodeRef<'a>, comment: impl Into) -> Self { + Self::Trailing { + node, + comment: comment.into(), + } + } + + /// Returns the placement if it isn't [`CommentPlacement::Default`], otherwise calls `f` and returns the result. + #[inline] + pub(super) fn or_else(self, f: F) -> Self + where + F: FnOnce(DecoratedComment<'a>) -> CommentPlacement<'a>, + { + match self { + CommentPlacement::Default(comment) => f(comment), + placement => placement, + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum TraversalSignal { + Traverse, + Skip, +} + +impl TraversalSignal { + const fn is_traverse(self) -> bool { + matches!(self, TraversalSignal::Traverse) + } +} + +#[derive(Clone, Debug, Default)] +struct CommentsBuilder<'a> { + comments: CommentsMap<'a>, +} + +impl<'a> CommentsBuilder<'a> { + fn add_comment(&mut self, placement: CommentPlacement<'a>) { + match placement { + CommentPlacement::Leading { node, comment } => { + self.push_leading_comment(node, comment); + } + CommentPlacement::Trailing { node, comment } => { + self.push_trailing_comment(node, comment); + } + CommentPlacement::Dangling { node, comment } => { + self.push_dangling_comment(node, comment); + } + CommentPlacement::Default(comment) => { + match comment.text_position() { + CommentTextPosition::EndOfLine => { + match (comment.preceding_node(), comment.following_node()) { + (Some(preceding), Some(_)) => { + // Attach comments with both preceding and following node to the preceding + // because there's a line break separating it from the following node. + // ```python + // a; # comment + // b + // ``` + self.push_trailing_comment(preceding, comment); + } + (Some(preceding), None) => { + self.push_trailing_comment(preceding, comment); + } + (None, Some(following)) => { + self.push_leading_comment(following, comment); + } + (None, None) => { + self.push_dangling_comment(comment.enclosing_node(), comment); + } + } + } + CommentTextPosition::OwnLine => { + match (comment.preceding_node(), comment.following_node()) { + // Following always wins for a leading comment + // ```python + // a + // // python + // b + // ``` + // attach the comment to the `b` expression statement + (_, Some(following)) => { + self.push_leading_comment(following, comment); + } + (Some(preceding), None) => { + self.push_trailing_comment(preceding, comment); + } + (None, None) => { + self.push_dangling_comment(comment.enclosing_node(), comment); + } + } + } + } + } + } + } + + fn finish(self) -> CommentsMap<'a> { + self.comments + } + + fn push_leading_comment(&mut self, node: AnyNodeRef<'a>, comment: impl Into) { + self.comments + .push_leading(NodeRefEqualityKey::from_ref(node), comment.into()); + } + + fn push_dangling_comment(&mut self, node: AnyNodeRef<'a>, comment: impl Into) { + self.comments + .push_dangling(NodeRefEqualityKey::from_ref(node), comment.into()); + } + + fn push_trailing_comment(&mut self, node: AnyNodeRef<'a>, comment: impl Into) { + self.comments + .push_trailing(NodeRefEqualityKey::from_ref(node), comment.into()); + } +} diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 301dbff052..affe9d7835 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -1,24 +1,40 @@ +use crate::comments::Comments; use ruff_formatter::{FormatContext, SimpleFormatOptions, SourceCode}; use ruff_python_ast::source_code::Locator; +use std::fmt::{Debug, Formatter}; -#[derive(Clone, Debug)] -pub struct ASTFormatContext<'source> { +#[derive(Clone)] +pub struct ASTFormatContext<'a> { options: SimpleFormatOptions, - contents: &'source str, + contents: &'a str, + comments: Comments<'a>, } -impl<'source> ASTFormatContext<'source> { - pub fn new(options: SimpleFormatOptions, contents: &'source str) -> Self { - Self { options, contents } +impl<'a> ASTFormatContext<'a> { + pub(crate) fn new( + options: SimpleFormatOptions, + contents: &'a str, + comments: Comments<'a>, + ) -> Self { + Self { + options, + contents, + comments, + } } - pub fn contents(&self) -> &'source str { + pub fn contents(&self) -> &'a str { self.contents } - pub fn locator(&self) -> Locator<'source> { + pub fn locator(&self) -> Locator<'a> { Locator::new(self.contents) } + + #[allow(unused)] + pub(crate) fn comments(&self) -> &Comments<'a> { + &self.comments + } } impl FormatContext for ASTFormatContext<'_> { @@ -32,3 +48,13 @@ impl FormatContext for ASTFormatContext<'_> { SourceCode::new(self.contents) } } + +impl Debug for ASTFormatContext<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ASTFormatContext") + .field("options", &self.options) + .field("comments", &self.comments.debug(self.source_code())) + .field("source", &self.contents) + .finish() + } +} diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index e52c7300c1..2a0b24d139 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -1,14 +1,20 @@ -use anyhow::Result; -use rustpython_parser::lexer::LexResult; +use anyhow::{anyhow, Context, Result}; +use rustpython_parser::ast::Mod; +use rustpython_parser::lexer::lex; +use rustpython_parser::{parse_tokens, Mode}; -use ruff_formatter::{format, Formatted, IndentStyle, SimpleFormatOptions}; -use ruff_python_ast::source_code::Locator; +use ruff_formatter::{ + format, FormatResult, Formatted, IndentStyle, Printed, SimpleFormatOptions, SourceCode, +}; +use ruff_python_ast::source_code::{CommentRanges, CommentRangesBuilder, Locator}; use crate::attachment::attach; +use crate::comments::Comments; use crate::context::ASTFormatContext; use crate::cst::Stmt; use crate::newlines::normalize_newlines; use crate::parentheses::normalize_parentheses; +use crate::trivia::TriviaToken; mod attachment; pub mod cli; @@ -23,22 +29,52 @@ mod trivia; include!("../../ruff_formatter/shared_traits.rs"); -pub fn fmt(contents: &str) -> Result> { - // Create a reusable locator. - let locator = Locator::new(contents); +pub fn fmt(contents: &str) -> Result { + // Tokenize once + let mut tokens = Vec::new(); + let mut comment_ranges = CommentRangesBuilder::default(); - // Tokenize once. - let tokens: Vec = ruff_rustpython::tokenize(contents); + for result in lex(contents, Mode::Module) { + let (token, range) = match result { + Ok((token, range)) => (token, range), + Err(err) => return Err(anyhow!("Source contains syntax errors {err:?}")), + }; + + comment_ranges.visit_token(&token, range); + tokens.push(Ok((token, range))); + } + + let comment_ranges = comment_ranges.finish(); - // Extract trivia. let trivia = trivia::extract_trivia_tokens(&tokens); // Parse the AST. - let python_ast = ruff_rustpython::parse_program_tokens(tokens, "")?; + let python_ast = parse_tokens(tokens, Mode::Module, "").unwrap(); + + let formatted = format_node(&python_ast, &comment_ranges, contents, trivia)?; + + formatted + .print() + .with_context(|| "Failed to print the formatter IR") +} + +pub(crate) fn format_node<'a>( + root: &'a Mod, + comment_ranges: &'a CommentRanges, + source: &'a str, + trivia: Vec, +) -> FormatResult>> { + let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges); + + let module = root.as_module().unwrap(); + + let locator = Locator::new(source); // Convert to a CST. - let mut python_cst: Vec = python_ast - .into_iter() + let mut python_cst: Vec = module + .body + .iter() + .cloned() .map(|stmt| (stmt, &locator).into()) .collect(); @@ -54,10 +90,10 @@ pub fn fmt(contents: &str) -> Result> { line_width: 88.try_into().unwrap(), }, locator.contents(), + comments ), [format::builders::statements(&python_cst)] ) - .map_err(Into::into) } #[cfg(test)] @@ -68,11 +104,14 @@ mod tests { use anyhow::Result; use insta::assert_snapshot; + use rustpython_parser::lexer::lex; + use rustpython_parser::{parse_tokens, Mode}; + use ruff_python_ast::source_code::CommentRangesBuilder; use ruff_testing_macros::fixture; use similar::TextDiff; - use crate::fmt; + use crate::{fmt, format_node, trivia}; #[fixture( pattern = "resources/test/fixtures/black/**/*.py", @@ -85,13 +124,12 @@ mod tests { fn black_test(input_path: &Path) -> Result<()> { let content = fs::read_to_string(input_path)?; - let formatted = fmt(&content)?; + let printed = fmt(&content)?; let expected_path = input_path.with_extension("py.expect"); let expected_output = fs::read_to_string(&expected_path) .unwrap_or_else(|_| panic!("Expected Black output file '{expected_path:?}' to exist")); - let printed = formatted.print()?; let formatted_code = printed.as_code(); if formatted_code == expected_output { @@ -162,7 +200,24 @@ mod tests { k: v for k, v in a_very_long_variable_name_that_exceeds_the_line_length_by_far_keep_going } "#; - let formatted = fmt(src).unwrap(); + // Tokenize once + let mut tokens = Vec::new(); + let mut comment_ranges = CommentRangesBuilder::default(); + + for result in lex(src, Mode::Module) { + let (token, range) = result.unwrap(); + comment_ranges.visit_token(&token, range); + tokens.push(Ok((token, range))); + } + + let comment_ranges = comment_ranges.finish(); + + let trivia = trivia::extract_trivia_tokens(&tokens); + + // Parse the AST. + let python_ast = parse_tokens(tokens, Mode::Module, "").unwrap(); + + let formatted = format_node(&python_ast, &comment_ranges, src, trivia).unwrap(); // Uncomment the `dbg` to print the IR. // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR diff --git a/crates/ruff_python_formatter/src/main.rs b/crates/ruff_python_formatter/src/main.rs index 1f38d0c59e..a67938b0a3 100644 --- a/crates/ruff_python_formatter/src/main.rs +++ b/crates/ruff_python_formatter/src/main.rs @@ -11,7 +11,7 @@ fn main() -> Result<()> { let contents = fs::read_to_string(cli.file)?; #[allow(clippy::print_stdout)] { - println!("{}", fmt(&contents)?.print()?.as_code()); + println!("{}", fmt(&contents)?.as_code()); } Ok(()) }