From 533d2f6f2bdb7e604abc93b093265258c7c042e6 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 21 Jun 2023 23:16:18 -0500 Subject: [PATCH] formatter: new approach for indentation that doesn't depend on original formatting (#2764) --- common/formatter/config/rule_config.cpp | 14 +- common/formatter/config/rule_config.h | 14 +- common/formatter/formatter.cpp | 47 ++- common/formatter/formatter_tree.cpp | 61 +--- common/formatter/formatter_tree.h | 20 -- common/formatter/formatting_rules.cpp | 317 ++++++++++++------ common/formatter/formatting_rules.h | 137 +++----- test/common/formatter/corpus/comments.test.gc | 12 +- .../formatter/corpus/constant-pairs.test.gc | 67 ++-- test/common/formatter/corpus/default.test.gc | 64 ---- .../formatter/corpus/indent-flow.test.gc | 60 ++++ .../formatter/corpus/indent-hang.test.gc | 0 test/common/formatter/corpus/inner.test.gc | 19 -- 13 files changed, 427 insertions(+), 405 deletions(-) delete mode 100644 test/common/formatter/corpus/default.test.gc create mode 100644 test/common/formatter/corpus/indent-flow.test.gc create mode 100644 test/common/formatter/corpus/indent-hang.test.gc delete mode 100644 test/common/formatter/corpus/inner.test.gc diff --git a/common/formatter/config/rule_config.cpp b/common/formatter/config/rule_config.cpp index 4cf20ecf20..4b6e29d1d9 100644 --- a/common/formatter/config/rule_config.cpp +++ b/common/formatter/config/rule_config.cpp @@ -1,14 +1,10 @@ #include "rule_config.h" -namespace formatter { - -// TODO - these are eventually going to need a refactor too, my depth is based on the true -// tree-depth, in reality we want it to be relative to the forms defined here because imagine -// multiple `let`s in the same form, their depths will be different but you want them relatively -// formatted the same +namespace formatter_rules { +namespace config { // TODO - populate these more -const std::unordered_map>> - opengoal_indentation_rules = {{"defun", {std::make_shared(1)}}}; -} // namespace formatter +std::unordered_map opengoal_form_config = {}; +} // namespace config +} // namespace formatter_rules diff --git a/common/formatter/config/rule_config.h b/common/formatter/config/rule_config.h index b495353528..da1d1b19ec 100644 --- a/common/formatter/config/rule_config.h +++ b/common/formatter/config/rule_config.h @@ -7,7 +7,13 @@ #include "common/formatter/formatting_rules.h" -namespace formatter { -extern const std::unordered_map>> - opengoal_indentation_rules; -} +namespace formatter_rules { +namespace config { +struct FormConfiguration { + bool force_hang = false; + int start_hang_at_index = 0; +}; + +extern std::unordered_map opengoal_form_config; +} // namespace config +} // namespace formatter_rules diff --git a/common/formatter/formatter.cpp b/common/formatter/formatter.cpp index 1fa42974a9..ce0a41e143 100644 --- a/common/formatter/formatter.cpp +++ b/common/formatter/formatter.cpp @@ -24,9 +24,6 @@ std::string apply_formatting(const FormatterTreeNode& curr_node, std::string curr_form = ""; // Print the token if (curr_node.token) { - // TODO - perhaps unneeded - curr_node.get_formatting_rule(tree_depth, -1) - ->indent_token(curr_form, curr_node, curr_node, tree_depth, -1); curr_form += curr_node.token.value(); return curr_form; } @@ -34,29 +31,51 @@ std::string apply_formatting(const FormatterTreeNode& curr_node, curr_form += "("; } // Iterate the form + + bool inline_form = false; + // Also check if the form should be constant-paired + const bool constant_pair_form = + formatter_rules::constant_pairs::form_should_be_constant_paired(curr_node); + if (!constant_pair_form) { + // Determine if the form should be inlined or hung/flowed + // TODO - this isn't entirely accurate, needs current cursor positioning (which is tricky + // because recursion!) + inline_form = formatter_rules::indent::form_can_be_inlined(curr_form, curr_node); + } for (int i = 0; i < curr_node.refs.size(); i++) { const auto& ref = curr_node.refs.at(i); // Append a newline if needed - curr_node.get_formatting_rule(tree_depth, i) - ->append_newline(curr_form, ref, curr_node, tree_depth, i); + if (!inline_form) { + formatter_rules::indent::append_newline(curr_form, ref, curr_node, tree_depth, i, + constant_pair_form); + } // Either print the element's token, or recursively format it as well if (ref.token) { - curr_node.get_formatting_rule(tree_depth, i) - ->indent_token(curr_form, ref, curr_node, 1, - i); // TODO depth hard-coded to 1, i think this can be removed, since - // forms are always done bottom-top recursively, they always act - // independently as if it was the shallowest depth - curr_form += ref.token.value(); + // TODO depth hard-coded to 1, i think this can be removed, since + // forms are always done bottom-top recursively, they always act + // independently as if it was the shallowest depth + if (!inline_form) { + formatter_rules::indent::flow_line(curr_form, ref, curr_node, 1, i); + } + if (ref.metadata.node_type == "comment" && ref.metadata.is_inline) { + curr_form += " " + ref.token.value(); + } else if (ref.metadata.node_type == "block_comment") { + curr_form += formatter_rules::comments::format_block_comment(ref.token.value()); + } else { + curr_form += ref.token.value(); + } if (!curr_node.metadata.is_top_level) { curr_form += " "; } } else { auto formatted_form = apply_formatting(ref, "", tree_depth + 1); - if (!curr_node.metadata.is_top_level) { - curr_node.get_formatting_rule(tree_depth, i) - ->align_form_lines(formatted_form, ref, curr_node); + if (!curr_node.metadata.is_top_level && !inline_form) { + formatter_rules::indent::hang_lines(formatted_form, ref, curr_node, constant_pair_form); } curr_form += formatted_form; + if (!curr_node.metadata.is_top_level) { + curr_form += " "; + } } // Handle blank lines at the top level, skip if it's the final element formatter_rules::blank_lines::separate_by_newline(curr_form, curr_node, ref, i); diff --git a/common/formatter/formatter_tree.cpp b/common/formatter/formatter_tree.cpp index b41ef1cba3..8d96e8b0bf 100644 --- a/common/formatter/formatter_tree.cpp +++ b/common/formatter/formatter_tree.cpp @@ -6,10 +6,6 @@ #include "third-party/fmt/core.h" -namespace formatter { -const std::shared_ptr default_rule = std::make_shared(); -} - std::string get_source_code(const std::string& source, const TSNode& node) { uint32_t start = ts_node_start_byte(node); uint32_t end = ts_node_end_byte(node); @@ -47,52 +43,13 @@ bool node_preceeded_by_only_whitespace(const std::string& source, const TSNode& FormatterTreeNode::FormatterTreeNode(const std::string& source, const TSNode& node) : token(get_source_code(source, node)) { metadata.node_type = ts_node_type(node); - metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";"); - // Do some formatting on block-comments text - // TODO - this should go into a formatting rule - if (str_util::starts_with(str_util::ltrim(token.value()), "#|")) { - metadata.is_comment = true; - // Normalize block comments, remove any trailing or leading whitespace - // Only allow annotations on the first line, like #|@file - // Don't mess with internal indentation as the user might intend it to be a certain way. - std::string new_token = ""; - std::string comment_contents = ""; - bool seek_until_whitespace = str_util::starts_with(token.value(), "#|@"); - int chars_seeked = 0; - for (const auto& c : token.value()) { - if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) || - (!seek_until_whitespace && (c != '#' && c != '|'))) { - break; - } - chars_seeked++; - new_token += c; - } - // Remove the first line content and any leading whitespace - comment_contents = str_util::ltrim_newlines(token.value().substr(chars_seeked)); - // Remove trailing whitespace - comment_contents = str_util::rtrim(comment_contents); - // remove |# - comment_contents.pop_back(); - comment_contents.pop_back(); - comment_contents = str_util::rtrim(comment_contents); - new_token += fmt::format("\n{}\n|#", comment_contents); - token = new_token; - } - + metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";") || + str_util::starts_with(str_util::ltrim(token.value()), "#|"); // Set any metadata based on the value of the token metadata.num_blank_lines_following = num_blank_lines_following_node(source, node); metadata.is_inline = !node_preceeded_by_only_whitespace(source, node); }; -std::shared_ptr FormatterTreeNode::get_formatting_rule(const int depth, - const int index) const { - // TODO - really lazy for now - if (!rules.empty()) { - return rules.at(0); - } - return formatter::default_rule; -} - // Check if the original source only has whitespace up to a new-line after it's token bool node_followed_by_only_whitespace(const std::string& source, const TSNode& node) { uint32_t pos = ts_node_end_byte(node); @@ -150,21 +107,7 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source continue; } if (curr_node_type == "list_lit") { - // Check to see if the first line of the form has more than 1 element - if (i == 1) { - list_node.metadata.multiple_elements_first_line = - !node_followed_by_only_whitespace(source, child_node); - // Peek at the first element of the list to determine formatting rules - if (formatter::opengoal_indentation_rules.find(contents) != - formatter::opengoal_indentation_rules.end()) { - list_node.rules = formatter::opengoal_indentation_rules.at(contents); - } - } construct_formatter_tree_recursive(source, child_node, list_node); - // Check if the node that was recursively added to the list was on the same line - auto& new_node = list_node.refs.at(list_node.refs.size() - 1); - new_node.metadata.was_on_first_line_of_form = - nodes_on_same_line(source, curr_node, child_node); } else { construct_formatter_tree_recursive(source, child_node, tree_node); } diff --git a/common/formatter/formatter_tree.h b/common/formatter/formatter_tree.h index 02d7b7036b..d0165a45df 100644 --- a/common/formatter/formatter_tree.h +++ b/common/formatter/formatter_tree.h @@ -5,12 +5,8 @@ #include #include -#include "formatting_rules.h" - #include "tree_sitter/api.h" -class IndentationRule; - // Treesitter is fantastic for validating and parsing our code into a structured tree format without // whitespace so we can do that ourselves (formatting) However, the treesitter AST is a bit too // detailed for purposes of formatting. @@ -27,10 +23,6 @@ class IndentationRule; // Pass 1 - convert the AST into a simplified FormatterTree // Pass 2 - use the simplified tree to output the final code -namespace formatter { -extern const std::shared_ptr default_indentation_rule; -} - class FormatterTreeNode { public: struct Metadata { @@ -39,28 +31,16 @@ class FormatterTreeNode { bool is_comment = false; bool is_inline = false; int num_blank_lines_following = 0; - // Whether the form had more than 1 element on the first line - // (println - // "test") - // vs - // (println "test") - bool multiple_elements_first_line = false; - bool was_on_first_line_of_form = false; }; std::vector refs; Metadata metadata; // The token is optional because list nodes do not contain a token, they just contain a bunch of // eventually token node refs std::optional token; - std::vector> rules = {}; FormatterTreeNode() = default; FormatterTreeNode(const std::string& source, const TSNode& node); FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){}; - - // Considers the input to determine the most relevant formatting rule for the given node - // if none are applicable, returns `formatter::default_rule` - std::shared_ptr get_formatting_rule(const int depth, const int index) const; }; // A FormatterTree has a very simple and crude tree structure where: diff --git a/common/formatter/formatting_rules.cpp b/common/formatter/formatting_rules.cpp index fd33d019ed..4171d31fa3 100644 --- a/common/formatter/formatting_rules.cpp +++ b/common/formatter/formatting_rules.cpp @@ -4,10 +4,19 @@ #include "common/util/string_util.h" -void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text, - const FormatterTreeNode& containing_node, - const FormatterTreeNode& node, - const int index) { +#include "third-party/fmt/core.h" + +namespace formatter_rules { + +// TODO - probably need to include quoted literals as well, though the grammar currently does not +// differentiate between a quoted symbol and a quoted form +const std::set constant_types = {"kwd_lit", "num_lit", "str_lit", + "char_lit", "null_lit", "bool_lit"}; +namespace blank_lines { +void separate_by_newline(std::string& curr_text, + const FormatterTreeNode& containing_node, + const FormatterTreeNode& node, + const int index) { // We only are concerned with top level forms or elements // Skip the last element, no trailing new-lines (let the editors handle this!) // Also peek ahead to see if there was a comment on this line, if so don't separate things! @@ -24,144 +33,240 @@ void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text, // Otherwise, add only 1 blank line curr_text += "\n"; } +} // namespace blank_lines -// TODO - probably need to include quoted literals as well, though the grammar currently does not -// differentiate between a quoted symbol and a quoted form -const std::set constant_pair_types = {"kwd_lit", "num_lit", "str_lit", "char_lit", - "null_lit", "bool_lit", "sym_lit"}; +namespace comments { +std::string format_block_comment(const std::string& comment) { + // Normalize block comments, remove any trailing or leading whitespace + // Only allow annotations on the first line, like #|@file + // Don't mess with internal indentation as the user might intend it to be a certain way. + std::string new_comment = ""; + std::string comment_contents = ""; + bool seek_until_whitespace = str_util::starts_with(comment, "#|@"); + int chars_seeked = 0; + for (const auto& c : comment) { + if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) || + (!seek_until_whitespace && (c != '#' && c != '|'))) { + break; + } + chars_seeked++; + new_comment += c; + } + // Remove the first line content and any leading whitespace + comment_contents = str_util::ltrim_newlines(comment.substr(chars_seeked)); + // Remove trailing whitespace + comment_contents = str_util::rtrim(comment_contents); + // remove |# + // TODO - check suffix + comment_contents.pop_back(); + comment_contents.pop_back(); + comment_contents = str_util::rtrim(comment_contents); + new_comment += fmt::format("\n{}\n|#", comment_contents); + return new_comment; +} +} // namespace comments -bool formatter_rules::constant_pairs::is_element_second_in_constant_pair( - const FormatterTreeNode& containing_node, - const FormatterTreeNode& node, - const int index) { +namespace constant_pairs { + +bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node, + const FormatterTreeNode& node, + const int index) { if (containing_node.refs.empty() || index == 0) { return false; } // Ensure that a keyword came before hand if (containing_node.refs.at(index - 1).metadata.node_type != "kwd_lit") { return false; + } else if (node.metadata.node_type == "kwd_lit") { + // NOTE - there is ambiugity here which cannot be totally solved (i think?) + // if the element itself is also a keyword, assume this is two adjacent keywords and they should + // not be paired + return false; } // Check the type of the element - if (constant_pair_types.find(node.metadata.node_type) != constant_pair_types.end()) { + if (constant_types.find(node.metadata.node_type) != constant_types.end()) { return true; } return false; } -void IndentationRule::append_newline(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) { - if (index <= 0 && !containing_node.metadata.multiple_elements_first_line || - index <= 1 && containing_node.metadata.multiple_elements_first_line || - containing_node.metadata.is_top_level || +bool form_should_be_constant_paired(const FormatterTreeNode& node) { + // Criteria for a list to be constant paired: + // - needs to start with a non-symbol + // - needs atleast the minimum amount of pairs, so 2 pairs can still be inlined + if (node.refs.empty()) { + return false; + } + int num_pairs = 0; + for (int i = 0; i < node.refs.size() - 1; i++) { + const auto& ref = node.refs.at(i); + const auto& next_ref = node.refs.at(i + 1); + if (ref.token && next_ref.token) { + // If the first element a keyword and the following item is a constant, it's a pair + // move forward one extra index + if (ref.metadata.node_type == "kwd_lit" && + constant_types.find(next_ref.metadata.node_type) != constant_types.end()) { + num_pairs++; + i++; + } + } + } + return num_pairs >= min_pair_amount; +} + +} // namespace constant_pairs + +namespace indent { + +int cursor_pos(const std::string& curr_text) { + if (curr_text.empty()) { + return 0; + } + // Get the last line of the text (which is also the line we are on!) + int pos = 0; + for (int i = curr_text.size() - 1; i >= 0; i--) { + const auto& c = curr_text.at(i); + if (c == '\n') { + break; + } + pos++; + } + return pos; +} + +int compute_form_width_after_index(const FormatterTreeNode& node, + const int index, + const int depth = 0) { + if (node.refs.empty()) { + if (node.token) { + return node.token->size(); + } else { + return 0; + } + } + int form_width = 0; + for (int i = 0; i < node.refs.size(); i++) { + const auto& ref = node.refs.at(i); + if (depth == 0 && i < index) { + continue; + } + if (ref.token) { + form_width += ref.token->size() + 1; + } else { + form_width += compute_form_width_after_index(ref, index, depth + 1) + 1; + } + } + return form_width; +} + +bool form_exceed_line_width(const std::string& curr_text, + const FormatterTreeNode& containing_node, + const int index) { + // Compute length from the current cursor position on the line as this check is done for every + // element of the form and not in advance + // + // This is for a good reason, intermediate nodes may override this styling and force to be + // formatted inline + // + // We early out as soon as we exceed the width + int curr_line_pos = cursor_pos(curr_text); + if (curr_line_pos >= line_width_target) { + return true; + } + int remaining_width_required = compute_form_width_after_index(containing_node, index); + if (curr_line_pos + remaining_width_required >= line_width_target) { + return true; + } + return false; +} + +bool form_contains_comment(const FormatterTreeNode& node) { + if (node.metadata.is_comment) { + return true; + } + for (const auto& ref : node.refs) { + if (ref.metadata.is_comment) { + return true; + } else if (!node.refs.empty()) { + if (form_contains_comment(ref)) { + return true; + } + } + } + return false; +} + +bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node) { + // Two main checks: + // - first, is the form too long to fit on a line TODO - increase accuracy here + if (form_exceed_line_width(curr_text, node, 0)) { + return false; + } + // - second, are there any comments? (inlined or not, doesn't matter) + if (form_contains_comment(node)) { + return false; + } + return true; +} + +void append_newline(std::string& curr_text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const int depth, + const int index, + const bool constant_pair_form) { + if (index <= 0 || containing_node.metadata.is_top_level || (node.metadata.is_comment && node.metadata.is_inline)) { return; } // Check if it's a constant pair - if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node, - index)) { + if (constant_pair_form && + constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) { return; } curr_text = str_util::rtrim(curr_text) + "\n"; } -void IndentationRule::indent_token(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) { - if (node.metadata.is_top_level) { +void flow_line(std::string& curr_text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const int depth, + const int index) { + if (node.metadata.is_top_level || (node.metadata.is_inline && node.metadata.is_comment)) { return; } // If the element is the second element in a constant pair, that means we did not append a // new-line before hand so we require no indentation (it's inline with the previous element) - if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node, - index)) { + if (constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) { return; } - if (containing_node.metadata.multiple_elements_first_line) { - if (index > 1) { - // Only apply indentation if we are about to print a normal text token - // TODO - unsafe - if (node.token.has_value()) { - curr_text += str_util::repeat(containing_node.refs.at(0).token.value().length() + 2, " "); - } - } - } else { - if (index > 0) { + if (index > 0) { + // If the first element in the list is a constant, we only indent with 1 space instead + if (constant_types.find(containing_node.refs.at(0).metadata.node_type) != + constant_types.end()) { curr_text += str_util::repeat(depth, " "); + } else { + curr_text += str_util::repeat(depth, " "); } } } -void IndentationRule::align_form_lines(std::string& text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node) { - const auto lines = str_util::split(text); - // TODO - unsafe (breaks on a list of lists) - int alignment_width = 1; - if (containing_node.metadata.multiple_elements_first_line) { - alignment_width = containing_node.refs.at(0).token.value().length() + 2; - } - std::string aligned_form = ""; - for (int i = 0; i < lines.size(); i++) { - aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i); - if (i != lines.size() - 1) { - aligned_form += "\n"; - } - } - text = aligned_form; -} - -void InnerIndentationRule::append_newline(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) { - if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) { - return; - } - // Check if it's a constant pair - if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node, - index)) { - return; - } - if (!node.metadata.was_on_first_line_of_form) { - curr_text = str_util::rtrim(curr_text) + "\n"; - } -} - -void InnerIndentationRule::indent_token(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) { - if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) { - return; - } - // If the element is the second element in a constant pair, that means we did not append a - // new-line before hand so we require no indentation (it's inline with the previous element) - if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node, - index)) { - return; - } - // We only new-line elements if they were not originally on the first line - if (!node.metadata.was_on_first_line_of_form) { - curr_text += str_util::repeat(depth * 2, " "); - } -} - -void InnerIndentationRule::align_form_lines(std::string& text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node) { - if (node.metadata.was_on_first_line_of_form) { - return; - } +void hang_lines(std::string& text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const bool constant_pair_form) { const auto lines = str_util::split(text); // TODO - unsafe (breaks on a list of lists) int alignment_width = 2; + if (constant_pair_form && + constant_types.find(containing_node.refs.at(0).metadata.node_type) != constant_types.end()) { + alignment_width = 3; + } + // TODO - implement hanging + // always hang unless flowing is "better" (this is the hard part) + /*else if (containing_node.metadata.multiple_elements_first_line) { + alignment_width = containing_node.refs.at(0).token.value().length() + 2; + }*/ std::string aligned_form = ""; for (int i = 0; i < lines.size(); i++) { aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i); @@ -170,5 +275,7 @@ void InnerIndentationRule::align_form_lines(std::string& text, } } text = aligned_form; - return; } +} // namespace indent + +} // namespace formatter_rules diff --git a/common/formatter/formatting_rules.h b/common/formatter/formatting_rules.h index ad120bb3b4..26bd06a7ea 100644 --- a/common/formatter/formatting_rules.h +++ b/common/formatter/formatting_rules.h @@ -4,11 +4,9 @@ #include "formatter_tree.h" -class FormatterTreeNode; - namespace formatter_rules { // The formatter will try to collapse as much space as possible in the top-level, this means -// separating forms by a single empty blank line +// separating forms by a single empty blank line // // The exception are comments, top level comments will retain their following blank lines from the // original source @@ -26,12 +24,14 @@ void separate_by_newline(std::string& curr_text, const int index); } -// TODO - nothing here yet, in the future: +// TODO: // - align consecutive comment lines // - if/when the formatter is concerned with line length, there are implications here // // Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/comments.md -namespace comments {} +namespace comments { +std::string format_block_comment(const std::string& comment); +} // Paired elements in a list will be kept in-line rather than the default new-line indentation // For example: @@ -53,6 +53,8 @@ namespace comments {} // // Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/constantpairs.md namespace constant_pairs { +const static int min_pair_amount = 4; + // Determines if the given element is the second element in a constant pair, if it is then we would // usually want to elide the new-line in whatever code that applies it // @@ -64,86 +66,49 @@ namespace constant_pairs { bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node, const FormatterTreeNode& node, const int index); +bool form_should_be_constant_paired(const FormatterTreeNode& node); } // namespace constant_pairs + +// There are two main types of indentations "flow"s and "hang"s +// (this is +// a +// hang) +// (this +// is +// a +// flow) +// +// `flow` is the default indentation mode and right now the determination between the two is down to +// manual configuration based on the head of the form. In general, we only `flow` or `hang` if the +// form cannot fit on the current line. +// +// Additionally, if the head of the form is a constant we `flow` with an indent of `1` instead of +// `2` +// +// TODO: - incorporate more heuristics here, explore both a hang and flow approach to see which is +// better +// +// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/indent.md +namespace indent { +const static int line_width_target = 120; + +bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node); + +void append_newline(std::string& curr_text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const int depth, + const int index, + const bool constant_pair_form); +void flow_line(std::string& curr_text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const int depth, + const int index); +void hang_lines(std::string& text, + const FormatterTreeNode& node, + const FormatterTreeNode& containing_node, + const bool constant_pair_form); + +} // namespace indent } // namespace formatter_rules - -// Indentation rules are heavily inspired by the descriptions here -// https://github.com/weavejester/cljfmt/blob/master/docs/INDENTS.md -// -// This style of formatting assumes the code coming in is already reasonably formatted, aka was -// written by an actual human and isn't just a single line of minified code -// -// If it _is_ though, the formatting rules will still be able to do a somewhat decent job, as -// certain forms are configured to format a specific way, but it probably won't be broadly -// consistent with code written normally -// -// cljfmt observations: -// - a form that starts on the first line but spans multiple lines (it doesn't really handle this -// well) ex. (println (hello -// world) ye) ;; you'd expect the 'ye' to be aligned with `(h...` -// - vector lists are treated differently from paren lists (seems to leave them inline or default -// indent them if they span multiple lines) ex. [hello world -// what] - -// The default rule that is used if no other rule applies to the given form -// -// For lists this will format like so: -// - If the only element on the first line is the head of the form, every subsequent element is -// indented with 1 space on a new line -// (println ; <= one or fewer elements on first line -// "hello" -// "world") -// - otherwise, every element after the 2nd is on a new line and aligned to that 1st list arg. -// (println "hello" ; <= more than one element on first line -// "world") -class IndentationRule { - public: - virtual ~IndentationRule() = default; - virtual void append_newline(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index); - virtual void indent_token(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index); - virtual void align_form_lines(std::string& text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node); -}; - -// Inner indentation always indents by 2 spaces for every line after the first regardless of the -// number of elements `depth` defines at what depth the rule should be applied, and optionally -// `index` narrows this down further to a given index at that depth -// -// Some simple examples: -// (defn greet [name] -// (println "Hello" name)) -// -// (defn dismiss -// [name] -// (println "Goodbye" name)) -class InnerIndentationRule : public IndentationRule { - private: - int m_depth; - std::optional m_index; - - public: - InnerIndentationRule(int depth) : m_depth(depth){}; - InnerIndentationRule(int depth, int index) : m_depth(depth), m_index(index){}; - virtual void append_newline(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) override; - virtual void indent_token(std::string& curr_text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node, - const int depth, - const int index) override; - void align_form_lines(std::string& text, - const FormatterTreeNode& node, - const FormatterTreeNode& containing_node) override; -}; diff --git a/test/common/formatter/corpus/comments.test.gc b/test/common/formatter/corpus/comments.test.gc index 8a6c933854..be7c8aa087 100644 --- a/test/common/formatter/corpus/comments.test.gc +++ b/test/common/formatter/corpus/comments.test.gc @@ -15,14 +15,14 @@ Comment - Within Form === (println - ;; test - "test") + ;; test + "test") --- (println - ;; test - "test") + ;; test + "test") === @@ -117,8 +117,8 @@ Block Comment - In Form --- (println - #| + #| block comment test |# - "test") + "test") diff --git a/test/common/formatter/corpus/constant-pairs.test.gc b/test/common/formatter/corpus/constant-pairs.test.gc index e2b4ab63ff..9f175c86d3 100644 --- a/test/common/formatter/corpus/constant-pairs.test.gc +++ b/test/common/formatter/corpus/constant-pairs.test.gc @@ -10,42 +10,71 @@ One Pair (:hello "world") +=== +Three Pairs +=== + +(:hello + +"world" :world 123 +:test 456) + +--- + +(:hello "world" :world 123 :test 456) + +=== +Four Pairs +=== + +(:hello + +"world" :world 123 +:test 456 +:doit 789) + +--- + +(:hello "world" + :world 123 + :test 456 + :doit 789) + === Not a Valid Constant === (:hello -(println "hello world")) - ---- - -(:hello - (println "hello world")) - -=== -Two Pairs -=== - -(:hello - -"world" :test 123) +"world" :world 123 +:test 456 +:not (println "hello world") +:doit 789) --- (:hello "world" - :test 123) + :world 123 + :test 456 + :not + (println "hello world") + :doit 789) === -Pair Mixture +Amibiguous List === (:hello -"world" "not-a-pair" :test 123) +"world" :world 123 +:test 456 +:not-paired +:doit 789) --- (:hello "world" - "not-a-pair" - :test 123) + :world 123 + :test 456 + :not-paired + :doit 789) diff --git a/test/common/formatter/corpus/default.test.gc b/test/common/formatter/corpus/default.test.gc deleted file mode 100644 index 016bef6f33..0000000000 --- a/test/common/formatter/corpus/default.test.gc +++ /dev/null @@ -1,64 +0,0 @@ -=== -All Alignment -=== - -(println "hello" (println "world" "world2")) - ---- - -(println "hello" - (println "world" - "world2")) - -=== -Multiple Top Level Forms -=== - -(println "hello" "world")(println "hello" "world") - ---- - -(println "hello" - "world") - -(println "hello" - "world") - -=== -All Indented -=== - -(println -"hello" (println -"world")) - ---- - -(println - "hello" - (println - "world")) - -=== -Mixed -=== - -(println -"hello" (println "world")) - ---- - -(println - "hello" - (println "world")) - -=== -Single Item Form -=== - -(println) - ---- - -(println) - diff --git a/test/common/formatter/corpus/indent-flow.test.gc b/test/common/formatter/corpus/indent-flow.test.gc new file mode 100644 index 0000000000..5b73c504de --- /dev/null +++ b/test/common/formatter/corpus/indent-flow.test.gc @@ -0,0 +1,60 @@ +=== +Basic Nested Form +=== + +(println "hello" (println "world" "world2" "very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form")) + +--- + +(println + "hello" + (println + "world" + "world2" + "very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form")) + +=== +Multiple Top Level Forms +=== + +(println "hello" "world")(println "hello" "world") + +--- + +(println "hello" "world") + +(println "hello" "world") + +=== +All Indented +=== + +(println +"hello" (println +"world")) + +--- + +(println "hello" (println "world")) + +=== +Mixed +=== + +(println +"hello" (println "world")) + +--- + +(println "hello" (println "world")) + +=== +Single Item Form +=== + +(println) + +--- + +(println) + diff --git a/test/common/formatter/corpus/indent-hang.test.gc b/test/common/formatter/corpus/indent-hang.test.gc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/common/formatter/corpus/inner.test.gc b/test/common/formatter/corpus/inner.test.gc deleted file mode 100644 index 0f050963ee..0000000000 --- a/test/common/formatter/corpus/inner.test.gc +++ /dev/null @@ -1,19 +0,0 @@ -=== -Basic Inner Indentation -=== - -(defun greet (name string) -(println "Hello" name) - (println "Hello" name) - (println "Hello" name) -) - ---- - -(defun greet (name string) - (println "Hello" - name) - (println "Hello" - name) - (println "Hello" - name))