formatter: new approach for indentation that doesn't depend on original formatting (#2764)

This commit is contained in:
Tyler Wilding
2023-06-21 23:16:18 -05:00
committed by GitHub
parent f980c733b2
commit 533d2f6f2b
13 changed files with 427 additions and 405 deletions
+5 -9
View File
@@ -1,14 +1,10 @@
#include "rule_config.h"
namespace formatter {
// TODO - these are eventually going to need a refactor too, my depth is based on the true
// tree-depth, in reality we want it to be relative to the forms defined here because imagine
// multiple `let`s in the same form, their depths will be different but you want them relatively
// formatted the same
namespace formatter_rules {
namespace config {
// TODO - populate these more
const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
opengoal_indentation_rules = {{"defun", {std::make_shared<InnerIndentationRule>(1)}}};
} // namespace formatter
std::unordered_map<std::string, FormConfiguration> opengoal_form_config = {};
} // namespace config
} // namespace formatter_rules
+10 -4
View File
@@ -7,7 +7,13 @@
#include "common/formatter/formatting_rules.h"
namespace formatter {
extern const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
opengoal_indentation_rules;
}
namespace formatter_rules {
namespace config {
struct FormConfiguration {
bool force_hang = false;
int start_hang_at_index = 0;
};
extern std::unordered_map<std::string, FormConfiguration> opengoal_form_config;
} // namespace config
} // namespace formatter_rules
+33 -14
View File
@@ -24,9 +24,6 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
std::string curr_form = "";
// Print the token
if (curr_node.token) {
// TODO - perhaps unneeded
curr_node.get_formatting_rule(tree_depth, -1)
->indent_token(curr_form, curr_node, curr_node, tree_depth, -1);
curr_form += curr_node.token.value();
return curr_form;
}
@@ -34,29 +31,51 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
curr_form += "(";
}
// Iterate the form
bool inline_form = false;
// Also check if the form should be constant-paired
const bool constant_pair_form =
formatter_rules::constant_pairs::form_should_be_constant_paired(curr_node);
if (!constant_pair_form) {
// Determine if the form should be inlined or hung/flowed
// TODO - this isn't entirely accurate, needs current cursor positioning (which is tricky
// because recursion!)
inline_form = formatter_rules::indent::form_can_be_inlined(curr_form, curr_node);
}
for (int i = 0; i < curr_node.refs.size(); i++) {
const auto& ref = curr_node.refs.at(i);
// Append a newline if needed
curr_node.get_formatting_rule(tree_depth, i)
->append_newline(curr_form, ref, curr_node, tree_depth, i);
if (!inline_form) {
formatter_rules::indent::append_newline(curr_form, ref, curr_node, tree_depth, i,
constant_pair_form);
}
// Either print the element's token, or recursively format it as well
if (ref.token) {
curr_node.get_formatting_rule(tree_depth, i)
->indent_token(curr_form, ref, curr_node, 1,
i); // TODO depth hard-coded to 1, i think this can be removed, since
// forms are always done bottom-top recursively, they always act
// independently as if it was the shallowest depth
curr_form += ref.token.value();
// TODO depth hard-coded to 1, i think this can be removed, since
// forms are always done bottom-top recursively, they always act
// independently as if it was the shallowest depth
if (!inline_form) {
formatter_rules::indent::flow_line(curr_form, ref, curr_node, 1, i);
}
if (ref.metadata.node_type == "comment" && ref.metadata.is_inline) {
curr_form += " " + ref.token.value();
} else if (ref.metadata.node_type == "block_comment") {
curr_form += formatter_rules::comments::format_block_comment(ref.token.value());
} else {
curr_form += ref.token.value();
}
if (!curr_node.metadata.is_top_level) {
curr_form += " ";
}
} else {
auto formatted_form = apply_formatting(ref, "", tree_depth + 1);
if (!curr_node.metadata.is_top_level) {
curr_node.get_formatting_rule(tree_depth, i)
->align_form_lines(formatted_form, ref, curr_node);
if (!curr_node.metadata.is_top_level && !inline_form) {
formatter_rules::indent::hang_lines(formatted_form, ref, curr_node, constant_pair_form);
}
curr_form += formatted_form;
if (!curr_node.metadata.is_top_level) {
curr_form += " ";
}
}
// Handle blank lines at the top level, skip if it's the final element
formatter_rules::blank_lines::separate_by_newline(curr_form, curr_node, ref, i);
+2 -59
View File
@@ -6,10 +6,6 @@
#include "third-party/fmt/core.h"
namespace formatter {
const std::shared_ptr<IndentationRule> default_rule = std::make_shared<IndentationRule>();
}
std::string get_source_code(const std::string& source, const TSNode& node) {
uint32_t start = ts_node_start_byte(node);
uint32_t end = ts_node_end_byte(node);
@@ -47,52 +43,13 @@ bool node_preceeded_by_only_whitespace(const std::string& source, const TSNode&
FormatterTreeNode::FormatterTreeNode(const std::string& source, const TSNode& node)
: token(get_source_code(source, node)) {
metadata.node_type = ts_node_type(node);
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";");
// Do some formatting on block-comments text
// TODO - this should go into a formatting rule
if (str_util::starts_with(str_util::ltrim(token.value()), "#|")) {
metadata.is_comment = true;
// Normalize block comments, remove any trailing or leading whitespace
// Only allow annotations on the first line, like #|@file
// Don't mess with internal indentation as the user might intend it to be a certain way.
std::string new_token = "";
std::string comment_contents = "";
bool seek_until_whitespace = str_util::starts_with(token.value(), "#|@");
int chars_seeked = 0;
for (const auto& c : token.value()) {
if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) ||
(!seek_until_whitespace && (c != '#' && c != '|'))) {
break;
}
chars_seeked++;
new_token += c;
}
// Remove the first line content and any leading whitespace
comment_contents = str_util::ltrim_newlines(token.value().substr(chars_seeked));
// Remove trailing whitespace
comment_contents = str_util::rtrim(comment_contents);
// remove |#
comment_contents.pop_back();
comment_contents.pop_back();
comment_contents = str_util::rtrim(comment_contents);
new_token += fmt::format("\n{}\n|#", comment_contents);
token = new_token;
}
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";") ||
str_util::starts_with(str_util::ltrim(token.value()), "#|");
// Set any metadata based on the value of the token
metadata.num_blank_lines_following = num_blank_lines_following_node(source, node);
metadata.is_inline = !node_preceeded_by_only_whitespace(source, node);
};
std::shared_ptr<IndentationRule> FormatterTreeNode::get_formatting_rule(const int depth,
const int index) const {
// TODO - really lazy for now
if (!rules.empty()) {
return rules.at(0);
}
return formatter::default_rule;
}
// Check if the original source only has whitespace up to a new-line after it's token
bool node_followed_by_only_whitespace(const std::string& source, const TSNode& node) {
uint32_t pos = ts_node_end_byte(node);
@@ -150,21 +107,7 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
continue;
}
if (curr_node_type == "list_lit") {
// Check to see if the first line of the form has more than 1 element
if (i == 1) {
list_node.metadata.multiple_elements_first_line =
!node_followed_by_only_whitespace(source, child_node);
// Peek at the first element of the list to determine formatting rules
if (formatter::opengoal_indentation_rules.find(contents) !=
formatter::opengoal_indentation_rules.end()) {
list_node.rules = formatter::opengoal_indentation_rules.at(contents);
}
}
construct_formatter_tree_recursive(source, child_node, list_node);
// Check if the node that was recursively added to the list was on the same line
auto& new_node = list_node.refs.at(list_node.refs.size() - 1);
new_node.metadata.was_on_first_line_of_form =
nodes_on_same_line(source, curr_node, child_node);
} else {
construct_formatter_tree_recursive(source, child_node, tree_node);
}
-20
View File
@@ -5,12 +5,8 @@
#include <string>
#include <vector>
#include "formatting_rules.h"
#include "tree_sitter/api.h"
class IndentationRule;
// Treesitter is fantastic for validating and parsing our code into a structured tree format without
// whitespace so we can do that ourselves (formatting) However, the treesitter AST is a bit too
// detailed for purposes of formatting.
@@ -27,10 +23,6 @@ class IndentationRule;
// Pass 1 - convert the AST into a simplified FormatterTree
// Pass 2 - use the simplified tree to output the final code
namespace formatter {
extern const std::shared_ptr<IndentationRule> default_indentation_rule;
}
class FormatterTreeNode {
public:
struct Metadata {
@@ -39,28 +31,16 @@ class FormatterTreeNode {
bool is_comment = false;
bool is_inline = false;
int num_blank_lines_following = 0;
// Whether the form had more than 1 element on the first line
// (println
// "test")
// vs
// (println "test")
bool multiple_elements_first_line = false;
bool was_on_first_line_of_form = false;
};
std::vector<FormatterTreeNode> refs;
Metadata metadata;
// The token is optional because list nodes do not contain a token, they just contain a bunch of
// eventually token node refs
std::optional<std::string> token;
std::vector<std::shared_ptr<IndentationRule>> rules = {};
FormatterTreeNode() = default;
FormatterTreeNode(const std::string& source, const TSNode& node);
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
// Considers the input to determine the most relevant formatting rule for the given node
// if none are applicable, returns `formatter::default_rule`
std::shared_ptr<IndentationRule> get_formatting_rule(const int depth, const int index) const;
};
// A FormatterTree has a very simple and crude tree structure where:
+212 -105
View File
@@ -4,10 +4,19 @@
#include "common/util/string_util.h"
void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text,
const FormatterTreeNode& containing_node,
const FormatterTreeNode& node,
const int index) {
#include "third-party/fmt/core.h"
namespace formatter_rules {
// TODO - probably need to include quoted literals as well, though the grammar currently does not
// differentiate between a quoted symbol and a quoted form
const std::set<std::string> constant_types = {"kwd_lit", "num_lit", "str_lit",
"char_lit", "null_lit", "bool_lit"};
namespace blank_lines {
void separate_by_newline(std::string& curr_text,
const FormatterTreeNode& containing_node,
const FormatterTreeNode& node,
const int index) {
// We only are concerned with top level forms or elements
// Skip the last element, no trailing new-lines (let the editors handle this!)
// Also peek ahead to see if there was a comment on this line, if so don't separate things!
@@ -24,144 +33,240 @@ void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text,
// Otherwise, add only 1 blank line
curr_text += "\n";
}
} // namespace blank_lines
// TODO - probably need to include quoted literals as well, though the grammar currently does not
// differentiate between a quoted symbol and a quoted form
const std::set<std::string> constant_pair_types = {"kwd_lit", "num_lit", "str_lit", "char_lit",
"null_lit", "bool_lit", "sym_lit"};
namespace comments {
std::string format_block_comment(const std::string& comment) {
// Normalize block comments, remove any trailing or leading whitespace
// Only allow annotations on the first line, like #|@file
// Don't mess with internal indentation as the user might intend it to be a certain way.
std::string new_comment = "";
std::string comment_contents = "";
bool seek_until_whitespace = str_util::starts_with(comment, "#|@");
int chars_seeked = 0;
for (const auto& c : comment) {
if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) ||
(!seek_until_whitespace && (c != '#' && c != '|'))) {
break;
}
chars_seeked++;
new_comment += c;
}
// Remove the first line content and any leading whitespace
comment_contents = str_util::ltrim_newlines(comment.substr(chars_seeked));
// Remove trailing whitespace
comment_contents = str_util::rtrim(comment_contents);
// remove |#
// TODO - check suffix
comment_contents.pop_back();
comment_contents.pop_back();
comment_contents = str_util::rtrim(comment_contents);
new_comment += fmt::format("\n{}\n|#", comment_contents);
return new_comment;
}
} // namespace comments
bool formatter_rules::constant_pairs::is_element_second_in_constant_pair(
const FormatterTreeNode& containing_node,
const FormatterTreeNode& node,
const int index) {
namespace constant_pairs {
bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node,
const FormatterTreeNode& node,
const int index) {
if (containing_node.refs.empty() || index == 0) {
return false;
}
// Ensure that a keyword came before hand
if (containing_node.refs.at(index - 1).metadata.node_type != "kwd_lit") {
return false;
} else if (node.metadata.node_type == "kwd_lit") {
// NOTE - there is ambiugity here which cannot be totally solved (i think?)
// if the element itself is also a keyword, assume this is two adjacent keywords and they should
// not be paired
return false;
}
// Check the type of the element
if (constant_pair_types.find(node.metadata.node_type) != constant_pair_types.end()) {
if (constant_types.find(node.metadata.node_type) != constant_types.end()) {
return true;
}
return false;
}
void IndentationRule::append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
if (index <= 0 && !containing_node.metadata.multiple_elements_first_line ||
index <= 1 && containing_node.metadata.multiple_elements_first_line ||
containing_node.metadata.is_top_level ||
bool form_should_be_constant_paired(const FormatterTreeNode& node) {
// Criteria for a list to be constant paired:
// - needs to start with a non-symbol
// - needs atleast the minimum amount of pairs, so 2 pairs can still be inlined
if (node.refs.empty()) {
return false;
}
int num_pairs = 0;
for (int i = 0; i < node.refs.size() - 1; i++) {
const auto& ref = node.refs.at(i);
const auto& next_ref = node.refs.at(i + 1);
if (ref.token && next_ref.token) {
// If the first element a keyword and the following item is a constant, it's a pair
// move forward one extra index
if (ref.metadata.node_type == "kwd_lit" &&
constant_types.find(next_ref.metadata.node_type) != constant_types.end()) {
num_pairs++;
i++;
}
}
}
return num_pairs >= min_pair_amount;
}
} // namespace constant_pairs
namespace indent {
int cursor_pos(const std::string& curr_text) {
if (curr_text.empty()) {
return 0;
}
// Get the last line of the text (which is also the line we are on!)
int pos = 0;
for (int i = curr_text.size() - 1; i >= 0; i--) {
const auto& c = curr_text.at(i);
if (c == '\n') {
break;
}
pos++;
}
return pos;
}
int compute_form_width_after_index(const FormatterTreeNode& node,
const int index,
const int depth = 0) {
if (node.refs.empty()) {
if (node.token) {
return node.token->size();
} else {
return 0;
}
}
int form_width = 0;
for (int i = 0; i < node.refs.size(); i++) {
const auto& ref = node.refs.at(i);
if (depth == 0 && i < index) {
continue;
}
if (ref.token) {
form_width += ref.token->size() + 1;
} else {
form_width += compute_form_width_after_index(ref, index, depth + 1) + 1;
}
}
return form_width;
}
bool form_exceed_line_width(const std::string& curr_text,
const FormatterTreeNode& containing_node,
const int index) {
// Compute length from the current cursor position on the line as this check is done for every
// element of the form and not in advance
//
// This is for a good reason, intermediate nodes may override this styling and force to be
// formatted inline
//
// We early out as soon as we exceed the width
int curr_line_pos = cursor_pos(curr_text);
if (curr_line_pos >= line_width_target) {
return true;
}
int remaining_width_required = compute_form_width_after_index(containing_node, index);
if (curr_line_pos + remaining_width_required >= line_width_target) {
return true;
}
return false;
}
bool form_contains_comment(const FormatterTreeNode& node) {
if (node.metadata.is_comment) {
return true;
}
for (const auto& ref : node.refs) {
if (ref.metadata.is_comment) {
return true;
} else if (!node.refs.empty()) {
if (form_contains_comment(ref)) {
return true;
}
}
}
return false;
}
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node) {
// Two main checks:
// - first, is the form too long to fit on a line TODO - increase accuracy here
if (form_exceed_line_width(curr_text, node, 0)) {
return false;
}
// - second, are there any comments? (inlined or not, doesn't matter)
if (form_contains_comment(node)) {
return false;
}
return true;
}
void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool constant_pair_form) {
if (index <= 0 || containing_node.metadata.is_top_level ||
(node.metadata.is_comment && node.metadata.is_inline)) {
return;
}
// Check if it's a constant pair
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
index)) {
if (constant_pair_form &&
constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) {
return;
}
curr_text = str_util::rtrim(curr_text) + "\n";
}
void IndentationRule::indent_token(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
if (node.metadata.is_top_level) {
void flow_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
if (node.metadata.is_top_level || (node.metadata.is_inline && node.metadata.is_comment)) {
return;
}
// If the element is the second element in a constant pair, that means we did not append a
// new-line before hand so we require no indentation (it's inline with the previous element)
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
index)) {
if (constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) {
return;
}
if (containing_node.metadata.multiple_elements_first_line) {
if (index > 1) {
// Only apply indentation if we are about to print a normal text token
// TODO - unsafe
if (node.token.has_value()) {
curr_text += str_util::repeat(containing_node.refs.at(0).token.value().length() + 2, " ");
}
}
} else {
if (index > 0) {
if (index > 0) {
// If the first element in the list is a constant, we only indent with 1 space instead
if (constant_types.find(containing_node.refs.at(0).metadata.node_type) !=
constant_types.end()) {
curr_text += str_util::repeat(depth, " ");
} else {
curr_text += str_util::repeat(depth, " ");
}
}
}
void IndentationRule::align_form_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node) {
const auto lines = str_util::split(text);
// TODO - unsafe (breaks on a list of lists)
int alignment_width = 1;
if (containing_node.metadata.multiple_elements_first_line) {
alignment_width = containing_node.refs.at(0).token.value().length() + 2;
}
std::string aligned_form = "";
for (int i = 0; i < lines.size(); i++) {
aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i);
if (i != lines.size() - 1) {
aligned_form += "\n";
}
}
text = aligned_form;
}
void InnerIndentationRule::append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
return;
}
// Check if it's a constant pair
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
index)) {
return;
}
if (!node.metadata.was_on_first_line_of_form) {
curr_text = str_util::rtrim(curr_text) + "\n";
}
}
void InnerIndentationRule::indent_token(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
return;
}
// If the element is the second element in a constant pair, that means we did not append a
// new-line before hand so we require no indentation (it's inline with the previous element)
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
index)) {
return;
}
// We only new-line elements if they were not originally on the first line
if (!node.metadata.was_on_first_line_of_form) {
curr_text += str_util::repeat(depth * 2, " ");
}
}
void InnerIndentationRule::align_form_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node) {
if (node.metadata.was_on_first_line_of_form) {
return;
}
void hang_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form) {
const auto lines = str_util::split(text);
// TODO - unsafe (breaks on a list of lists)
int alignment_width = 2;
if (constant_pair_form &&
constant_types.find(containing_node.refs.at(0).metadata.node_type) != constant_types.end()) {
alignment_width = 3;
}
// TODO - implement hanging
// always hang unless flowing is "better" (this is the hard part)
/*else if (containing_node.metadata.multiple_elements_first_line) {
alignment_width = containing_node.refs.at(0).token.value().length() + 2;
}*/
std::string aligned_form = "";
for (int i = 0; i < lines.size(); i++) {
aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i);
@@ -170,5 +275,7 @@ void InnerIndentationRule::align_form_lines(std::string& text,
}
}
text = aligned_form;
return;
}
} // namespace indent
} // namespace formatter_rules
+51 -86
View File
@@ -4,11 +4,9 @@
#include "formatter_tree.h"
class FormatterTreeNode;
namespace formatter_rules {
// The formatter will try to collapse as much space as possible in the top-level, this means
// separating forms by a single empty blank line
// separating forms by a single empty blank line
//
// The exception are comments, top level comments will retain their following blank lines from the
// original source
@@ -26,12 +24,14 @@ void separate_by_newline(std::string& curr_text,
const int index);
}
// TODO - nothing here yet, in the future:
// TODO:
// - align consecutive comment lines
// - if/when the formatter is concerned with line length, there are implications here
//
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/comments.md
namespace comments {}
namespace comments {
std::string format_block_comment(const std::string& comment);
}
// Paired elements in a list will be kept in-line rather than the default new-line indentation
// For example:
@@ -53,6 +53,8 @@ namespace comments {}
//
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/constantpairs.md
namespace constant_pairs {
const static int min_pair_amount = 4;
// Determines if the given element is the second element in a constant pair, if it is then we would
// usually want to elide the new-line in whatever code that applies it
//
@@ -64,86 +66,49 @@ namespace constant_pairs {
bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node,
const FormatterTreeNode& node,
const int index);
bool form_should_be_constant_paired(const FormatterTreeNode& node);
} // namespace constant_pairs
// There are two main types of indentations "flow"s and "hang"s
// (this is
// a
// hang)
// (this
// is
// a
// flow)
//
// `flow` is the default indentation mode and right now the determination between the two is down to
// manual configuration based on the head of the form. In general, we only `flow` or `hang` if the
// form cannot fit on the current line.
//
// Additionally, if the head of the form is a constant we `flow` with an indent of `1` instead of
// `2`
//
// TODO: - incorporate more heuristics here, explore both a hang and flow approach to see which is
// better
//
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/indent.md
namespace indent {
const static int line_width_target = 120;
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node);
void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool constant_pair_form);
void flow_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index);
void hang_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form);
} // namespace indent
} // namespace formatter_rules
// Indentation rules are heavily inspired by the descriptions here
// https://github.com/weavejester/cljfmt/blob/master/docs/INDENTS.md
//
// This style of formatting assumes the code coming in is already reasonably formatted, aka was
// written by an actual human and isn't just a single line of minified code
//
// If it _is_ though, the formatting rules will still be able to do a somewhat decent job, as
// certain forms are configured to format a specific way, but it probably won't be broadly
// consistent with code written normally
//
// cljfmt observations:
// - a form that starts on the first line but spans multiple lines (it doesn't really handle this
// well) ex. (println (hello
// world) ye) ;; you'd expect the 'ye' to be aligned with `(h...`
// - vector lists are treated differently from paren lists (seems to leave them inline or default
// indent them if they span multiple lines) ex. [hello world
// what]
// The default rule that is used if no other rule applies to the given form
//
// For lists this will format like so:
// - If the only element on the first line is the head of the form, every subsequent element is
// indented with 1 space on a new line
// (println ; <= one or fewer elements on first line
// "hello"
// "world")
// - otherwise, every element after the 2nd is on a new line and aligned to that 1st list arg.
// (println "hello" ; <= more than one element on first line
// "world")
class IndentationRule {
public:
virtual ~IndentationRule() = default;
virtual void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index);
virtual void indent_token(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index);
virtual void align_form_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node);
};
// Inner indentation always indents by 2 spaces for every line after the first regardless of the
// number of elements `depth` defines at what depth the rule should be applied, and optionally
// `index` narrows this down further to a given index at that depth
//
// Some simple examples:
// (defn greet [name]
// (println "Hello" name))
//
// (defn dismiss
// [name]
// (println "Goodbye" name))
class InnerIndentationRule : public IndentationRule {
private:
int m_depth;
std::optional<int> m_index;
public:
InnerIndentationRule(int depth) : m_depth(depth){};
InnerIndentationRule(int depth, int index) : m_depth(depth), m_index(index){};
virtual void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) override;
virtual void indent_token(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) override;
void align_form_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node) override;
};
@@ -15,14 +15,14 @@ Comment - Within Form
===
(println
;; test
"test")
;; test
"test")
---
(println
;; test
"test")
;; test
"test")
===
@@ -117,8 +117,8 @@ Block Comment - In Form
---
(println
#|
#|
block comment
test
|#
"test")
"test")
@@ -10,42 +10,71 @@ One Pair
(:hello "world")
===
Three Pairs
===
(:hello
"world" :world 123
:test 456)
---
(:hello "world" :world 123 :test 456)
===
Four Pairs
===
(:hello
"world" :world 123
:test 456
:doit 789)
---
(:hello "world"
:world 123
:test 456
:doit 789)
===
Not a Valid Constant
===
(:hello
(println "hello world"))
---
(:hello
(println "hello world"))
===
Two Pairs
===
(:hello
"world" :test 123)
"world" :world 123
:test 456
:not (println "hello world")
:doit 789)
---
(:hello "world"
:test 123)
:world 123
:test 456
:not
(println "hello world")
:doit 789)
===
Pair Mixture
Amibiguous List
===
(:hello
"world" "not-a-pair" :test 123)
"world" :world 123
:test 456
:not-paired
:doit 789)
---
(:hello "world"
"not-a-pair"
:test 123)
:world 123
:test 456
:not-paired
:doit 789)
@@ -1,64 +0,0 @@
===
All Alignment
===
(println "hello" (println "world" "world2"))
---
(println "hello"
(println "world"
"world2"))
===
Multiple Top Level Forms
===
(println "hello" "world")(println "hello" "world")
---
(println "hello"
"world")
(println "hello"
"world")
===
All Indented
===
(println
"hello" (println
"world"))
---
(println
"hello"
(println
"world"))
===
Mixed
===
(println
"hello" (println "world"))
---
(println
"hello"
(println "world"))
===
Single Item Form
===
(println)
---
(println)
@@ -0,0 +1,60 @@
===
Basic Nested Form
===
(println "hello" (println "world" "world2" "very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
---
(println
"hello"
(println
"world"
"world2"
"very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
===
Multiple Top Level Forms
===
(println "hello" "world")(println "hello" "world")
---
(println "hello" "world")
(println "hello" "world")
===
All Indented
===
(println
"hello" (println
"world"))
---
(println "hello" (println "world"))
===
Mixed
===
(println
"hello" (println "world"))
---
(println "hello" (println "world"))
===
Single Item Form
===
(println)
---
(println)
@@ -1,19 +0,0 @@
===
Basic Inner Indentation
===
(defun greet (name string)
(println "Hello" name)
(println "Hello" name)
(println "Hello" name)
)
---
(defun greet (name string)
(println "Hello"
name)
(println "Hello"
name)
(println "Hello"
name))