mirror of
https://github.com/open-goal/jak-project
synced 2026-06-29 11:41:11 -04:00
formatter: new approach for indentation that doesn't depend on original formatting (#2764)
This commit is contained in:
@@ -1,14 +1,10 @@
|
||||
#include "rule_config.h"
|
||||
|
||||
namespace formatter {
|
||||
|
||||
// TODO - these are eventually going to need a refactor too, my depth is based on the true
|
||||
// tree-depth, in reality we want it to be relative to the forms defined here because imagine
|
||||
// multiple `let`s in the same form, their depths will be different but you want them relatively
|
||||
// formatted the same
|
||||
namespace formatter_rules {
|
||||
namespace config {
|
||||
|
||||
// TODO - populate these more
|
||||
|
||||
const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
|
||||
opengoal_indentation_rules = {{"defun", {std::make_shared<InnerIndentationRule>(1)}}};
|
||||
} // namespace formatter
|
||||
std::unordered_map<std::string, FormConfiguration> opengoal_form_config = {};
|
||||
} // namespace config
|
||||
} // namespace formatter_rules
|
||||
|
||||
@@ -7,7 +7,13 @@
|
||||
|
||||
#include "common/formatter/formatting_rules.h"
|
||||
|
||||
namespace formatter {
|
||||
extern const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
|
||||
opengoal_indentation_rules;
|
||||
}
|
||||
namespace formatter_rules {
|
||||
namespace config {
|
||||
struct FormConfiguration {
|
||||
bool force_hang = false;
|
||||
int start_hang_at_index = 0;
|
||||
};
|
||||
|
||||
extern std::unordered_map<std::string, FormConfiguration> opengoal_form_config;
|
||||
} // namespace config
|
||||
} // namespace formatter_rules
|
||||
|
||||
@@ -24,9 +24,6 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
|
||||
std::string curr_form = "";
|
||||
// Print the token
|
||||
if (curr_node.token) {
|
||||
// TODO - perhaps unneeded
|
||||
curr_node.get_formatting_rule(tree_depth, -1)
|
||||
->indent_token(curr_form, curr_node, curr_node, tree_depth, -1);
|
||||
curr_form += curr_node.token.value();
|
||||
return curr_form;
|
||||
}
|
||||
@@ -34,29 +31,51 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
|
||||
curr_form += "(";
|
||||
}
|
||||
// Iterate the form
|
||||
|
||||
bool inline_form = false;
|
||||
// Also check if the form should be constant-paired
|
||||
const bool constant_pair_form =
|
||||
formatter_rules::constant_pairs::form_should_be_constant_paired(curr_node);
|
||||
if (!constant_pair_form) {
|
||||
// Determine if the form should be inlined or hung/flowed
|
||||
// TODO - this isn't entirely accurate, needs current cursor positioning (which is tricky
|
||||
// because recursion!)
|
||||
inline_form = formatter_rules::indent::form_can_be_inlined(curr_form, curr_node);
|
||||
}
|
||||
for (int i = 0; i < curr_node.refs.size(); i++) {
|
||||
const auto& ref = curr_node.refs.at(i);
|
||||
// Append a newline if needed
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->append_newline(curr_form, ref, curr_node, tree_depth, i);
|
||||
if (!inline_form) {
|
||||
formatter_rules::indent::append_newline(curr_form, ref, curr_node, tree_depth, i,
|
||||
constant_pair_form);
|
||||
}
|
||||
// Either print the element's token, or recursively format it as well
|
||||
if (ref.token) {
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->indent_token(curr_form, ref, curr_node, 1,
|
||||
i); // TODO depth hard-coded to 1, i think this can be removed, since
|
||||
// forms are always done bottom-top recursively, they always act
|
||||
// independently as if it was the shallowest depth
|
||||
curr_form += ref.token.value();
|
||||
// TODO depth hard-coded to 1, i think this can be removed, since
|
||||
// forms are always done bottom-top recursively, they always act
|
||||
// independently as if it was the shallowest depth
|
||||
if (!inline_form) {
|
||||
formatter_rules::indent::flow_line(curr_form, ref, curr_node, 1, i);
|
||||
}
|
||||
if (ref.metadata.node_type == "comment" && ref.metadata.is_inline) {
|
||||
curr_form += " " + ref.token.value();
|
||||
} else if (ref.metadata.node_type == "block_comment") {
|
||||
curr_form += formatter_rules::comments::format_block_comment(ref.token.value());
|
||||
} else {
|
||||
curr_form += ref.token.value();
|
||||
}
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_form += " ";
|
||||
}
|
||||
} else {
|
||||
auto formatted_form = apply_formatting(ref, "", tree_depth + 1);
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->align_form_lines(formatted_form, ref, curr_node);
|
||||
if (!curr_node.metadata.is_top_level && !inline_form) {
|
||||
formatter_rules::indent::hang_lines(formatted_form, ref, curr_node, constant_pair_form);
|
||||
}
|
||||
curr_form += formatted_form;
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_form += " ";
|
||||
}
|
||||
}
|
||||
// Handle blank lines at the top level, skip if it's the final element
|
||||
formatter_rules::blank_lines::separate_by_newline(curr_form, curr_node, ref, i);
|
||||
|
||||
@@ -6,10 +6,6 @@
|
||||
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace formatter {
|
||||
const std::shared_ptr<IndentationRule> default_rule = std::make_shared<IndentationRule>();
|
||||
}
|
||||
|
||||
std::string get_source_code(const std::string& source, const TSNode& node) {
|
||||
uint32_t start = ts_node_start_byte(node);
|
||||
uint32_t end = ts_node_end_byte(node);
|
||||
@@ -47,52 +43,13 @@ bool node_preceeded_by_only_whitespace(const std::string& source, const TSNode&
|
||||
FormatterTreeNode::FormatterTreeNode(const std::string& source, const TSNode& node)
|
||||
: token(get_source_code(source, node)) {
|
||||
metadata.node_type = ts_node_type(node);
|
||||
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";");
|
||||
// Do some formatting on block-comments text
|
||||
// TODO - this should go into a formatting rule
|
||||
if (str_util::starts_with(str_util::ltrim(token.value()), "#|")) {
|
||||
metadata.is_comment = true;
|
||||
// Normalize block comments, remove any trailing or leading whitespace
|
||||
// Only allow annotations on the first line, like #|@file
|
||||
// Don't mess with internal indentation as the user might intend it to be a certain way.
|
||||
std::string new_token = "";
|
||||
std::string comment_contents = "";
|
||||
bool seek_until_whitespace = str_util::starts_with(token.value(), "#|@");
|
||||
int chars_seeked = 0;
|
||||
for (const auto& c : token.value()) {
|
||||
if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) ||
|
||||
(!seek_until_whitespace && (c != '#' && c != '|'))) {
|
||||
break;
|
||||
}
|
||||
chars_seeked++;
|
||||
new_token += c;
|
||||
}
|
||||
// Remove the first line content and any leading whitespace
|
||||
comment_contents = str_util::ltrim_newlines(token.value().substr(chars_seeked));
|
||||
// Remove trailing whitespace
|
||||
comment_contents = str_util::rtrim(comment_contents);
|
||||
// remove |#
|
||||
comment_contents.pop_back();
|
||||
comment_contents.pop_back();
|
||||
comment_contents = str_util::rtrim(comment_contents);
|
||||
new_token += fmt::format("\n{}\n|#", comment_contents);
|
||||
token = new_token;
|
||||
}
|
||||
|
||||
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";") ||
|
||||
str_util::starts_with(str_util::ltrim(token.value()), "#|");
|
||||
// Set any metadata based on the value of the token
|
||||
metadata.num_blank_lines_following = num_blank_lines_following_node(source, node);
|
||||
metadata.is_inline = !node_preceeded_by_only_whitespace(source, node);
|
||||
};
|
||||
|
||||
std::shared_ptr<IndentationRule> FormatterTreeNode::get_formatting_rule(const int depth,
|
||||
const int index) const {
|
||||
// TODO - really lazy for now
|
||||
if (!rules.empty()) {
|
||||
return rules.at(0);
|
||||
}
|
||||
return formatter::default_rule;
|
||||
}
|
||||
|
||||
// Check if the original source only has whitespace up to a new-line after it's token
|
||||
bool node_followed_by_only_whitespace(const std::string& source, const TSNode& node) {
|
||||
uint32_t pos = ts_node_end_byte(node);
|
||||
@@ -150,21 +107,7 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
|
||||
continue;
|
||||
}
|
||||
if (curr_node_type == "list_lit") {
|
||||
// Check to see if the first line of the form has more than 1 element
|
||||
if (i == 1) {
|
||||
list_node.metadata.multiple_elements_first_line =
|
||||
!node_followed_by_only_whitespace(source, child_node);
|
||||
// Peek at the first element of the list to determine formatting rules
|
||||
if (formatter::opengoal_indentation_rules.find(contents) !=
|
||||
formatter::opengoal_indentation_rules.end()) {
|
||||
list_node.rules = formatter::opengoal_indentation_rules.at(contents);
|
||||
}
|
||||
}
|
||||
construct_formatter_tree_recursive(source, child_node, list_node);
|
||||
// Check if the node that was recursively added to the list was on the same line
|
||||
auto& new_node = list_node.refs.at(list_node.refs.size() - 1);
|
||||
new_node.metadata.was_on_first_line_of_form =
|
||||
nodes_on_same_line(source, curr_node, child_node);
|
||||
} else {
|
||||
construct_formatter_tree_recursive(source, child_node, tree_node);
|
||||
}
|
||||
|
||||
@@ -5,12 +5,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "formatting_rules.h"
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
class IndentationRule;
|
||||
|
||||
// Treesitter is fantastic for validating and parsing our code into a structured tree format without
|
||||
// whitespace so we can do that ourselves (formatting) However, the treesitter AST is a bit too
|
||||
// detailed for purposes of formatting.
|
||||
@@ -27,10 +23,6 @@ class IndentationRule;
|
||||
// Pass 1 - convert the AST into a simplified FormatterTree
|
||||
// Pass 2 - use the simplified tree to output the final code
|
||||
|
||||
namespace formatter {
|
||||
extern const std::shared_ptr<IndentationRule> default_indentation_rule;
|
||||
}
|
||||
|
||||
class FormatterTreeNode {
|
||||
public:
|
||||
struct Metadata {
|
||||
@@ -39,28 +31,16 @@ class FormatterTreeNode {
|
||||
bool is_comment = false;
|
||||
bool is_inline = false;
|
||||
int num_blank_lines_following = 0;
|
||||
// Whether the form had more than 1 element on the first line
|
||||
// (println
|
||||
// "test")
|
||||
// vs
|
||||
// (println "test")
|
||||
bool multiple_elements_first_line = false;
|
||||
bool was_on_first_line_of_form = false;
|
||||
};
|
||||
std::vector<FormatterTreeNode> refs;
|
||||
Metadata metadata;
|
||||
// The token is optional because list nodes do not contain a token, they just contain a bunch of
|
||||
// eventually token node refs
|
||||
std::optional<std::string> token;
|
||||
std::vector<std::shared_ptr<IndentationRule>> rules = {};
|
||||
|
||||
FormatterTreeNode() = default;
|
||||
FormatterTreeNode(const std::string& source, const TSNode& node);
|
||||
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
|
||||
|
||||
// Considers the input to determine the most relevant formatting rule for the given node
|
||||
// if none are applicable, returns `formatter::default_rule`
|
||||
std::shared_ptr<IndentationRule> get_formatting_rule(const int depth, const int index) const;
|
||||
};
|
||||
|
||||
// A FormatterTree has a very simple and crude tree structure where:
|
||||
|
||||
@@ -4,10 +4,19 @@
|
||||
|
||||
#include "common/util/string_util.h"
|
||||
|
||||
void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index) {
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace formatter_rules {
|
||||
|
||||
// TODO - probably need to include quoted literals as well, though the grammar currently does not
|
||||
// differentiate between a quoted symbol and a quoted form
|
||||
const std::set<std::string> constant_types = {"kwd_lit", "num_lit", "str_lit",
|
||||
"char_lit", "null_lit", "bool_lit"};
|
||||
namespace blank_lines {
|
||||
void separate_by_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index) {
|
||||
// We only are concerned with top level forms or elements
|
||||
// Skip the last element, no trailing new-lines (let the editors handle this!)
|
||||
// Also peek ahead to see if there was a comment on this line, if so don't separate things!
|
||||
@@ -24,144 +33,240 @@ void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text,
|
||||
// Otherwise, add only 1 blank line
|
||||
curr_text += "\n";
|
||||
}
|
||||
} // namespace blank_lines
|
||||
|
||||
// TODO - probably need to include quoted literals as well, though the grammar currently does not
|
||||
// differentiate between a quoted symbol and a quoted form
|
||||
const std::set<std::string> constant_pair_types = {"kwd_lit", "num_lit", "str_lit", "char_lit",
|
||||
"null_lit", "bool_lit", "sym_lit"};
|
||||
namespace comments {
|
||||
std::string format_block_comment(const std::string& comment) {
|
||||
// Normalize block comments, remove any trailing or leading whitespace
|
||||
// Only allow annotations on the first line, like #|@file
|
||||
// Don't mess with internal indentation as the user might intend it to be a certain way.
|
||||
std::string new_comment = "";
|
||||
std::string comment_contents = "";
|
||||
bool seek_until_whitespace = str_util::starts_with(comment, "#|@");
|
||||
int chars_seeked = 0;
|
||||
for (const auto& c : comment) {
|
||||
if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) ||
|
||||
(!seek_until_whitespace && (c != '#' && c != '|'))) {
|
||||
break;
|
||||
}
|
||||
chars_seeked++;
|
||||
new_comment += c;
|
||||
}
|
||||
// Remove the first line content and any leading whitespace
|
||||
comment_contents = str_util::ltrim_newlines(comment.substr(chars_seeked));
|
||||
// Remove trailing whitespace
|
||||
comment_contents = str_util::rtrim(comment_contents);
|
||||
// remove |#
|
||||
// TODO - check suffix
|
||||
comment_contents.pop_back();
|
||||
comment_contents.pop_back();
|
||||
comment_contents = str_util::rtrim(comment_contents);
|
||||
new_comment += fmt::format("\n{}\n|#", comment_contents);
|
||||
return new_comment;
|
||||
}
|
||||
} // namespace comments
|
||||
|
||||
bool formatter_rules::constant_pairs::is_element_second_in_constant_pair(
|
||||
const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index) {
|
||||
namespace constant_pairs {
|
||||
|
||||
bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index) {
|
||||
if (containing_node.refs.empty() || index == 0) {
|
||||
return false;
|
||||
}
|
||||
// Ensure that a keyword came before hand
|
||||
if (containing_node.refs.at(index - 1).metadata.node_type != "kwd_lit") {
|
||||
return false;
|
||||
} else if (node.metadata.node_type == "kwd_lit") {
|
||||
// NOTE - there is ambiugity here which cannot be totally solved (i think?)
|
||||
// if the element itself is also a keyword, assume this is two adjacent keywords and they should
|
||||
// not be paired
|
||||
return false;
|
||||
}
|
||||
// Check the type of the element
|
||||
if (constant_pair_types.find(node.metadata.node_type) != constant_pair_types.end()) {
|
||||
if (constant_types.find(node.metadata.node_type) != constant_types.end()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void IndentationRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index <= 0 && !containing_node.metadata.multiple_elements_first_line ||
|
||||
index <= 1 && containing_node.metadata.multiple_elements_first_line ||
|
||||
containing_node.metadata.is_top_level ||
|
||||
bool form_should_be_constant_paired(const FormatterTreeNode& node) {
|
||||
// Criteria for a list to be constant paired:
|
||||
// - needs to start with a non-symbol
|
||||
// - needs atleast the minimum amount of pairs, so 2 pairs can still be inlined
|
||||
if (node.refs.empty()) {
|
||||
return false;
|
||||
}
|
||||
int num_pairs = 0;
|
||||
for (int i = 0; i < node.refs.size() - 1; i++) {
|
||||
const auto& ref = node.refs.at(i);
|
||||
const auto& next_ref = node.refs.at(i + 1);
|
||||
if (ref.token && next_ref.token) {
|
||||
// If the first element a keyword and the following item is a constant, it's a pair
|
||||
// move forward one extra index
|
||||
if (ref.metadata.node_type == "kwd_lit" &&
|
||||
constant_types.find(next_ref.metadata.node_type) != constant_types.end()) {
|
||||
num_pairs++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return num_pairs >= min_pair_amount;
|
||||
}
|
||||
|
||||
} // namespace constant_pairs
|
||||
|
||||
namespace indent {
|
||||
|
||||
int cursor_pos(const std::string& curr_text) {
|
||||
if (curr_text.empty()) {
|
||||
return 0;
|
||||
}
|
||||
// Get the last line of the text (which is also the line we are on!)
|
||||
int pos = 0;
|
||||
for (int i = curr_text.size() - 1; i >= 0; i--) {
|
||||
const auto& c = curr_text.at(i);
|
||||
if (c == '\n') {
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
int compute_form_width_after_index(const FormatterTreeNode& node,
|
||||
const int index,
|
||||
const int depth = 0) {
|
||||
if (node.refs.empty()) {
|
||||
if (node.token) {
|
||||
return node.token->size();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int form_width = 0;
|
||||
for (int i = 0; i < node.refs.size(); i++) {
|
||||
const auto& ref = node.refs.at(i);
|
||||
if (depth == 0 && i < index) {
|
||||
continue;
|
||||
}
|
||||
if (ref.token) {
|
||||
form_width += ref.token->size() + 1;
|
||||
} else {
|
||||
form_width += compute_form_width_after_index(ref, index, depth + 1) + 1;
|
||||
}
|
||||
}
|
||||
return form_width;
|
||||
}
|
||||
|
||||
bool form_exceed_line_width(const std::string& curr_text,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int index) {
|
||||
// Compute length from the current cursor position on the line as this check is done for every
|
||||
// element of the form and not in advance
|
||||
//
|
||||
// This is for a good reason, intermediate nodes may override this styling and force to be
|
||||
// formatted inline
|
||||
//
|
||||
// We early out as soon as we exceed the width
|
||||
int curr_line_pos = cursor_pos(curr_text);
|
||||
if (curr_line_pos >= line_width_target) {
|
||||
return true;
|
||||
}
|
||||
int remaining_width_required = compute_form_width_after_index(containing_node, index);
|
||||
if (curr_line_pos + remaining_width_required >= line_width_target) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool form_contains_comment(const FormatterTreeNode& node) {
|
||||
if (node.metadata.is_comment) {
|
||||
return true;
|
||||
}
|
||||
for (const auto& ref : node.refs) {
|
||||
if (ref.metadata.is_comment) {
|
||||
return true;
|
||||
} else if (!node.refs.empty()) {
|
||||
if (form_contains_comment(ref)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node) {
|
||||
// Two main checks:
|
||||
// - first, is the form too long to fit on a line TODO - increase accuracy here
|
||||
if (form_exceed_line_width(curr_text, node, 0)) {
|
||||
return false;
|
||||
}
|
||||
// - second, are there any comments? (inlined or not, doesn't matter)
|
||||
if (form_contains_comment(node)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index,
|
||||
const bool constant_pair_form) {
|
||||
if (index <= 0 || containing_node.metadata.is_top_level ||
|
||||
(node.metadata.is_comment && node.metadata.is_inline)) {
|
||||
return;
|
||||
}
|
||||
// Check if it's a constant pair
|
||||
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
|
||||
index)) {
|
||||
if (constant_pair_form &&
|
||||
constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) {
|
||||
return;
|
||||
}
|
||||
curr_text = str_util::rtrim(curr_text) + "\n";
|
||||
}
|
||||
|
||||
void IndentationRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (node.metadata.is_top_level) {
|
||||
void flow_line(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (node.metadata.is_top_level || (node.metadata.is_inline && node.metadata.is_comment)) {
|
||||
return;
|
||||
}
|
||||
// If the element is the second element in a constant pair, that means we did not append a
|
||||
// new-line before hand so we require no indentation (it's inline with the previous element)
|
||||
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
|
||||
index)) {
|
||||
if (constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) {
|
||||
return;
|
||||
}
|
||||
if (containing_node.metadata.multiple_elements_first_line) {
|
||||
if (index > 1) {
|
||||
// Only apply indentation if we are about to print a normal text token
|
||||
// TODO - unsafe
|
||||
if (node.token.has_value()) {
|
||||
curr_text += str_util::repeat(containing_node.refs.at(0).token.value().length() + 2, " ");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (index > 0) {
|
||||
if (index > 0) {
|
||||
// If the first element in the list is a constant, we only indent with 1 space instead
|
||||
if (constant_types.find(containing_node.refs.at(0).metadata.node_type) !=
|
||||
constant_types.end()) {
|
||||
curr_text += str_util::repeat(depth, " ");
|
||||
} else {
|
||||
curr_text += str_util::repeat(depth, " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IndentationRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
const auto lines = str_util::split(text);
|
||||
// TODO - unsafe (breaks on a list of lists)
|
||||
int alignment_width = 1;
|
||||
if (containing_node.metadata.multiple_elements_first_line) {
|
||||
alignment_width = containing_node.refs.at(0).token.value().length() + 2;
|
||||
}
|
||||
std::string aligned_form = "";
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i);
|
||||
if (i != lines.size() - 1) {
|
||||
aligned_form += "\n";
|
||||
}
|
||||
}
|
||||
text = aligned_form;
|
||||
}
|
||||
|
||||
void InnerIndentationRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
|
||||
return;
|
||||
}
|
||||
// Check if it's a constant pair
|
||||
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
|
||||
index)) {
|
||||
return;
|
||||
}
|
||||
if (!node.metadata.was_on_first_line_of_form) {
|
||||
curr_text = str_util::rtrim(curr_text) + "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void InnerIndentationRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
|
||||
return;
|
||||
}
|
||||
// If the element is the second element in a constant pair, that means we did not append a
|
||||
// new-line before hand so we require no indentation (it's inline with the previous element)
|
||||
if (formatter_rules::constant_pairs::is_element_second_in_constant_pair(containing_node, node,
|
||||
index)) {
|
||||
return;
|
||||
}
|
||||
// We only new-line elements if they were not originally on the first line
|
||||
if (!node.metadata.was_on_first_line_of_form) {
|
||||
curr_text += str_util::repeat(depth * 2, " ");
|
||||
}
|
||||
}
|
||||
|
||||
void InnerIndentationRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
if (node.metadata.was_on_first_line_of_form) {
|
||||
return;
|
||||
}
|
||||
void hang_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const bool constant_pair_form) {
|
||||
const auto lines = str_util::split(text);
|
||||
// TODO - unsafe (breaks on a list of lists)
|
||||
int alignment_width = 2;
|
||||
if (constant_pair_form &&
|
||||
constant_types.find(containing_node.refs.at(0).metadata.node_type) != constant_types.end()) {
|
||||
alignment_width = 3;
|
||||
}
|
||||
// TODO - implement hanging
|
||||
// always hang unless flowing is "better" (this is the hard part)
|
||||
/*else if (containing_node.metadata.multiple_elements_first_line) {
|
||||
alignment_width = containing_node.refs.at(0).token.value().length() + 2;
|
||||
}*/
|
||||
std::string aligned_form = "";
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i);
|
||||
@@ -170,5 +275,7 @@ void InnerIndentationRule::align_form_lines(std::string& text,
|
||||
}
|
||||
}
|
||||
text = aligned_form;
|
||||
return;
|
||||
}
|
||||
} // namespace indent
|
||||
|
||||
} // namespace formatter_rules
|
||||
|
||||
@@ -4,11 +4,9 @@
|
||||
|
||||
#include "formatter_tree.h"
|
||||
|
||||
class FormatterTreeNode;
|
||||
|
||||
namespace formatter_rules {
|
||||
// The formatter will try to collapse as much space as possible in the top-level, this means
|
||||
// separating forms by a single empty blank line
|
||||
// separating forms by a single empty blank line
|
||||
//
|
||||
// The exception are comments, top level comments will retain their following blank lines from the
|
||||
// original source
|
||||
@@ -26,12 +24,14 @@ void separate_by_newline(std::string& curr_text,
|
||||
const int index);
|
||||
}
|
||||
|
||||
// TODO - nothing here yet, in the future:
|
||||
// TODO:
|
||||
// - align consecutive comment lines
|
||||
// - if/when the formatter is concerned with line length, there are implications here
|
||||
//
|
||||
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/comments.md
|
||||
namespace comments {}
|
||||
namespace comments {
|
||||
std::string format_block_comment(const std::string& comment);
|
||||
}
|
||||
|
||||
// Paired elements in a list will be kept in-line rather than the default new-line indentation
|
||||
// For example:
|
||||
@@ -53,6 +53,8 @@ namespace comments {}
|
||||
//
|
||||
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/constantpairs.md
|
||||
namespace constant_pairs {
|
||||
const static int min_pair_amount = 4;
|
||||
|
||||
// Determines if the given element is the second element in a constant pair, if it is then we would
|
||||
// usually want to elide the new-line in whatever code that applies it
|
||||
//
|
||||
@@ -64,86 +66,49 @@ namespace constant_pairs {
|
||||
bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index);
|
||||
bool form_should_be_constant_paired(const FormatterTreeNode& node);
|
||||
} // namespace constant_pairs
|
||||
|
||||
// There are two main types of indentations "flow"s and "hang"s
|
||||
// (this is
|
||||
// a
|
||||
// hang)
|
||||
// (this
|
||||
// is
|
||||
// a
|
||||
// flow)
|
||||
//
|
||||
// `flow` is the default indentation mode and right now the determination between the two is down to
|
||||
// manual configuration based on the head of the form. In general, we only `flow` or `hang` if the
|
||||
// form cannot fit on the current line.
|
||||
//
|
||||
// Additionally, if the head of the form is a constant we `flow` with an indent of `1` instead of
|
||||
// `2`
|
||||
//
|
||||
// TODO: - incorporate more heuristics here, explore both a hang and flow approach to see which is
|
||||
// better
|
||||
//
|
||||
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/indent.md
|
||||
namespace indent {
|
||||
const static int line_width_target = 120;
|
||||
|
||||
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node);
|
||||
|
||||
void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index,
|
||||
const bool constant_pair_form);
|
||||
void flow_line(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index);
|
||||
void hang_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const bool constant_pair_form);
|
||||
|
||||
} // namespace indent
|
||||
} // namespace formatter_rules
|
||||
|
||||
// Indentation rules are heavily inspired by the descriptions here
|
||||
// https://github.com/weavejester/cljfmt/blob/master/docs/INDENTS.md
|
||||
//
|
||||
// This style of formatting assumes the code coming in is already reasonably formatted, aka was
|
||||
// written by an actual human and isn't just a single line of minified code
|
||||
//
|
||||
// If it _is_ though, the formatting rules will still be able to do a somewhat decent job, as
|
||||
// certain forms are configured to format a specific way, but it probably won't be broadly
|
||||
// consistent with code written normally
|
||||
//
|
||||
// cljfmt observations:
|
||||
// - a form that starts on the first line but spans multiple lines (it doesn't really handle this
|
||||
// well) ex. (println (hello
|
||||
// world) ye) ;; you'd expect the 'ye' to be aligned with `(h...`
|
||||
// - vector lists are treated differently from paren lists (seems to leave them inline or default
|
||||
// indent them if they span multiple lines) ex. [hello world
|
||||
// what]
|
||||
|
||||
// The default rule that is used if no other rule applies to the given form
|
||||
//
|
||||
// For lists this will format like so:
|
||||
// - If the only element on the first line is the head of the form, every subsequent element is
|
||||
// indented with 1 space on a new line
|
||||
// (println ; <= one or fewer elements on first line
|
||||
// "hello"
|
||||
// "world")
|
||||
// - otherwise, every element after the 2nd is on a new line and aligned to that 1st list arg.
|
||||
// (println "hello" ; <= more than one element on first line
|
||||
// "world")
|
||||
class IndentationRule {
|
||||
public:
|
||||
virtual ~IndentationRule() = default;
|
||||
virtual void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index);
|
||||
virtual void indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index);
|
||||
virtual void align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node);
|
||||
};
|
||||
|
||||
// Inner indentation always indents by 2 spaces for every line after the first regardless of the
|
||||
// number of elements `depth` defines at what depth the rule should be applied, and optionally
|
||||
// `index` narrows this down further to a given index at that depth
|
||||
//
|
||||
// Some simple examples:
|
||||
// (defn greet [name]
|
||||
// (println "Hello" name))
|
||||
//
|
||||
// (defn dismiss
|
||||
// [name]
|
||||
// (println "Goodbye" name))
|
||||
class InnerIndentationRule : public IndentationRule {
|
||||
private:
|
||||
int m_depth;
|
||||
std::optional<int> m_index;
|
||||
|
||||
public:
|
||||
InnerIndentationRule(int depth) : m_depth(depth){};
|
||||
InnerIndentationRule(int depth, int index) : m_depth(depth), m_index(index){};
|
||||
virtual void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) override;
|
||||
virtual void indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) override;
|
||||
void align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) override;
|
||||
};
|
||||
|
||||
@@ -15,14 +15,14 @@ Comment - Within Form
|
||||
===
|
||||
|
||||
(println
|
||||
;; test
|
||||
"test")
|
||||
;; test
|
||||
"test")
|
||||
|
||||
---
|
||||
|
||||
(println
|
||||
;; test
|
||||
"test")
|
||||
;; test
|
||||
"test")
|
||||
|
||||
|
||||
===
|
||||
@@ -117,8 +117,8 @@ Block Comment - In Form
|
||||
---
|
||||
|
||||
(println
|
||||
#|
|
||||
#|
|
||||
block comment
|
||||
test
|
||||
|#
|
||||
"test")
|
||||
"test")
|
||||
|
||||
@@ -10,42 +10,71 @@ One Pair
|
||||
|
||||
(:hello "world")
|
||||
|
||||
===
|
||||
Three Pairs
|
||||
===
|
||||
|
||||
(:hello
|
||||
|
||||
"world" :world 123
|
||||
:test 456)
|
||||
|
||||
---
|
||||
|
||||
(:hello "world" :world 123 :test 456)
|
||||
|
||||
===
|
||||
Four Pairs
|
||||
===
|
||||
|
||||
(:hello
|
||||
|
||||
"world" :world 123
|
||||
:test 456
|
||||
:doit 789)
|
||||
|
||||
---
|
||||
|
||||
(:hello "world"
|
||||
:world 123
|
||||
:test 456
|
||||
:doit 789)
|
||||
|
||||
===
|
||||
Not a Valid Constant
|
||||
===
|
||||
|
||||
(:hello
|
||||
|
||||
(println "hello world"))
|
||||
|
||||
---
|
||||
|
||||
(:hello
|
||||
(println "hello world"))
|
||||
|
||||
===
|
||||
Two Pairs
|
||||
===
|
||||
|
||||
(:hello
|
||||
|
||||
"world" :test 123)
|
||||
"world" :world 123
|
||||
:test 456
|
||||
:not (println "hello world")
|
||||
:doit 789)
|
||||
|
||||
---
|
||||
|
||||
(:hello "world"
|
||||
:test 123)
|
||||
:world 123
|
||||
:test 456
|
||||
:not
|
||||
(println "hello world")
|
||||
:doit 789)
|
||||
|
||||
===
|
||||
Pair Mixture
|
||||
Amibiguous List
|
||||
===
|
||||
|
||||
(:hello
|
||||
|
||||
"world" "not-a-pair" :test 123)
|
||||
"world" :world 123
|
||||
:test 456
|
||||
:not-paired
|
||||
:doit 789)
|
||||
|
||||
---
|
||||
|
||||
(:hello "world"
|
||||
"not-a-pair"
|
||||
:test 123)
|
||||
:world 123
|
||||
:test 456
|
||||
:not-paired
|
||||
:doit 789)
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
===
|
||||
All Alignment
|
||||
===
|
||||
|
||||
(println "hello" (println "world" "world2"))
|
||||
|
||||
---
|
||||
|
||||
(println "hello"
|
||||
(println "world"
|
||||
"world2"))
|
||||
|
||||
===
|
||||
Multiple Top Level Forms
|
||||
===
|
||||
|
||||
(println "hello" "world")(println "hello" "world")
|
||||
|
||||
---
|
||||
|
||||
(println "hello"
|
||||
"world")
|
||||
|
||||
(println "hello"
|
||||
"world")
|
||||
|
||||
===
|
||||
All Indented
|
||||
===
|
||||
|
||||
(println
|
||||
"hello" (println
|
||||
"world"))
|
||||
|
||||
---
|
||||
|
||||
(println
|
||||
"hello"
|
||||
(println
|
||||
"world"))
|
||||
|
||||
===
|
||||
Mixed
|
||||
===
|
||||
|
||||
(println
|
||||
"hello" (println "world"))
|
||||
|
||||
---
|
||||
|
||||
(println
|
||||
"hello"
|
||||
(println "world"))
|
||||
|
||||
===
|
||||
Single Item Form
|
||||
===
|
||||
|
||||
(println)
|
||||
|
||||
---
|
||||
|
||||
(println)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
===
|
||||
Basic Nested Form
|
||||
===
|
||||
|
||||
(println "hello" (println "world" "world2" "very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
|
||||
|
||||
---
|
||||
|
||||
(println
|
||||
"hello"
|
||||
(println
|
||||
"world"
|
||||
"world2"
|
||||
"very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
|
||||
|
||||
===
|
||||
Multiple Top Level Forms
|
||||
===
|
||||
|
||||
(println "hello" "world")(println "hello" "world")
|
||||
|
||||
---
|
||||
|
||||
(println "hello" "world")
|
||||
|
||||
(println "hello" "world")
|
||||
|
||||
===
|
||||
All Indented
|
||||
===
|
||||
|
||||
(println
|
||||
"hello" (println
|
||||
"world"))
|
||||
|
||||
---
|
||||
|
||||
(println "hello" (println "world"))
|
||||
|
||||
===
|
||||
Mixed
|
||||
===
|
||||
|
||||
(println
|
||||
"hello" (println "world"))
|
||||
|
||||
---
|
||||
|
||||
(println "hello" (println "world"))
|
||||
|
||||
===
|
||||
Single Item Form
|
||||
===
|
||||
|
||||
(println)
|
||||
|
||||
---
|
||||
|
||||
(println)
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
===
|
||||
Basic Inner Indentation
|
||||
===
|
||||
|
||||
(defun greet (name string)
|
||||
(println "Hello" name)
|
||||
(println "Hello" name)
|
||||
(println "Hello" name)
|
||||
)
|
||||
|
||||
---
|
||||
|
||||
(defun greet (name string)
|
||||
(println "Hello"
|
||||
name)
|
||||
(println "Hello"
|
||||
name)
|
||||
(println "Hello"
|
||||
name))
|
||||
Reference in New Issue
Block a user