formatter: support for a few more forms and fix some bugs, update tree-sitter grammar (#3317)

This commit is contained in:
Tyler Wilding
2024-01-18 20:09:40 -05:00
committed by GitHub
parent 85039fe2d6
commit c3e4baf697
13 changed files with 315 additions and 64 deletions
+27 -5
View File
@@ -67,8 +67,8 @@ void apply_formatting_config(
//
// Otherwise, we always default to a hang.
//
// NOTE - any modifications here to child elements could be superseeded later in the recursion
// in order to maintain your sanity, only modify things here that _arent_ touched by default
// NOTE - any modifications here to child elements could be superseeded later in the recursion!
// In order to maintain your sanity, only modify things here that _arent_ touched by default
// configurations. These are explicitly prepended with `parent_mutable_`
if (!predefined_config) {
if (curr_node.metadata.is_top_level) {
@@ -104,6 +104,8 @@ void apply_formatting_config(
}
// If we are hanging, lets determine the indentation width since it is based on the form itself
if (curr_node.formatting_config.hang_forms) {
// TODO - this isn't being calculated for a pre-defined config
// TODO - another idea is to do this during consolidation
curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node);
}
// iterate through the refs
@@ -123,6 +125,7 @@ void apply_formatting_config(
}
}
// TODO - this doesn't account for paren's width contribution!
int get_total_form_inlined_width(const FormatterTreeNode& curr_node) {
if (curr_node.token) {
return curr_node.token->length();
@@ -196,12 +199,15 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
using namespace formatter_rules;
if (!curr_node.token && curr_node.refs.empty()) {
// special case to handle an empty list
if (curr_node.node_prefix) {
return {fmt::format("{}()", curr_node.node_prefix.value())};
}
return {"()"};
}
// If its a token, just print the token and move on
if (curr_node.token) {
return {curr_node.token.value()};
return {curr_node.token_str()};
}
bool inline_form = can_node_be_inlined(curr_node, cursor_pos);
@@ -220,13 +226,19 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
// Add new line entry
if (ref.token) {
// Cleanup block-comments
std::string val = ref.token.value();
std::string val = ref.token_str();
if (ref.metadata.node_type == "block_comment") {
// TODO - change this sanitization to return a list of lines instead of a single new-lined
// line
val = comments::format_block_comment(ref.token.value());
val = comments::format_block_comment(ref.token_str());
}
form_lines.push_back(val);
if (!curr_node.metadata.is_top_level && i == curr_node.refs.size() - 1 &&
(ref.metadata.is_comment)) {
// if there's an inline comment at the end of a form, we have to force the paren to the next
// line and do a new-line paren this is ugly, but we have no choice!
form_lines.push_back("");
}
} else {
// If it's not a token, we have to recursively build up the form
// TODO - add the cursor_pos here
@@ -250,6 +262,9 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) ||
(curr_node.formatting_config.has_constant_pairs &&
constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) {
// TODO
// has issues with not consolidating first lines, this should probably just be moved to
// outside this loop for simplicity, do it later
if (next_ref.token) {
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value());
i++;
@@ -260,6 +275,10 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
}
i++;
}
if (!curr_node.metadata.is_top_level && next_ref.metadata.node_type == "comment" &&
(i + 1) == (int)curr_node.refs.size()) {
form_lines.push_back("");
}
}
}
// If we are at the top level, potential separate with a new line
@@ -288,6 +307,9 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
// Apply necessary indentation to each line and add parens
if (!curr_node.metadata.is_top_level) {
std::string form_surround_start = "(";
if (curr_node.node_prefix) {
form_surround_start = fmt::format("{}(", curr_node.node_prefix.value());
}
std::string form_surround_end = ")";
form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]);
form_lines[form_lines.size() - 1] =
+35 -9
View File
@@ -78,16 +78,24 @@ FormatterTree::FormatterTree(const std::string& source, const TSNode& root_node)
construct_formatter_tree_recursive(source, root_node, root);
}
const std::unordered_map<std::string, std::vector<std::string>> node_type_ignorable_contents = {
{"list_lit", {"(", ")"}},
{"quoting_lit", {"'"}},
{"unquoting_lit", {","}},
{"quasi_quoting_lit", {"`"}}};
// TODO make an imperative version eventually
void FormatterTree::construct_formatter_tree_recursive(const std::string& source,
TSNode curr_node,
FormatterTreeNode& tree_node) {
FormatterTreeNode& tree_node,
std::optional<std::string> node_prefix) {
if (ts_node_child_count(curr_node) == 0) {
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
return;
}
const std::string curr_node_type = ts_node_type(curr_node);
FormatterTreeNode list_node;
std::optional<std::string> next_node_prefix;
if (curr_node_type == "list_lit") {
list_node = FormatterTreeNode();
} else if (curr_node_type == "str_lit") {
@@ -97,22 +105,40 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
return;
} else if (curr_node_type == "quoting_lit") {
// same story for quoted symbols
// TODO - expect to have to add more here
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
return;
next_node_prefix = "'";
} else if (curr_node_type == "unquoting_lit") {
next_node_prefix = ",";
} else if (curr_node_type == "quasi_quoting_lit") {
next_node_prefix = "`";
}
std::vector<std::string> skippable_nodes = {};
if (node_type_ignorable_contents.find(curr_node_type) != node_type_ignorable_contents.end()) {
skippable_nodes = node_type_ignorable_contents.at(curr_node_type);
}
for (size_t i = 0; i < ts_node_child_count(curr_node); i++) {
const auto child_node = ts_node_child(curr_node, i);
// We skip parens
const auto contents = get_source_code(source, child_node);
if (contents == "(" || contents == ")") {
bool skip_node = false;
for (const auto& skippable_content : skippable_nodes) {
if (skippable_content == contents) {
skip_node = true;
break;
}
}
if (skip_node) {
continue;
}
if (curr_node_type == "list_lit") {
construct_formatter_tree_recursive(source, child_node, list_node);
construct_formatter_tree_recursive(source, child_node, list_node, next_node_prefix);
if (node_prefix) {
list_node.node_prefix = node_prefix;
}
} else {
construct_formatter_tree_recursive(source, child_node, tree_node);
construct_formatter_tree_recursive(source, child_node, tree_node, next_node_prefix);
// TODO - im not sure if this is correct
if (node_prefix && !tree_node.refs.empty()) {
tree_node.refs.at(tree_node.refs.size() - 1).node_prefix = node_prefix;
}
}
}
if (curr_node_type == "list_lit") {
+16 -4
View File
@@ -18,8 +18,9 @@
// we really care about is:
// - getting all the text tokens for the source code
// - having them in a proper, nested format
// The treesitter format is complicated and highly nested, leading to some very hard to understand
// code. So my solution is a 2-pass format.
//
// TLDR - The treesitter format is complicated and highly nested, leading to some very hard to
// understand code. So my solution is atleast a 2-pass format.
//
// Pass 1 - convert the AST into a simplified FormatterTree
// Pass 2 - use the simplified tree to output the final code
@@ -37,8 +38,9 @@ class FormatterTreeNode {
std::vector<FormatterTreeNode> refs;
Metadata metadata;
// The token is optional because list nodes do not contain a token, they just contain a bunch of
// eventually token node refs
// eventually-containing token node refs
std::optional<std::string> token;
std::optional<std::string> node_prefix;
formatter_rules::config::FormFormattingConfig formatting_config;
@@ -47,6 +49,15 @@ class FormatterTreeNode {
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
bool is_list() const { return !token.has_value(); }
std::string token_str() const {
if (node_prefix && token) {
return node_prefix.value() + token.value();
}
if (token) {
return token.value();
}
return "";
}
};
// A FormatterTree has a very simple and crude tree structure where:
@@ -62,5 +73,6 @@ class FormatterTree {
private:
void construct_formatter_tree_recursive(const std::string& source,
TSNode curr_node,
FormatterTreeNode& tree_node);
FormatterTreeNode& tree_node,
std::optional<std::string> node_prefix = {});
};
+12
View File
@@ -6,6 +6,13 @@ namespace formatter_rules {
namespace config {
// TODO - this could be greatly simplified with C++20's designated initialization
FormFormattingConfig new_permissive_flow_rule() {
FormFormattingConfig cfg;
cfg.hang_forms = false;
cfg.combine_first_two_lines = true;
return cfg;
}
FormFormattingConfig new_flow_rule(int start_index) {
FormFormattingConfig cfg;
cfg.hang_forms = false;
@@ -80,6 +87,11 @@ const std::unordered_map<std::string, FormFormattingConfig> opengoal_form_config
{"defmethod", new_flow_rule(3)},
{"deftype", new_flow_rule_prevent_inlining_indexes(3, {3, 4, 5})},
{"defun", new_flow_rule(3)},
{"defbehavior", new_flow_rule(4)},
{"if", new_permissive_flow_rule()},
{"define", new_permissive_flow_rule()},
{"define-extern", new_permissive_flow_rule()},
{"defmacro", new_flow_rule(3)},
{"dotimes", new_flow_rule(2)},
{"let", new_binding_rule()},
{"when", new_flow_rule(2)},
+48 -1
View File
@@ -105,7 +105,7 @@ Block Comment - Allow Annotations
(println "test")
===
Block Comment - In Form
Block Comment - In Form - TODO Improve
===
(println
@@ -122,3 +122,50 @@ Block Comment - In Form
test
|#
"test")
===
At the end of a form
===
(println
"hello world"
;; this is a comment, don't forget the paren!
)
---
(println "hello world"
;; this is a comment, don't forget the paren!
)
===
Block at the end of a form - TODO Improve
===
(println
"hello world"
#| wow look at that block comment |#
)
---
(println "hello world"
#|
wow look at that block comment
|#
)
===
Inline at the end of a form - TODO-A handle hanging in this instance better
===
(println
"hello world" ;; this is a comment
)
---
(println
"hello world" ;; this is a comment
)
@@ -0,0 +1,27 @@
===
Inlinable If
===
(if arg1
arg1
(symbol->string (-> arg0 type symbol))
)
---
(if arg1 arg1 (symbol->string (-> arg0 type symbol)))
===
Non-Inlinable If
===
(if arg1
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
)
---
(if arg1
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol)))
@@ -0,0 +1,21 @@
===
Define with docstring - inlinable
===
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none))
---
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none))
===
Define with docstring - not inlinable
===
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar" (function none))
---
(define enable-level-text-file-loading
"Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar"
(function none))
@@ -1,3 +1,29 @@
===
defun - No Docstring
===
(defun test-function ((arg0 string))
(println arg0))
---
(defun test-function ((arg0 string))
(println arg0))
===
defun - Docstring
===
(defun test-function ((arg0 string))
"hello world"
(println arg0))
---
(defun test-function ((arg0 string))
"hello world"
(println arg0))
===
Decent size and nesting
===
@@ -34,3 +60,25 @@ Decent size and nesting
arg2
(new 'static 'sound-id))))
(= (get-status *gui-control* a1-3) (gui-status active))))
===
Basic behavior
===
(defbehavior camera-teleport-to-entity process ((arg0 entity-actor))
(let ((gp-0 (new 'stack 'transformq)))
(set! (-> gp-0 trans quad) (-> arg0 extra trans quad))
(quaternion-copy! (-> gp-0 quat) (-> arg0 quat))
(vector-identity! (-> gp-0 scale))
(the-as symbol (send-event *camera* 'teleport-to-transformq gp-0))
)
)
---
(defbehavior camera-teleport-to-entity process ((arg0 entity-actor))
(let ((gp-0 (new 'stack 'transformq)))
(set! (-> gp-0 trans quad) (-> arg0 extra trans quad))
(quaternion-copy! (-> gp-0 quat) (-> arg0 quat))
(vector-identity! (-> gp-0 scale))
(the-as symbol (send-event *camera* 'teleport-to-transformq gp-0))))
@@ -68,15 +68,3 @@ Single Item Form
---
(println)
===
defun - No Docstring
===
(defun test-function ((arg0 string))
(println arg0))
---
(defun test-function ((arg0 string))
(println arg0))
@@ -0,0 +1,17 @@
===
With Docstring
===
(defmacro set-on-less-than (destination source-1 source-2)
"destination = source-1 < source-2 ? 1 : 0 -- Compare as Signed Integers"
`(if (< (the int ,source-1) (the int ,source-2))
(set! ,destination 1)
(set! ,destination 0)
)
)
---
(defmacro set-on-less-than (destination source-1 source-2)
"destination = source-1 < source-2 ? 1 : 0 -- Compare as Signed Integers"
`(if (< (the int ,source-1) (the int ,source-2)) (set! ,destination 1) (set! ,destination 0)))
+1 -1
View File
@@ -38,7 +38,7 @@ const COMMENT =
token(/(;)[^\n]*/);
const BLOCK_COMMENT =
token(seq('#|', repeat1(/[^#|]/), '|#'));
token(seq('#|', repeat(choice(/[^|#]/, seq('#', /[^|]/), seq('|', /[^#]/))), '|#'));
const DIGIT =
/[0-9]/;
+34 -3
View File
@@ -61,10 +61,41 @@
"value": "#|"
},
{
"type": "REPEAT1",
"type": "REPEAT",
"content": {
"type": "PATTERN",
"value": "[^#|]"
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^|#]"
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "#"
},
{
"type": "PATTERN",
"value": "[^|]"
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "|"
},
{
"type": "PATTERN",
"value": "[^#]"
}
]
}
]
}
},
{
+29 -29
View File
@@ -849,14 +849,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
if (lookahead == ';') ADVANCE(59);
if (lookahead == '?') ADVANCE(61);
if (lookahead == '@') ADVANCE(38);
if (lookahead == 'N') ADVANCE(7);
if (lookahead == 'N') ADVANCE(8);
if (lookahead == 'V') ADVANCE(34);
if (lookahead == '^') ADVANCE(47);
if (lookahead == '_') ADVANCE(54);
if (lookahead == 'v') ADVANCE(33);
if (lookahead == '|') ADVANCE(45);
if (lookahead == '~') ADVANCE(42);
if (('+' <= lookahead && lookahead <= '-')) ADVANCE(4);
if (('+' <= lookahead && lookahead <= '-')) ADVANCE(5);
if (lookahead == '<' ||
lookahead == '>') ADVANCE(58);
if (lookahead == 'A' ||
@@ -879,51 +879,52 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 2:
if (lookahead == '"') ADVANCE(64);
if (lookahead == '\\') ADVANCE(19);
if (lookahead == '\\') ADVANCE(18);
if (lookahead == '~') ADVANCE(42);
if (lookahead != 0) ADVANCE(65);
END_STATE();
case 3:
if (lookahead == '#') ADVANCE(25);
if (lookahead == '#') ADVANCE(20);
if (lookahead == '|') ADVANCE(4);
if (lookahead != 0) ADVANCE(3);
END_STATE();
case 4:
if (lookahead == '#') ADVANCE(6);
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26);
if (lookahead == '#') ADVANCE(25);
if (lookahead != 0) ADVANCE(3);
END_STATE();
case 5:
if (lookahead == '\\') ADVANCE(20);
if (lookahead == '#') ADVANCE(7);
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26);
END_STATE();
case 6:
if (lookahead == '\\') ADVANCE(19);
if (lookahead == 'b') ADVANCE(14);
if (lookahead == 'f' ||
lookahead == 't') ADVANCE(70);
if (lookahead == 'x') ADVANCE(15);
if (lookahead == '|') ADVANCE(17);
if (lookahead == '|') ADVANCE(3);
END_STATE();
case 6:
case 7:
if (lookahead == 'b') ADVANCE(14);
if (lookahead == 'x') ADVANCE(15);
END_STATE();
case 7:
if (lookahead == 'e') ADVANCE(12);
END_STATE();
case 8:
if (lookahead == 'e') ADVANCE(62);
if (lookahead == 'e') ADVANCE(13);
END_STATE();
case 9:
if (lookahead == 'i') ADVANCE(11);
if (lookahead == 'e') ADVANCE(62);
END_STATE();
case 10:
if (lookahead == 'l') ADVANCE(9);
if (lookahead == 'i') ADVANCE(12);
END_STATE();
case 11:
if (lookahead == 'n') ADVANCE(8);
if (lookahead == 'l') ADVANCE(10);
END_STATE();
case 12:
if (lookahead == 'w') ADVANCE(10);
if (lookahead == 'n') ADVANCE(9);
END_STATE();
case 13:
if (lookahead == '|') ADVANCE(3);
if (lookahead != 0 &&
lookahead != '#') ADVANCE(13);
if (lookahead == 'w') ADVANCE(11);
END_STATE();
case 14:
if (lookahead == '0' ||
@@ -939,26 +940,25 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 17:
if (lookahead != 0 &&
lookahead != '#' &&
lookahead != '|') ADVANCE(13);
lookahead != '\n') ADVANCE(32);
END_STATE();
case 18:
if (lookahead != 0 &&
lookahead != '\n') ADVANCE(32);
END_STATE();
case 19:
if (lookahead != 0 &&
lookahead != '\n') ADVANCE(66);
END_STATE();
case 20:
case 19:
if (lookahead != 0 &&
lookahead != '\\') ADVANCE(67);
if (lookahead == '\\') ADVANCE(68);
END_STATE();
case 20:
if (lookahead != 0 &&
lookahead != '|') ADVANCE(3);
END_STATE();
case 21:
if (eof) ADVANCE(22);
if (lookahead == '"') ADVANCE(64);
if (lookahead == '#') ADVANCE(5);
if (lookahead == '#') ADVANCE(6);
if (lookahead == '\'') ADVANCE(31);
if (lookahead == '(') ADVANCE(80);
if (lookahead == ')') ADVANCE(81);
@@ -1289,7 +1289,7 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[55] = {.lex_state = 2},
[56] = {.lex_state = 2},
[57] = {.lex_state = 2},
[58] = {.lex_state = 18},
[58] = {.lex_state = 17},
[59] = {.lex_state = 0},
};