diff --git a/common/formatter/formatter.cpp b/common/formatter/formatter.cpp index 24dd9c8e94..e4eaf82456 100644 --- a/common/formatter/formatter.cpp +++ b/common/formatter/formatter.cpp @@ -67,8 +67,8 @@ void apply_formatting_config( // // Otherwise, we always default to a hang. // - // NOTE - any modifications here to child elements could be superseeded later in the recursion - // in order to maintain your sanity, only modify things here that _arent_ touched by default + // NOTE - any modifications here to child elements could be superseeded later in the recursion! + // In order to maintain your sanity, only modify things here that _arent_ touched by default // configurations. These are explicitly prepended with `parent_mutable_` if (!predefined_config) { if (curr_node.metadata.is_top_level) { @@ -104,6 +104,8 @@ void apply_formatting_config( } // If we are hanging, lets determine the indentation width since it is based on the form itself if (curr_node.formatting_config.hang_forms) { + // TODO - this isn't being calculated for a pre-defined config + // TODO - another idea is to do this during consolidation curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node); } // iterate through the refs @@ -123,6 +125,7 @@ void apply_formatting_config( } } +// TODO - this doesn't account for paren's width contribution! int get_total_form_inlined_width(const FormatterTreeNode& curr_node) { if (curr_node.token) { return curr_node.token->length(); @@ -196,12 +199,15 @@ std::vector apply_formatting(const FormatterTreeNode& curr_node, using namespace formatter_rules; if (!curr_node.token && curr_node.refs.empty()) { // special case to handle an empty list + if (curr_node.node_prefix) { + return {fmt::format("{}()", curr_node.node_prefix.value())}; + } return {"()"}; } // If its a token, just print the token and move on if (curr_node.token) { - return {curr_node.token.value()}; + return {curr_node.token_str()}; } bool inline_form = can_node_be_inlined(curr_node, cursor_pos); @@ -220,13 +226,19 @@ std::vector apply_formatting(const FormatterTreeNode& curr_node, // Add new line entry if (ref.token) { // Cleanup block-comments - std::string val = ref.token.value(); + std::string val = ref.token_str(); if (ref.metadata.node_type == "block_comment") { // TODO - change this sanitization to return a list of lines instead of a single new-lined // line - val = comments::format_block_comment(ref.token.value()); + val = comments::format_block_comment(ref.token_str()); } form_lines.push_back(val); + if (!curr_node.metadata.is_top_level && i == curr_node.refs.size() - 1 && + (ref.metadata.is_comment)) { + // if there's an inline comment at the end of a form, we have to force the paren to the next + // line and do a new-line paren this is ugly, but we have no choice! + form_lines.push_back(""); + } } else { // If it's not a token, we have to recursively build up the form // TODO - add the cursor_pos here @@ -250,6 +262,9 @@ std::vector apply_formatting(const FormatterTreeNode& curr_node, if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) || (curr_node.formatting_config.has_constant_pairs && constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) { + // TODO + // has issues with not consolidating first lines, this should probably just be moved to + // outside this loop for simplicity, do it later if (next_ref.token) { form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value()); i++; @@ -260,6 +275,10 @@ std::vector apply_formatting(const FormatterTreeNode& curr_node, } i++; } + if (!curr_node.metadata.is_top_level && next_ref.metadata.node_type == "comment" && + (i + 1) == (int)curr_node.refs.size()) { + form_lines.push_back(""); + } } } // If we are at the top level, potential separate with a new line @@ -288,6 +307,9 @@ std::vector apply_formatting(const FormatterTreeNode& curr_node, // Apply necessary indentation to each line and add parens if (!curr_node.metadata.is_top_level) { std::string form_surround_start = "("; + if (curr_node.node_prefix) { + form_surround_start = fmt::format("{}(", curr_node.node_prefix.value()); + } std::string form_surround_end = ")"; form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]); form_lines[form_lines.size() - 1] = diff --git a/common/formatter/formatter_tree.cpp b/common/formatter/formatter_tree.cpp index 8df85f9a08..6e797c2607 100644 --- a/common/formatter/formatter_tree.cpp +++ b/common/formatter/formatter_tree.cpp @@ -78,16 +78,24 @@ FormatterTree::FormatterTree(const std::string& source, const TSNode& root_node) construct_formatter_tree_recursive(source, root_node, root); } +const std::unordered_map> node_type_ignorable_contents = { + {"list_lit", {"(", ")"}}, + {"quoting_lit", {"'"}}, + {"unquoting_lit", {","}}, + {"quasi_quoting_lit", {"`"}}}; + // TODO make an imperative version eventually void FormatterTree::construct_formatter_tree_recursive(const std::string& source, TSNode curr_node, - FormatterTreeNode& tree_node) { + FormatterTreeNode& tree_node, + std::optional node_prefix) { if (ts_node_child_count(curr_node) == 0) { tree_node.refs.push_back(FormatterTreeNode(source, curr_node)); return; } const std::string curr_node_type = ts_node_type(curr_node); FormatterTreeNode list_node; + std::optional next_node_prefix; if (curr_node_type == "list_lit") { list_node = FormatterTreeNode(); } else if (curr_node_type == "str_lit") { @@ -97,22 +105,40 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source tree_node.refs.push_back(FormatterTreeNode(source, curr_node)); return; } else if (curr_node_type == "quoting_lit") { - // same story for quoted symbols - // TODO - expect to have to add more here - tree_node.refs.push_back(FormatterTreeNode(source, curr_node)); - return; + next_node_prefix = "'"; + } else if (curr_node_type == "unquoting_lit") { + next_node_prefix = ","; + } else if (curr_node_type == "quasi_quoting_lit") { + next_node_prefix = "`"; + } + std::vector skippable_nodes = {}; + if (node_type_ignorable_contents.find(curr_node_type) != node_type_ignorable_contents.end()) { + skippable_nodes = node_type_ignorable_contents.at(curr_node_type); } for (size_t i = 0; i < ts_node_child_count(curr_node); i++) { const auto child_node = ts_node_child(curr_node, i); - // We skip parens const auto contents = get_source_code(source, child_node); - if (contents == "(" || contents == ")") { + bool skip_node = false; + for (const auto& skippable_content : skippable_nodes) { + if (skippable_content == contents) { + skip_node = true; + break; + } + } + if (skip_node) { continue; } if (curr_node_type == "list_lit") { - construct_formatter_tree_recursive(source, child_node, list_node); + construct_formatter_tree_recursive(source, child_node, list_node, next_node_prefix); + if (node_prefix) { + list_node.node_prefix = node_prefix; + } } else { - construct_formatter_tree_recursive(source, child_node, tree_node); + construct_formatter_tree_recursive(source, child_node, tree_node, next_node_prefix); + // TODO - im not sure if this is correct + if (node_prefix && !tree_node.refs.empty()) { + tree_node.refs.at(tree_node.refs.size() - 1).node_prefix = node_prefix; + } } } if (curr_node_type == "list_lit") { diff --git a/common/formatter/formatter_tree.h b/common/formatter/formatter_tree.h index df0ef13b0d..40fc908430 100644 --- a/common/formatter/formatter_tree.h +++ b/common/formatter/formatter_tree.h @@ -18,8 +18,9 @@ // we really care about is: // - getting all the text tokens for the source code // - having them in a proper, nested format -// The treesitter format is complicated and highly nested, leading to some very hard to understand -// code. So my solution is a 2-pass format. +// +// TLDR - The treesitter format is complicated and highly nested, leading to some very hard to +// understand code. So my solution is atleast a 2-pass format. // // Pass 1 - convert the AST into a simplified FormatterTree // Pass 2 - use the simplified tree to output the final code @@ -37,8 +38,9 @@ class FormatterTreeNode { std::vector refs; Metadata metadata; // The token is optional because list nodes do not contain a token, they just contain a bunch of - // eventually token node refs + // eventually-containing token node refs std::optional token; + std::optional node_prefix; formatter_rules::config::FormFormattingConfig formatting_config; @@ -47,6 +49,15 @@ class FormatterTreeNode { FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){}; bool is_list() const { return !token.has_value(); } + std::string token_str() const { + if (node_prefix && token) { + return node_prefix.value() + token.value(); + } + if (token) { + return token.value(); + } + return ""; + } }; // A FormatterTree has a very simple and crude tree structure where: @@ -62,5 +73,6 @@ class FormatterTree { private: void construct_formatter_tree_recursive(const std::string& source, TSNode curr_node, - FormatterTreeNode& tree_node); + FormatterTreeNode& tree_node, + std::optional node_prefix = {}); }; diff --git a/common/formatter/rules/rule_config.cpp b/common/formatter/rules/rule_config.cpp index e8aa1e15dc..c8a7662d0d 100644 --- a/common/formatter/rules/rule_config.cpp +++ b/common/formatter/rules/rule_config.cpp @@ -6,6 +6,13 @@ namespace formatter_rules { namespace config { // TODO - this could be greatly simplified with C++20's designated initialization +FormFormattingConfig new_permissive_flow_rule() { + FormFormattingConfig cfg; + cfg.hang_forms = false; + cfg.combine_first_two_lines = true; + return cfg; +} + FormFormattingConfig new_flow_rule(int start_index) { FormFormattingConfig cfg; cfg.hang_forms = false; @@ -80,6 +87,11 @@ const std::unordered_map opengoal_form_config {"defmethod", new_flow_rule(3)}, {"deftype", new_flow_rule_prevent_inlining_indexes(3, {3, 4, 5})}, {"defun", new_flow_rule(3)}, + {"defbehavior", new_flow_rule(4)}, + {"if", new_permissive_flow_rule()}, + {"define", new_permissive_flow_rule()}, + {"define-extern", new_permissive_flow_rule()}, + {"defmacro", new_flow_rule(3)}, {"dotimes", new_flow_rule(2)}, {"let", new_binding_rule()}, {"when", new_flow_rule(2)}, diff --git a/test/common/formatter/corpus/comments.test.gc b/test/common/formatter/corpus/comments.test.gc index be7c8aa087..54de0a0777 100644 --- a/test/common/formatter/corpus/comments.test.gc +++ b/test/common/formatter/corpus/comments.test.gc @@ -105,7 +105,7 @@ Block Comment - Allow Annotations (println "test") === -Block Comment - In Form +Block Comment - In Form - TODO Improve === (println @@ -122,3 +122,50 @@ Block Comment - In Form test |# "test") + +=== +At the end of a form +=== + +(println + "hello world" + ;; this is a comment, don't forget the paren! + ) + +--- + +(println "hello world" + ;; this is a comment, don't forget the paren! + ) + +=== +Block at the end of a form - TODO Improve +=== + +(println + "hello world" + #| wow look at that block comment |# + ) + +--- + +(println "hello world" + #| + wow look at that block comment +|# + ) + +=== +Inline at the end of a form - TODO-A handle hanging in this instance better +=== + +(println + "hello world" ;; this is a comment + ) + +--- + +(println + "hello world" ;; this is a comment + ) + diff --git a/test/common/formatter/corpus/conditions.test.gc b/test/common/formatter/corpus/conditions.test.gc new file mode 100644 index 0000000000..a29617a5ad --- /dev/null +++ b/test/common/formatter/corpus/conditions.test.gc @@ -0,0 +1,27 @@ +=== +Inlinable If +=== + +(if arg1 + arg1 + (symbol->string (-> arg0 type symbol)) + ) + +--- + +(if arg1 arg1 (symbol->string (-> arg0 type symbol))) + +=== +Non-Inlinable If +=== + +(if arg1 + (symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol)) + (symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol)) + ) + +--- + +(if arg1 + (symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol)) + (symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))) diff --git a/test/common/formatter/corpus/definitions.test.gc b/test/common/formatter/corpus/definitions.test.gc new file mode 100644 index 0000000000..67dcd94eeb --- /dev/null +++ b/test/common/formatter/corpus/definitions.test.gc @@ -0,0 +1,21 @@ +=== +Define with docstring - inlinable +=== + +(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none)) + +--- + +(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none)) + +=== +Define with docstring - not inlinable +=== + +(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar" (function none)) + +--- + +(define enable-level-text-file-loading + "Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar" + (function none)) diff --git a/test/common/formatter/corpus/functions.test.gc b/test/common/formatter/corpus/functions.test.gc index 8bbff533df..3a0a98a9a8 100644 --- a/test/common/formatter/corpus/functions.test.gc +++ b/test/common/formatter/corpus/functions.test.gc @@ -1,3 +1,29 @@ +=== +defun - No Docstring +=== + +(defun test-function ((arg0 string)) + (println arg0)) + +--- + +(defun test-function ((arg0 string)) + (println arg0)) + +=== +defun - Docstring +=== + +(defun test-function ((arg0 string)) + "hello world" + (println arg0)) + +--- + +(defun test-function ((arg0 string)) + "hello world" + (println arg0)) + === Decent size and nesting === @@ -34,3 +60,25 @@ Decent size and nesting arg2 (new 'static 'sound-id)))) (= (get-status *gui-control* a1-3) (gui-status active)))) + +=== +Basic behavior +=== + +(defbehavior camera-teleport-to-entity process ((arg0 entity-actor)) + (let ((gp-0 (new 'stack 'transformq))) + (set! (-> gp-0 trans quad) (-> arg0 extra trans quad)) + (quaternion-copy! (-> gp-0 quat) (-> arg0 quat)) + (vector-identity! (-> gp-0 scale)) + (the-as symbol (send-event *camera* 'teleport-to-transformq gp-0)) + ) + ) + +--- + +(defbehavior camera-teleport-to-entity process ((arg0 entity-actor)) + (let ((gp-0 (new 'stack 'transformq))) + (set! (-> gp-0 trans quad) (-> arg0 extra trans quad)) + (quaternion-copy! (-> gp-0 quat) (-> arg0 quat)) + (vector-identity! (-> gp-0 scale)) + (the-as symbol (send-event *camera* 'teleport-to-transformq gp-0)))) diff --git a/test/common/formatter/corpus/indent.test.gc b/test/common/formatter/corpus/indent.test.gc index 676b26b4a9..69eeb81de0 100644 --- a/test/common/formatter/corpus/indent.test.gc +++ b/test/common/formatter/corpus/indent.test.gc @@ -68,15 +68,3 @@ Single Item Form --- (println) - -=== -defun - No Docstring -=== - -(defun test-function ((arg0 string)) - (println arg0)) - ---- - -(defun test-function ((arg0 string)) - (println arg0)) diff --git a/test/common/formatter/corpus/macros.test.gc b/test/common/formatter/corpus/macros.test.gc new file mode 100644 index 0000000000..2c9c0b4988 --- /dev/null +++ b/test/common/formatter/corpus/macros.test.gc @@ -0,0 +1,17 @@ +=== +With Docstring +=== + +(defmacro set-on-less-than (destination source-1 source-2) + "destination = source-1 < source-2 ? 1 : 0 -- Compare as Signed Integers" + `(if (< (the int ,source-1) (the int ,source-2)) + (set! ,destination 1) + (set! ,destination 0) + ) + ) + +--- + +(defmacro set-on-less-than (destination source-1 source-2) + "destination = source-1 < source-2 ? 1 : 0 -- Compare as Signed Integers" + `(if (< (the int ,source-1) (the int ,source-2)) (set! ,destination 1) (set! ,destination 0))) diff --git a/third-party/tree-sitter/tree-sitter-opengoal/grammar.js b/third-party/tree-sitter/tree-sitter-opengoal/grammar.js index 71b1387e3b..c02bfb8d60 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/grammar.js +++ b/third-party/tree-sitter/tree-sitter-opengoal/grammar.js @@ -38,7 +38,7 @@ const COMMENT = token(/(;)[^\n]*/); const BLOCK_COMMENT = - token(seq('#|', repeat1(/[^#|]/), '|#')); + token(seq('#|', repeat(choice(/[^|#]/, seq('#', /[^|]/), seq('|', /[^#]/))), '|#')); const DIGIT = /[0-9]/; diff --git a/third-party/tree-sitter/tree-sitter-opengoal/grammar.json b/third-party/tree-sitter/tree-sitter-opengoal/grammar.json index a7ac90f8a9..8761f4247e 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/grammar.json +++ b/third-party/tree-sitter/tree-sitter-opengoal/grammar.json @@ -61,10 +61,41 @@ "value": "#|" }, { - "type": "REPEAT1", + "type": "REPEAT", "content": { - "type": "PATTERN", - "value": "[^#|]" + "type": "CHOICE", + "members": [ + { + "type": "PATTERN", + "value": "[^|#]" + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "#" + }, + { + "type": "PATTERN", + "value": "[^|]" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "|" + }, + { + "type": "PATTERN", + "value": "[^#]" + } + ] + } + ] } }, { diff --git a/third-party/tree-sitter/tree-sitter-opengoal/parser.c b/third-party/tree-sitter/tree-sitter-opengoal/parser.c index a36a65454e..92ff867666 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/parser.c +++ b/third-party/tree-sitter/tree-sitter-opengoal/parser.c @@ -849,14 +849,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { if (lookahead == ';') ADVANCE(59); if (lookahead == '?') ADVANCE(61); if (lookahead == '@') ADVANCE(38); - if (lookahead == 'N') ADVANCE(7); + if (lookahead == 'N') ADVANCE(8); if (lookahead == 'V') ADVANCE(34); if (lookahead == '^') ADVANCE(47); if (lookahead == '_') ADVANCE(54); if (lookahead == 'v') ADVANCE(33); if (lookahead == '|') ADVANCE(45); if (lookahead == '~') ADVANCE(42); - if (('+' <= lookahead && lookahead <= '-')) ADVANCE(4); + if (('+' <= lookahead && lookahead <= '-')) ADVANCE(5); if (lookahead == '<' || lookahead == '>') ADVANCE(58); if (lookahead == 'A' || @@ -879,51 +879,52 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { END_STATE(); case 2: if (lookahead == '"') ADVANCE(64); - if (lookahead == '\\') ADVANCE(19); + if (lookahead == '\\') ADVANCE(18); if (lookahead == '~') ADVANCE(42); if (lookahead != 0) ADVANCE(65); END_STATE(); case 3: - if (lookahead == '#') ADVANCE(25); + if (lookahead == '#') ADVANCE(20); + if (lookahead == '|') ADVANCE(4); + if (lookahead != 0) ADVANCE(3); END_STATE(); case 4: - if (lookahead == '#') ADVANCE(6); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + if (lookahead == '#') ADVANCE(25); + if (lookahead != 0) ADVANCE(3); END_STATE(); case 5: - if (lookahead == '\\') ADVANCE(20); + if (lookahead == '#') ADVANCE(7); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + END_STATE(); + case 6: + if (lookahead == '\\') ADVANCE(19); if (lookahead == 'b') ADVANCE(14); if (lookahead == 'f' || lookahead == 't') ADVANCE(70); if (lookahead == 'x') ADVANCE(15); - if (lookahead == '|') ADVANCE(17); + if (lookahead == '|') ADVANCE(3); END_STATE(); - case 6: + case 7: if (lookahead == 'b') ADVANCE(14); if (lookahead == 'x') ADVANCE(15); END_STATE(); - case 7: - if (lookahead == 'e') ADVANCE(12); - END_STATE(); case 8: - if (lookahead == 'e') ADVANCE(62); + if (lookahead == 'e') ADVANCE(13); END_STATE(); case 9: - if (lookahead == 'i') ADVANCE(11); + if (lookahead == 'e') ADVANCE(62); END_STATE(); case 10: - if (lookahead == 'l') ADVANCE(9); + if (lookahead == 'i') ADVANCE(12); END_STATE(); case 11: - if (lookahead == 'n') ADVANCE(8); + if (lookahead == 'l') ADVANCE(10); END_STATE(); case 12: - if (lookahead == 'w') ADVANCE(10); + if (lookahead == 'n') ADVANCE(9); END_STATE(); case 13: - if (lookahead == '|') ADVANCE(3); - if (lookahead != 0 && - lookahead != '#') ADVANCE(13); + if (lookahead == 'w') ADVANCE(11); END_STATE(); case 14: if (lookahead == '0' || @@ -939,26 +940,25 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { END_STATE(); case 17: if (lookahead != 0 && - lookahead != '#' && - lookahead != '|') ADVANCE(13); + lookahead != '\n') ADVANCE(32); END_STATE(); case 18: - if (lookahead != 0 && - lookahead != '\n') ADVANCE(32); - END_STATE(); - case 19: if (lookahead != 0 && lookahead != '\n') ADVANCE(66); END_STATE(); - case 20: + case 19: if (lookahead != 0 && lookahead != '\\') ADVANCE(67); if (lookahead == '\\') ADVANCE(68); END_STATE(); + case 20: + if (lookahead != 0 && + lookahead != '|') ADVANCE(3); + END_STATE(); case 21: if (eof) ADVANCE(22); if (lookahead == '"') ADVANCE(64); - if (lookahead == '#') ADVANCE(5); + if (lookahead == '#') ADVANCE(6); if (lookahead == '\'') ADVANCE(31); if (lookahead == '(') ADVANCE(80); if (lookahead == ')') ADVANCE(81); @@ -1289,7 +1289,7 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = { [55] = {.lex_state = 2}, [56] = {.lex_state = 2}, [57] = {.lex_state = 2}, - [58] = {.lex_state = 18}, + [58] = {.lex_state = 17}, [59] = {.lex_state = 0}, };