mirror of
https://github.com/open-goal/jak-project
synced 2026-06-04 02:47:17 -04:00
formatter: handle top level blank lines and better handle comments (#2702)
This commit is contained in:
@@ -9,6 +9,6 @@ namespace formatter {
|
||||
|
||||
// TODO - populate these more
|
||||
|
||||
const std::unordered_map<std::string, std::vector<std::shared_ptr<FormattingRule>>> opengoal_rules =
|
||||
{{"defun", {std::make_shared<InnerFormattingRule>(1)}}};
|
||||
const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
|
||||
opengoal_indentation_rules = {{"defun", {std::make_shared<InnerIndentationRule>(1)}}};
|
||||
} // namespace formatter
|
||||
|
||||
@@ -8,6 +8,6 @@
|
||||
#include "common/formatter/formatting_rules.h"
|
||||
|
||||
namespace formatter {
|
||||
extern const std::unordered_map<std::string, std::vector<std::shared_ptr<FormattingRule>>>
|
||||
opengoal_rules;
|
||||
extern const std::unordered_map<std::string, std::vector<std::shared_ptr<IndentationRule>>>
|
||||
opengoal_indentation_rules;
|
||||
}
|
||||
|
||||
@@ -39,17 +39,15 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
|
||||
return curr_form;
|
||||
}
|
||||
// TODO - this might have some issues for non-list top level elements (ie. comments)
|
||||
if (!curr_node.metadata.is_root) {
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_form += "(";
|
||||
}
|
||||
// Iterate the form
|
||||
for (int i = 0; i < curr_node.refs.size(); i++) {
|
||||
const auto& ref = curr_node.refs.at(i);
|
||||
// Append a newline if relevant
|
||||
if (!curr_node.metadata.is_root) {
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->append_newline(curr_form, ref, curr_node, tree_depth, i);
|
||||
}
|
||||
// Append a newline if needed
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->append_newline(curr_form, ref, curr_node, tree_depth, i);
|
||||
// Either print the element's token, or recursively format it as well
|
||||
if (ref.token) {
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
@@ -58,30 +56,21 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
|
||||
// forms are always done bottom-top recursively, they always act
|
||||
// independently as if it was the shallowest depth
|
||||
curr_form += ref.token.value();
|
||||
if (!curr_node.metadata.is_root) {
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_form += " ";
|
||||
}
|
||||
} else {
|
||||
auto formatted_form = apply_formatting(ref, "", tree_depth + 1);
|
||||
if (!curr_node.metadata.is_root) {
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_node.get_formatting_rule(tree_depth, i)
|
||||
->align_form_lines(formatted_form, ref, curr_node);
|
||||
}
|
||||
curr_form += formatted_form;
|
||||
}
|
||||
// Separate top-level elements with a new-line
|
||||
// TODO - move this into a top-level rule
|
||||
// TODO - temporary hack for top level comments, but need a better strategy for leaving them
|
||||
// where they were originally
|
||||
if (curr_node.metadata.is_root && i < curr_node.refs.size() - 1) {
|
||||
curr_form += "\n";
|
||||
if (!ref.token || !str_util::starts_with(ref.token.value(), ";")) {
|
||||
curr_form += "\n";
|
||||
}
|
||||
}
|
||||
// Handle blank lines at the top level, skip if it's the final element
|
||||
formatter_rules::blank_lines::separate_by_newline(curr_form, curr_node, ref, i);
|
||||
}
|
||||
// TODO - similar fear to issues as above
|
||||
if (!curr_node.metadata.is_root) {
|
||||
if (!curr_node.metadata.is_top_level) {
|
||||
curr_form = str_util::rtrim(curr_form) + ")";
|
||||
}
|
||||
return curr_form;
|
||||
|
||||
@@ -3,8 +3,12 @@
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include "formatting_rules.h"
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
// TODO:
|
||||
// - Considering _eventually_ adding line-length heuristics
|
||||
namespace formatter {
|
||||
|
||||
struct TreeSitterParserDeleter {
|
||||
|
||||
@@ -5,11 +5,52 @@
|
||||
#include "config/rule_config.h"
|
||||
|
||||
namespace formatter {
|
||||
const std::shared_ptr<FormattingRule> default_rule = std::make_shared<FormattingRule>();
|
||||
const std::shared_ptr<IndentationRule> default_rule = std::make_shared<IndentationRule>();
|
||||
}
|
||||
|
||||
std::shared_ptr<FormattingRule> FormatterTreeNode::get_formatting_rule(const int depth,
|
||||
const int index) const {
|
||||
std::string get_source_code(const std::string& source, const TSNode& node) {
|
||||
uint32_t start = ts_node_start_byte(node);
|
||||
uint32_t end = ts_node_end_byte(node);
|
||||
// TODO - comments end with a \n, this is likely a tree-sitter grammar problem
|
||||
return str_util::rtrim(source.substr(start, end - start));
|
||||
}
|
||||
|
||||
int num_blank_lines_following_node(const std::string& source, const TSNode& node) {
|
||||
int num_lines = 0;
|
||||
uint32_t cursor = ts_node_end_byte(node);
|
||||
while (cursor < source.length() && source.at(cursor) == '\n') {
|
||||
num_lines++;
|
||||
cursor++;
|
||||
}
|
||||
return num_lines;
|
||||
}
|
||||
|
||||
// Check if the original source only has whitespace up to a new-line after it's token
|
||||
bool node_preceeded_by_only_whitespace(const std::string& source, const TSNode& node) {
|
||||
uint32_t pos = ts_node_start_byte(node);
|
||||
while (pos > 0) {
|
||||
const auto& c = source.at(pos);
|
||||
if (c == '\n') {
|
||||
return true;
|
||||
} else if (c == ' ' || c == '\t') {
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
FormatterTreeNode::FormatterTreeNode(const std::string& source, const TSNode& node)
|
||||
: token(get_source_code(source, node)) {
|
||||
// Set any metadata based on the value of the token
|
||||
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";");
|
||||
metadata.num_blank_lines_following = num_blank_lines_following_node(source, node);
|
||||
metadata.is_inline = !node_preceeded_by_only_whitespace(source, node);
|
||||
};
|
||||
|
||||
std::shared_ptr<IndentationRule> FormatterTreeNode::get_formatting_rule(const int depth,
|
||||
const int index) const {
|
||||
// TODO - really lazy for now
|
||||
if (!rules.empty()) {
|
||||
return rules.at(0);
|
||||
@@ -41,16 +82,9 @@ bool nodes_on_same_line(const std::string& source, const TSNode& n1, const TSNod
|
||||
return !str_util::contains(code_between, "\n");
|
||||
}
|
||||
|
||||
std::string get_source_code(const std::string& source, const TSNode& node) {
|
||||
uint32_t start = ts_node_start_byte(node);
|
||||
uint32_t end = ts_node_end_byte(node);
|
||||
// TODO - comments end with a \n, this is likely a tree-sitter grammar problem
|
||||
return str_util::rtrim(source.substr(start, end - start));
|
||||
}
|
||||
|
||||
FormatterTree::FormatterTree(const std::string& source, const TSNode& root_node) {
|
||||
root = FormatterTreeNode();
|
||||
root.metadata.is_root = true;
|
||||
root.metadata.is_top_level = true;
|
||||
construct_formatter_tree_recursive(source, root_node, root);
|
||||
}
|
||||
|
||||
@@ -59,7 +93,7 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
|
||||
TSNode curr_node,
|
||||
FormatterTreeNode& tree_node) {
|
||||
if (ts_node_child_count(curr_node) == 0) {
|
||||
tree_node.refs.push_back(FormatterTreeNode(get_source_code(source, curr_node)));
|
||||
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
|
||||
return;
|
||||
}
|
||||
const std::string curr_node_type = ts_node_type(curr_node);
|
||||
@@ -80,8 +114,9 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
|
||||
list_node.metadata.multiple_elements_first_line =
|
||||
!node_followed_by_only_whitespace(source, child_node);
|
||||
// Peek at the first element of the list to determine formatting rules
|
||||
if (formatter::opengoal_rules.find(contents) != formatter::opengoal_rules.end()) {
|
||||
list_node.rules = formatter::opengoal_rules.at(contents);
|
||||
if (formatter::opengoal_indentation_rules.find(contents) !=
|
||||
formatter::opengoal_indentation_rules.end()) {
|
||||
list_node.rules = formatter::opengoal_indentation_rules.at(contents);
|
||||
}
|
||||
}
|
||||
construct_formatter_tree_recursive(source, child_node, list_node);
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
class FormattingRule;
|
||||
class IndentationRule;
|
||||
|
||||
// Treesitter is fantastic for validating and parsing our code into a structured tree format without
|
||||
// whitespace so we can do that ourselves (formatting) However, the treesitter AST is a bit too
|
||||
@@ -28,13 +28,16 @@ class FormattingRule;
|
||||
// Pass 2 - use the simplified tree to output the final code
|
||||
|
||||
namespace formatter {
|
||||
extern const std::shared_ptr<FormattingRule> default_rule;
|
||||
extern const std::shared_ptr<IndentationRule> default_indentation_rule;
|
||||
}
|
||||
|
||||
class FormatterTreeNode {
|
||||
public:
|
||||
struct Metadata {
|
||||
bool is_root = false;
|
||||
bool is_top_level = false;
|
||||
bool is_comment = false;
|
||||
bool is_inline = false;
|
||||
int num_blank_lines_following = 0;
|
||||
// Whether the form had more than 1 element on the first line
|
||||
// (println
|
||||
// "test")
|
||||
@@ -48,15 +51,15 @@ class FormatterTreeNode {
|
||||
// The token is optional because list nodes do not contain a token, they just contain a bunch of
|
||||
// eventually token node refs
|
||||
std::optional<std::string> token;
|
||||
std::vector<std::shared_ptr<FormattingRule>> rules = {};
|
||||
std::vector<std::shared_ptr<IndentationRule>> rules = {};
|
||||
|
||||
FormatterTreeNode() = default;
|
||||
FormatterTreeNode(const std::string& _token) : token(_token){};
|
||||
FormatterTreeNode(const std::string& source, const TSNode& node);
|
||||
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
|
||||
|
||||
// Considers the input to determine the most relevant formatting rule for the given node
|
||||
// if none are applicable, returns `formatter::default_rule`
|
||||
std::shared_ptr<FormattingRule> get_formatting_rule(const int depth, const int index) const;
|
||||
std::shared_ptr<IndentationRule> get_formatting_rule(const int depth, const int index) const;
|
||||
};
|
||||
|
||||
// A FormatterTree has a very simple and crude tree structure where:
|
||||
|
||||
@@ -2,24 +2,47 @@
|
||||
|
||||
#include "common/util/string_util.h"
|
||||
|
||||
void FormattingRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
void formatter_rules::blank_lines::separate_by_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index) {
|
||||
// We only are concerned with top level forms or elements
|
||||
// Skip the last element, no trailing new-lines (let the editors handle this!)
|
||||
// Also peek ahead to see if there was a comment on this line, if so don't separate things!
|
||||
if (!containing_node.metadata.is_top_level || index >= containing_node.refs.size() - 1 ||
|
||||
(containing_node.refs.at(index + 1).metadata.is_comment &&
|
||||
containing_node.refs.at(index + 1).metadata.is_inline)) {
|
||||
return;
|
||||
}
|
||||
curr_text += "\n";
|
||||
// If it's a comment, but has no following blank lines, dont insert a blank line
|
||||
if (node.metadata.is_comment && node.metadata.num_blank_lines_following == 0) {
|
||||
return;
|
||||
}
|
||||
// Otherwise, add only 1 blank line
|
||||
curr_text += "\n";
|
||||
}
|
||||
|
||||
void IndentationRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index <= 0 && !containing_node.metadata.multiple_elements_first_line ||
|
||||
index <= 1 && containing_node.metadata.multiple_elements_first_line) {
|
||||
index <= 1 && containing_node.metadata.multiple_elements_first_line ||
|
||||
containing_node.metadata.is_top_level ||
|
||||
(node.metadata.is_comment && node.metadata.is_inline)) {
|
||||
return;
|
||||
}
|
||||
curr_text = str_util::rtrim(curr_text) + "\n";
|
||||
}
|
||||
|
||||
void FormattingRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (node.metadata.is_root) {
|
||||
void IndentationRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (node.metadata.is_top_level) {
|
||||
return;
|
||||
}
|
||||
if (containing_node.metadata.multiple_elements_first_line) {
|
||||
@@ -37,9 +60,9 @@ void FormattingRule::indent_token(std::string& curr_text,
|
||||
}
|
||||
}
|
||||
|
||||
void FormattingRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
void IndentationRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
const auto lines = str_util::split(text);
|
||||
// TODO - unsafe (breaks on a list of lists)
|
||||
int alignment_width = 1;
|
||||
@@ -56,11 +79,11 @@ void FormattingRule::align_form_lines(std::string& text,
|
||||
text = aligned_form;
|
||||
}
|
||||
|
||||
void InnerFormattingRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
void InnerIndentationRule::append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
|
||||
return;
|
||||
}
|
||||
@@ -69,11 +92,11 @@ void InnerFormattingRule::append_newline(std::string& curr_text,
|
||||
}
|
||||
}
|
||||
|
||||
void InnerFormattingRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
void InnerIndentationRule::indent_token(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const int depth,
|
||||
const int index) {
|
||||
if (index < 1 || (m_depth != depth || m_index && m_index.value() != index)) {
|
||||
return;
|
||||
}
|
||||
@@ -83,9 +106,9 @@ void InnerFormattingRule::indent_token(std::string& curr_text,
|
||||
}
|
||||
}
|
||||
|
||||
void InnerFormattingRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
void InnerIndentationRule::align_form_lines(std::string& text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node) {
|
||||
if (node.metadata.was_on_first_line_of_form) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,35 @@
|
||||
|
||||
class FormatterTreeNode;
|
||||
|
||||
namespace formatter_rules {
|
||||
// The formatter will try to collapse as much space as possible in the top-level, this means
|
||||
// separating forms by a single empty blank line
|
||||
//
|
||||
// The exception is comments, top level comments will retain their following blank lines from the
|
||||
// original source
|
||||
// - this could be none, in the case where a comment is directly next to a form (like this one!)
|
||||
// - you don't want them to be separated!
|
||||
// - or this could be top level comments / comment blocks documenting things but not being
|
||||
// associated with a form
|
||||
// - in this case, you want them to remain separated
|
||||
//
|
||||
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/blank.md
|
||||
namespace blank_lines {
|
||||
void separate_by_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& containing_node,
|
||||
const FormatterTreeNode& node,
|
||||
const int index);
|
||||
}
|
||||
|
||||
// TODO - nothing here yet, in the future:
|
||||
// - if/when the formatter is concerned with line length, there are implications here
|
||||
// - align consecutive comment lines
|
||||
//
|
||||
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/comments.md
|
||||
namespace comments {}
|
||||
|
||||
} // namespace formatter_rules
|
||||
|
||||
// Indentation rules are heavily inspired by the descriptions here
|
||||
// https://github.com/weavejester/cljfmt/blob/master/docs/INDENTS.md
|
||||
//
|
||||
@@ -35,9 +64,9 @@ class FormatterTreeNode;
|
||||
// - otherwise, every element after the 2nd is on a new line and aligned to that 1st list arg.
|
||||
// (println "hello" ; <= more than one element on first line
|
||||
// "world")
|
||||
class FormattingRule {
|
||||
class IndentationRule {
|
||||
public:
|
||||
virtual ~FormattingRule() = default;
|
||||
virtual ~IndentationRule() = default;
|
||||
virtual void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
@@ -64,14 +93,14 @@ class FormattingRule {
|
||||
// (defn dismiss
|
||||
// [name]
|
||||
// (println "Goodbye" name))
|
||||
class InnerFormattingRule : public FormattingRule {
|
||||
class InnerIndentationRule : public IndentationRule {
|
||||
private:
|
||||
int m_depth;
|
||||
std::optional<int> m_index;
|
||||
|
||||
public:
|
||||
InnerFormattingRule(int depth) : m_depth(depth){};
|
||||
InnerFormattingRule(int depth, int index) : m_depth(depth), m_index(index){};
|
||||
InnerIndentationRule(int depth) : m_depth(depth){};
|
||||
InnerIndentationRule(int depth, int index) : m_depth(depth), m_index(index){};
|
||||
virtual void append_newline(std::string& curr_text,
|
||||
const FormatterTreeNode& node,
|
||||
const FormatterTreeNode& containing_node,
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
===
|
||||
Separate Forms
|
||||
===
|
||||
|
||||
(println "test")
|
||||
(println "test") (println "test")
|
||||
|
||||
---
|
||||
|
||||
(println "test")
|
||||
|
||||
(println "test")
|
||||
|
||||
(println "test")
|
||||
@@ -11,16 +11,14 @@ Top-Level Comment
|
||||
(println "test")
|
||||
|
||||
===
|
||||
TODO - Inline Comment
|
||||
Inline Comment
|
||||
===
|
||||
|
||||
(println "test") ;; test
|
||||
|
||||
---
|
||||
|
||||
(println "test")
|
||||
|
||||
;; test
|
||||
(println "test") ;; test
|
||||
|
||||
===
|
||||
TODO - Block Comment
|
||||
|
||||
@@ -35,7 +35,7 @@ struct TestDefinition {
|
||||
std::string output;
|
||||
};
|
||||
|
||||
bool run_tests(const fs::path& file_path, const bool only_important_tests) {
|
||||
std::vector<TestDefinition> get_test_definitions(const fs::path& file_path) {
|
||||
// Read in the file, and run the test
|
||||
const auto contents = str_util::split(file_util::read_text_file(file_path));
|
||||
std::vector<TestDefinition> tests;
|
||||
@@ -74,6 +74,21 @@ bool run_tests(const fs::path& file_path, const bool only_important_tests) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return tests;
|
||||
}
|
||||
|
||||
bool has_important_tests(const fs::path& file_path) {
|
||||
const auto& tests = get_test_definitions(file_path);
|
||||
for (const auto& test : tests) {
|
||||
if (str_util::starts_with(test.name, "!")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool run_tests(const fs::path& file_path, const bool only_important_tests) {
|
||||
const auto& tests = get_test_definitions(file_path);
|
||||
// Run the tests, report successes and failures
|
||||
bool test_failed = false;
|
||||
fmt::print("{}:\n", fmt::styled(file_util::base_name(file_path.string()),
|
||||
@@ -103,11 +118,19 @@ bool run_tests(const fs::path& file_path, const bool only_important_tests) {
|
||||
return test_failed;
|
||||
}
|
||||
|
||||
bool find_and_run_tests(const bool only_important_tests) {
|
||||
bool find_and_run_tests() {
|
||||
// Enumerate test files
|
||||
const auto test_files = file_util::find_files_recursively(
|
||||
file_util::get_file_path({"test/common/formatter/corpus"}), std::regex("^.*\.test.gc$"));
|
||||
bool failed = false;
|
||||
// First do a pass to see if any tests are meant to be prioritized for debugging
|
||||
bool only_important_tests = false;
|
||||
for (const auto& file : test_files) {
|
||||
only_important_tests = has_important_tests(file);
|
||||
if (only_important_tests) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto& file : test_files) {
|
||||
// don't fail fast, but any failure means we return false
|
||||
if (failed) {
|
||||
@@ -120,9 +143,5 @@ bool find_and_run_tests(const bool only_important_tests) {
|
||||
}
|
||||
|
||||
TEST(Formatter, FormatterTests) {
|
||||
// TODO - when i get annoyed enough, make this not a manual change and pre-scan the tests to see
|
||||
// if there are any flagged tests
|
||||
//
|
||||
// This is to make it easier to run an individual test when debugging
|
||||
EXPECT_TRUE(find_and_run_tests(false));
|
||||
EXPECT_TRUE(find_and_run_tests());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user