mirror of
https://github.com/open-goal/jak-project
synced 2026-05-26 07:39:12 -04:00
93afb02cf4
This includes all the collision stuff needed to spawn `target`, decompiles the sparticle code and adds some of the PC hacks needed for merc to run (it doesn't work quite right and looks bad, likely due to a combination of code copied from Jak 2 and the time of day hacks). There are a bunch of temporary hacks (see commits) in place to prevent the game from crashing quite as much, but it is still extremely prone to doing so due to lots of missing functions/potentially bad decomp. --------- Co-authored-by: water <awaterford111445@gmail.com>
447 lines
19 KiB
C++
447 lines
19 KiB
C++
#include "formatter.h"
|
|
|
|
#include "formatter_tree.h"
|
|
|
|
#include "common/formatter/rules/formatting_rules.h"
|
|
#include "common/formatter/rules/rule_config.h"
|
|
#include "common/log/log.h"
|
|
#include "common/util/FileUtil.h"
|
|
#include "common/util/ast_util.h"
|
|
#include "common/util/string_util.h"
|
|
|
|
#include "tree_sitter/api.h"
|
|
|
|
#include "fmt/core.h"
|
|
|
|
// Declare the `tree_sitter_opengoal` function, which is
|
|
// implemented by the `tree-sitter-opengoal` library.
|
|
extern "C" {
|
|
extern const TSLanguage* tree_sitter_opengoal();
|
|
}
|
|
|
|
int hang_indentation_width(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.token || curr_node.refs.empty()) {
|
|
return 0;
|
|
}
|
|
// Get the first element of the form
|
|
const auto& first_elt = curr_node.refs.at(0);
|
|
if (first_elt.token) {
|
|
return first_elt.token->length() +
|
|
2; // +2 because the opening paren and then the following space
|
|
}
|
|
// Otherwise, continue nesting
|
|
return 1 + hang_indentation_width(first_elt);
|
|
}
|
|
|
|
// TODO - this doesn't account for paren's width contribution!
|
|
int get_total_form_inlined_width(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.token) {
|
|
return curr_node.token->length();
|
|
}
|
|
int width = 1;
|
|
for (const auto& ref : curr_node.refs) {
|
|
width += get_total_form_inlined_width(ref);
|
|
}
|
|
return width + 1;
|
|
}
|
|
|
|
// TODO - compute length of each node and store it AOT
|
|
void apply_formatting_config(
|
|
FormatterTreeNode& curr_node,
|
|
std::optional<std::shared_ptr<formatter_rules::config::FormFormattingConfig>>
|
|
config_from_parent = {}) {
|
|
using namespace formatter_rules;
|
|
// node is empty, base-case
|
|
if (curr_node.token || curr_node.refs.empty()) {
|
|
return;
|
|
}
|
|
// first, check to see if this form already has a predefined formatting configuration
|
|
// if it does, that simplifies things because there is only 1 way of formatting the form
|
|
std::optional<formatter_rules::config::FormFormattingConfig> predefined_config;
|
|
if (!config_from_parent && !curr_node.refs.empty() && curr_node.refs.at(0).token) {
|
|
const auto& form_head = curr_node.refs.at(0).token;
|
|
if (form_head && config::opengoal_form_config.find(form_head.value()) !=
|
|
config::opengoal_form_config.end()) {
|
|
predefined_config = config::opengoal_form_config.at(form_head.value());
|
|
curr_node.formatting_config = predefined_config.value();
|
|
}
|
|
} else if (config_from_parent) {
|
|
// TODO - doesn't merge just replaces, a bit inflexible
|
|
predefined_config = *config_from_parent.value();
|
|
curr_node.formatting_config = predefined_config.value();
|
|
}
|
|
// In order to keep things simple, as well as because its ineffectual in lisp code (you can only
|
|
// enforce it so much without making things unreadable), line width will not matter for deciding
|
|
// whether or not to hang or flow the form
|
|
//
|
|
// This means that a hang would ALWAYS win, because it's 1 less line break. Therefore this
|
|
// simplifies our approach there is no need to explore both braches to see which one would be
|
|
// preferred.
|
|
//
|
|
// Instead, we either use the predefined configuration (obviously) or we do some checks for some
|
|
// outlier conditions to see if things should be formatted differently
|
|
//
|
|
// Otherwise, we always default to a hang.
|
|
//
|
|
// NOTE - any modifications here to child elements could be superseeded later in the recursion!
|
|
// In order to maintain your sanity, only modify things here that _arent_ touched by default
|
|
// configurations. These are explicitly prepended with `parent_mutable_`
|
|
if (!predefined_config && !curr_node.formatting_config.config_set) {
|
|
if (curr_node.metadata.is_top_level) {
|
|
curr_node.formatting_config.indentation_width = 0;
|
|
curr_node.formatting_config.hang_forms = false;
|
|
} else if (constant_list::is_constant_list(curr_node)) {
|
|
// - Check if the form is a constant list (ie. a list of numbers)
|
|
curr_node.formatting_config.indentation_width = 1;
|
|
curr_node.formatting_config.hang_forms = false;
|
|
curr_node.formatting_config.has_constant_pairs =
|
|
constant_pairs::form_should_be_constant_paired(curr_node);
|
|
// If applicable, iterate through the constant pairs, since we can potentially pair up
|
|
// non-constant second elements in a pair (like a function call), there is the potential that
|
|
// they need to spill to the next line and get indented in extra. This is an exceptional
|
|
// circumstance, we do NOT do this sort of thing when formatting normal forms (cond/case pairs
|
|
// are another similar situation)
|
|
if (curr_node.formatting_config.has_constant_pairs) {
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
auto& child_ref = curr_node.refs.at(i);
|
|
const auto type = child_ref.metadata.node_type;
|
|
if (constant_types.find(type) == constant_types.end() &&
|
|
constant_pairs::is_element_second_in_constant_pair(curr_node, child_ref, i)) {
|
|
child_ref.formatting_config.parent_mutable_extra_indent = 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
} else if (curr_node.formatting_config.hang_forms && curr_node.refs.size() > 1 &&
|
|
curr_node.refs.at(1).metadata.is_comment) {
|
|
// - Check if the second argument is a comment, it looks better if we flow instead
|
|
curr_node.formatting_config.hang_forms = false;
|
|
}
|
|
}
|
|
// If we are hanging, lets determine the indentation width since it is based on the form itself
|
|
if (curr_node.formatting_config.hang_forms) {
|
|
// TODO - this isn't being calculated for a pre-defined config
|
|
// TODO - another idea is to do this during consolidation
|
|
curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node);
|
|
}
|
|
// iterate through the refs
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
auto& ref = curr_node.refs.at(i);
|
|
if (!ref.token) {
|
|
// If the child has a pre-defined configuration at that index, we pass it along
|
|
if (predefined_config &&
|
|
predefined_config->index_configs.find(i) != predefined_config->index_configs.end()) {
|
|
apply_formatting_config(ref, predefined_config->index_configs.at(i));
|
|
} else if (predefined_config && predefined_config->default_index_config) {
|
|
apply_formatting_config(ref, predefined_config->default_index_config);
|
|
} else {
|
|
apply_formatting_config(ref);
|
|
}
|
|
}
|
|
}
|
|
// Precompute the column widths for things like deftype fields
|
|
if (curr_node.formatting_config.determine_column_widths_for_list_elements) {
|
|
// iterate through each ref and find the max length of each index (may be a token, may not be!)
|
|
// then store that info in each list element's `list_element_column_widths` to be used when
|
|
// printing out the tokens (pad width)
|
|
// Find the maximum number of columns
|
|
int max_columns = 0;
|
|
for (const auto& field : curr_node.refs) {
|
|
if ((int)field.refs.size() > max_columns) {
|
|
max_columns = field.refs.size();
|
|
}
|
|
}
|
|
// Now find the column max widths
|
|
std::vector<int> column_max_widths = {};
|
|
for (int col = 0; col < max_columns; col++) {
|
|
column_max_widths.push_back(0);
|
|
for (const auto& field : curr_node.refs) {
|
|
if ((int)field.refs.size() > col) {
|
|
const auto width = get_total_form_inlined_width(field.refs.at(col));
|
|
if (width > column_max_widths.at(col)) {
|
|
column_max_widths[col] = width;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Apply column info to every list
|
|
for (auto& field : curr_node.refs) {
|
|
field.formatting_config.list_element_column_widths = column_max_widths;
|
|
field.formatting_config.config_set = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool form_contains_comment(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.metadata.is_comment) {
|
|
return true;
|
|
}
|
|
for (const auto& ref : curr_node.refs) {
|
|
const auto contains_comment = form_contains_comment(ref);
|
|
if (contains_comment) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool form_contains_node_that_prevents_inlining(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.formatting_config.should_prevent_inlining(curr_node.formatting_config,
|
|
curr_node.refs.size())) {
|
|
return true;
|
|
}
|
|
for (const auto& ref : curr_node.refs) {
|
|
const auto prevents_inlining = form_contains_node_that_prevents_inlining(ref);
|
|
if (prevents_inlining) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool can_node_be_inlined(const FormatterTreeNode& curr_node, int cursor_pos) {
|
|
using namespace formatter_rules;
|
|
if (curr_node.formatting_config.force_inline) {
|
|
return true;
|
|
}
|
|
// First off, we cannot inline the top level
|
|
if (curr_node.metadata.is_top_level) {
|
|
return false;
|
|
}
|
|
// If the config explicitly prevents inlining, or it contains a sub-node that prevents inlining
|
|
if (curr_node.formatting_config.prevent_inlining ||
|
|
form_contains_node_that_prevents_inlining(curr_node)) {
|
|
return false;
|
|
}
|
|
// nor can we inline something that contains a comment in the middle
|
|
if (form_contains_comment(curr_node)) {
|
|
return false;
|
|
}
|
|
// constant pairs are not inlined!
|
|
if (curr_node.formatting_config.has_constant_pairs) {
|
|
return false;
|
|
}
|
|
// If this is set in the config, then the form is intended to be partially inlined
|
|
if (curr_node.formatting_config.inline_until_index({})) {
|
|
return false;
|
|
}
|
|
// let's see if we can inline the form all on one line to do that, we recursively explore
|
|
// the form to find the total width
|
|
int line_width = cursor_pos + get_total_form_inlined_width(curr_node);
|
|
return line_width <= indent::line_width_target; // TODO - comments
|
|
}
|
|
|
|
std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
|
|
std::vector<std::string> /*output*/ = {},
|
|
int cursor_pos = 0) {
|
|
using namespace formatter_rules;
|
|
if (!curr_node.token && curr_node.refs.empty()) {
|
|
// special case to handle an empty list
|
|
if (curr_node.node_prefix) {
|
|
return {fmt::format("{}()", curr_node.node_prefix.value())};
|
|
}
|
|
return {"()"};
|
|
}
|
|
|
|
// If its a token, just print the token and move on
|
|
if (curr_node.token) {
|
|
return {curr_node.token_str()};
|
|
}
|
|
|
|
bool inline_form = can_node_be_inlined(curr_node, cursor_pos);
|
|
// TODO - also if the form is inlinable, we can skip all the complication below and just...inline
|
|
// it!
|
|
// TODO - should figure out the inlining here as well, instead of the bool above
|
|
|
|
// Iterate the form, building up a list of the final lines but don't worry about indentation
|
|
// at this stage. Once the lines are finalized, it's easy to add the indentation later
|
|
//
|
|
// This means we may combine elements onto the same line in this step.
|
|
std::vector<std::string> form_lines = {};
|
|
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
const auto& ref = curr_node.refs.at(i);
|
|
// Add new line entry
|
|
if (ref.token) {
|
|
// Cleanup block-comments
|
|
std::string val = ref.token_str();
|
|
if (ref.metadata.node_type == "block_comment") {
|
|
// TODO - change this sanitization to return a list of lines instead of a single new-lined
|
|
// line
|
|
val = comments::format_block_comment(ref.token_str());
|
|
}
|
|
form_lines.push_back(val);
|
|
if (!curr_node.metadata.is_top_level && i == (int)curr_node.refs.size() - 1 &&
|
|
(ref.metadata.is_comment)) {
|
|
// if there's an inline comment at the end of a form, we have to force the paren to the next
|
|
// line and do a new-line paren this is ugly, but we have no choice!
|
|
form_lines.push_back("");
|
|
}
|
|
} else {
|
|
// If it's not a token, we have to recursively build up the form
|
|
// TODO - add the cursor_pos here
|
|
const auto& lines = apply_formatting(ref, {}, cursor_pos);
|
|
for (int i = 0; i < (int)lines.size(); i++) {
|
|
const auto& line = lines.at(i);
|
|
form_lines.push_back(fmt::format(
|
|
"{}{}", str_util::repeat(ref.formatting_config.parent_mutable_extra_indent, " "),
|
|
line));
|
|
}
|
|
}
|
|
// If we are hanging forms, combine the first two forms onto the same line
|
|
if (i == (int)curr_node.refs.size() - 1 && form_lines.size() > 1 &&
|
|
(curr_node.formatting_config.hang_forms ||
|
|
curr_node.formatting_config.combine_first_two_lines)) {
|
|
form_lines.at(0) += fmt::format(" {}", form_lines.at(1));
|
|
form_lines.erase(form_lines.begin() + 1);
|
|
} else if ((i + 1) < (int)curr_node.refs.size()) {
|
|
const auto& next_ref = curr_node.refs.at(i + 1);
|
|
// combine the next inline comment or constant pair
|
|
if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) ||
|
|
(curr_node.formatting_config.has_constant_pairs &&
|
|
constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) {
|
|
// TODO
|
|
// has issues with not consolidating first lines, this should probably just be moved to
|
|
// outside this loop for simplicity, do it later
|
|
if (next_ref.token) {
|
|
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value());
|
|
i++;
|
|
} else if (can_node_be_inlined(next_ref, cursor_pos)) {
|
|
const auto& lines = apply_formatting(next_ref, {}, cursor_pos); // TODO - cursor pos
|
|
for (const auto& line : lines) {
|
|
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", line);
|
|
}
|
|
i++;
|
|
}
|
|
if (!curr_node.metadata.is_top_level && next_ref.metadata.node_type == "comment" &&
|
|
(i + 1) == (int)curr_node.refs.size()) {
|
|
form_lines.push_back("");
|
|
}
|
|
}
|
|
}
|
|
// If we are at the top level, potential separate with a new line
|
|
if (blank_lines::should_insert_blank_line(curr_node, ref, i)) {
|
|
form_lines.at(form_lines.size() - 1) += "\n";
|
|
}
|
|
}
|
|
|
|
// Consolidate any lines if the configuration requires it
|
|
// TODO there is a hack here so that multi-line forms that are consolidated still line up properly
|
|
// i have to make consolidate a more first-class feature of the config
|
|
if (curr_node.formatting_config.inline_until_index(form_lines)) {
|
|
std::vector<std::string> new_form_lines = {};
|
|
const auto original_form_head_width = str_util::split(form_lines.at(0), '\n').at(0).length();
|
|
bool consolidating_lines = true;
|
|
for (int i = 0; i < (int)form_lines.size(); i++) {
|
|
if (i < curr_node.formatting_config.inline_until_index(form_lines)) {
|
|
if (new_form_lines.empty()) {
|
|
new_form_lines.push_back(form_lines.at(i));
|
|
} else {
|
|
new_form_lines.at(0) += fmt::format(" {}", form_lines.at(i));
|
|
}
|
|
} else {
|
|
if (str_util::starts_with(form_lines.at(i), " ") && consolidating_lines) {
|
|
new_form_lines.push_back(fmt::format(
|
|
"{}{}", str_util::repeat(original_form_head_width, " "), form_lines.at(i)));
|
|
} else {
|
|
consolidating_lines = false;
|
|
new_form_lines.push_back(form_lines.at(i));
|
|
}
|
|
}
|
|
}
|
|
form_lines = new_form_lines;
|
|
}
|
|
|
|
// Add any column padding
|
|
if (!curr_node.formatting_config.list_element_column_widths.empty()) {
|
|
for (int i = 0; i < (int)form_lines.size(); i++) {
|
|
const auto& token = form_lines.at(i);
|
|
if (i < (int)form_lines.size() - 1) {
|
|
form_lines[i] = str_util::pad_right(
|
|
token, curr_node.formatting_config.list_element_column_widths.at(i), ' ');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Apply necessary indentation to each line and add parens
|
|
if (!curr_node.metadata.is_top_level) {
|
|
std::string form_surround_start = "(";
|
|
if (curr_node.node_prefix) {
|
|
form_surround_start = fmt::format("{}(", curr_node.node_prefix.value());
|
|
}
|
|
std::string form_surround_end = ")";
|
|
form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]);
|
|
form_lines[form_lines.size() - 1] =
|
|
fmt::format("{}{}", form_lines[form_lines.size() - 1], form_surround_end);
|
|
}
|
|
std::string curr_form = "";
|
|
if (curr_node.formatting_config.parent_mutable_extra_indent > 0) {
|
|
curr_form += str_util::repeat(curr_node.formatting_config.parent_mutable_extra_indent, " ");
|
|
}
|
|
if (inline_form) {
|
|
form_lines = {fmt::format("{}", fmt::join(form_lines, " "))};
|
|
} else {
|
|
for (int i = 0; i < (int)form_lines.size(); i++) {
|
|
if (i > 0) {
|
|
auto& line = form_lines.at(i);
|
|
line = fmt::format("{}{}",
|
|
str_util::repeat(curr_node.formatting_config.indentation_width_for_index(
|
|
curr_node.formatting_config, i),
|
|
" "),
|
|
line);
|
|
}
|
|
}
|
|
}
|
|
return form_lines;
|
|
}
|
|
|
|
std::string join_formatted_lines(const std::vector<std::string>& lines,
|
|
const std::string& line_ending) {
|
|
return fmt::format("{}", fmt::join(lines, line_ending));
|
|
}
|
|
|
|
std::optional<std::string> formatter::format_code(const std::string& source) {
|
|
// Create a parser.
|
|
std::shared_ptr<TSParser> parser(ts_parser_new(), TreeSitterParserDeleter());
|
|
ts_parser_set_language(parser.get(), tree_sitter_opengoal());
|
|
|
|
// Build a syntax tree based on source code stored in a string.
|
|
std::shared_ptr<TSTree> tree(
|
|
ts_parser_parse_string(parser.get(), NULL, source.c_str(), source.length()),
|
|
TreeSitterTreeDeleter());
|
|
|
|
// Get the root node of the syntax tree.
|
|
TSNode root_node = ts_tree_root_node(tree.get());
|
|
if (ts_node_is_null(root_node)) {
|
|
lg::error("null root node");
|
|
return std::nullopt;
|
|
} else if (ts_node_has_error(root_node)) {
|
|
lg::error("grammar parsing error, go figure it out!");
|
|
return std::nullopt;
|
|
}
|
|
|
|
try {
|
|
// There are three phases of formatting
|
|
// 1. Simplify the AST down to something that is easier to work on from a formatting perspective
|
|
// this also gathers basic metadata that can be done at this stage, like if the token is a
|
|
// comment or if the form is on the top-level
|
|
auto formatting_tree = FormatterTree(source, root_node);
|
|
// 2. Recursively iterate through this simplified FormatterTree and figure out what rules
|
|
// need to be applied to produce an optimal result
|
|
apply_formatting_config(formatting_tree.root);
|
|
// 3. Use this updated FormatterTree to print out the final source-code, while doing so
|
|
// we may deviate from the optimal result to produce something even more optimal by inlining
|
|
// forms that can fit within the line width.
|
|
const auto formatted_lines = apply_formatting(formatting_tree.root);
|
|
// 4. Now we joint he lines together, it's easier when formatting to leave all lines independent
|
|
// so adding indentation is easier
|
|
const auto formatted_source =
|
|
join_formatted_lines(formatted_lines, file_util::get_majority_file_line_endings(source));
|
|
return formatted_source;
|
|
} catch (std::exception& e) {
|
|
lg::error("Unable to format code - {}", e.what());
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|