diff --git a/Taskfile.yml b/Taskfile.yml index c6ff83d3f1..4d6a448a32 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -126,6 +126,9 @@ tasks: cmds: - "{{.FORMATTER_BIN_RELEASE_DIR}}/formatter --write --file '{{.FILE}}'" # DECOMPILING + disasm: + cmds: + - '{{.DECOMP_BIN_RELEASE_DIR}}/decompiler "./decompiler/config/{{.DECOMP_CONFIG}}" "./iso_data" "./decompiler_out" --version "{{.DECOMP_CONFIG_VERSION}}" --config-override ''{"disassemble_code": true, "dump_function_metadata": true, "levels_extract": false}''' decomp: cmds: - '{{.DECOMP_BIN_RELEASE_DIR}}/decompiler "./decompiler/config/{{.DECOMP_CONFIG}}" "./iso_data" "./decompiler_out" --version "{{.DECOMP_CONFIG_VERSION}}" --config-override ''{"decompile_code": true, "levels_extract": false}''' diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 3b50f7e7b1..22150faeff 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include "third-party/json.hpp" +#include "third-party/zstd/lib/common/xxhash.h" namespace decompiler { /*! @@ -710,6 +711,20 @@ std::string LinkedObjectFile::print_asm_function_disassembly(const std::string& return result; } +// Currently, just hashes the contents of every function, this makes it easy to +// compare between games to see if something is identical +void LinkedObjectFile::dump_asm_function_metadata(std::unordered_map &map) { + ASSERT(segments <= 3); + for (int seg = segments; seg-- > 0;) { + // functions + for (auto& func : functions_by_seg.at(seg)) { + const auto& function_rep = print_function_disassembly(func, seg, false, ""); + const auto func_hash = XXH64(function_rep.data(), function_rep.size(), 0); + map.emplace(func.name(), fmt::format("{}", func_hash)); + } + } +} + /*! * Print disassembled functions and data segments. */ diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index dedefc077f..892e11e51c 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -67,6 +67,7 @@ class LinkedObjectFile { bool write_hex, const std::string& extra_name); std::string print_asm_function_disassembly(const std::string& my_name); + void dump_asm_function_metadata(std::unordered_map &map); u32 read_data_word(const DecompilerLabel& label); const DecompilerLabel& get_label_by_name(const std::string& name) const; diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 897325f700..0fc3b6dd46 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "LinkedObjectFileCreation.h" @@ -34,6 +35,7 @@ #include "decompiler/data/game_text.h" #include "decompiler/data/tpage.h" +#include "third-party/json.hpp" #include "third-party/xdelta3/xdelta3.h" namespace decompiler { @@ -630,18 +632,26 @@ void ObjectFileDB::write_object_file_words(const fs::path& output_dir, void ObjectFileDB::write_disassembly(const fs::path& output_dir, bool disassemble_data, bool disassemble_code, - bool print_hex) { + bool print_hex, + bool dump_function_metadata) { lg::info("- Writing functions..."); Timer timer; uint32_t total_bytes = 0, total_files = 0; std::string asm_functions; + std::unordered_map> file_func_metadata_map = {}; for_each_obj([&](ObjectFileData& obj) { if (((obj.obj_version == 3 || (obj.obj_version == 5 && obj.linked_data.has_any_functions())) && disassemble_code) || (obj.obj_version != 3 && disassemble_data)) { auto file_text = obj.linked_data.print_disassembly(print_hex); + if (dump_function_metadata) { + if (!file_func_metadata_map.contains(obj.to_unique_name())) { + file_func_metadata_map[obj.to_unique_name()] = std::unordered_map{}; + } + obj.linked_data.dump_asm_function_metadata(file_func_metadata_map[obj.to_unique_name()]); + } asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name()); auto file_name = output_dir / (obj.to_unique_name() + ".asm"); @@ -655,6 +665,11 @@ void ObjectFileDB::write_disassembly(const fs::path& output_dir, total_files++; file_util::write_text_file(output_dir / "asm_functions.func", asm_functions); + if (dump_function_metadata) { + json data = file_func_metadata_map; + file_util::write_text_file(output_dir / "func_metadata.json", data.dump(2)); + } + lg::info("Wrote functions dumps:"); lg::info(" Total {} files", total_files); lg::info(" Total {} MB", total_bytes / ((float)(1u << 20u))); diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 835f57c846..a4e537ab7d 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -196,7 +196,7 @@ class ObjectFileDB { void write_disassembly(const fs::path& output_dir, bool disassemble_data, bool disassemble_code, - bool print_hex); + bool print_hex, bool dump_function_metadata); void process_object_file_data( ObjectFileData& data, diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 44af5fe861..55322cd027 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -102,6 +102,9 @@ Config make_config_via_json(nlohmann::json& json) { config.obj_file_name_map_file = json.at("obj_file_name_map_file").get(); } config.disassemble_code = json.at("disassemble_code").get(); + if (json.contains("dump_function_metadata")) { + config.dump_function_metadata = json.at("dump_function_metadata").get(); + } config.decompile_code = json.at("decompile_code").get(); if (json.contains("format_code")) { config.format_code = json.at("format_code").get(); diff --git a/decompiler/config.h b/decompiler/config.h index d7d870be6e..6282d798e7 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -108,6 +108,7 @@ struct Config { std::string all_types_file; bool disassemble_code = false; + bool dump_function_metadata = false; bool decompile_code = false; bool format_code = false; bool write_scripts = false; diff --git a/decompiler/config/jak3/all-types.gc b/decompiler/config/jak3/all-types.gc index 370345f540..96dd083d4d 100644 --- a/decompiler/config/jak3/all-types.gc +++ b/decompiler/config/jak3/all-types.gc @@ -1,7 +1,5 @@ ;; All Types -;; TODO - some of these are likely not needed / copied from jak 2 - ;; type system setup (define-extern object type) (define-extern type type) diff --git a/decompiler/decompilation_process.cpp b/decompiler/decompilation_process.cpp index 2b3ad20778..540058ee1d 100644 --- a/decompiler/decompilation_process.cpp +++ b/decompiler/decompilation_process.cpp @@ -130,7 +130,7 @@ int run_decompilation_process(decompiler::Config config, // print disassembly if (config.disassemble_code || config.disassemble_data) { db.write_disassembly(out_folder, config.disassemble_data, config.disassemble_code, - config.write_hex_near_instructions); + config.write_hex_near_instructions, config.dump_function_metadata); } if (config.process_art_groups) {