mirror of
https://github.com/open-goal/jak-project
synced 2026-05-23 06:54:31 -04:00
Make decompiler naming consistent (#94)
* use a fixed object file naming by default, option to allow new map file creation * fix prints * fixing up edge cases * update json config
This commit is contained in:
@@ -21,24 +21,29 @@
|
||||
#include "decompiler/IR/BasicOpBuilder.h"
|
||||
#include "decompiler/IR/CfgBuilder.h"
|
||||
#include "third-party/spdlog/include/spdlog/spdlog.h"
|
||||
#include "third-party/json.hpp"
|
||||
|
||||
/*!
|
||||
* Get a unique name for this object file.
|
||||
*/
|
||||
std::string ObjectFileRecord::to_unique_name() const {
|
||||
return name + "-v" + std::to_string(version);
|
||||
namespace {
|
||||
std::string strip_dgo_extension(const std::string& x) {
|
||||
auto ext = x.substr(x.length() - 4, 4);
|
||||
if (ext == ".CGO" || ext == ".cgo" || ext == ".DGO" || ext == ".dgo") {
|
||||
return x.substr(0, x.length() - 4);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string ObjectFileData::to_unique_name() const {
|
||||
if (!name_from_map.empty()) {
|
||||
return name_from_map;
|
||||
}
|
||||
|
||||
if (has_multiple_versions) {
|
||||
std::string result = record.name + "-";
|
||||
auto dgo_names_sorted = dgo_names;
|
||||
std::sort(dgo_names_sorted.begin(), dgo_names_sorted.end());
|
||||
for (auto x : dgo_names_sorted) {
|
||||
auto ext = x.substr(x.length() - 4, 4);
|
||||
if (ext == ".CGO" || ext == ".cgo" || ext == ".DGO" || ext == ".dgo") {
|
||||
x = x.substr(0, x.length() - 4);
|
||||
}
|
||||
x = strip_dgo_extension(x);
|
||||
result += x + "-";
|
||||
}
|
||||
result.pop_back();
|
||||
@@ -47,7 +52,7 @@ std::string ObjectFileData::to_unique_name() const {
|
||||
return record.name;
|
||||
}
|
||||
}
|
||||
ObjectFileData& ObjectFileDB::lookup_record(ObjectFileRecord rec) {
|
||||
ObjectFileData& ObjectFileDB::lookup_record(const ObjectFileRecord& rec) {
|
||||
ObjectFileData* result = nullptr;
|
||||
|
||||
for (auto& x : obj_files_by_name[rec.name]) {
|
||||
@@ -65,12 +70,23 @@ ObjectFileData& ObjectFileDB::lookup_record(ObjectFileRecord rec) {
|
||||
/*!
|
||||
* Build an object file DB for the given list of DGOs.
|
||||
*/
|
||||
ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos) {
|
||||
ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos,
|
||||
const std::string& obj_file_name_map_file) {
|
||||
Timer timer;
|
||||
|
||||
spdlog::info("-Loading types...");
|
||||
dts.parse_type_defs({"decompiler", "config", "all-types.gc"});
|
||||
|
||||
if (!obj_file_name_map_file.empty()) {
|
||||
spdlog::info("-Loading obj name map file...");
|
||||
load_map_file(file_util::read_text_file(file_util::get_file_path({obj_file_name_map_file})));
|
||||
} else {
|
||||
spdlog::warn(
|
||||
"Not using an obj name map file! The decompiler will automatically generate object file "
|
||||
"names and write them to out/objs.txt. It is recommended to reuse this map file to get "
|
||||
"consistent naming when doing a partial decompilation.");
|
||||
}
|
||||
|
||||
spdlog::info("-Initializing ObjectFileDB...");
|
||||
for (auto& dgo : _dgos) {
|
||||
get_objs_from_dgo(dgo);
|
||||
@@ -82,11 +98,36 @@ ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos) {
|
||||
spdlog::info("Total objs: {}", stats.total_obj_files);
|
||||
spdlog::info("Unique objs: {}", stats.unique_obj_files);
|
||||
spdlog::info("Unique data: {} bytes", stats.unique_obj_bytes);
|
||||
spdlog::info("Total {} ms ({:3f} MB/sec, {} obj/sec", timer.getMs(),
|
||||
spdlog::info("Total {:.2f} ms ({:.3f} MB/sec, {:.2f} obj/sec)", timer.getMs(),
|
||||
stats.total_dgo_bytes / ((1u << 20u) * timer.getSeconds()),
|
||||
stats.total_obj_files / timer.getSeconds());
|
||||
}
|
||||
|
||||
void ObjectFileDB::load_map_file(const std::string& map_data) {
|
||||
auto j = nlohmann::json::parse(map_data, nullptr, true, true);
|
||||
|
||||
for (auto& x : j) {
|
||||
auto mapped_name = x[0].get<std::string>();
|
||||
auto game_name = x[1].get<std::string>();
|
||||
auto dgo_names = x[3].get<std::vector<std::string>>();
|
||||
bool is_ag = mapped_name.find("-ag") != std::string::npos;
|
||||
auto game_name_with_ag = game_name;
|
||||
if (is_ag) {
|
||||
game_name_with_ag += "-ag";
|
||||
}
|
||||
|
||||
// add dgo
|
||||
for (auto& dgo : dgo_names) {
|
||||
auto kv = dgo_obj_name_map[dgo].find(game_name_with_ag);
|
||||
if (kv != dgo_obj_name_map[dgo].end()) {
|
||||
spdlog::error("Object {} in dgo {} occurs more than one time.", game_name_with_ag, dgo);
|
||||
assert(false);
|
||||
}
|
||||
dgo_obj_name_map[dgo][game_name_with_ag] = mapped_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Header for a DGO file
|
||||
struct DgoHeader {
|
||||
uint32_t size;
|
||||
@@ -215,6 +256,14 @@ void ObjectFileDB::get_objs_from_dgo(const std::string& filename) {
|
||||
assert(reader.bytes_left() >= obj_header.size);
|
||||
assert_string_empty_after(obj_header.name, 60);
|
||||
|
||||
if (std::string(obj_header.name).find("-ag") != std::string::npos) {
|
||||
spdlog::error(
|
||||
"Object file {} has \"-ag\" in its name. This will break any tools which use this to "
|
||||
"detect an art group",
|
||||
obj_header.name);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
auto name = get_object_file_name(obj_header.name, reader.here(), obj_header.size);
|
||||
|
||||
add_obj_from_dgo(name, obj_header.name, reader.here(), obj_header.size, dgo_base_name);
|
||||
@@ -276,6 +325,21 @@ void ObjectFileDB::add_obj_from_dgo(const std::string& obj_name,
|
||||
data.record.version = obj_files_by_name[obj_name].size();
|
||||
data.name_in_dgo = name_in_dgo;
|
||||
data.obj_version = version;
|
||||
if (!dgo_obj_name_map.empty()) {
|
||||
auto dgo_kv = dgo_obj_name_map.find(strip_dgo_extension(dgo_name));
|
||||
if (dgo_kv == dgo_obj_name_map.end()) {
|
||||
spdlog::error("Object {} is from DGO {}, but this DGO wasn't in the map.", obj_name,
|
||||
dgo_name);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
auto name_kv = dgo_kv->second.find(obj_name);
|
||||
if (name_kv == dgo_kv->second.end()) {
|
||||
spdlog::error("Object {} from DGO {} wasn't found in the name map.", obj_name, dgo_name);
|
||||
assert(false);
|
||||
}
|
||||
data.name_from_map = name_kv->second;
|
||||
}
|
||||
obj_files_by_dgo[dgo_name].push_back(data.record);
|
||||
obj_files_by_name[obj_name].emplace_back(std::move(data));
|
||||
stats.unique_obj_files++;
|
||||
@@ -421,7 +485,7 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
if (obj.linked_data.segments == 3 || !dump_v3_only) {
|
||||
auto file_text = obj.linked_data.print_words();
|
||||
auto file_name = combine_path(output_dir, obj.record.to_unique_name() + ".txt");
|
||||
auto file_name = combine_path(output_dir, obj.to_unique_name() + ".txt");
|
||||
total_bytes += file_text.size();
|
||||
file_util::write_text_file(file_name, file_text);
|
||||
total_files++;
|
||||
@@ -430,8 +494,8 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d
|
||||
|
||||
spdlog::info("Wrote object file dumps:");
|
||||
spdlog::info(" Total {} files", total_files);
|
||||
spdlog::info(" Total {:3f} MB", total_bytes / ((float)(1u << 20u)));
|
||||
spdlog::info(" Total {} ms ({:3f} MB/sec)", timer.getMs(),
|
||||
spdlog::info(" Total {:.3f} MB", total_bytes / ((float)(1u << 20u)));
|
||||
spdlog::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(),
|
||||
total_bytes / ((1u << 20u) * timer.getSeconds()));
|
||||
// printf("\n");
|
||||
}
|
||||
@@ -445,12 +509,15 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir,
|
||||
Timer timer;
|
||||
uint32_t total_bytes = 0, total_files = 0;
|
||||
|
||||
std::string asm_functions;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
if (obj.linked_data.has_any_functions() || disassemble_objects_without_functions) {
|
||||
auto file_text = obj.linked_data.print_disassembly();
|
||||
auto file_name = combine_path(output_dir, obj.record.to_unique_name() + ".func");
|
||||
asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name());
|
||||
auto file_name = combine_path(output_dir, obj.to_unique_name() + ".func");
|
||||
|
||||
auto json_asm_text = obj.linked_data.to_asm_json();
|
||||
auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name());
|
||||
auto json_asm_file_name = combine_path(output_dir, obj.to_unique_name() + "_asm.json");
|
||||
file_util::write_text_file(json_asm_file_name, json_asm_text);
|
||||
|
||||
@@ -460,12 +527,15 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir,
|
||||
}
|
||||
});
|
||||
|
||||
total_bytes += asm_functions.size();
|
||||
total_files++;
|
||||
file_util::write_text_file(combine_path(output_dir, "asm_functions.func"), asm_functions);
|
||||
|
||||
spdlog::info("Wrote functions dumps:");
|
||||
spdlog::info(" Total {} files", total_files);
|
||||
spdlog::info(" Total {} MB", total_bytes / ((float)(1u << 20u)));
|
||||
spdlog::info(" Total {} ms ({:3f} MB/sec)", timer.getMs(),
|
||||
spdlog::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(),
|
||||
total_bytes / ((1u << 20u) * timer.getSeconds()));
|
||||
// printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
@@ -482,31 +552,31 @@ void ObjectFileDB::find_code() {
|
||||
obj.linked_data.find_functions();
|
||||
obj.linked_data.disassemble_functions();
|
||||
|
||||
if (get_config().game_version == 1 || obj.record.to_unique_name() != "effect-control-v0") {
|
||||
if (get_config().game_version == 1 || obj.to_unique_name() != "effect-control-v0") {
|
||||
obj.linked_data.process_fp_relative_links();
|
||||
} else {
|
||||
spdlog::warn("Skipping process_fp_relative_links in {}", obj.record.to_unique_name().c_str());
|
||||
spdlog::warn("Skipping process_fp_relative_links in {}", obj.to_unique_name().c_str());
|
||||
}
|
||||
|
||||
auto& obj_stats = obj.linked_data.stats;
|
||||
if (obj_stats.code_bytes / 4 > obj_stats.decoded_ops) {
|
||||
spdlog::warn("Failed to decode all in {} ({} / {})", obj.record.to_unique_name().c_str(),
|
||||
spdlog::warn("Failed to decode all in {} ({} / {})", obj.to_unique_name().c_str(),
|
||||
obj_stats.decoded_ops, obj_stats.code_bytes / 4);
|
||||
}
|
||||
combined_stats.add(obj.linked_data.stats);
|
||||
});
|
||||
|
||||
spdlog::info("Found code:");
|
||||
spdlog::info(" Code {} MB", combined_stats.code_bytes / (float)(1 << 20));
|
||||
spdlog::info(" Data {} MB", combined_stats.data_bytes / (float)(1 << 20));
|
||||
spdlog::info(" Code {:.3f} MB", combined_stats.code_bytes / (float)(1 << 20));
|
||||
spdlog::info(" Data {:.3f} MB", combined_stats.data_bytes / (float)(1 << 20));
|
||||
spdlog::info(" Functions: {}", combined_stats.function_count);
|
||||
spdlog::info(" fp uses resolved: {} / {} ({} %)", combined_stats.n_fp_reg_use_resolved,
|
||||
spdlog::info(" fp uses resolved: {} / {} ({:.3f} %)", combined_stats.n_fp_reg_use_resolved,
|
||||
combined_stats.n_fp_reg_use,
|
||||
100.f * (float)combined_stats.n_fp_reg_use_resolved / combined_stats.n_fp_reg_use);
|
||||
auto total_ops = combined_stats.code_bytes / 4;
|
||||
spdlog::info(" Decoded {} / {} ({} %)", combined_stats.decoded_ops, total_ops,
|
||||
spdlog::info(" Decoded {} / {} ({:.3f} %)", combined_stats.decoded_ops, total_ops,
|
||||
100.f * (float)combined_stats.decoded_ops / total_ops);
|
||||
spdlog::info(" Total {} ms", timer.getMs());
|
||||
spdlog::info(" Total {:.3f} ms", timer.getMs());
|
||||
// printf("\n");
|
||||
}
|
||||
|
||||
@@ -523,7 +593,7 @@ void ObjectFileDB::find_and_write_scripts(const std::string& output_dir) {
|
||||
auto scripts = obj.linked_data.print_scripts();
|
||||
if (!scripts.empty()) {
|
||||
all_scripts += ";--------------------------------------\n";
|
||||
all_scripts += "; " + obj.record.to_unique_name() + "\n";
|
||||
all_scripts += "; " + obj.to_unique_name() + "\n";
|
||||
all_scripts += ";---------------------------------------\n";
|
||||
all_scripts += scripts;
|
||||
}
|
||||
@@ -533,7 +603,7 @@ void ObjectFileDB::find_and_write_scripts(const std::string& output_dir) {
|
||||
file_util::write_text_file(file_name, all_scripts);
|
||||
|
||||
spdlog::info("Found scripts:");
|
||||
spdlog::info(" Total {} ms\n", timer.getMs());
|
||||
spdlog::info(" Total {:.3f} ms\n", timer.getMs());
|
||||
}
|
||||
|
||||
void ObjectFileDB::analyze_functions() {
|
||||
@@ -564,41 +634,46 @@ void ObjectFileDB::analyze_functions() {
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>> duplicated_functions;
|
||||
|
||||
int uid = 1;
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
(void)segment_id;
|
||||
func.guessed_name.unique_id = uid++;
|
||||
auto name = func.guessed_name.to_string();
|
||||
if (func.guessed_name.expected_unique()) {
|
||||
if (unique_names.find(name) != unique_names.end()) {
|
||||
duplicated_functions[name].insert(data.record.to_unique_name());
|
||||
for_each_obj([&](ObjectFileData& data) {
|
||||
int func_in_obj = 0;
|
||||
for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) {
|
||||
for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) {
|
||||
func.guessed_name.unique_id = uid++;
|
||||
func.guessed_name.id_in_object = func_in_obj++;
|
||||
func.guessed_name.object_name = data.to_unique_name();
|
||||
auto name = func.guessed_name.to_string();
|
||||
|
||||
if (unique_names.find(name) != unique_names.end()) {
|
||||
duplicated_functions[name].insert(data.to_unique_name());
|
||||
}
|
||||
|
||||
unique_names.insert(name);
|
||||
|
||||
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
|
||||
func.warnings += "flagged as asm by config\n";
|
||||
func.suspected_asm = true;
|
||||
}
|
||||
}
|
||||
|
||||
unique_names.insert(name);
|
||||
}
|
||||
|
||||
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
|
||||
func.warnings += "flagged as asm by config\n";
|
||||
func.suspected_asm = true;
|
||||
}
|
||||
});
|
||||
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
(void)segment_id;
|
||||
auto name = func.guessed_name.to_string();
|
||||
if (func.guessed_name.expected_unique()) {
|
||||
if (duplicated_functions.find(name) != duplicated_functions.end()) {
|
||||
duplicated_functions[name].insert(data.record.to_unique_name());
|
||||
func.warnings += "this function exists in multiple non-identical object files";
|
||||
}
|
||||
|
||||
if (duplicated_functions.find(name) != duplicated_functions.end()) {
|
||||
duplicated_functions[name].insert(data.to_unique_name());
|
||||
func.warnings += "this function exists in multiple non-identical object files";
|
||||
}
|
||||
});
|
||||
|
||||
// for(const auto& kv : duplicated_functions) {
|
||||
// printf("Function %s is found in non-identical object files:\n", kv.first.c_str());
|
||||
// for(const auto& obj : kv.second) {
|
||||
// printf(" %s\n", obj.c_str());
|
||||
// }
|
||||
// }
|
||||
/*
|
||||
for (const auto& kv : duplicated_functions) {
|
||||
printf("Function %s is found in non-identical object files:\n", kv.first.c_str());
|
||||
for (const auto& obj : kv.second) {
|
||||
printf(" %s\n", obj.c_str());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
int total_trivial_cfg_functions = 0;
|
||||
@@ -616,17 +691,20 @@ void ObjectFileDB::analyze_functions() {
|
||||
timer.start();
|
||||
int total_basic_blocks = 0;
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
// printf("in %s\n", func.guessed_name.to_string().c_str());
|
||||
// printf("in %s from %s\n", func.guessed_name.to_string().c_str(),
|
||||
// data.to_unique_name().c_str());
|
||||
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
|
||||
total_basic_blocks += blocks.size();
|
||||
func.basic_blocks = blocks;
|
||||
|
||||
total_functions++;
|
||||
if (!func.suspected_asm) {
|
||||
// run analysis
|
||||
|
||||
// first, find the prologue/epilogue
|
||||
func.analyze_prologue(data.linked_data);
|
||||
}
|
||||
|
||||
if (!func.suspected_asm) {
|
||||
// run analysis
|
||||
|
||||
// build a control flow graph
|
||||
func.cfg = build_cfg(data.linked_data, segment_id, func);
|
||||
@@ -649,6 +727,9 @@ void ObjectFileDB::analyze_functions() {
|
||||
|
||||
if (func.cfg->is_fully_resolved()) {
|
||||
resolved_cfg_functions++;
|
||||
} else {
|
||||
spdlog::warn("Function {} from {} failed cfg ir", func.guessed_name.to_string(),
|
||||
data.to_unique_name());
|
||||
}
|
||||
|
||||
// type analysis
|
||||
@@ -663,9 +744,11 @@ void ObjectFileDB::analyze_functions() {
|
||||
}
|
||||
// GOOD!
|
||||
func.type = kv->second;
|
||||
/*
|
||||
spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(),
|
||||
kv->second.print());
|
||||
func.run_type_analysis(kv->second, dts, data.linked_data);
|
||||
*/
|
||||
if (func.has_typemaps()) {
|
||||
successful_type_analysis++;
|
||||
}
|
||||
@@ -690,24 +773,20 @@ void ObjectFileDB::analyze_functions() {
|
||||
if (!func.guessed_name.empty()) {
|
||||
total_named_functions++;
|
||||
}
|
||||
|
||||
// if (func.guessed_name.to_string() == "inspect") {
|
||||
// assert(false);
|
||||
// }
|
||||
});
|
||||
|
||||
spdlog::info("Found {} functions ({} with no control flow)", total_functions,
|
||||
total_trivial_cfg_functions);
|
||||
spdlog::info("Named {}/{} functions ({}%)", total_named_functions, total_functions,
|
||||
spdlog::info("Named {}/{} functions ({:.3f}%)", total_named_functions, total_functions,
|
||||
100.f * float(total_named_functions) / float(total_functions));
|
||||
spdlog::info("Excluding {} asm functions", asm_funcs);
|
||||
spdlog::info("Found {} basic blocks in {} ms", total_basic_blocks, timer.getMs());
|
||||
spdlog::info(" {}/{} functions passed cfg analysis stage ({}%)", resolved_cfg_functions,
|
||||
spdlog::info("Found {} basic blocks in {:.3f} ms", total_basic_blocks, timer.getMs());
|
||||
spdlog::info(" {}/{} functions passed cfg analysis stage ({:.3f}%)", resolved_cfg_functions,
|
||||
non_asm_funcs, 100.f * float(resolved_cfg_functions) / float(non_asm_funcs));
|
||||
int successful_basic_ops = total_basic_ops - total_failed_basic_ops;
|
||||
spdlog::info(" {}/{} basic ops converted successfully ({}%)", successful_basic_ops,
|
||||
spdlog::info(" {}/{} basic ops converted successfully ({:.3f}%)", successful_basic_ops,
|
||||
total_basic_ops, 100.f * float(successful_basic_ops) / float(total_basic_ops));
|
||||
spdlog::info(" {}/{} cfgs converted to ir ({}%)", successful_cfg_irs, non_asm_funcs,
|
||||
spdlog::info(" {}/{} cfgs converted to ir ({:.3f}%)", successful_cfg_irs, non_asm_funcs,
|
||||
100.f * float(successful_cfg_irs) / float(non_asm_funcs));
|
||||
spdlog::info(" {}/{} functions passed type analysis ({:.2f}%)\n", successful_type_analysis,
|
||||
non_asm_funcs, 100.f * float(successful_type_analysis) / float(non_asm_funcs));
|
||||
|
||||
Reference in New Issue
Block a user