diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index 60369114e0..7f243428d1 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -108,6 +108,9 @@ std::shared_ptr instr_atom_to_ir(const InstructionAtom& ia, int idx) { return std::make_shared(IR_AsmReg::VU_ACC); case InstructionAtom::IMM: return make_int(ia.get_imm()); + case InstructionAtom::VF_FIELD: + // not supported by IR1 + return std::make_shared(); default: assert(false); return nullptr; @@ -123,6 +126,10 @@ std::shared_ptr instr_atom_to_ir(const InstructionAtom& ia, int idx) { */ std::shared_ptr to_asm_automatic(const std::string& str, Instruction& instr, int idx) { auto result = std::make_shared(str); + if (instr.n_src >= 4) { + // not supported by IR1 + return std::make_shared(); + } assert(instr.n_dst < 2); assert(instr.n_src < 4); if (instr.n_dst >= 1) { diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 9a0d0344f7..3013d4d981 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -529,74 +529,6 @@ void LinkedObjectFile::process_fp_relative_links() { } } -std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) { - nlohmann::json data; - std::vector functions; - - std::unordered_map functions_seen; - for (int seg = segments; seg-- > 0;) { - for (size_t fi = functions_by_seg.at(seg).size(); fi--;) { - auto& func = functions_by_seg.at(seg).at(fi); - auto fname = func.guessed_name.to_string(); - if (functions_seen.find(fname) != functions_seen.end()) { - lg::warn( - "Function {} appears multiple times in the same object file {} - it cannot be uniquely " - "referenced from config", - func.guessed_name.to_string(), obj_file_name); - functions_seen[fname]++; - fname += "-v" + std::to_string(functions_seen[fname]); - } else { - functions_seen[fname] = 0; - } - - nlohmann::json::object_t f; - f["name"] = fname; - f["type"] = func.type.print(); - f["segment"] = seg; - f["warnings"] = func.warnings.get_warning_text(false); - f["parent_object"] = obj_file_name; - std::vector ops; - - for (int i = 1; i < func.end_word - func.start_word; i++) { - nlohmann::json::object_t op; - auto label_id = get_label_at(seg, (func.start_word + i) * 4); - if (label_id != -1) { - op["label"] = labels.at(label_id).name; - } - auto& instr = func.instructions.at(i); - op["id"] = i; - op["asm_op"] = instr.to_string(labels); - - if (func.has_basic_ops() && func.instr_starts_basic_op(i)) { - op["basic_op"] = func.get_basic_op_at_instr(i)->print(*this); - // if (func.has_typemaps()) { - // auto& tm = func.get_typemap_by_instr_idx(i); - // auto& json_type_map = op["type_map"]; - // for (auto& kv : tm) { - // json_type_map[kv.first.to_charp()] = kv.second.print(); - // } - // } - } - - for (int iidx = 0; iidx < instr.n_src; iidx++) { - if (instr.get_src(iidx).is_label()) { - auto lab = labels.at(instr.get_src(iidx).get_label()); - if (is_string(lab.target_segment, lab.offset)) { - op["referenced_string"] = get_goal_string(lab.target_segment, lab.offset / 4 - 1); - } - } - } - - ops.push_back(op); - } - f["asm"] = ops; - functions.push_back(f); - } - } - data["functions"] = functions; - return data.dump(); -} - std::string LinkedObjectFile::print_function_disassembly(Function& func, int seg, bool write_hex, diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 0dcb3be34d..9b574cc124 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -54,7 +54,6 @@ class LinkedObjectFile { std::string print_disassembly(); bool has_any_functions(); void append_word_to_string(std::string& dest, const LinkedWord& word) const; - std::string to_asm_json(const std::string& obj_file_name); std::string print_function_disassembly(Function& func, int seg, bool write_hex, diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 532cca1b07..f74ee7bf7d 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -423,18 +423,17 @@ void ObjectFileDB::process_labels() { /*! * Dump object files and their linking data to text files for debugging */ -void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool dump_v3_only) { - if (dump_v3_only) { - lg::info("- Writing object file dumps (v3 only)..."); - } else { - lg::info("- Writing object file dumps (all)..."); - } +void ObjectFileDB::write_object_file_words(const std::string& output_dir, + bool dump_data, + bool dump_code) { + lg::info("- Writing object file dumps (code? {} data? {})...", dump_code, dump_data); Timer timer; uint32_t total_bytes = 0, total_files = 0; for_each_obj([&](ObjectFileData& obj) { - if (obj.linked_data.segments == 3 || !dump_v3_only) { + if ((obj.linked_data.segments == 3 && dump_code) || + (obj.linked_data.segments != 3 && dump_data)) { auto file_text = obj.linked_data.print_words(); auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".txt"); total_bytes += file_text.size(); @@ -448,16 +447,14 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d lg::info(" Total {:.3f} MB", total_bytes / ((float)(1u << 20u))); lg::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(), total_bytes / ((1u << 20u) * timer.getSeconds())); - // printf("\n"); } /*! * Dump disassembly for object files containing code. Data zones will also be dumped. */ void ObjectFileDB::write_disassembly(const std::string& output_dir, - bool disassemble_objects_without_functions, - bool write_json, - const std::string& file_suffix) { + bool disassemble_data, + bool disassemble_code) { lg::info("- Writing functions..."); Timer timer; uint32_t total_bytes = 0, total_files = 0; @@ -465,20 +462,10 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir, std::string asm_functions; for_each_obj([&](ObjectFileData& obj) { - if (obj.linked_data.has_any_functions() || disassemble_objects_without_functions) { + if ((obj.obj_version == 3 && disassemble_code) || (obj.obj_version != 3 && disassemble_data)) { auto file_text = obj.linked_data.print_disassembly(); asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name()); - auto file_name = - file_util::combine_path(output_dir, obj.to_unique_name() + file_suffix + ".asm"); - - if (get_config().analyze_functions && write_json) { - auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name()); - auto json_asm_file_name = - file_util::combine_path(output_dir, obj.to_unique_name() + "_asm.json"); - file_util::write_text_file(json_asm_file_name, json_asm_text); - total_files++; - total_bytes += json_asm_text.size(); - } + auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".asm"); total_bytes += file_text.size(); file_util::write_text_file(file_name, file_text); diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index befcd36cb7..05afc27fab 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -58,11 +58,10 @@ class ObjectFileDB { void find_and_write_scripts(const std::string& output_dir); void dump_raw_objects(const std::string& output_dir); - void write_object_file_words(const std::string& output_dir, bool dump_v3_only); + void write_object_file_words(const std::string& output_dir, bool dump_data, bool dump_code); void write_disassembly(const std::string& output_dir, - bool disassemble_objects_without_functions, - bool write_json, - const std::string& file_suffix = ""); + bool disassemble_data, + bool disassemble_code); void analyze_functions_ir1(); void analyze_functions_ir2(const std::string& output_dir); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index e5976a92d9..3fddfece64 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -47,20 +47,19 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) { ir2_variable_pass(); lg::info("Initial structuring..."); ir2_cfg_build_pass(); - if (get_config().analyze_expressions) { - lg::info("Storing temporary form result..."); - ir2_store_current_forms(); - lg::info("Expression building..."); - ir2_build_expressions(); - lg::info("Re-writing inline asm instructions..."); - ir2_rewrite_inline_asm_instructions(); - if (get_config().insert_lets) { - lg::info("Inserting lets..."); - ir2_insert_lets(); - } - lg::info("Inserting anonymous function definitions..."); - ir2_insert_anonymous_functions(); - } + + lg::info("Storing temporary form result..."); + ir2_store_current_forms(); + lg::info("Expression building..."); + ir2_build_expressions(); + lg::info("Re-writing inline asm instructions..."); + ir2_rewrite_inline_asm_instructions(); + + lg::info("Inserting lets..."); + ir2_insert_lets(); + + lg::info("Inserting anonymous function definitions..."); + ir2_insert_anonymous_functions(); if (!output_dir.empty()) { lg::info("Writing results..."); @@ -213,14 +212,6 @@ void ObjectFileDB::ir2_basic_block_pass() { func.guessed_name.to_string(), data.to_unique_name()); failed_to_build_cfg++; } - - // if we got an inspect method, inspect it. - if (func.is_inspect_method) { - auto result = inspect_inspect_method(func, func.method_of_type, dts, data.linked_data); - all_type_defs += ";; " + data.to_unique_name() + "\n"; - all_type_defs += result.print_as_deftype() + "\n"; - inspect_methods++; - } } if (func.suspected_asm) { diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 5d715f5b1f..c0e901d6c5 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -38,23 +38,18 @@ void set_config(const std::string& path_to_config_file) { if (cfg.contains("obj_file_name_map_file")) { gConfig.obj_file_name_map_file = cfg.at("obj_file_name_map_file").get(); } - gConfig.write_disassembly = cfg.at("write_disassembly").get(); - gConfig.write_hexdump = cfg.at("write_hexdump").get(); - gConfig.write_scripts = cfg.at("write_scripts").get(); - gConfig.write_hexdump_on_v3_only = cfg.at("write_hexdump_on_v3_only").get(); - gConfig.disassemble_objects_without_functions = - cfg.at("disassemble_objects_without_functions").get(); + gConfig.disassemble_code = cfg.at("disassemble_code").get(); + gConfig.decompile_code = cfg.at("decompile_code").get(); + gConfig.regenerate_all_types = cfg.at("regenerate_all_types").get(); gConfig.write_hex_near_instructions = cfg.at("write_hex_near_instructions").get(); - gConfig.analyze_functions = cfg.at("analyze_functions").get(); + gConfig.write_scripts = cfg.at("write_scripts").get(); + gConfig.disassemble_data = cfg.at("disassemble_data").get(); gConfig.process_tpages = cfg.at("process_tpages").get(); gConfig.process_game_text = cfg.at("process_game_text").get(); gConfig.process_game_count = cfg.at("process_game_count").get(); + gConfig.hexdump_code = cfg.at("hexdump_code").get(); + gConfig.hexdump_data = cfg.at("hexdump_data").get(); gConfig.dump_objs = cfg.at("dump_objs").get(); - gConfig.write_func_json = cfg.at("write_func_json").get(); - gConfig.function_type_prop = cfg.at("function_type_prop").get(); - gConfig.analyze_expressions = cfg.at("analyze_expressions").get(); - gConfig.run_ir2 = cfg.at("run_ir2").get(); - gConfig.insert_lets = cfg.at("insert_lets").get(); gConfig.hint_inline_assembly_functions = cfg.at("inline_asm_hint").get>(); diff --git a/decompiler/config.h b/decompiler/config.h index 3d298b1d8b..9fa04bb350 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -49,21 +49,21 @@ struct Config { std::vector str_file_names; std::unordered_set bad_inspect_types; std::string obj_file_name_map_file; - bool write_disassembly = false; - bool write_hexdump = false; + + bool disassemble_code = false; + bool decompile_code = false; bool write_scripts = false; - bool write_hexdump_on_v3_only = false; - bool disassemble_objects_without_functions = false; - bool write_hex_near_instructions = false; - bool analyze_functions = false; + bool disassemble_data = false; bool process_tpages = false; bool process_game_text = false; bool process_game_count = false; + + bool regenerate_all_types = false; + bool write_hex_near_instructions = false; + bool hexdump_code = false; + bool hexdump_data = false; bool dump_objs = false; - bool write_func_json = false; - bool function_type_prop = false; - bool analyze_expressions = false; - bool insert_lets = false; + std::unordered_set asm_functions_by_name; std::unordered_set pair_functions_by_name; std::unordered_set no_type_analysis_functions_by_name; @@ -77,7 +77,6 @@ struct Config { function_var_overrides; std::unordered_map> label_types; std::unordered_map> stack_var_hints_by_function; - bool run_ir2 = false; std::unordered_set hint_inline_assembly_functions; }; diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 7748dda5bf..e08c994feb 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -2,20 +2,25 @@ { "game_version":1, - // the order here matters (not sure that this is true any more...). KERNEL and GAME should go first - "dgo_names":["CGO/KERNEL.CGO","CGO/GAME.CGO", - "CGO/ENGINE.CGO" - , "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO", + + + ////////////////////// + // INPUT FILES + ////////////////////// + + // input is GOAL object files, possibly in containers. + // most objects are part of CGO/DGO files (both go in dgo_names). This includes levels and the engine + // the DGOs will be processed in this order. Usually it's best to have KERNEL, ENGINE, then the levels when + // you want to run on the entire game. + "dgo_names":["CGO/KERNEL.CGO","CGO/ENGINE.CGO", "CGO/GAME.CGO", + "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO", "DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO", "DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO", "DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO", "DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO" ], - "dgo_names_":["CGO/KERNEL.CGO"], - - "object_file_names":["TEXT/0COMMON.TXT", "TEXT/1COMMON.TXT", "TEXT/2COMMON.TXT", "TEXT/3COMMON.TXT", "TEXT/4COMMON.TXT", - "TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"], + // some objects are part of STR files (streaming data). In Jak 1 this is just animations "str_file_names":["STR/BAFCELL.STR", "STR/SWTE4.STR", "STR/SWTE3.STR", "STR/SWTE2.STR", "STR/SWTE1.STR", "STR/SNRBSBFC.STR", "STR/SNRBIPFC.STR", "STR/SNRBICFC.STR", "STR/ORR3.STR", "STR/ORR2.STR", "STR/MICANNON.STR", "STR/BECANNON.STR", "STR/SWTS4.STR", "STR/SWTS3.STR", "STR/SWTS2.STR", "STR/SW4.STR", "STR/SW3.STR", "STR/SW2.STR", @@ -50,8 +55,67 @@ "STR/SAISA.STR","STR/SIHISC.STR","STR/MIIORBS.STR","STR/WAINTROD.STR","STR/SAISD2.STR","STR/GRSOPREB.STR", "STR/GRSOBBB.STR","STR/SA3INTRO.STR" ], - "str_file_names_":[], - "allowed_objects":["gstate"], + + // some objects are directly stored as files on the DVD. This is just text files. + "object_file_names":["TEXT/0COMMON.TXT", "TEXT/1COMMON.TXT", "TEXT/2COMMON.TXT", "TEXT/3COMMON.TXT", "TEXT/4COMMON.TXT", + "TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"], + + // if you want to filter to only some object names. + // it will make the decompiler much faster. + "allowed_objects":[], + + //////////////////////////// + // CODE ANALYSIS OPTIONS + //////////////////////////// + + // set to true to generate plain .asm files with MIPS disassembly, with no fancy decompilation. + // this is fast and should succeed 100% of the time. + "disassemble_code":false, + + // Run the decompiler + "decompile_code":true, + + //////////////////////////// + // DATA ANALYSIS OPTIONS + //////////////////////////// + + // set to true to generate plain .asm files for data files. + // this will display most data as hex, but will add labels/references/type pointers/strings + // this generates a huge amount of output if you run it on the entire game. + "disassemble_data":false, + + // unpack textures to assets folder + "process_tpages":true, + // unpack game text to assets folder + "process_game_text":true, + // unpack game count to assets folder + "process_game_count":true, + + /////////////////////////// + // WEIRD OPTIONS + /////////////////////////// + + // these options are used rarely and should usually be left at false + + // output a file type_defs.gc which is used the types part of all-types.gc + "regenerate_all_types":false, + + // debug option for instruction decoder + "write_hex_near_instructions":false, + + // experimental tool to extract linked lists used for region scripting in Jak 2 and Jak 3. + "write_scripts":false, + + // hex dump of code/data files. + "hexdump_code":false, + "hexdump_data":false, + // dump raw obj files + "dump_objs":false, + + + //////////////////////////// + // CONFIG FILES + //////////////////////////// "type_casts_file":"decompiler/config/jak1_ntsc_black_label/type_casts.jsonc", "anonymous_function_types_file":"decompiler/config/jak1_ntsc_black_label/anonymous_function_types.jsonc", @@ -59,36 +123,15 @@ "label_types_file":"decompiler/config/jak1_ntsc_black_label/label_types.jsonc", "stack_vars_file":"decompiler/config/jak1_ntsc_black_label/stack_vars.jsonc", - "analyze_functions":true, - "analyze_expressions":true, - "function_type_prop":true, - "insert_lets":true, - "write_disassembly":true, - "write_hex_near_instructions":false, - - "run_ir2":true, - - // if false, skips printing disassembly of object with functions, as these are usually large (~1 GB) and not interesting yet. - "disassemble_objects_without_functions":false, - - "process_tpages":true, - "process_game_text":true, - "process_game_count":true, - "dump_objs":false, - "write_func_json":false, - - // to write out data of each object file - "write_hexdump":false, - // to write out hexdump on the v3 only, to avoid the huge level data files. Only if write_hexdump is true. - "write_hexdump_on_v3_only":true, - - // to write out "scripts", which are currently just all the linked lists found. mostly a jak 2/3 thing - "write_scripts":false, - - // optional: a predetermined object file name map from a file. Useful if you want to run only on some DGOs but have consistent names + // optional: a predetermined object file name map from a file. + // this will make decompilation naming consistent even if you only run on some objects. "obj_file_name_map_file":"goal_src/build/all_objs.json", + //////////////////////////// + // HACKS and ASM FUNCTIONS + //////////////////////////// + "types_with_bad_inspect_methods":[ "engine", "bsp-header", diff --git a/decompiler/main.cpp b/decompiler/main.cpp index b3d4285b94..c91c533676 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -47,49 +47,56 @@ int main(int argc, char** argv) { // build file database lg::info("Setting up object file DB..."); ObjectFileDB db(dgos, get_config().obj_file_name_map_file, objs, strs); + + // write out DGO file info file_util::write_text_file(file_util::combine_path(out_folder, "dgo.txt"), db.generate_dgo_listing()); + // write out object file map (used for future decompilations, if desired) file_util::write_text_file(file_util::combine_path(out_folder, "obj.txt"), db.generate_obj_listing()); + // dump raw objs if (get_config().dump_objs) { auto path = file_util::combine_path(out_folder, "raw_obj"); file_util::create_dir_if_needed(path); db.dump_raw_objects(path); } - // process files (basic) + // process files (required for all analysis) db.process_link_data(); db.find_code(); db.process_labels(); - // IR1 or IR2 function analysis - if (get_config().run_ir2) { - db.analyze_functions_ir2(out_folder); - } else { - if (get_config().analyze_functions) { - db.analyze_functions_ir1(); - } - - if (get_config().write_disassembly) { - db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions, - get_config().write_func_json); - } + // print disassembly + if (get_config().disassemble_code || get_config().disassemble_data) { + db.write_disassembly(out_folder, get_config().disassemble_data, get_config().disassemble_code); } - // common IR1 and IR2 function stuff: + // regenerate all-types if needed + if (get_config().regenerate_all_types) { + db.analyze_functions_ir1(); + file_util::write_text_file(file_util::combine_path(out_folder, "type_defs.gc"), + db.all_type_defs); + } + + // main decompile. + if (get_config().decompile_code) { + db.analyze_functions_ir2(out_folder); + } + + // write out all symbols TODO - organize by file file_util::write_text_file(file_util::combine_path(out_folder, "all-syms.gc"), db.dts.dump_symbol_types()); + if (get_config().hexdump_code || get_config().hexdump_data) { + db.write_object_file_words(out_folder, get_config().hexdump_data, get_config().hexdump_code); + } + // data stuff if (get_config().write_scripts) { db.find_and_write_scripts(out_folder); } - if (get_config().write_hexdump) { - db.write_object_file_words(out_folder, get_config().write_hexdump_on_v3_only); - } - if (get_config().process_game_text) { auto result = db.process_game_text_files(); file_util::write_text_file(file_util::get_file_path({"assets", "game_text.txt"}), result); @@ -104,9 +111,6 @@ int main(int argc, char** argv) { file_util::write_text_file(file_util::get_file_path({"assets", "game_count.txt"}), result); } - // todo print type summary - // printf("%s\n", get_type_info().get_summary().c_str()); - lg::info("Disassembly has completed successfully."); return 0; }