From 3a1c9eaf7587b0b01be76e8ba8fefeaa107231ba Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Tue, 12 Oct 2021 20:33:26 -0400 Subject: [PATCH] [decompiler] clean up offline test program, reorganize decomp order (#895) * make a new offline test * finish up offline test and fix crash bugs --- common/CMakeLists.txt | 1 + common/util/diff.cpp | 308 +++++++ common/util/diff.h | 8 + decompiler/CMakeLists.txt | 6 + decompiler/ObjectFile/ObjectFileDB.h | 64 +- decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 387 +++----- decompiler/analysis/static_refs.cpp | 2 +- decompiler/config/jak1_ntsc_black_label.jsonc | 13 +- decompiler/main.cpp | 2 +- test/offline/CMakeLists.txt | 3 +- test/offline/config.json | 114 +++ test/offline/offline_test_main.cpp | 825 +++++++----------- 12 files changed, 924 insertions(+), 809 deletions(-) create mode 100644 common/util/diff.cpp create mode 100644 common/util/diff.h create mode 100644 test/offline/config.json diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 032f3a5df8..f8c4f59676 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -24,6 +24,7 @@ add_library(common util/dgo_util.cpp util/DgoReader.cpp util/DgoWriter.cpp + util/diff.cpp util/FileUtil.cpp util/json_util.cpp util/Timer.cpp diff --git a/common/util/diff.cpp b/common/util/diff.cpp new file mode 100644 index 0000000000..ee5d87e709 --- /dev/null +++ b/common/util/diff.cpp @@ -0,0 +1,308 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +// The Google C++ Testing and Mocking Framework (Google Test) + +#include "diff.h" + +#include +#include +#include +#include +#include +#include + +namespace { +enum EditType { kMatch, kAdd, kRemove, kReplace }; +std::vector CalculateOptimalEdits(const std::vector& left, + const std::vector& right) { + std::vector > costs(left.size() + 1, std::vector(right.size() + 1)); + std::vector > best_move(left.size() + 1, + std::vector(right.size() + 1)); + + // Populate for empty right. + for (size_t l_i = 0; l_i < costs.size(); ++l_i) { + costs[l_i][0] = static_cast(l_i); + best_move[l_i][0] = kRemove; + } + // Populate for empty left. + for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) { + costs[0][r_i] = static_cast(r_i); + best_move[0][r_i] = kAdd; + } + + for (size_t l_i = 0; l_i < left.size(); ++l_i) { + for (size_t r_i = 0; r_i < right.size(); ++r_i) { + if (left[l_i] == right[r_i]) { + // Found a match. Consume it. + costs[l_i + 1][r_i + 1] = costs[l_i][r_i]; + best_move[l_i + 1][r_i + 1] = kMatch; + continue; + } + + const double add = costs[l_i + 1][r_i]; + const double remove = costs[l_i][r_i + 1]; + const double replace = costs[l_i][r_i]; + if (add < remove && add < replace) { + costs[l_i + 1][r_i + 1] = add + 1; + best_move[l_i + 1][r_i + 1] = kAdd; + } else if (remove < add && remove < replace) { + costs[l_i + 1][r_i + 1] = remove + 1; + best_move[l_i + 1][r_i + 1] = kRemove; + } else { + // We make replace a little more expensive than add/remove to lower + // their priority. + costs[l_i + 1][r_i + 1] = replace + 1.00001; + best_move[l_i + 1][r_i + 1] = kReplace; + } + } + } + + // Reconstruct the best path. We do it in reverse order. + std::vector best_path; + for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) { + EditType move = best_move[l_i][r_i]; + best_path.push_back(move); + l_i -= move != kAdd; + r_i -= move != kRemove; + } + std::reverse(best_path.begin(), best_path.end()); + return best_path; +} + +// Helper class to convert string into ids with deduplication. +class InternalStrings { + public: + size_t GetId(const std::string& str) { + IdMap::iterator it = ids_.find(str); + if (it != ids_.end()) + return it->second; + size_t id = ids_.size(); + return ids_[str] = id; + } + + private: + typedef std::map IdMap; + IdMap ids_; +}; + +std::vector CalculateOptimalEdits(const std::vector& left, + const std::vector& right) { + std::vector left_ids, right_ids; + { + InternalStrings intern_table; + for (size_t i = 0; i < left.size(); ++i) { + left_ids.push_back(intern_table.GetId(left[i])); + } + for (size_t i = 0; i < right.size(); ++i) { + right_ids.push_back(intern_table.GetId(right[i])); + } + } + return CalculateOptimalEdits(left_ids, right_ids); +} + +constexpr char RESET_COLOR[] = "\x1B[0m"; +constexpr char RED_COLOR[] = "\x1B[31m"; +constexpr char GREEN_COLOR[] = "\x1B[32m"; + +// Helper class that holds the state for one hunk and prints it out to the +// stream. +// It reorders adds/removes when possible to group all removes before all +// adds. It also adds the hunk header before printint into the stream. +class Hunk { + public: + Hunk(size_t left_start, size_t right_start) + : left_start_(left_start), right_start_(right_start), adds_(), removes_(), common_() {} + + void PushLine(char edit, const char* line) { + switch (edit) { + case ' ': + ++common_; + FlushEdits(); + hunk_.push_back(std::make_pair(' ', line)); + break; + case '-': + ++removes_; + hunk_removes_.push_back(std::make_pair('-', line)); + break; + case '+': + ++adds_; + hunk_adds_.push_back(std::make_pair('+', line)); + break; + } + } + + void PrintTo(std::ostream* os) { + PrintHeader(os); + FlushEdits(); + for (std::list >::const_iterator it = hunk_.begin(); + it != hunk_.end(); ++it) { + // NOTE: this part is modified from gtest to give us pretty colored diffs. + switch (it->first) { + case '+': + *os << GREEN_COLOR << it->first << it->second << RESET_COLOR << "\n"; + break; + case '-': + *os << RED_COLOR << it->first << it->second << RESET_COLOR << "\n"; + break; + default: + *os << it->first << it->second << "\n"; + } + } + } + + bool has_edits() const { return adds_ || removes_; } + + private: + void FlushEdits() { + hunk_.splice(hunk_.end(), hunk_removes_); + hunk_.splice(hunk_.end(), hunk_adds_); + } + + // Print a unified diff header for one hunk. + // The format is + // "@@ -, +, @@" + // where the left/right parts are omitted if unnecessary. + void PrintHeader(std::ostream* ss) const { + *ss << "@@ "; + if (removes_) { + *ss << "-" << left_start_ << "," << (removes_ + common_); + } + if (removes_ && adds_) { + *ss << " "; + } + if (adds_) { + *ss << "+" << right_start_ << "," << (adds_ + common_); + } + *ss << " @@\n"; + } + + size_t left_start_, right_start_; + size_t adds_, removes_, common_; + std::list > hunk_, hunk_adds_, hunk_removes_; +}; + +// Create a list of diff hunks in Unified diff format. +// Each hunk has a header generated by PrintHeader above plus a body with +// lines prefixed with ' ' for no change, '-' for deletion and '+' for +// addition. +// 'context' represents the desired unchanged prefix/suffix around the diff. +// If two hunks are close enough that their contexts overlap, then they are +// joined into one hunk. +std::string CreateUnifiedDiff(const std::vector& left, + const std::vector& right, + size_t context) { + const std::vector edits = CalculateOptimalEdits(left, right); + + size_t l_i = 0, r_i = 0, edit_i = 0; + std::stringstream ss; + while (edit_i < edits.size()) { + // Find first edit. + while (edit_i < edits.size() && edits[edit_i] == kMatch) { + ++l_i; + ++r_i; + ++edit_i; + } + + // Find the first line to include in the hunk. + const size_t prefix_context = std::min(l_i, context); + Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); + for (size_t i = prefix_context; i > 0; --i) { + hunk.PushLine(' ', left[l_i - i].c_str()); + } + + // Iterate the edits until we found enough suffix for the hunk or the input + // is over. + size_t n_suffix = 0; + for (; edit_i < edits.size(); ++edit_i) { + if (n_suffix >= context) { + // Continue only if the next hunk is very close. + auto it = edits.begin() + static_cast(edit_i); + while (it != edits.end() && *it == kMatch) + ++it; + if (it == edits.end() || static_cast(it - edits.begin()) - edit_i >= context) { + // There is no next edit or it is too far away. + break; + } + } + + EditType edit = edits[edit_i]; + // Reset count when a non match is found. + n_suffix = edit == kMatch ? n_suffix + 1 : 0; + + if (edit == kMatch || edit == kRemove || edit == kReplace) { + hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str()); + } + if (edit == kAdd || edit == kReplace) { + hunk.PushLine('+', right[r_i].c_str()); + } + + // Advance indices, depending on edit type. + l_i += edit != kAdd; + r_i += edit != kRemove; + } + + if (!hunk.has_edits()) { + // We are done. We don't want this hunk. + break; + } + + hunk.PrintTo(&ss); + } + return ss.str(); +} + +std::vector SplitString(const ::std::string& str, char delimiter = '\n') { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (true) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + return parsed; +} + +} // namespace +std::string diff_strings(const std::string& lhs, const std::string& rhs) { + if (!lhs.empty() && !rhs.empty()) { + const std::vector lhs_lines = SplitString(lhs); + const std::vector rhs_lines = SplitString(rhs); + if (lhs_lines.size() > 1 || rhs_lines.size() > 1) { + return CreateUnifiedDiff(lhs_lines, rhs_lines, 2); + } + } + return ""; +} \ No newline at end of file diff --git a/common/util/diff.h b/common/util/diff.h new file mode 100644 index 0000000000..b9da9a85f9 --- /dev/null +++ b/common/util/diff.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +/*! + * Diff two strings. This uses the code from gtest's diff implementation. + */ +std::string diff_strings(const std::string& lhs, const std::string& rhs); \ No newline at end of file diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index d4f804c449..011b47888f 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -70,6 +70,12 @@ add_library( config.cpp) +if (UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") +elseif (WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2") +endif () + target_link_libraries(decomp lzokay common diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 4651551b74..38ea31b879 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -14,6 +14,7 @@ #include "LinkedObjectFile.h" #include "decompiler/util/DecompilerTypeSystem.h" #include "common/common_types.h" +#include "decompiler/analysis/symbol_def_map.h" namespace decompiler { /*! @@ -39,6 +40,9 @@ struct ObjectFileData { std::string name_from_map; std::string to_unique_name() const; uint32_t reference_count = 0; // number of times its used. + + std::string full_output; + std::string output_with_skips; }; class ObjectFileDB { @@ -65,26 +69,26 @@ class ObjectFileDB { void analyze_functions_ir1(const Config& config); void analyze_functions_ir2(const std::string& output_dir, const Config& config, - bool skip_debug_output = false); + const std::unordered_set& skip_functions); void ir2_top_level_pass(const Config& config); - void ir2_stack_spill_slot_pass(int seg); - void ir2_basic_block_pass(int seg, const Config& config); - void ir2_atomic_op_pass(int seg, const Config& config); - void ir2_type_analysis_pass(int seg, const Config& config); - void ir2_register_usage_pass(int seg); - void ir2_variable_pass(int seg); - void ir2_cfg_build_pass(int seg); + void ir2_stack_spill_slot_pass(int seg, ObjectFileData& data); + void ir2_basic_block_pass(int seg, const Config& config, ObjectFileData& data); + void ir2_atomic_op_pass(int seg, const Config& config, ObjectFileData& data); + void ir2_type_analysis_pass(int seg, const Config& config, ObjectFileData& data); + void ir2_register_usage_pass(int seg, ObjectFileData& data); + void ir2_variable_pass(int seg, ObjectFileData& data); + void ir2_cfg_build_pass(int seg, ObjectFileData& data); void ir2_store_current_forms(int seg); - void ir2_build_expressions(int seg, const Config& config); - void ir2_insert_lets(int seg); - void ir2_rewrite_inline_asm_instructions(int seg); - void ir2_insert_anonymous_functions(int seg); - void ir2_symbol_definition_map(const std::string& output_dir); - void ir2_write_results(const std::string& output_dir, const Config& config); - void ir2_do_segment_analysis_phase1(int seg, const Config& config); - void ir2_do_segment_analysis_phase2(int seg, const Config& config); - void ir2_setup_labels(const Config& config); - void ir2_run_mips2c(const Config& config); + void ir2_build_expressions(int seg, const Config& config, ObjectFileData& data); + void ir2_insert_lets(int seg, ObjectFileData& data); + void ir2_rewrite_inline_asm_instructions(int seg, ObjectFileData& data); + void ir2_insert_anonymous_functions(int seg, ObjectFileData& data); + void ir2_symbol_definition_map(ObjectFileData& data); + void ir2_write_results(const std::string& output_dir, const Config& config, ObjectFileData& data); + void ir2_do_segment_analysis_phase1(int seg, const Config& config, ObjectFileData& data); + void ir2_do_segment_analysis_phase2(int seg, const Config& config, ObjectFileData& data); + void ir2_setup_labels(const Config& config, ObjectFileData& data); + void ir2_run_mips2c(const Config& config, ObjectFileData& data); std::string ir2_to_file(ObjectFileData& data, const Config& config); std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); std::string ir2_final_out(ObjectFileData& data, @@ -157,6 +161,17 @@ class ObjectFileDB { }); } + template + void for_each_function_def_order_in_obj(ObjectFileData& data, Func f) { + for (int i = 0; i < int(data.linked_data.segments); i++) { + int fn = 0; + for (size_t j = data.linked_data.functions_by_seg.at(i).size(); j-- > 0;) { + f(data.linked_data.functions_by_seg.at(i).at(j), i); + fn++; + } + } + } + template void for_each_function_in_seg(int seg, Func f) { for_each_obj([&](ObjectFileData& data) { @@ -170,6 +185,17 @@ class ObjectFileDB { }); } + template + void for_each_function_in_seg_in_obj(int seg, ObjectFileData& data, Func f) { + int fn = 0; + if (data.linked_data.segments == 3) { + for (size_t j = data.linked_data.functions_by_seg.at(seg).size(); j-- > 0;) { + f(data.linked_data.functions_by_seg.at(seg).at(j)); + fn++; + } + } + } + // Danger: after adding all object files, we assume that the vector never reallocates. std::unordered_map> obj_files_by_name; std::unordered_map> obj_files_by_dgo; @@ -177,6 +203,8 @@ class ObjectFileDB { std::vector obj_file_order; std::unordered_map> dgo_obj_name_map; + SymbolMapBuilder map_builder; + struct { uint32_t total_dgo_bytes = 0; uint32_t total_obj_files = 0; diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 063862dcde..7bfbcca839 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -35,105 +35,108 @@ namespace decompiler { */ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir, const Config& config, - bool skip_debug_output) { - (void)skip_debug_output; + const std::unordered_set& skip_functions) { // First, do basic analysis on the top level: lg::info("Using IR2 analysis..."); + lg::info("Processing top-level functions..."); ir2_top_level_pass(config); - ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config); - ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config); - ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config); - - ir2_setup_labels(config); - - ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config); - + int total_file_count = 0; + for (auto& f : obj_files_by_name) { + total_file_count += f.second.size(); + } + int file_idx = 1; for_each_obj([&](ObjectFileData& data) { + Timer file_timer; + fmt::print("[{:3d}/{}]------ {}\n", file_idx++, total_file_count, data.to_unique_name()); + ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config, data); + ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config, data); + ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config, data); + ir2_setup_labels(config, data); + ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config, data); try { - run_defstate(data.linked_data.functions_by_seg.at(2).front()); + if (data.linked_data.functions_by_seg.size() == 3) { + run_defstate(data.linked_data.functions_by_seg.at(2).front()); + } } catch (const std::exception& e) { lg::error("Failed to find defstates: {}", e.what()); } - }); + ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config, data); + ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config, data); - ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config); - ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config); + ir2_insert_anonymous_functions(DEBUG_SEGMENT, data); + ir2_insert_anonymous_functions(MAIN_SEGMENT, data); + ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT, data); + + ir2_run_mips2c(config, data); + + ir2_symbol_definition_map(data); + + if (!output_dir.empty()) { + ir2_write_results(output_dir, config, data); + } else { + if (!skip_functions.empty()) { + data.output_with_skips = ir2_final_out(data, skip_functions); + } + data.full_output = ir2_final_out(data); + } + + for_each_function_def_order_in_obj(data, [&](Function& f, int) { f.ir2 = {}; }); + + fmt::print("Done in {:.2f}ms\n", file_timer.getMs()); + }); if (config.generate_symbol_definition_map) { lg::info("Generating symbol definition map..."); - ir2_symbol_definition_map(output_dir); - } - - lg::info("Inserting anonymous function definitions..."); - - ir2_insert_anonymous_functions(DEBUG_SEGMENT); - ir2_insert_anonymous_functions(MAIN_SEGMENT); - ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT); - - // doesn't really matter where we do this. - ir2_run_mips2c(config); - - if (!output_dir.empty()) { - lg::info("Writing results..."); - ir2_write_results(output_dir, config); + map_builder.build_map(); + std::string result = map_builder.convert_to_json(); + auto file_name = file_util::combine_path(output_dir, "symbol_map.json"); + file_util::write_text_file(file_name, result); } } -void ObjectFileDB::ir2_do_segment_analysis_phase1(int seg, const Config& config) { - lg::info("ASM analysis for {} segment", SEGMENT_NAMES[seg]); - - lg::info("Processing basic blocks and control flow graph..."); - ir2_basic_block_pass(seg, config); - lg::info("Finding stack spills..."); - ir2_stack_spill_slot_pass(seg); - lg::info("Converting to atomic ops..."); - ir2_atomic_op_pass(seg, config); +void ObjectFileDB::ir2_do_segment_analysis_phase1(int seg, + const Config& config, + ObjectFileData& data) { + ir2_basic_block_pass(seg, config, data); + ir2_stack_spill_slot_pass(seg, data); + ir2_atomic_op_pass(seg, config, data); } -void ObjectFileDB::ir2_do_segment_analysis_phase2(int seg, const Config& config) { - lg::info("GOAL analysis for {} segment", SEGMENT_NAMES[seg]); +void ObjectFileDB::ir2_do_segment_analysis_phase2(int seg, + const Config& config, + ObjectFileData& data) { + ir2_type_analysis_pass(seg, config, data); + ir2_register_usage_pass(seg, data); + ir2_variable_pass(seg, data); + ir2_cfg_build_pass(seg, data); - lg::info("Running type analysis..."); - ir2_type_analysis_pass(seg, config); - lg::info("Register usage analysis..."); - ir2_register_usage_pass(seg); - lg::info("Variable analysis..."); - ir2_variable_pass(seg); - lg::info("Initial structuring..."); - ir2_cfg_build_pass(seg); + ir2_build_expressions(seg, config, data); + ir2_rewrite_inline_asm_instructions(seg, data); - lg::info("Expression building..."); - ir2_build_expressions(seg, config); - lg::info("Re-writing inline asm instructions..."); - ir2_rewrite_inline_asm_instructions(seg); - - lg::info("Inserting lets..."); - ir2_insert_lets(seg); + ir2_insert_lets(seg, data); } -void ObjectFileDB::ir2_setup_labels(const Config& config) { - for_each_obj([&](ObjectFileData& data) { - if (data.linked_data.segments == 3) { - std::unordered_map config_labels; - auto config_it = config.label_types.find(data.to_unique_name()); - if (config_it != config.label_types.end()) { - config_labels = config_it->second; - } - try { - data.linked_data.label_db = - std::make_unique(config_labels, data.linked_data.labels, dts); - analyze_labels(data.linked_data.label_db.get(), &data.linked_data); - } catch (const std::exception& e) { - lg::die("Error parsing labels for {}: {}\n", data.to_unique_name(), e.what()); - } +void ObjectFileDB::ir2_setup_labels(const Config& config, ObjectFileData& data) { + if (data.linked_data.segments == 3) { + std::unordered_map config_labels; + auto config_it = config.label_types.find(data.to_unique_name()); + if (config_it != config.label_types.end()) { + config_labels = config_it->second; } - }); + try { + data.linked_data.label_db = + std::make_unique(config_labels, data.linked_data.labels, dts); + analyze_labels(data.linked_data.label_db.get(), &data.linked_data); + } catch (const std::exception& e) { + lg::die("Error parsing labels for {}: {}\n", data.to_unique_name(), e.what()); + } + } } -void ObjectFileDB::ir2_run_mips2c(const Config& config) { - for_each_function_def_order([&](Function& func, int, ObjectFileData&) { +void ObjectFileDB::ir2_run_mips2c(const Config& config, ObjectFileData& data) { + for_each_function_def_order_in_obj(data, [&](Function& func, int) { if (config.hacks.mips2c_functions_by_name.count(func.name())) { lg::info("MIPS2C on {}", func.name()); run_mips2c(&func); @@ -255,28 +258,14 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) { * - Analyze prologue and epilogue * - Build control flow graph */ -void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config) { - Timer timer; - // Main Pass over each function... - int total_basic_blocks = 0; - int total_functions = 0; - int functions_with_one_block = 0; - int inspect_methods = 0; - int suspected_asm = 0; - int failed_to_build_cfg = 0; - - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { - total_functions++; +void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { func.ir2.env.file = &data.linked_data; func.ir2.env.dts = &dts; func.ir2.env.func = &func; // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, seg, func); - total_basic_blocks += blocks.size(); - if (blocks.size() == 1) { - functions_with_one_block++; - } func.basic_blocks = blocks; if (!func.suspected_asm) { @@ -309,7 +298,6 @@ void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config) { if (!func.cfg->is_fully_resolved()) { lg::warn("Function {} from {} failed to build control flow graph!", func.name(), data.to_unique_name()); - failed_to_build_cfg++; } else { func.cfg_ok = true; } @@ -317,63 +305,35 @@ void ObjectFileDB::ir2_basic_block_pass(int seg, const Config& config) { if (func.suspected_asm) { func.warnings.info("Assembly Function"); - suspected_asm++; } }); - - lg::info("Found {} basic blocks in {} functions in {:.2f} ms:", total_basic_blocks, - total_functions, timer.getMs()); - lg::info(" {} functions ({:.2f}%) failed to build control flow graph", failed_to_build_cfg, - 100.f * failed_to_build_cfg / total_functions); - lg::info(" {} functions ({:.2f}%) had exactly one basic block", functions_with_one_block, - 100.f * functions_with_one_block / total_functions); - lg::info(" {} functions ({:.2f}%) were ignored as assembly", suspected_asm, - 100.f * suspected_asm / total_functions); - lg::info(" {} functions ({:.2f}%) were inspect methods\n", inspect_methods, - 100.f * inspect_methods / total_functions); } -void ObjectFileDB::ir2_stack_spill_slot_pass(int seg) { - Timer timer; - int functions_with_spills = 0; - int total_slots = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData&) { +void ObjectFileDB::ir2_stack_spill_slot_pass(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.cfg_ok) { return; } try { auto spill_map = build_spill_map(func.instructions, {func.prologue_end, func.epilogue_start}); - auto map_size = spill_map.size(); - if (map_size) { - functions_with_spills++; - total_slots += map_size; - } func.ir2.env.set_stack_spills(spill_map); } catch (std::exception& e) { func.warnings.general_warning("stack spill failed: {}", e.what()); } }); - lg::info("Analyzed stack spills: found {} functions with spills (total {} vars), took {:.2f} ms", - functions_with_spills, total_slots, timer.getMs()); } /*! * Conversion of MIPS instructions into AtomicOps. The AtomicOps represent what we * think are IR of the original GOAL compiler. */ -void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config) { - Timer timer; - int total_functions = 0; - int attempted = 0; - int successful = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.cfg_ok) { return; } - total_functions++; if (!func.suspected_asm) { func.ir2.atomic_ops_attempted = true; - attempted++; try { bool inline_asm = config.hacks.hint_inline_assembly_functions.find(func.name()) != config.hacks.hint_inline_assembly_functions.end(); @@ -391,7 +351,6 @@ void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config) { func.ir2.atomic_ops = std::make_shared(std::move(ops)); func.ir2.atomic_ops_succeeded = true; func.ir2.env.set_end_var(func.ir2.atomic_ops->end_op().return_var()); - successful++; } catch (std::exception& e) { lg::warn("Function {} from {} could not be converted to atomic ops: {}", func.name(), data.to_unique_name(), e.what()); @@ -399,23 +358,10 @@ void ObjectFileDB::ir2_atomic_op_pass(int seg, const Config& config) { } } }); - - lg::info("{}/{}/{} (successful/attempted/total) functions converted to Atomic Ops in {:.2f} ms", - successful, attempted, total_functions, timer.getMs()); - lg::info("{:.2f}% were attempted, {:.2f}% of attempted succeeded\n", - 100.f * attempted / total_functions, 100.f * successful / attempted); } -void ObjectFileDB::ir2_symbol_definition_map(const std::string& output_dir) { - Timer timer; - SymbolMapBuilder map_builder; - for_each_obj([&](ObjectFileData& data) { map_builder.add_object(data); }); - map_builder.build_map(); - std::string result = map_builder.convert_to_json(); - auto file_name = file_util::combine_path(output_dir, "symbol_map.json"); - file_util::write_text_file(file_name, result); - - lg::info("Built symbol map in {:.2f} ms", timer.getMs()); +void ObjectFileDB::ir2_symbol_definition_map(ObjectFileData& data) { + map_builder.add_object(data); } template @@ -434,22 +380,13 @@ Value try_lookup(const std::unordered_map& map, const Key& key) { * - Propagate types. * - NOTE: this will update register info usage more accurately for functions. */ -void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config) { - Timer timer; - int total_functions = 0; - int non_asm_functions = 0; - int attempted_functions = 0; - int successful_functions = 0; - - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { - total_functions++; +void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.suspected_asm) { - non_asm_functions++; TypeSpec ts; if (lookup_function_type(func.guessed_name, data.to_unique_name(), config, &ts) && func.ir2.atomic_ops_succeeded) { func.type = ts; - attempted_functions++; // try type analysis here. auto func_name = func.name(); auto register_casts = @@ -470,7 +407,6 @@ void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config) { func.ir2.env.set_stack_structure_hints( try_lookup(config.stack_structure_hints_by_function, func_name)); if (run_type_analysis_ir2(ts, dts, func)) { - successful_functions++; func.ir2.env.types_succeeded = true; } else { func.warnings.type_prop_warning("Type analysis failed"); @@ -481,20 +417,11 @@ void ObjectFileDB::ir2_type_analysis_pass(int seg, const Config& config) { } } }); - - lg::info("{}/{}/{}/{} (success/attempted/non-asm/total) in {:.2f} ms\n", successful_functions, - attempted_functions, non_asm_functions, total_functions, timer.getMs()); } -void ObjectFileDB::ir2_register_usage_pass(int seg) { - Timer timer; - - int total_funcs = 0, analyzed_funcs = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { - (void)data; - total_funcs++; +void ObjectFileDB::ir2_register_usage_pass(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) { - analyzed_funcs++; func.ir2.env.set_reg_use(analyze_ir2_register_usage(func)); auto block_0_start = func.ir2.env.reg_use().block.at(0).input; @@ -536,24 +463,16 @@ void ObjectFileDB::ir2_register_usage_pass(int seg) { } } }); - - lg::info("{}/{} functions had register usage analyzed in {:.2f} ms\n", analyzed_funcs, - total_funcs, timer.getMs()); } -void ObjectFileDB::ir2_variable_pass(int seg) { - Timer timer; - int attempted = 0; - int successful = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +void ObjectFileDB::ir2_variable_pass(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.ir2.env.has_type_analysis()) { try { - attempted++; auto result = run_variable_renaming(func, func.ir2.env.reg_use(), *func.ir2.atomic_ops, dts); if (result.has_value()) { - successful++; func.ir2.env.set_local_vars(*result); } } catch (const std::exception& e) { @@ -561,16 +480,14 @@ void ObjectFileDB::ir2_variable_pass(int seg) { } } }); - lg::info("{}/{} functions out of attempted passed variable pass in {:.2f} ms\n", successful, - attempted, timer.getMs()); } -void ObjectFileDB::ir2_cfg_build_pass(int seg) { +void ObjectFileDB::ir2_cfg_build_pass(int seg, ObjectFileData& data) { Timer timer; int total = 0; int attempted = 0; int successful = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; total++; if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.cfg->is_fully_resolved()) { @@ -587,38 +504,30 @@ void ObjectFileDB::ir2_cfg_build_pass(int seg) { successful++; } }); - - lg::info("{}/{}/{} cfg build in {:.2f} ms\n", successful, attempted, total, timer.getMs()); } -void ObjectFileDB::ir2_store_current_forms(int seg) { - Timer timer; - int total = 0; - - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +// void ObjectFileDB::ir2_store_current_forms(int seg) { +// Timer timer; +// int total = 0; +// +// for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +// (void)data; +// +// if (func.ir2.top_form) { +// total++; +// func.ir2.debug_form_string = +// pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); +// } +// }); +// +// lg::info("Stored debug forms for {} functions in {:.2f} ms\n", total, timer.getMs()); +//} +// +void ObjectFileDB::ir2_build_expressions(int seg, const Config& config, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; - - if (func.ir2.top_form) { - total++; - func.ir2.debug_form_string = - pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); - } - }); - - lg::info("Stored debug forms for {} functions in {:.2f} ms\n", total, timer.getMs()); -} - -void ObjectFileDB::ir2_build_expressions(int seg, const Config& config) { - Timer timer; - int total = 0; - int attempted = 0; - int successful = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { - (void)data; - total++; if (func.ir2.top_form && func.ir2.env.has_type_analysis() && func.ir2.env.has_local_vars() && func.ir2.env.types_succeeded) { - attempted++; auto name = func.name(); auto arg_config = config.function_arg_names.find(name); auto var_config = config.function_var_overrides.find(name); @@ -630,26 +539,18 @@ void ObjectFileDB::ir2_build_expressions(int seg, const Config& config) { ? var_config->second : std::unordered_map{}, dts)) { - successful++; func.ir2.print_debug_forms = true; func.ir2.expressions_succeeded = true; } } }); - - lg::info("{}/{}/{} expression build in {:.2f} ms\n", successful, attempted, total, timer.getMs()); } -void ObjectFileDB::ir2_insert_lets(int seg) { - Timer timer; - LetStats combined_stats; - int attempted = 0; - - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData&) { +void ObjectFileDB::ir2_insert_lets(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { if (func.ir2.expressions_succeeded) { - attempted++; try { - combined_stats += insert_lets(func, func.ir2.env, *func.ir2.form_pool, func.ir2.top_form); + insert_lets(func, func.ir2.env, *func.ir2.form_pool, func.ir2.top_form); } catch (const std::exception& e) { func.warnings.general_warning( fmt::format("Error while inserting lets: {}. Make sure that the return type is not " @@ -658,71 +559,47 @@ void ObjectFileDB::ir2_insert_lets(int seg) { } } }); - - lg::info("Let pass on {} functions ({}/{} vars in lets) in {:.2f} ms\n", attempted, - combined_stats.vars_in_lets, combined_stats.total_vars, timer.getMs()); } -void ObjectFileDB::ir2_rewrite_inline_asm_instructions(int seg) { - Timer timer; - int total = 0; - int attempted = 0; - int successful = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +void ObjectFileDB::ir2_rewrite_inline_asm_instructions(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; - total++; if (func.ir2.top_form && func.ir2.env.has_type_analysis()) { - attempted++; if (rewrite_inline_asm_instructions(func.ir2.top_form, *func.ir2.form_pool, func, dts)) { - successful++; func.ir2.print_debug_forms = true; } } }); - - lg::info("{}/{}/{} rewrote inline-asm instructions in {:.2f} ms\n", successful, attempted, total, - timer.getMs()); } -void ObjectFileDB::ir2_insert_anonymous_functions(int seg) { - Timer timer; - int total = 0; - for_each_function_in_seg(seg, [&](Function& func, ObjectFileData& data) { +void ObjectFileDB::ir2_insert_anonymous_functions(int seg, ObjectFileData& data) { + for_each_function_in_seg_in_obj(seg, data, [&](Function& func) { (void)data; if (func.ir2.top_form && func.ir2.env.has_type_analysis()) { try { - total += insert_static_refs(func.ir2.top_form, *func.ir2.form_pool, func, dts); + insert_static_refs(func.ir2.top_form, *func.ir2.form_pool, func, dts); } catch (std::exception& e) { func.warnings.general_warning("Failed static ref finding: {}\n", e.what()); lg::error("Function {} failed static ref: {}\n", func.name(), e.what()); } } }); - - lg::info("Inserted {} anonymous functions in {:.2f} ms\n", total, timer.getMs()); } -void ObjectFileDB::ir2_write_results(const std::string& output_dir, const Config& config) { - Timer timer; - lg::info("Writing IR2 results to file..."); - int total_files = 0; - int total_bytes = 0; - for_each_obj([&](ObjectFileData& obj) { - if (obj.linked_data.has_any_functions()) { - // todo - total_files++; - auto file_text = ir2_to_file(obj, config); - total_bytes += file_text.length(); - auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_ir2.asm"); - file_util::write_text_file(file_name, file_text); +void ObjectFileDB::ir2_write_results(const std::string& output_dir, + const Config& config, + ObjectFileData& obj) { + if (obj.linked_data.has_any_functions()) { + // todo - auto final = ir2_final_out(obj); - auto final_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_disasm.gc"); - file_util::write_text_file(final_name, final); - } - }); - lg::info("Wrote {} files ({:.2f} MB) in {:.2f} ms\n", total_files, total_bytes / float(1 << 20), - timer.getMs()); + auto file_text = ir2_to_file(obj, config); + auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_ir2.asm"); + file_util::write_text_file(file_name, file_text); + + auto final = ir2_final_out(obj); + auto final_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_disasm.gc"); + file_util::write_text_file(final_name, final); + } } std::string ObjectFileDB::ir2_to_file(ObjectFileData& data, const Config& config) { diff --git a/decompiler/analysis/static_refs.cpp b/decompiler/analysis/static_refs.cpp index 55099a1562..b78098f595 100644 --- a/decompiler/analysis/static_refs.cpp +++ b/decompiler/analysis/static_refs.cpp @@ -35,7 +35,7 @@ bool try_convert_lambda(const Function& parent_function, "ignored and is no longer required.", lab.name); } - if (!other_func->ir2.env.has_local_vars()) { + if (!other_func->ir2.env.has_local_vars() || !other_func->ir2.top_form) { // don't bother if we don't even have vars. return false; } diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index b21df3a194..76261c9600 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -4,18 +4,7 @@ // if you want to filter to only some object names. // it will make the decompiler much faster. "allowed_objects": [], - "banned_objects": [ - "crates", - "puffer", - "helix-water", - "green-eco-lurker", - "seagull", - "sunken-pipegame", - "snow-ram", - "snow-ram-boss", - "rolling-race-ring", - "part-tester" - ], + "banned_objects": [], //////////////////////////// // CODE ANALYSIS OPTIONS diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 206251cffd..4946fd7b9f 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -83,7 +83,7 @@ int main(int argc, char** argv) { // main decompile. if (config.decompile_code) { - db.analyze_functions_ir2(out_folder, config); + db.analyze_functions_ir2(out_folder, config, {}); } // write out all symbols diff --git a/test/offline/CMakeLists.txt b/test/offline/CMakeLists.txt index fef605f755..ae29d16db3 100644 --- a/test/offline/CMakeLists.txt +++ b/test/offline/CMakeLists.txt @@ -2,4 +2,5 @@ add_executable(offline-test ${CMAKE_CURRENT_LIST_DIR}/offline_test_main.cpp) -target_link_libraries(offline-test common gtest decomp compiler) \ No newline at end of file +target_link_libraries(offline-test common gtest decomp compiler) + diff --git a/test/offline/config.json b/test/offline/config.json new file mode 100644 index 0000000000..097d914cde --- /dev/null +++ b/test/offline/config.json @@ -0,0 +1,114 @@ +{ + "dgos": ["CGO/KERNEL.CGO", "CGO/ENGINE.CGO", "CGO/GAME.CGO", "DGO/BEA.DGO", + "DGO/INT.DGO", "DGO/VI1.DGO", "DGO/VI2.DGO", "DGO/VI3.DGO", + "DGO/CIT.DGO", "DGO/MIS.DGO", "DGO/JUB.DGO", "DGO/SUN.DGO", + "DGO/DEM.DGO", "DGO/FIN.DGO", "DGO/JUN.DGO", "DGO/FIC.DGO", + "DGO/SNO.DGO", "DGO/SWA.DGO", "DGO/MAI.DGO", "DGO/ROB.DGO", + "DGO/LAV.DGO", "DGO/OGR.DGO", "DGO/TRA.DGO", "DGO/ROL.DGO"], + + "skip_compile_files": [ + "timer", // accessing timer regs + "display", // interrupt handlers + "target-snowball" // screwed up labels, likely cut content + ], + + "skip_compile_functions": [ + /// GCOMMON + // these functions are not implemented by the compiler in OpenGOAL, but are in GOAL. + "abs", "ash", "min", "max", "lognor", + // weird PS2 specific debug registers: + "breakpoint-range-set!", + // inline assembly + "valid?", + + /// GKERNEL + // asm + "(method 10 process)", "(method 14 dead-pool)", + + /// GSTATE + "enter-state", // stack pointer asm + + /// MATH + "rand-vu-init", "rand-vu", + "rand-vu-nostep", // random hardware + + // trig + "sin-rad", // fpu acc + "cos-rad", // fpu acc + "atan-series-rad", // fpu acc + + /// VECTOR-H + "(method 3 vector)", // this function appears twice, which confuses the compiler. + "vector4-dot", // fpu acc + + "(method 3 profile-frame)", // double definition. + + // dma-disasm + "disasm-dma-list", // missing a single cast :( + + // math camera + "transform-point-vector!", "transform-point-qword!", "transform-point-vector-scale!", + + // display-h + "put-draw-env", + + // geometry + "calculate-basis-functions-vector!", // asm requiring manual rewrite + "curve-evaluate!", // asm requiring manual rewrite + "point-in-triangle-cross", // logior on floats manual fixup + + // texture + "(method 9 texture-page-dir)", // multiplication on pointers + "adgif-shader<-texture-with-update!", // misrecognized bitfield stuff. + + // asm + "invalidate-cache-line", + + // stats-h + "(method 11 perf-stat)", "(method 12 perf-stat)", + + // sprite-distorter + "sprite-draw-distorters", // uses clipping flag. + + // sync-info + "(method 15 sync-info)", // needs display stuff first + "(method 15 sync-info-eased)", // needs display stuff first + "(method 15 sync-info-paused)", // needs display stuff first + + // sparticle + "lookup-part-group-pointer-by-name", // address of element in array issue + + // ripple - calls an asm function + "ripple-execute", + + "get-task-status", + + // aligner - return-from-thread, currently not supported + "(method 9 align-control)", + + // stat collection + "start-perf-stat-collection", "end-perf-stat-collection", + + // float to int + "(method 10 bsp-header)", + + // multiply defined. + "(method 3 sprite-aux-list)", + + // camera + "slave-set-rotation!", "v-slrp2!", "v-slrp3!", // vector-dot involving the stack + + // function returning float with a weird cast. + "debug-menu-item-var-make-float", + + // decompiler BUG + "level-hint-task-process", + + // anim-tester + "(method 3 anim-tester)", + "anim-tester-save-object-seqs" // anim-tester -- new basic on the stack + ], + + "skip_compile_states": { + } +} \ No newline at end of file diff --git a/test/offline/offline_test_main.cpp b/test/offline/offline_test_main.cpp index e4031b73ba..e1e991bcf5 100644 --- a/test/offline/offline_test_main.cpp +++ b/test/offline/offline_test_main.cpp @@ -1,214 +1,95 @@ -#include -#include "common/util/FileUtil.h" -#include "gtest/gtest.h" +#include +#include +#include +#include + +#include "third-party/fmt/format.h" #include "common/log/log.h" -#include "decompiler/Disasm/OpcodeInfo.h" -#include "decompiler/config.h" +#include "common/common_types.h" +#include "common/util/FileUtil.h" +#include "common/util/json_util.h" #include "decompiler/ObjectFile/ObjectFileDB.h" +#include "common/util/diff.h" #include "goalc/compiler/Compiler.h" #include "common/util/Timer.h" -#include namespace fs = std::filesystem; -namespace { - -// list of object files to ignore during reference checks -const std::unordered_set g_files_to_skip_compiling = { - "timer", // accessing timer regs - "display", // interrupt handlers - "target-snowball", // screwed up labels, likely cut content +// command line arguments +struct OfflineTestArgs { + bool dump_current_output = false; + std::string iso_data_path; + s32 max_files = INT32_MAX; }; -// the functions we expect the decompiler to skip -const std::unordered_set g_functions_expected_to_reject = { - // gcommon - "quad-copy!", // asm mempcy - // gkernel - "set-to-run-bootstrap", // kernel context switch - "throw", // manually sets fp/t9. - "throw-dispatch", // restore context - "(method 0 catch-frame)", // save context - "(method 11 cpu-thread)", // kernel -> user context switch - "(method 10 cpu-thread)", // user -> kernel context switch - "reset-and-call", // kernel -> user - "return-from-thread-dead", // kernel -> user - "return-from-thread", // kernel -> user - "return-from-exception", // ps2 exception -> ps2 user - "run-function-in-process", // temp while stack vars aren't supported. - // pskernel - "kernel-check-hardwired-addresses", // ps2 ee kernel debug hook - "kernel-read-function", // ps2 ee kernel debug hook - "kernel-write-function", // ps2 ee kernel debug hook - "kernel-copy-function", // ps2 ee kernel debug hook - // math - "rand-uint31-gen", // weird and terrible random generator - // bounding-box - "(method 9 bounding-box)", // handwritten asm loop - "(method 14 bounding-box)", // handwritten asm loop - // trig - "exp", "atan0", "sincos!", "sincos-rad!", - // matrix - "(method 9 matrix)", // handwritten asm loop - "matrix-axis-sin-cos!", "matrix-axis-sin-cos-vu!", - // geometry - "circle-circle-xz-intersect", // unused not bothering - // dma-h - "dma-count-until-done", // dma asm loop - "dma-sync-with-count", "dma-send-no-scratch", "dma-sync-fast", - // dma - "symlink2", "symlink3", - "dma-sync-hang", // handwritten asm - "vector=", // asm branching - // display - "vblank-handler", // asm - "vif1-handler", "vif1-handler-debug", - // texture - "adgif-shader<-texture-with-update!", // mips2c - // sparticle - "sp-launch-particles-var", "particle-adgif", "sp-init-fields!", "memcpy", "sp-process-block-2d", - "sp-process-block-3d", - // ripple - asm - "ripple-execute-init", "ripple-create-wave-table", "ripple-apply-wave-table", - "ripple-matrix-scale", +/*! + * Parse command line arguments. + */ +OfflineTestArgs parse_args(int argc, char* argv[]) { + OfflineTestArgs result; - // collide-mesh-h - "(method 11 collide-mesh-cache)", // asm + for (int i = 1; i < argc; i++) { + auto arg = std::string(argv[i]); + if (arg == "--dump-mode") { + result.dump_current_output = true; + continue; + } - // mood - "update-mood-lava", // asm - "update-mood-lightning", // asm + if (arg == "--max-files") { + i++; + if (i >= argc) { + fmt::print("--max-files must be followed by an integer\n"); + exit(1); + } + result.max_files = atoi(argv[i]); + fmt::print("Limiting to {} files\n", result.max_files); + continue; + } - // ambient - "ambient-inspect", // asm, weird + result.iso_data_path = arg; + fmt::print("Using {} for ISO data\n", result.iso_data_path); + } - // background - "background-upload-vu0", "draw-node-cull", + return result; +} - // anim-tester - "(method 3 anim-tester)", - "anim-tester-save-object-seqs" // anim-tester -- new basic on the stack +// json config file data (previously was in source of offline_test_main.cpp) +struct OfflineTestConfig { + std::vector dgos; + std::unordered_set skip_compile_files; + std::unordered_set skip_compile_functions; + std::unordered_map> skip_compile_states; }; -const std::unordered_set g_functions_to_skip_compiling = { - /// GCOMMON - // these functions are not implemented by the compiler in OpenGOAL, but are in GOAL. - "abs", "ash", "min", "max", "lognor", - // weird PS2 specific debug registers: - "breakpoint-range-set!", - // inline assembly - "valid?", +/*! + * Read and parse the json config file, config.json, located in test/offline + */ +OfflineTestConfig parse_config() { + auto json_file_path = file_util::get_file_path({"test", "offline", "config.json"}); + auto json = parse_commented_json(file_util::read_text_file(json_file_path), json_file_path); + OfflineTestConfig result; + result.dgos = json["dgos"].get>(); + result.skip_compile_files = json["skip_compile_files"].get>(); + result.skip_compile_functions = + json["skip_compile_functions"].get>(); + result.skip_compile_states = + json["skip_compile_states"] + .get>>(); - /// GKERNEL - // asm - "(method 10 process)", "(method 14 dead-pool)", + if (!result.skip_compile_states.empty()) { + fmt::print("skip_compile_states wasn't emtpy. It's not implemented in the decompiler yet\n"); + exit(1); + } + return result; +} - /// GSTATE - "enter-state", // stack pointer asm - - /// MATH - "rand-vu-init", "rand-vu", - "rand-vu-nostep", // random hardware - - // trig - "sin-rad", // fpu acc - "cos-rad", // fpu acc - "atan-series-rad", // fpu acc - - /// VECTOR-H - "(method 3 vector)", // this function appears twice, which confuses the compiler. - "vector4-dot", // fpu acc - - "(method 3 profile-frame)", // double definition. - - // dma-disasm - "disasm-dma-list", // missing a single cast :( - - // math camera - "transform-point-vector!", "transform-point-qword!", "transform-point-vector-scale!", - - // display-h - "put-draw-env", - - // geometry - "calculate-basis-functions-vector!", // asm requiring manual rewrite - "curve-evaluate!", // asm requiring manual rewrite - "point-in-triangle-cross", // logior on floats manual fixup - - // texture - "(method 9 texture-page-dir)", // multiplication on pointers - "adgif-shader<-texture-with-update!", // misrecognized bitfield stuff. - - // asm - "invalidate-cache-line", - - // stats-h - "(method 11 perf-stat)", "(method 12 perf-stat)", - - // sprite-distorter - "sprite-draw-distorters", // uses clipping flag. - - // sync-info - "(method 15 sync-info)", // needs display stuff first - "(method 15 sync-info-eased)", // needs display stuff first - "(method 15 sync-info-paused)", // needs display stuff first - - // sparticle - "lookup-part-group-pointer-by-name", // address of element in array issue - - // ripple - calls an asm function - "ripple-execute", - - "get-task-status", - - // aligner - return-from-thread, currently not supported - "(method 9 align-control)", - - // stat collection - "start-perf-stat-collection", "end-perf-stat-collection", - - // float to int - "(method 10 bsp-header)", - - // multiply defined. - "(method 3 sprite-aux-list)", - - // camera - "slave-set-rotation!", "v-slrp2!", "v-slrp3!", // vector-dot involving the stack - - // function returning float with a weird cast. - "debug-menu-item-var-make-float", - - // decompiler BUG - "level-hint-task-process", - - // anim-tester - "(method 3 anim-tester)", - "anim-tester-save-object-seqs" // anim-tester -- new basic on the stack +struct DecompilerFile { + std::filesystem::path path; + std::string name_in_dgo; + std::string unique_name; + std::string reference; }; -// default location for the data. It can be changed with a command line argument. -std::string g_iso_data_path = ""; - -bool g_dump_mode = false; - -struct decomp_meta { - std::string fileName; - std::string fileNameOverride; - fs::path filePath; -}; - -std::vector g_object_files_to_decompile_or_ref_check; - -std::vector dgos = {"CGO/KERNEL.CGO", "CGO/ENGINE.CGO", "CGO/GAME.CGO", "DGO/BEA.DGO", - "DGO/INT.DGO", "DGO/VI1.DGO", "DGO/VI2.DGO", "DGO/VI3.DGO", - "DGO/CIT.DGO", "DGO/MIS.DGO", "DGO/JUB.DGO", "DGO/SUN.DGO", - "DGO/DEM.DGO", "DGO/FIN.DGO", "DGO/JUN.DGO", "DGO/FIC.DGO", - "DGO/SNO.DGO", "DGO/SWA.DGO", "DGO/MAI.DGO", "DGO/ROB.DGO", - "DGO/LAV.DGO", "DGO/OGR.DGO", "DGO/TRA.DGO", "DGO/ROL.DGO"}; - -} // namespace - std::string replaceFirstOccurrence(std::string& s, const std::string& toReplace, const std::string& replaceWith) { @@ -218,13 +99,10 @@ std::string replaceFirstOccurrence(std::string& s, return s.replace(pos, toReplace.length(), replaceWith); } -int main(int argc, char** argv) { - lg::initialize(); +std::vector find_files(const std::vector& dgos) { + std::vector result; - // Determine the files to decompile and reference check by scanning the reference directory - // All relevant files are assumed to end with `_REF.g[c|d]` - // First rough order them - std::vector reference_files_rough_order; + std::unordered_map files_with_ref; for (auto& p : fs::recursive_directory_iterator( file_util::get_file_path({"test", "decompiler", "reference"}))) { if (p.is_regular_file()) { @@ -233,339 +111,156 @@ int main(int argc, char** argv) { continue; } std::string object_name = replaceFirstOccurrence(file_name, "_REF", ""); - reference_files_rough_order.push_back({object_name, "", p.path()}); + files_with_ref.insert({object_name, p.path()}); } } + + fmt::print(" Found {} reference files\n", files_with_ref.size()); + // use the all_objs.json file to place them in the correct build order auto j = parse_commented_json( file_util::read_text_file(file_util::get_file_path({"goal_src", "build", "all_objs.json"})), "all_objs.json"); + + std::unordered_set matched_files; for (auto& x : j) { - auto mapped_name = x[0].get(); + auto unique_name = x[0].get(); + std::vector dgoList = x[3].get>(); - for (auto& p : reference_files_rough_order) { - if (p.fileName == mapped_name) { - // Check to see if we've included atleast one of the DGO/CGOs in our hardcoded list - // If not BLOW UP - bool dgoValidated = false; - for (int i = 0; i < (int)dgoList.size(); i++) { - std::string& dgo = dgoList.at(i); - // can either be in the DGO or CGO folder, and can either end with .CGO or .DGO - if (std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.DGO", dgo)) != dgos.end() || - std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.CGO", dgo)) != dgos.end() || - std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.DGO", dgo)) != dgos.end() || - std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.CGO", dgo)) != dgos.end()) { - dgoValidated = true; - } + // for (auto& p : reference_files_rough_order) { + auto it = files_with_ref.find(unique_name); + if (it != files_with_ref.end()) { + // Check to see if we've included atleast one of the DGO/CGOs in our hardcoded list + // If not BLOW UP + bool dgoValidated = false; + for (int i = 0; i < (int)dgoList.size(); i++) { + std::string& dgo = dgoList.at(i); + // can either be in the DGO or CGO folder, and can either end with .CGO or .DGO + if (std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.DGO", dgo)) != dgos.end() || + std::find(dgos.begin(), dgos.end(), fmt::format("DGO/{}.CGO", dgo)) != dgos.end() || + std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.DGO", dgo)) != dgos.end() || + std::find(dgos.begin(), dgos.end(), fmt::format("CGO/{}.CGO", dgo)) != dgos.end()) { + dgoValidated = true; } - if (!dgoValidated) { - fmt::print( - "File [{}] is in the following DGOs [{}], and not one of these is in our list! Add " - "it!\n", - mapped_name, fmt::join(dgoList, ", ")); - return 1; - } - // Hack for working around multi-DGO files - if (mapped_name != x[1]) { - p.fileNameOverride = x[1]; - } - g_object_files_to_decompile_or_ref_check.push_back(p); - break; + } + if (!dgoValidated) { + fmt::print( + "File [{}] is in the following DGOs [{}], and not one of these is in our list! Add " + "it!\n", + unique_name, fmt::join(dgoList, ", ")); + exit(1); + } + + DecompilerFile file; + file.path = it->second; + file.unique_name = it->first; + file.name_in_dgo = x[1]; + result.push_back(file); + matched_files.insert(unique_name); + } + } + + if (matched_files.size() != files_with_ref.size()) { + fmt::print("Error: some REF files were not matched to files in all_objs.json:\n"); + for (auto& f : files_with_ref) { + if (matched_files.count(f.first) == 0) { + fmt::print(" {}\n", f.first); } } + exit(1); } - // look for an argument that's not a gtest option - bool got_arg = false; - int max_files = -1; - for (int i = 1; i < argc; i++) { - auto arg = std::string(argv[i]); - if (arg == "--dump-mode") { - g_dump_mode = true; - continue; - } - if (arg == "--max-files") { - i++; - assert(i < argc); - max_files = atoi(argv[i]); - printf("Limiting to %d files\n", max_files); - } - if (arg.length() > 2 && arg[0] == '-' && arg[1] == '-') { - continue; - } - if (got_arg) { - printf("You can only specify a single path for ISO data\n"); - return 1; - } - g_iso_data_path = arg; - lg::warn("Using path {} for iso_data", g_iso_data_path); - got_arg = true; - } - - if (max_files >= 0) { - if ((int)g_object_files_to_decompile_or_ref_check.size() > max_files) { - g_object_files_to_decompile_or_ref_check.erase( - g_object_files_to_decompile_or_ref_check.begin() + max_files, - g_object_files_to_decompile_or_ref_check.end()); - } - } - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + return result; } -class OfflineDecompilation : public ::testing::Test { - protected: - static std::unique_ptr db; - static std::unique_ptr config; - - static std::unique_ptr> final_output_cache; - - static void SetUpTestCase() { - // global setup - file_util::init_crc(); - decompiler::init_opcode_info(); - config = std::make_unique(decompiler::read_config_file( - file_util::get_file_path({"decompiler", "config", "jak1_ntsc_black_label.jsonc"}))); - - std::unordered_set object_files; - for (auto& p : g_object_files_to_decompile_or_ref_check) { - std::string fileName = p.fileNameOverride == "" ? p.fileName : p.fileNameOverride; - object_files.insert(fileName); - } - config->allowed_objects = object_files; - // don't try to do this because we can't write the file - config->generate_symbol_definition_map = false; - - std::vector dgo_paths; - if (g_iso_data_path.empty()) { - for (auto& x : dgos) { - dgo_paths.push_back(file_util::get_file_path({"iso_data", x})); - } - } else { - for (auto& x : dgos) { - dgo_paths.push_back(file_util::combine_path(g_iso_data_path, x)); - } - } - - db = std::make_unique(dgo_paths, config->obj_file_name_map_file, - std::vector{}, - std::vector{}, *config); - - // basic processing to find functions/data/disassembly - db->process_link_data(*config); - db->find_code(*config); - db->process_labels(); - - // fancy decompilation. - db->analyze_functions_ir2({}, *config, true); - - final_output_cache = std::make_unique>(); - } - - static void TearDownTestCase() { - db.reset(); - config.reset(); - final_output_cache.reset(); - } +struct Decompiler { + std::unique_ptr db; + std::unique_ptr config; }; -std::unique_ptr OfflineDecompilation::db; -std::unique_ptr OfflineDecompilation::config; -std::unique_ptr> - OfflineDecompilation::final_output_cache; +Decompiler setup_decompiler(const std::vector& files, + const OfflineTestArgs& args, + const OfflineTestConfig& offline_config) { + Decompiler dc; + file_util::init_crc(); + decompiler::init_opcode_info(); + dc.config = std::make_unique(decompiler::read_config_file( + file_util::get_file_path({"decompiler", "config", "jak1_ntsc_black_label.jsonc"}))); -/*! - * Check that the most basic disassembly into files/functions/instructions has succeeded. - */ -TEST_F(OfflineDecompilation, CheckBasicDecode) { - int obj_count = 0; - db->for_each_obj([&](decompiler::ObjectFileData& obj) { - obj_count++; - auto& stats = obj.linked_data.stats; - // make sure we decoded all instructions - EXPECT_EQ(stats.code_bytes / 4, stats.decoded_ops); - // make sure all FP uses are properly recognized - EXPECT_EQ(stats.n_fp_reg_use, stats.n_fp_reg_use_resolved); - }); - - EXPECT_EQ(obj_count, config->allowed_objects.size()); -} - -TEST_F(OfflineDecompilation, AsmFunction) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (func.suspected_asm) { - if (g_functions_expected_to_reject.find(func.name()) == - g_functions_expected_to_reject.end()) { - lg::error("Function {} was marked as asm, but wasn't expected.", func.name()); - failed_count++; - } - } - }); - EXPECT_EQ(failed_count, 0); -} - -/*! - * Test that all functions pass CFG build stage. - */ -TEST_F(OfflineDecompilation, CfgBuild) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.cfg || !func.cfg->is_fully_resolved()) { - lg::error("Function {} failed cfg", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -/*! - * Test that all functions pass the atomic op construction stage - */ -TEST_F(OfflineDecompilation, AtomicOp) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.atomic_ops || !func.ir2.atomic_ops_succeeded) { - lg::error("Function {} failed atomic ops", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -/*! - * Test that all functions pass the type analysis stage - */ -TEST_F(OfflineDecompilation, TypeAnalysis) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.env.has_type_analysis() || !func.ir2.env.types_succeeded) { - lg::error("Function {} failed types", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -TEST_F(OfflineDecompilation, RegisterUse) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.env.has_reg_use()) { - lg::error("Function {} failed reg use", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -TEST_F(OfflineDecompilation, VariableSSA) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.env.has_local_vars()) { - lg::error("Function {} failed ssa", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -TEST_F(OfflineDecompilation, Structuring) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.top_form) { - lg::error("Function {} failed structuring", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -TEST_F(OfflineDecompilation, Expressions) { - int failed_count = 0; - db->for_each_function([&](decompiler::Function& func, int, decompiler::ObjectFileData&) { - if (!func.suspected_asm) { - if (!func.ir2.expressions_succeeded) { - lg::error("Function {} failed expressions", func.name()); - failed_count++; - } - } - }); - - EXPECT_EQ(failed_count, 0); -} - -namespace { -void strip_trailing_newlines(std::string& in) { - while (!in.empty() && in.back() == '\n') { - in.pop_back(); + // modify the config + std::unordered_set object_files; + for (auto& file : files) { + object_files.insert(file.name_in_dgo); // todo, make this work with unique_name } -} -} // namespace -TEST_F(OfflineDecompilation, Reference) { - for (decomp_meta& file : g_object_files_to_decompile_or_ref_check) { - std::string fileName = file.fileNameOverride == "" ? file.fileName : file.fileNameOverride; - auto& obj_l = db->obj_files_by_name.at(fileName); - ASSERT_EQ(obj_l.size(), 1); + dc.config->allowed_objects = object_files; + // don't try to do this because we can't write the file + dc.config->generate_symbol_definition_map = false; - std::string src = db->ir2_final_out(obj_l.at(0)); - - lg::info("Comparing {}...", fileName); - - // NOTE - currently only handles .gc files! - auto reference = file_util::read_text_file(file.filePath.string()); - - bool can_cache = true; - for (auto& func_list : obj_l.at(0).linked_data.functions_by_seg) { - for (auto& func : func_list) { - if (g_functions_to_skip_compiling.find(func.name()) != - g_functions_to_skip_compiling.end()) { - can_cache = false; - break; - } - } + std::vector dgo_paths; + if (args.iso_data_path.empty()) { + for (auto& x : offline_config.dgos) { + dgo_paths.push_back(file_util::get_file_path({"iso_data", x})); } - - if (can_cache) { - EXPECT_EQ(final_output_cache->count(fileName), 0); - final_output_cache->insert({file.fileName, src}); - } - - strip_trailing_newlines(reference); - strip_trailing_newlines(src); - - if (g_dump_mode) { - if (reference != src) { - file_util::create_dir_if_needed("./failures"); - file_util::write_text_file("./failures/" + file.fileName + "_REF.gc", src); - EXPECT_TRUE(false); - } - } else { - EXPECT_EQ(reference, src); + } else { + for (auto& x : offline_config.dgos) { + dgo_paths.push_back(file_util::combine_path(args.iso_data_path, x)); } } + + dc.db = std::make_unique(dgo_paths, dc.config->obj_file_name_map_file, + std::vector{}, + std::vector{}, *dc.config); + + std::unordered_set db_files; + for (auto& files_by_name : dc.db->obj_files_by_name) { + for (auto& f : files_by_name.second) { + db_files.insert(f.to_unique_name()); + } + } + + if (db_files.size() != files.size()) { + fmt::print("DB file error.\n"); + for (auto& f : files) { + if (!db_files.count(f.unique_name)) { + fmt::print("didn't find {}\n", f.unique_name); + } + } + exit(1); + } + + return dc; +} + +void disassemble(Decompiler& dc) { + dc.db->process_link_data(*dc.config); + dc.db->find_code(*dc.config); + dc.db->process_labels(); +} + +void decompile(Decompiler& dc, const OfflineTestConfig& config) { + dc.db->analyze_functions_ir2({}, *dc.config, config.skip_compile_functions); +} + +std::string strip_trailing_newlines(const std::string& in) { + std::string out = in; + while (!out.empty() && out.back() == '\n') { + out.pop_back(); + } + return out; +} + +decompiler::ObjectFileData& get_data(Decompiler& dc, + const std::string& unique_name, + const std::string& name_in_dgo) { + auto& files = dc.db->obj_files_by_name.at(name_in_dgo); + auto it = std::find_if(files.begin(), files.end(), [&](const decompiler::ObjectFileData& data) { + return data.to_unique_name() == unique_name; + }); + assert(it != files.end()); + return *it; } -namespace { int line_count(const std::string& str) { int result = 0; for (auto& c : str) { @@ -575,9 +270,47 @@ int line_count(const std::string& str) { } return result; } -} // namespace -TEST_F(OfflineDecompilation, Compile) { +struct CompareResult { + std::vector failing_files; + int total_files = 0; + int ok_files = 0; + int total_lines = 0; + + bool total_pass = true; +}; + +CompareResult compare(Decompiler& dc, const std::vector& refs, bool dump_mode) { + CompareResult compare_result; + + for (const auto& file : refs) { + auto& data = get_data(dc, file.unique_name, file.name_in_dgo); + std::string result = strip_trailing_newlines(data.full_output); + std::string ref = strip_trailing_newlines(file_util::read_text_file(file.path.string())); + compare_result.total_files++; + compare_result.total_lines += line_count(result); + if (result != ref) { + compare_result.failing_files.push_back(file.unique_name); + compare_result.total_pass = false; + fmt::print("Reference test failure on {}:\n", file.unique_name); + fmt::print("{}\n", diff_strings(result, ref)); + + if (dump_mode) { + file_util::create_dir_if_needed("./failures"); + file_util::write_text_file("./failures/" + file.unique_name + "_REF.gc", result); + } + } else { + compare_result.ok_files++; + } + } + + return compare_result; +} + +bool compile(Decompiler& dc, + const std::vector& refs, + const OfflineTestConfig& config) { + fmt::print("Setting up compiler...\n"); Compiler compiler; compiler.run_front_end_on_file({"decompiler", "config", "all-types.gc"}); @@ -585,29 +318,79 @@ TEST_F(OfflineDecompilation, Compile) { Timer timer; int total_lines = 0; - for (decomp_meta& file : g_object_files_to_decompile_or_ref_check) { - std::string fileName = file.fileNameOverride == "" ? file.fileName : file.fileNameOverride; - if (g_files_to_skip_compiling.find(fileName) != g_files_to_skip_compiling.end()) { + for (const auto& file : refs) { + if (config.skip_compile_files.count(file.name_in_dgo)) { + fmt::print("Skipping {}\n", file.name_in_dgo); continue; } - lg::info("Compiling {}...", fileName); + fmt::print("Compiling {}...\n", file.unique_name); - auto& obj_l = db->obj_files_by_name.at(fileName); - ASSERT_EQ(obj_l.size(), 1); + auto& data = get_data(dc, file.unique_name, file.name_in_dgo); - const auto& cache = final_output_cache->find(fileName); - if (cache != final_output_cache->end()) { - const auto& src = cache->second; - total_lines += line_count(src); - compiler.run_full_compiler_on_string_no_save(src); - } else { - auto src = db->ir2_final_out(obj_l.at(0), g_functions_to_skip_compiling); + try { + const auto& src = data.output_with_skips; total_lines += line_count(src); compiler.run_full_compiler_on_string_no_save(src); + } catch (const std::exception& e) { + fmt::print("Compiler exception: {}\n", e.what()); + return false; } } auto time = timer.getSeconds(); - lg::info("Total Lines Compiled: {}. Lines/second: {:.1f}\n", total_lines, - (float)total_lines / time); + fmt::print("Total Lines Compiled: {}. Lines/second: {:.1f}\n", total_lines, + (float)total_lines / time); + + return true; } + +int main(int argc, char* argv[]) { + fmt::print("Offline Decompiler Test 2\n"); + lg::initialize(); + + fmt::print("Reading config...\n"); + auto args = parse_args(argc, argv); + auto config = parse_config(); + + fmt::print("Finding files...\n"); + auto files = find_files(config.dgos); + if (args.max_files < (int)files.size()) { + files.erase(files.begin() + args.max_files, files.end()); + } + + fmt::print("Setting up decompiler and loading files...\n"); + auto decompiler = setup_decompiler(files, args, config); + + fmt::print("Disassembling files...\n"); + disassemble(decompiler); + + fmt::print("Decompiling...\n"); + decompile(decompiler, config); + + fmt::print("Comparing...\n"); + auto compare_result = compare(decompiler, files, args.dump_current_output); + fmt::print("Compared {} lines. {}/{} files passed.\n", compare_result.total_lines, + compare_result.ok_files, compare_result.total_files); + + if (!compare_result.failing_files.empty()) { + fmt::print("Failing files:\n"); + for (auto& f : compare_result.failing_files) { + fmt::print(" {}\n", f); + } + } + + bool compile_result = compile(decompiler, files, config); + + if (compare_result.total_pass && compile_result) { + fmt::print("Pass!\n"); + return 0; + } else { + if (!compile_result) { + fmt::print("Compilation failed.\n"); + } + if (!compare_result.total_pass) { + fmt::print("Comparison failed.\n"); + } + } + return 1; +} \ No newline at end of file