From d402ad89186039705595d911e70d4b604ee1a147 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Sun, 16 Oct 2022 17:20:44 -0400 Subject: [PATCH] tests: parallelize offline-test execution (#1974) The offline-tests are going to end up taking too long for jak 2, I did some rough math and by the end of it we'll be spending almost 2 minutes for a full offline test on my machine. These changes allow us to throw hardware at the problem Still some work to do to make the output nicer, but seems to be fairly reliable. By default it still uses 1 thread, use `num_threads` to change this. --- .vs/launch.vs.json | 12 ++ Taskfile.yml | 4 +- common/goos/Printer.cpp | 4 + decompiler/ObjectFile/ObjectFileDB.h | 6 + decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 161 +++++++++--------- decompiler/config/jak2/hacks.jsonc | 32 ++-- decompiler/config/jak2/stack_structures.jsonc | 4 +- decompiler/config/jak2/type_casts.jsonc | 20 +-- test/offline/offline_test_main.cpp | 153 +++++++++++++---- 9 files changed, 250 insertions(+), 146 deletions(-) diff --git a/.vs/launch.vs.json b/.vs/launch.vs.json index 8544beae0b..461961bfd7 100644 --- a/.vs/launch.vs.json +++ b/.vs/launch.vs.json @@ -56,6 +56,18 @@ "jak1" ] }, + { + "type": "default", + "project": "CMakeLists.txt", + "projectTarget": "offline-test.exe (bin\\offline-test.exe)", + "name": "Tests - Offline Tests - Jak 2", + "args": [ + "--iso_data_path", + "${workspaceRoot}/iso_data/jak2", + "--game", + "jak2" + ] + }, { "type": "default", "project": "CMakeLists.txt", diff --git a/Taskfile.yml b/Taskfile.yml index 0cf7d5c03d..e137265a1d 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -115,7 +115,7 @@ tasks: cmds: - "{{.TYPESEARCH_BIN_RELEASE_DIR}}/type_searcher --output-path ./search-results.json --game {{.GAME}} --fields '[{\"type\":\"int16\",\"offset\":2},{\"type\":\"int16\",\"offset\":4}]'" # TESTS - offline-tests: + offline-tests: # ran by jenkins cmds: - '{{.OFFLINETEST_BIN_RELEASE_DIR}}/offline-test --iso_data_path "./iso_data/{{.GAME}}" --game {{.GAME}}' offline-test-file: @@ -124,7 +124,7 @@ tasks: update-ref-tests: cmds: - cmd: python ./scripts/tasks/default-file-or-folder.py --path failures - - cmd: '{{.OFFLINETEST_BIN_RELEASE_DIR}}/offline-test --iso_data_path "./iso_data/{{.GAME}}" --game {{.GAME}} --dump_current_output' + - cmd: '{{.OFFLINETEST_BIN_RELEASE_DIR}}/offline-test --iso_data_path "./iso_data/{{.GAME}}" --game {{.GAME}} --dump_current_output --num_threads 10' ignore_error: true - python ./scripts/update_decomp_reference.py ./failures ./test/decompiler/reference/ --game {{.GAME}} - task: offline-tests diff --git a/common/goos/Printer.cpp b/common/goos/Printer.cpp index 9057f652ea..1c5b9e25cb 100644 --- a/common/goos/Printer.cpp +++ b/common/goos/Printer.cpp @@ -1,5 +1,7 @@ #include "Printer.h" +#include + #include "third-party/fmt/core.h" namespace pretty_print { @@ -34,6 +36,7 @@ goos::Object float_representation(float value) { } std::unique_ptr pretty_printer_reader; +std::mutex pretty_printer_reader_mutex; goos::Reader& get_pretty_printer_reader() { if (!pretty_printer_reader) { @@ -43,6 +46,7 @@ goos::Reader& get_pretty_printer_reader() { } goos::Object to_symbol(const std::string& str) { + std::lock_guard guard(pretty_printer_reader_mutex); return goos::SymbolObject::make_new(get_pretty_printer_reader().symbolTable, str); } diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 27f3e70bc9..e09f663d12 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -172,6 +172,12 @@ class ObjectFileDB { bool disassemble_code, bool print_hex); + void process_object_file_data( + ObjectFileData& data, + const fs::path& output_dir, + const Config& config, + const std::unordered_set& skip_functions, + const std::unordered_map>& skip_states); void analyze_functions_ir2( const fs::path& output_dir, const Config& config, diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index cddd5df9ce..d2ec205650 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -33,6 +33,90 @@ namespace decompiler { +void ObjectFileDB::process_object_file_data( + ObjectFileData& data, + const fs::path& output_dir, + const Config& config, + const std::unordered_set& skip_functions, + const std::unordered_map>& skip_states) { + Timer file_timer; + ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config, data); + ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config, data); + ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config, data); + ir2_setup_labels(config, data); + ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config, data); + if (data.linked_data.functions_by_seg.size() == 3) { + enum { DEFPART, DEFSTATE, DEFSKELGROUP } step = DEFPART; + try { + run_defpartgroup(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); + step = DEFSTATE; + run_defstate(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front(), skip_states); + step = DEFSKELGROUP; + run_defskelgroups(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); + + } catch (const std::exception& e) { + switch (step) { + case DEFPART: + lg::error("Failed to find defpartgroups: {}", e.what()); + break; + case DEFSTATE: + lg::error("Failed to find defstates: {}", e.what()); + break; + case DEFSKELGROUP: + lg::error("Failed to find defskelgroups: {}", e.what()); + break; + } + } + } + ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config, data); + ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config, data); + + ir2_insert_anonymous_functions(DEBUG_SEGMENT, data); + ir2_insert_anonymous_functions(MAIN_SEGMENT, data); + ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT, data); + + ir2_run_mips2c(config, data); + + ir2_symbol_definition_map(data); + + // TODO - insert the game_name into the import line automatically + // instead of `goal_src/jak1/import/something.gc` + // just `import/something.gc` + // + // Can be relative to the root of the source directory + const auto& imports_it = config.import_deps_by_file.find(data.to_unique_name()); + std::vector imports; + if (imports_it != config.import_deps_by_file.end()) { + imports = imports_it->second; + } + + if (!output_dir.string().empty()) { + ir2_write_results(output_dir, config, imports, data); + } else { + data.output_with_skips = ir2_final_out(data, imports, skip_functions); + data.full_output = ir2_final_out(data, imports, {}); + } + + if (!config.generate_all_types) { + // this frees ir2 memory, but means future passes can't look back on this function. + for_each_function_def_order_in_obj(data, [&](Function& f, int) { f.ir2 = {}; }); + } else { + for_each_function_def_order_in_obj(data, [&](Function& f, int seg) { + if (seg == TOP_LEVEL_SEGMENT) { + return; // keep top-levels + } + if (f.guessed_name.kind == FunctionName::FunctionKind::METHOD && + f.guessed_name.method_id == GOAL_INSPECT_METHOD) { + return; // keep inspects + } + // otherwise free memory + f.ir2 = {}; + }); + } + + lg::info("Done in {:.2f}ms", file_timer.getMs()); +} + /*! * Main IR2 analysis pass. * At this point, we assume that the files are loaded and we've run find_code to locate all @@ -49,83 +133,8 @@ void ObjectFileDB::analyze_functions_ir2( } int file_idx = 1; for_each_obj([&](ObjectFileData& data) { - Timer file_timer; lg::info("[{:3d}/{}]------ {}", file_idx++, total_file_count, data.to_unique_name()); - ir2_do_segment_analysis_phase1(TOP_LEVEL_SEGMENT, config, data); - ir2_do_segment_analysis_phase1(DEBUG_SEGMENT, config, data); - ir2_do_segment_analysis_phase1(MAIN_SEGMENT, config, data); - ir2_setup_labels(config, data); - ir2_do_segment_analysis_phase2(TOP_LEVEL_SEGMENT, config, data); - if (data.linked_data.functions_by_seg.size() == 3) { - enum { DEFPART, DEFSTATE, DEFSKELGROUP } step = DEFPART; - try { - run_defpartgroup(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); - step = DEFSTATE; - run_defstate(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front(), skip_states); - step = DEFSKELGROUP; - run_defskelgroups(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).front()); - - } catch (const std::exception& e) { - switch (step) { - case DEFPART: - lg::error("Failed to find defpartgroups: {}", e.what()); - break; - case DEFSTATE: - lg::error("Failed to find defstates: {}", e.what()); - break; - case DEFSKELGROUP: - lg::error("Failed to find defskelgroups: {}", e.what()); - break; - } - } - } - ir2_do_segment_analysis_phase2(DEBUG_SEGMENT, config, data); - ir2_do_segment_analysis_phase2(MAIN_SEGMENT, config, data); - - ir2_insert_anonymous_functions(DEBUG_SEGMENT, data); - ir2_insert_anonymous_functions(MAIN_SEGMENT, data); - ir2_insert_anonymous_functions(TOP_LEVEL_SEGMENT, data); - - ir2_run_mips2c(config, data); - - ir2_symbol_definition_map(data); - - // TODO - insert the game_name into the import line automatically - // instead of `goal_src/jak1/import/something.gc` - // just `import/something.gc` - // - // Can be relative to the root of the source directory - const auto& imports_it = config.import_deps_by_file.find(data.to_unique_name()); - std::vector imports; - if (imports_it != config.import_deps_by_file.end()) { - imports = imports_it->second; - } - - if (!output_dir.string().empty()) { - ir2_write_results(output_dir, config, imports, data); - } else { - data.output_with_skips = ir2_final_out(data, imports, skip_functions); - data.full_output = ir2_final_out(data, imports, {}); - } - - if (!config.generate_all_types) { - // this frees ir2 memory, but means future passes can't look back on this function. - for_each_function_def_order_in_obj(data, [&](Function& f, int) { f.ir2 = {}; }); - } else { - for_each_function_def_order_in_obj(data, [&](Function& f, int seg) { - if (seg == TOP_LEVEL_SEGMENT) { - return; // keep top-levels - } - if (f.guessed_name.kind == FunctionName::FunctionKind::METHOD && - f.guessed_name.method_id == GOAL_INSPECT_METHOD) { - return; // keep inspects - } - // otherwise free memory - f.ir2 = {}; - }); - } - - lg::info("Done in {:.2f}ms", file_timer.getMs()); + process_object_file_data(data, output_dir, config, skip_functions, skip_states); }); lg::info("{}", stats.let.print()); diff --git a/decompiler/config/jak2/hacks.jsonc b/decompiler/config/jak2/hacks.jsonc index 2ac74510f8..c6d37eae6e 100644 --- a/decompiler/config/jak2/hacks.jsonc +++ b/decompiler/config/jak2/hacks.jsonc @@ -347,22 +347,22 @@ ], "(method 22 gui-control)": [ -10, // goto L63 (B39) -16, // goto L58 (B27) -26, // goto L62 (B) -27, // goto L62 -28, // goto L61 -35, // goto L62 -36, // goto L62 -38, // goto L99 -42, // goto L89 -50, // goto L84 -108, // goto L86 -110, // goto L86 -116, // goto L99 -117, // goto L91 -120 -] + 10, // goto L63 (B39) + 16, // goto L58 (B27) + 26, // goto L62 (B) + 27, // goto L62 + 28, // goto L61 + 35, // goto L62 + 36, // goto L62 + 38, // goto L99 + 42, // goto L89 + 50, // goto L84 + 108, // goto L86 + 110, // goto L86 + 116, // goto L99 + 117, // goto L91 + 120 + ] // "(method 67 collide-shape-moving)": [1, 7, 9, 10, 11, 12] - TODO }, diff --git a/decompiler/config/jak2/stack_structures.jsonc b/decompiler/config/jak2/stack_structures.jsonc index c29178bd66..89e6269d95 100644 --- a/decompiler/config/jak2/stack_structures.jsonc +++ b/decompiler/config/jak2/stack_structures.jsonc @@ -983,9 +983,7 @@ [640, ["inline-array", "sphere", 2]], [656, "vector"] ], - "cloud-track": [ - [16, "vector"] - ], + "cloud-track": [[16, "vector"]], "progress-post": [[112, "hud-box"]], "(method 10 menu-missions-option)": [[224, "hud-box"]], "(method 10 menu-secret-option)": [[64, "hud-box"]], diff --git a/decompiler/config/jak2/type_casts.jsonc b/decompiler/config/jak2/type_casts.jsonc index 6bc98883df..303aa85fb9 100644 --- a/decompiler/config/jak2/type_casts.jsonc +++ b/decompiler/config/jak2/type_casts.jsonc @@ -3440,9 +3440,7 @@ ], "(post carry crate)": [[[13, 16], "a0", "collide-shape-moving"]], "(event carry crate)": [[15, "a0", "vector"]], - "(code idle crate)": [ - [[2, 5], "a0", "collide-shape-moving"] - ], + "(code idle crate)": [[[2, 5], "a0", "collide-shape-moving"]], "(code hide crate)": [ [27, "v1", "collide-shape-moving"], [95, "v1", "collide-shape-moving"], @@ -3458,9 +3456,7 @@ [6, "a0", "collide-shape-moving"], [27, "a0", "collide-shape-moving"] ], - "camera-rotate-to-vector": [ - [63, "v1", "float"] - ], + "camera-rotate-to-vector": [[63, "v1", "float"]], "target-gun-find-track": [ [182, "v0", "process-focusable"], [[192, 224], "gp", "process-focusable"], @@ -3471,19 +3467,13 @@ [520, "v1", "int"] ], "target-gun-check": [[599, "v0", "sound-rpc-set-param"]], - "target-gun-build-track-list": [ - [46, "v1", "vector"] - ], - "target-gun-joint-pre0": [ - [[131, 165], "gp", "process-focusable"] - ], + "target-gun-build-track-list": [[46, "v1", "vector"]], + "target-gun-joint-pre0": [[[131, 165], "gp", "process-focusable"]], "(method 28 water-anim)": [ ["_stack_", 16, "res-tag"], [27, "v0", "vector"] ], - "(method 7 drop-plat)": [ - [18, "v1", "external-art-buffer"] - ], + "(method 7 drop-plat)": [[18, "v1", "external-art-buffer"]], "(method 22 gui-control)": [ [[268, 315], "s4", "process-drawable"], [[275,338], "s5", "sound-rpc-set-param"], diff --git a/test/offline/offline_test_main.cpp b/test/offline/offline_test_main.cpp index a62369b883..32ad9c2ec7 100644 --- a/test/offline/offline_test_main.cpp +++ b/test/offline/offline_test_main.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -128,8 +129,23 @@ void decompile(Decompiler& dc, const OfflineTestConfig& config) { config.skip_compile_states); } -std::string strip_trailing_newlines(const std::string& in) { - std::string out = in; +/// @brief Removes trailing new-lines and comment lines +std::string clean_decompilation_code(const std::string& in) { + std::vector lines = split_string(in); + // Remove all lines that are comments + // comments are added only by us, meaning this _should_ be consistent + std::vector::iterator line_itr = lines.begin(); + while (line_itr != lines.end()) { + if (line_itr->rfind(";", 0) == 0) { + // remove comment line + line_itr = lines.erase(line_itr); + } else { + // iterate + line_itr++; + } + } + + std::string out = fmt::format("{}", fmt::join(lines, "\n")); while (!out.empty() && out.back() == '\n') { out.pop_back(); } @@ -171,8 +187,8 @@ CompareResult compare(Decompiler& dc, const std::vector& refs, b for (const auto& file : refs) { auto& data = get_data(dc, file.unique_name, file.name_in_dgo); - std::string result = strip_trailing_newlines(data.full_output); - std::string ref = strip_trailing_newlines(file_util::read_text_file(file.path.string())); + std::string result = clean_decompilation_code(data.full_output); + std::string ref = clean_decompilation_code(file_util::read_text_file(file.path.string())); compare_result.total_files++; compare_result.total_lines += line_count(result); if (result != ref) { @@ -393,6 +409,14 @@ std::optional parse_config(const std::string_view& game_name) return std::make_optional(result); } +/// @brief A simple struct to contain the reason for failure from a thread +struct OfflineTestResult { + int exit_code; + std::string reason; + + OfflineTestResult(int _exit_code, std::string _reason) : exit_code(_exit_code), reason(_reason) {} +}; + int main(int argc, char* argv[]) { ArgumentGuard u8_guard(argc, argv); @@ -404,6 +428,7 @@ int main(int argc, char* argv[]) { // Useful for testing in debug mode (dont have to wait for everything to finish) int max_files = -1; std::string single_file = ""; + uint32_t num_threads = 1; CLI::App app{"OpenGOAL - Offline Reference Test Runner"}; app.add_option("--iso_data_path", iso_data_path, "The path to the folder with the ISO data files") @@ -415,6 +440,8 @@ int main(int argc, char* argv[]) { "files update script"); app.add_option("-m,--max_files", max_files, "Limit the amount of files ran in a single test, picks the first N"); + app.add_option("-t,--num_threads", num_threads, + "The number of threads to partition the offline test work between"); app.add_option("-f,--file", single_file, "Limit the offline test routine to a single file to decompile/compile -- useful " "when you are just iterating on a single file"); @@ -442,41 +469,99 @@ int main(int argc, char* argv[]) { art_files = find_art_files(game_name, config->dgos); } - lg::info("Setting up decompiler and loading files..."); - auto decompiler = - setup_decompiler(files, art_files, fs::path(iso_data_path), config.value(), game_name); + // Create a bunch of threads to disassemble/decompile/compile the files + if (num_threads < 1) { + num_threads = 1; + } else if (num_threads > 1) { + num_threads = std::min(num_threads, std::thread::hardware_concurrency()); + } + // First, prepare our batches of files to be processed + std::vector> work_groups = {}; + for (int i = 0; i < num_threads; i++) { + work_groups.push_back({}); + } + int total_added = 0; + for (auto& file : files) { + work_groups.at(total_added % num_threads).push_back(file); + total_added++; + } - lg::info("Disassembling files..."); - disassemble(decompiler); + // TODO - nicer printing, very messy with dozens of threads processing the job - lg::info("Decompiling..."); - decompile(decompiler, config.value()); + // Now we create a thread to process each group of work, and then await them + std::vector> threads; + decompiler::init_opcode_info(); + for (const auto& work_group : work_groups) { + threads.push_back(std::async(std::launch::async, [&]() { + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + lg::info("Setting up decompiler and loading files..."); + auto decompiler = setup_decompiler(work_group, art_files, fs::path(iso_data_path), + config.value(), game_name); + lg::info("Took {}ms", std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count()); - lg::info("Comparing..."); - auto compare_result = compare(decompiler, files, dump_current_output); - lg::info("Compared {} lines. {}/{} files passed.", compare_result.total_lines, - compare_result.ok_files, compare_result.total_files); - lg::info("Dump? {}\n", dump_current_output); + begin = std::chrono::steady_clock::now(); + lg::info("Disassembling files..."); + disassemble(decompiler); + lg::info("Took {}ms", std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count()); - if (!compare_result.failing_files.empty()) { - lg::error("Failing files:"); - for (auto& f : compare_result.failing_files) { - lg::error("- {}", f); + begin = std::chrono::steady_clock::now(); + lg::info("Decompiling..."); + decompile(decompiler, config.value()); + // It's about 100ms per file to decompile on average + // meaning that when we have all 900 files, a full offline test will take 1.5 minutes + lg::info("Took {}ms", std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count()); + + begin = std::chrono::steady_clock::now(); + lg::info("Comparing..."); + auto compare_result = compare(decompiler, work_group, dump_current_output); + lg::info("Took {}ms", std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count()); + + lg::info("Compared {} lines. {}/{} files passed.", compare_result.total_lines, + compare_result.ok_files, compare_result.total_files); + lg::info("Dump? {}\n", dump_current_output); + + if (!compare_result.failing_files.empty()) { + lg::error("Failing files:"); + for (auto& f : compare_result.failing_files) { + lg::error("- {}", f); + } + lg::error("Comparison failed."); + // No point continuing to compile if the comparison has failed + return OfflineTestResult(1, "Comparison Failed"); + } + + begin = std::chrono::steady_clock::now(); + lg::info("Compiling..."); + bool compile_result = compile(decompiler, work_group, config.value(), game_name); + // Compiling on the otherhand, is around 20ms per file + lg::info("Took {}ms", std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count()); + + if (!compile_result) { + return OfflineTestResult(1, "Compilation Failed"); + } + + return OfflineTestResult(0, ""); + })); + } + + // Fail fast over any thread tripping over + for (auto& thread : threads) { + auto ret = thread.get(); + if (ret.exit_code != 0) { + lg::error(ret.reason); + return ret.exit_code; } } - bool compile_result = compile(decompiler, files, config.value(), game_name); - - if (compare_result.total_pass && compile_result) { - lg::info("Pass!"); - return 0; - } else { - if (!compile_result) { - lg::error("Compilation failed."); - } - if (!compare_result.total_pass) { - lg::error("Comparison failed."); - } - } - return 1; + return 0; }