diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fce0edcce..e34cf487b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug") endif() -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) # Set default compile flags for GCC # optimization level can be set here. Note that game/ overwrites this for building game C++ code. diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 31bdae7d9b..ad90ff9503 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -1722,8 +1722,9 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se // printf("%s\n", cfg->to_dot().c_str()); // printf("%s\n", cfg->to_form()->toStringPretty().c_str()); - changed = changed || cfg->find_cond_w_else(); changed = changed || cfg->find_cond_n_else(); + changed = changed || cfg->find_cond_w_else(); + changed = changed || cfg->find_while_loop_top_level(); changed = changed || cfg->find_seq_top_level(); changed = changed || cfg->find_short_circuits(); diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp index 006fc6ce83..04745415d9 100644 --- a/decompiler/IR/CfgBuilder.cpp +++ b/decompiler/IR/CfgBuilder.cpp @@ -3,32 +3,6 @@ #include "decompiler/Function/CfgVtx.h" #include "decompiler/Function/Function.h" -std::vector> IR::get_all_ir(LinkedObjectFile& file) const { - std::vector> result; - get_children(&result); - size_t last_checked = 0; - size_t last_last_checked = -1; - - while (last_checked != last_last_checked) { - last_last_checked = last_checked; - auto end_of_check = result.size(); - for (size_t i = last_checked; i < end_of_check; i++) { - auto it = result.at(i).get(); - assert(it); - it->get_children(&result); - } - last_checked = end_of_check; - } - - // Todo, remove this check which is just for debugging. - std::unordered_set> unique_ir; - for (auto& x : result) { - unique_ir.insert(x); - } - assert(unique_ir.size() == result.size()); - return result; -} - namespace { std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx); @@ -60,12 +34,14 @@ void insert_cfg_into_list(Function& f, output->push_back(got); } } else { - // output->push_back(cfg_to_ir(f, file, vtx)); + // doesn't look like we're going to get something that can be inlined, so try as usual auto ir = cfg_to_ir(f, file, vtx); auto ir_as_begin = dynamic_cast(ir.get()); if (ir_as_begin) { // we unexpectedly got a begin, even though we didn't think we would. This is okay, but we - // should inline this begin to avoid nested begins. + // should inline this begin to avoid nested begins. This happens in the case where an entire + // control flow pattern is turned into a single op (like type-of) and includes some ops at + // the beginning. We don't have a good way of knowing this will happen until we try it. for (auto& x : ir_as_begin->forms) { output->push_back(x); } @@ -75,6 +51,12 @@ void insert_cfg_into_list(Function& f, } } +/*! + * If it's a begin with a branch as the last operation, returns a pointer to the branch IR + * and also a pointer to the vector which holds the branch operation in its last slot. + * Otherwise returns nullptr. Useful to modify or remove branches found at the end of blocks, + * and inline things into the begin they were found in. + */ std::pair>*> get_condition_branch_as_vector(IR* in) { auto as_seq = dynamic_cast(in); if (as_seq) { @@ -86,6 +68,10 @@ std::pair>*> get_condition_branch_as return std::make_pair(nullptr, nullptr); } +/*! + * Given an IR, find a branch IR at the end, and also the location of it so it can be patched. + * Returns nullptr as the first item in the pair if it didn't work. + */ std::pair*> get_condition_branch(std::shared_ptr* in) { IR_Branch* condition_branch = dynamic_cast(in->get()); std::shared_ptr* condition_branch_location = in; @@ -100,33 +86,137 @@ std::pair*> get_condition_branch(std::shared_ptr return std::make_pair(condition_branch, condition_branch_location); } -void clean_up_cond_with_else(IR_CondWithElse* cwe, LinkedObjectFile& file) { +/*! + * Given a CondWithElse IR, remove the internal branches and set the condition to be an actual + * compare IR instead of a branch. + * Doesn't "rebalance" the leading condition because this runs way before expression compaction. + */ +void clean_up_cond_with_else(std::shared_ptr* ir, LinkedObjectFile& file) { + (void)file; + auto cwe = dynamic_cast(ir->get()); + assert(cwe); for (auto& e : cwe->entries) { auto jump_to_next = get_condition_branch(&e.condition); assert(jump_to_next.first); assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP); - // printf("got cond condition %s\n", jump_to_next.first->print(file).c_str()); + // patch the jump to next with a condition. auto replacement = std::make_shared(jump_to_next.first->condition); *(jump_to_next.second) = replacement; + // patch the jump at the end of a block. auto jump_to_end = get_condition_branch(&e.body); assert(jump_to_end.first); assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); assert(jump_to_end.first->condition.kind == Condition::ALWAYS); + + // if possible, we just want to remove this from the sequence its in. + // but sometimes there's a case with nothing in it so there is no sequence. + // in this case, we can just replace the branch with a NOP IR to indicate that nothing + // happens in this case, but there was still GOAL code to test for it. + // this happens rarely, as you would expect. auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); if (as_end_of_sequence.first) { assert(as_end_of_sequence.second->size() > 1); as_end_of_sequence.second->pop_back(); } else { - // this means the case is empty, which is a little bit weird but does actually appear to - // happen in a few places. so we just replace the jump with a nop. In the future we could - // consider having a more explicit "this case is empty" operator so this doesn't get confused - // with an actual MIPS nop. + // In the future we could consider having a more explicit "this case is empty" operator so + // this doesn't get confused with an actual MIPS nop. *(jump_to_end.second) = std::make_shared(); } } } +/*! + * A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis. + * But it should not be decompiled as a branching statement. + * This either succeeds or asserts and must be called with with something that can be converted + * successfully + */ +void convert_cond_no_else_to_compare(std::shared_ptr* ir) { + auto cne = dynamic_cast(ir->get()); + assert(cne); + auto condition = get_condition_branch(&cne->entries.front().condition); + assert(condition.first); + auto body = dynamic_cast(cne->entries.front().body.get()); + assert(body); + auto dst = body->dst; + auto src = dynamic_cast(body->src.get()); + assert(src->name == "#f"); + assert(cne->entries.size() == 1); + + auto condition_as_single = dynamic_cast(cne->entries.front().condition.get()); + if (condition_as_single) { + // as far as I can tell this is totally valid but just happens to not appear? + // if this case is ever hit in the future it's fine and we just need to implement this. + // but leaving empty for now so there's fewer things to test. + assert(false); + } else { + auto condition_as_seq = dynamic_cast(cne->entries.front().condition.get()); + assert(condition_as_seq); + if (condition_as_seq) { + auto replacement = std::make_shared(); + replacement->forms = condition_as_seq->forms; + assert(condition.second == &condition_as_seq->forms.back()); + replacement->forms.pop_back(); + replacement->forms.push_back(std::make_shared( + IR_Set::REG_64, dst, std::make_shared(condition.first->condition))); + *ir = replacement; + } + } +} + +/*! + * not yet finished + * Replace internal branches inside a CondNoElse IR. + * If possible will simplify the entire expression into a comparison operation if possible. + * @param ir + * @param file + */ +void clean_up_cond_no_else(std::shared_ptr* ir, LinkedObjectFile& file) { + auto cne = dynamic_cast(ir->get()); + assert(cne); + // for (auto& e : cne->entries) { + for (size_t idx = 0; idx < cne->entries.size(); idx++) { + auto& e = cne->entries.at(idx); + auto jump_to_next = get_condition_branch(&e.condition); + assert(jump_to_next.first); + // + printf("got cond condition %s\n", jump_to_next.first->print(file).c_str()); + if (jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_TRUE && + cne->entries.size() == 1) { + convert_cond_no_else_to_compare(ir); + } else { + assert(jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_FALSE || + jump_to_next.first->branch_delay.kind == BranchDelay::NOP); + } + + // auto replacement = std::make_shared(jump_to_next.first->condition); + // *(jump_to_next.second) = replacement; + // + // auto jump_to_end = get_condition_branch(&e.body); + // assert(jump_to_end.first); + // assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); + // assert(jump_to_end.first->condition.kind == Condition::ALWAYS); + // auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); + // if (as_end_of_sequence.first) { + // assert(as_end_of_sequence.second->size() > 1); + // as_end_of_sequence.second->pop_back(); + // } else { + // // this means the case is empty, which is a little bit weird but does actually appear to + // // happen in a few places. so we just replace the jump with a nop. In the future we + // could + // // consider having a more explicit "this case is empty" operator so this doesn't get + // confused + // // with an actual MIPS nop. + // *(jump_to_end.second) = std::make_shared(); + // } + } +} + +/*! + * Try to convert a short circuiting expression into a "type-of" expression. + * We do this before attempting the normal and/or expressions. + */ std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { // the assembly looks like this: /* @@ -235,9 +325,6 @@ std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, Short assert(src_reg3->reg == src_reg->reg); assert(offset->value == -4); - printf("Candidates for SC type-of:\n%s\n%s\n%s\n", b0_ir->print(file).c_str(), - b1_ir->print(file).c_str(), b2_ir->print(file).c_str()); - std::shared_ptr clobber = nullptr; if (temp_reg->reg != src_reg->reg && temp_reg->reg != dst_reg->reg) { clobber = first_branch->condition.src0; @@ -246,7 +333,6 @@ std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, Short return std::make_shared(IR_Set::REG_64, else_case->dst, std::make_shared(shift->arg0, clobber)); } else { - // i'm not brave enough to enable this until I have found a better test case // remove the branch b0_ir->forms.pop_back(); // remove the shift @@ -256,9 +342,11 @@ std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, Short IR_Set::REG_64, else_case->dst, std::make_shared(shift->arg0, clobber))); return b0_ptr; } - return nullptr; // todo } +/*! + * Main CFG vertex to IR conversion. Will pull basic IR ops from the provided function as needed. + */ std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) { if (dynamic_cast(vtx)) { auto* bv = dynamic_cast(vtx); @@ -294,30 +382,73 @@ std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) return result; } else if (dynamic_cast(vtx)) { auto* cvtx = dynamic_cast(vtx); - std::vector entries; - for (auto& x : cvtx->entries) { - IR_CondWithElse::Entry e; - e.condition = cfg_to_ir(f, file, x.condition); - e.body = cfg_to_ir(f, file, x.body); - entries.push_back(std::move(e)); - } + + // the cfg analysis pass may recognize some things out of order, which can cause + // fake nesting. This is actually a problem at this point because it can turn a normal + // cond into a cond with else, which emits different instructions. This attempts to recognize + // an else which is actually more cases and compacts it into a single statement. At this point + // I don't know if this is sufficient to catch all cases. it may even recognize the wrong + // thing in some cases... maybe we should check the delay slot instead? auto else_ir = cfg_to_ir(f, file, cvtx->else_vtx); - auto result = std::make_shared(entries, else_ir); - clean_up_cond_with_else(result.get(), file); - return result; + + if (dynamic_cast(else_ir.get())) { + auto extra_cond = dynamic_cast(else_ir.get()); + std::vector entries; + for (auto& x : cvtx->entries) { + IR_Cond::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + for (auto& x : extra_cond->entries) { + entries.push_back(x); + } + std::shared_ptr result = std::make_shared(entries); + clean_up_cond_no_else(&result, file); + return result; + } else { + std::vector entries; + for (auto& x : cvtx->entries) { + IR_CondWithElse::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + std::shared_ptr result = std::make_shared(entries, else_ir); + clean_up_cond_with_else(&result, file); + return result; + } } else if (dynamic_cast(vtx)) { auto* svtx = dynamic_cast(vtx); auto as_type_of = try_sc_as_type_of(f, file, svtx); if (as_type_of) { return as_type_of; } + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + std::vector entries; + for (auto& x : cvtx->entries) { + IR_Cond::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + std::shared_ptr result = std::make_shared(entries); + clean_up_cond_no_else(&result, file); + return result; } throw std::runtime_error("not yet implemented IR conversion."); return nullptr; } +/*! + * Post processing pass to clean up while loops - annoyingly the block before a while loop + * has a jump to the condition branch that we need to remove. This currently happens after all + * conversion but this may need to be revisited depending on the final order of simplifications. + */ void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { + (void)file; std::vector to_remove; // the list of branches to remove by index in this sequence for (size_t i = 0; i < sequence->forms.size(); i++) { auto* form_as_while = dynamic_cast(sequence->forms.at(i).get()); @@ -353,6 +484,11 @@ void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { } } // namespace +/*! + * Use a control flow graph to build a single IR representing a function. + * This should be done after basic ops are added and before typing, variable splitting, and + * expression compaction. + */ std::shared_ptr build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file) { diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index a60ea68ba9..00d52d728e 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -1,6 +1,33 @@ #include "IR.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" +std::vector> IR::get_all_ir(LinkedObjectFile& file) const { + (void)file; + std::vector> result; + get_children(&result); + size_t last_checked = 0; + size_t last_last_checked = -1; + + while (last_checked != last_last_checked) { + last_last_checked = last_checked; + auto end_of_check = result.size(); + for (size_t i = last_checked; i < end_of_check; i++) { + auto it = result.at(i).get(); + assert(it); + it->get_children(&result); + } + last_checked = end_of_check; + } + + // Todo, remove this check which is just for debugging. + std::unordered_set> unique_ir; + for (auto& x : result) { + unique_ir.insert(x); + } + assert(unique_ir.size() == result.size()); + return result; +} + std::string IR::print(const LinkedObjectFile& file) const { return to_form(file)->toStringPretty(); } @@ -582,4 +609,50 @@ std::shared_ptr
IR_GetRuntimeType::to_form(const LinkedObjectFile& file) c void IR_GetRuntimeType::get_children(std::vector>* output) const { output->push_back(object); +} + +std::shared_ptr IR_Cond::to_form(const LinkedObjectFile& file) const { + if (entries.size() == 1 && is_single_expression(entries.front().body.get())) { + // print as an if statement if we can put the body in a single form. + std::vector> list; + list.push_back(toForm("if")); + list.push_back(entries.front().condition->to_form(file)); + list.push_back(entries.front().body->to_form(file)); + return buildList(list); + } else if (entries.size() == 1) { + // turn into a when if the body requires multiple forms + std::vector> list; + list.push_back(toForm("when")); + list.push_back(entries.front().condition->to_form(file)); + print_inlining_begin(&list, entries.front().body.get(), file); + return buildList(list); + } else { + std::vector> list; + list.push_back(toForm("cond")); + for (auto& e : entries) { + std::vector> entry; + entry.push_back(e.condition->to_form(file)); + print_inlining_begin(&entry, e.body.get(), file); + list.push_back(buildList(entry)); + } + return buildList(list); + } +} + +void IR_Cond::get_children(std::vector>* output) const { + for (auto& e : entries) { + output->push_back(e.condition); + output->push_back(e.body); + } +} + +std::shared_ptr IR_PartialNot::to_form(const LinkedObjectFile& file) const { + return buildList("INCOMPLETE-NOT", dst->to_form(file), src->to_form(file)); +} + +void IR_PartialNot::get_children(std::vector>* output) const { + // probably we could get away with not returning anything here because these should + // always be registers? + output->push_back(dst); + output->push_back(src); } \ No newline at end of file diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index a083c270b2..818812b545 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -295,14 +295,37 @@ class IR_CondWithElse : public IR { void get_children(std::vector>* output) const override; }; +// this one doesn't have an else statement. Will return false if none of the cases are taken. +class IR_Cond : public IR { + public: + struct Entry { + std::shared_ptr condition = nullptr; + std::shared_ptr body = nullptr; + std::shared_ptr false_destination = nullptr; + }; + std::vector entries; + IR_Cond(std::vector _entries) : entries(std::move(_entries)) {} + std::shared_ptr to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + // this will work on pairs, bintegers, or basics class IR_GetRuntimeType : public IR { public: std::shared_ptr object, clobber; - explicit IR_GetRuntimeType(std::shared_ptr _object, std::shared_ptr _clobber) + IR_GetRuntimeType(std::shared_ptr _object, std::shared_ptr _clobber) : object(std::move(_object)), clobber(std::move(_clobber)) {} std::shared_ptr to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; }; +class IR_PartialNot : public IR { + public: + std::shared_ptr dst, src; + IR_PartialNot(std::shared_ptr _dst, std::shared_ptr _src) + : dst(std::move(_dst)), src(std::move(_src)) {} + std::shared_ptr to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + #endif // JAK_IR_H diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 3a96107e6e..8a5a6872e2 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -598,12 +598,15 @@ void ObjectFileDB::analyze_functions() { int total_basic_ops = 0; int total_failed_basic_ops = 0; + int attempted_cfg_irs = 0; + int successful_cfg_irs = 0; + std::map> unresolved_by_length; if (get_config().find_basic_blocks) { timer.start(); int total_basic_blocks = 0; for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { - // printf("in %s\n", func.guessed_name.to_string().c_str()); + printf("in %s\n", func.guessed_name.to_string().c_str()); auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); total_basic_blocks += blocks.size(); func.basic_blocks = blocks; @@ -621,6 +624,10 @@ void ObjectFileDB::analyze_functions() { total_failed_basic_ops += func.get_failed_basic_op_count(); func.ir = build_cfg_ir(func, *func.cfg, data.linked_data); + attempted_cfg_irs++; + if (func.ir) { + successful_cfg_irs++; + } if (func.cfg->is_fully_resolved()) { resolved_cfg_functions++; @@ -644,6 +651,10 @@ void ObjectFileDB::analyze_functions() { if (!func.guessed_name.empty()) { total_named_functions++; } + + // if (func.guessed_name.to_string() == "inspect") { + // assert(false); + // } }); printf("Found %d functions (%d with nontrivial cfgs)\n", total_functions, @@ -656,10 +667,11 @@ void ObjectFileDB::analyze_functions() { printf(" %d/%d nontrivial cfg's resolved (%.2f%%)\n", total_resolved_nontrivial_functions, total_nontrivial_functions, 100.f * float(total_resolved_nontrivial_functions) / float(total_nontrivial_functions)); - int successful_basic_ops = total_basic_ops - total_failed_basic_ops; printf(" %d/%d basic ops converted successfully (%.2f%%)\n", successful_basic_ops, total_basic_ops, 100.f * float(successful_basic_ops) / float(total_basic_ops)); + printf(" %d/%d cfgs converted to ir (%.2f%%)\n", successful_cfg_irs, attempted_cfg_irs, + 100.f * float(successful_cfg_irs) / float(attempted_cfg_irs)); // for (auto& kv : unresolved_by_length) { // printf("LEN %d\n", kv.first); diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 0305ed5311..d9328cb6b7 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -36,13 +36,23 @@ "resend-exception", "kernel-set-interrupt-vector", "kernel-set-exception-vector", "return-from-exception", "kernel-read", "kernel-read-function", "kernel-write", "kernel-write-function", "kernel-copy-to-kernel-ram", + // this one needs more investigation. nothing looks weird about it but it fails... + "camera-change-to", + + // this one fails due to false compaction where an else case has only a not expression in it. + "master-is-hopeful-better?", + + // real asm + "cspace<-parented-transformq-joint!", "blerc-a-fragment", "render-boundary-tri", "render-boundary-quad", + "(method 19 collide-shape-prim-sphere)", + "(method 11 cpu-thread)", "atan0", "sincos!", "sincos-rad!", "disasm-dma-list", "vblank-handler", "vif1-handler", "vif1-handler-debug", "entity-actor-count", "decompress-frame-data-pair-to-accumulator", "decompress-frame-data-to-accumulator", "normalize-frame-quaternions", "clear-frame-accumulator", "generic-copy-vtx-dclr-dtex", "generic-no-light-dproc-only", "generic-no-light-proc", "mercneric-bittable-asm", "generic-tie-decompress", "matrix-axis-sin-cos!", "matrix-axis-sin-cos-vu!", "generic-prepare-dma-single", "(method 13 collide-shape-prim-sphere)", "(method 14 collide-shape-prim-sphere)", "(method 12 collide-shape-prim-sphere)", - "adgif-shader<-texture-with-update!", + "adgif-shader<-texture-with-update!", "generic-interp-dproc", "collide-do-primitives", "draw-bones-check-longest-edge-asm", "sp-launch-particles-var", "(method 15 collide-shape-prim-mesh)", "(method 15 collide-shape-prim-sphere)", diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index ae5aece6f8..6e623e9e3e 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -1,5 +1,5 @@ # We define our own compilation flags here. -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) # Set default compile flags for GCC # optimization level can be set here. Note that game/ overwrites this for building game C++ code.