From a64dd6c90be1299c4bd3700b1ec6abc53ea20a38 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Tue, 6 Oct 2020 18:14:27 -0400 Subject: [PATCH] Add CFG to IR decompiler pass (#60) * add some more cfg ir stuff * add cond with else * add type of recognition * add cond to compare conversion * finally all conds are passing * started sc recognize, but ash min and max should be recognized first * fix ash showing up as sc * add abs * fix merge issues * try building goos with optimizations on * sc mostly working, still need to fix right aligned nesting * ands and ors are converting correctly now * clean up --- CMakeLists.txt | 2 +- common/goos/CMakeLists.txt | 7 + decompiler/CMakeLists.txt | 1 + decompiler/Function/CfgVtx.cpp | 3 +- decompiler/Function/Function.cpp | 4 +- decompiler/Function/Function.h | 4 +- decompiler/IR/BasicOpBuilder.cpp | 104 +- decompiler/IR/CfgBuilder.cpp | 1012 +++++++++++++++++ decompiler/IR/CfgBuilder.h | 10 + decompiler/IR/IR.cpp | 415 ++++++- decompiler/IR/IR.h | 148 ++- decompiler/ObjectFile/LinkedObjectFile.cpp | 5 + decompiler/ObjectFile/ObjectFileDB.cpp | 40 +- decompiler/config/jak1_ntsc_black_label.jsonc | 35 +- game/CMakeLists.txt | 2 +- 15 files changed, 1755 insertions(+), 37 deletions(-) create mode 100644 decompiler/IR/CfgBuilder.cpp create mode 100644 decompiler/IR/CfgBuilder.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fce0edcce..e34cf487b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug") endif() -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) # Set default compile flags for GCC # optimization level can be set here. Note that game/ overwrites this for building game C++ code. diff --git a/common/goos/CMakeLists.txt b/common/goos/CMakeLists.txt index 2c36a8302f..cc8c4fe041 100644 --- a/common/goos/CMakeLists.txt +++ b/common/goos/CMakeLists.txt @@ -1,2 +1,9 @@ + +IF (WIN32) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2") +ELSE() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") +ENDIF() + add_library(goos SHARED Object.cpp TextDB.cpp Reader.cpp Interpreter.cpp InterpreterEval.cpp PrettyPrinter.cpp) target_link_libraries(goos common_util fmt) \ No newline at end of file diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index d48df23840..211efaaa62 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(decompiler Disasm/InstructionMatching.cpp Function/CfgVtx.cpp Function/CfgVtx.h IR/BasicOpBuilder.cpp + IR/CfgBuilder.cpp IR/IR.cpp) target_link_libraries(decompiler diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index cc09a1c285..b95b134276 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -1721,8 +1721,9 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se // printf("%s\n", cfg->to_dot().c_str()); // printf("%s\n", cfg->to_form()->toStringPretty().c_str()); - changed = changed || cfg->find_cond_w_else(); changed = changed || cfg->find_cond_n_else(); + changed = changed || cfg->find_cond_w_else(); + changed = changed || cfg->find_while_loop_top_level(); changed = changed || cfg->find_seq_top_level(); changed = changed || cfg->find_short_circuits(); diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index dd433763d6..96233104fb 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -572,8 +572,8 @@ bool Function::instr_starts_basic_op(int idx) { return false; } -IR* Function::get_basic_op_at_instr(int idx) { - return basic_ops.at(instruction_to_basic_op.at(idx)).get(); +std::shared_ptr Function::get_basic_op_at_instr(int idx) { + return basic_ops.at(instruction_to_basic_op.at(idx)); } int Function::get_basic_op_count() { diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index c0420ec055..156d1b8e14 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -68,10 +68,12 @@ class Function { void add_basic_op(std::shared_ptr op, int start_instr, int end_instr); bool has_basic_ops() { return !basic_ops.empty(); } bool instr_starts_basic_op(int idx); - IR* get_basic_op_at_instr(int idx); + std::shared_ptr get_basic_op_at_instr(int idx); int get_basic_op_count(); int get_failed_basic_op_count(); + std::shared_ptr ir = nullptr; + int segment = -1; int start_word = -1; int end_word = -1; // not inclusive, but does include padding. diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index 0c27140b8b..7549650503 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -511,6 +511,28 @@ std::shared_ptr try_sll(Instruction& instr, int idx) { return nullptr; } +std::shared_ptr try_dsrav(Instruction& instr, int idx) { + if (is_gpr_3(instr, InstructionKind::DSRAV, {}, {}, {}) && + !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { + return make_set(IR_Set::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared(IR_IntMath2::RIGHT_SHIFT_ARITH, + make_reg(instr.get_src(0).get_reg(), idx), + make_reg(instr.get_src(1).get_reg(), idx))); + } + return nullptr; +} + +std::shared_ptr try_dsrlv(Instruction& instr, int idx) { + if (is_gpr_3(instr, InstructionKind::DSRLV, {}, {}, {}) && + !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { + return make_set(IR_Set::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared(IR_IntMath2::RIGHT_SHIFT_LOGIC, + make_reg(instr.get_src(0).get_reg(), idx), + make_reg(instr.get_src(1).get_reg(), idx))); + } + return nullptr; +} + std::shared_ptr try_sw(Instruction& instr, int idx) { if (instr.kind == InstructionKind::SW && instr.get_src(1).is_sym() && instr.get_src(2).is_reg(make_gpr(Reg::S7))) { @@ -691,6 +713,28 @@ BranchDelay get_branch_delay(Instruction& i, int idx) { BranchDelay b(BranchDelay::SET_REG_TRUE); b.destination = make_reg(i.get_dst(0).get_reg(), idx); return b; + } else if (i.kind == InstructionKind::LW && i.get_src(1).is_reg(make_gpr(Reg::S7)) && + i.get_src(0).is_sym()) { + if (i.get_src(0).get_sym() == "binteger") { + BranchDelay b(BranchDelay::SET_BINTEGER); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + return b; + } else if (i.get_src(0).get_sym() == "pair") { + BranchDelay b(BranchDelay::SET_PAIR); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + return b; + } + } else if (i.kind == InstructionKind::DSLLV) { + BranchDelay b(BranchDelay::DSLLV); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + b.source = make_reg(i.get_src(0).get_reg(), idx); + b.source2 = make_reg(i.get_src(1).get_reg(), idx); + return b; + } else if (is_gpr_3(i, InstructionKind::DSUBU, {}, make_gpr(Reg::R0), {})) { + BranchDelay b(BranchDelay::NEGATE); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + b.source = make_reg(i.get_src(1).get_reg(), idx); + return b; } BranchDelay b(BranchDelay::UNKNOWN); return b; @@ -719,6 +763,10 @@ std::shared_ptr try_bnel(Instruction& instr, Instruction& next_instr, int id return std::make_shared( Condition(Condition::TRUTHY, make_reg(instr.get_src(1).get_reg(), idx), nullptr, nullptr), instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); + } else if (instr.kind == InstructionKind::BNEL && instr.get_src(1).is_reg(make_gpr(Reg::R0))) { + return std::make_shared( + Condition(Condition::NONZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, nullptr), + instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); } else if (instr.kind == InstructionKind::BNEL) { // return std::make_shared(IR_Branch2::NOT_EQUAL, instr.get_src(2).get_label(), // make_reg(instr.get_src(0).get_reg(), idx), @@ -733,7 +781,13 @@ std::shared_ptr try_beql(Instruction& instr, Instruction& next_instr, int id return std::make_shared( Condition(Condition::FALSE, make_reg(instr.get_src(1).get_reg(), idx), nullptr, nullptr), instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); - } else if (instr.kind == InstructionKind::BEQL) { + } else if (instr.kind == InstructionKind::BEQL && instr.get_src(1).is_reg(make_gpr(Reg::R0))) { + return std::make_shared( + Condition(Condition::ZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, nullptr), + instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); + } + + else if (instr.kind == InstructionKind::BEQL) { return std::make_shared( Condition(Condition::EQUAL, make_reg(instr.get_src(0).get_reg(), idx), make_reg(instr.get_src(1).get_reg(), idx), nullptr), @@ -761,6 +815,36 @@ std::shared_ptr try_beq(Instruction& instr, Instruction& next_instr, int idx return nullptr; } +std::shared_ptr try_bgtzl(Instruction& instr, Instruction& next_instr, int idx) { + if (instr.kind == InstructionKind::BGTZL) { + return std::make_shared( + Condition(Condition::GREATER_THAN_ZERO_SIGNED, make_reg(instr.get_src(0).get_reg(), idx), + nullptr, nullptr), + instr.get_src(1).get_label(), get_branch_delay(next_instr, idx), true); + } + return nullptr; +} + +std::shared_ptr try_bgezl(Instruction& instr, Instruction& next_instr, int idx) { + if (instr.kind == InstructionKind::BGEZL) { + return std::make_shared( + Condition(Condition::GEQ_ZERO_SIGNED, make_reg(instr.get_src(0).get_reg(), idx), nullptr, + nullptr), + instr.get_src(1).get_label(), get_branch_delay(next_instr, idx), true); + } + return nullptr; +} + +std::shared_ptr try_bltzl(Instruction& instr, Instruction& next_instr, int idx) { + if (instr.kind == InstructionKind::BLTZL) { + return std::make_shared( + Condition(Condition::LESS_THAN_ZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, + nullptr), + instr.get_src(1).get_label(), get_branch_delay(next_instr, idx), true); + } + return nullptr; +} + std::shared_ptr try_daddiu(Instruction& i0, Instruction& i1, int idx) { if (i0.kind == InstructionKind::DADDIU && i1.kind == InstructionKind::MOVN && i0.get_src(0).get_reg() == make_gpr(Reg::S7)) { @@ -1135,6 +1219,7 @@ std::shared_ptr try_lwu(Instruction& i0, } // namespace void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file) { + (void)file; for (int instr = block.start_word; instr < block.end_word; instr++) { auto& i = func->instructions.at(instr); @@ -1214,6 +1299,15 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec case InstructionKind::BEQ: result = try_beq(i, next, instr); break; + case InstructionKind::BGTZL: + result = try_bgtzl(i, next, instr); + break; + case InstructionKind::BGEZL: + result = try_bgezl(i, next, instr); + break; + case InstructionKind::BLTZL: + result = try_bltzl(i, next, instr); + break; case InstructionKind::BEQL: result = try_beql(i, next, instr); break; @@ -1384,6 +1478,12 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec case InstructionKind::CVTSW: result = try_cvtsw(i, instr); break; + case InstructionKind::DSRAV: + result = try_dsrav(i, instr); + break; + case InstructionKind::DSRLV: + result = try_dsrlv(i, instr); + break; default: result = nullptr; } @@ -1396,7 +1496,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec // everything failed if (!result) { // temp hack for debug: - printf("Instruction -> BasicOp failed on %s\n", i.to_string(*file).c_str()); + // printf("Instruction -> BasicOp failed on %s\n", i.to_string(*file).c_str()); func->add_basic_op(std::make_shared(), instr, instr + 1); } else { func->add_basic_op(result, instr, instr + length); diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp new file mode 100644 index 0000000000..eca4a3d848 --- /dev/null +++ b/decompiler/IR/CfgBuilder.cpp @@ -0,0 +1,1012 @@ +#include "third-party/fmt/format.h" +#include +#include "common/util/MatchParam.h" +#include "CfgBuilder.h" +#include "decompiler/Function/CfgVtx.h" +#include "decompiler/Function/Function.h" +#include "decompiler/Disasm/InstructionMatching.h" + +namespace { + +std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx); + +/*! + * This adds a single CfgVtx* to a list of IR's by converting it with cfg to IR. + * The trick here is that it will recursively inline anything which would generate an IR begin. + * This avoids the case where Begin's are nested excessively. + */ +void insert_cfg_into_list(Function& f, + LinkedObjectFile& file, + std::vector>* output, + CfgVtx* vtx) { + auto as_sequence = dynamic_cast(vtx); + auto as_block = dynamic_cast(vtx); + if (as_sequence) { + for (auto& x : as_sequence->seq) { + insert_cfg_into_list(f, file, output, x); + } + } else if (as_block) { + auto& block = f.basic_blocks.at(as_block->block_id); + IR* last = nullptr; + for (int instr = block.start_word; instr < block.end_word; instr++) { + auto got = f.get_basic_op_at_instr(instr); + if (got.get() == last) { + continue; + } + last = got.get(); + output->push_back(got); + } + } else { + // doesn't look like we're going to get something that can be inlined, so try as usual + auto ir = cfg_to_ir(f, file, vtx); + auto ir_as_begin = dynamic_cast(ir.get()); + if (ir_as_begin) { + // we unexpectedly got a begin, even though we didn't think we would. This is okay, but we + // should inline this begin to avoid nested begins. This happens in the case where an entire + // control flow pattern is turned into a single op (like type-of) and includes some ops at + // the beginning. We don't have a good way of knowing this will happen until we try it. + for (auto& x : ir_as_begin->forms) { + output->push_back(x); + } + } else { + output->push_back(ir); + } + } +} + +/*! + * If it's a begin with a branch as the last operation, returns a pointer to the branch IR + * and also a pointer to the vector which holds the branch operation in its last slot. + * Otherwise returns nullptr. Useful to modify or remove branches found at the end of blocks, + * and inline things into the begin they were found in. + */ +std::pair>*> get_condition_branch_as_vector(IR* in) { + auto as_seq = dynamic_cast(in); + if (as_seq) { + auto irb = dynamic_cast(as_seq->forms.back().get()); + auto loc = &as_seq->forms; + assert(irb); + return std::make_pair(irb, loc); + } + return std::make_pair(nullptr, nullptr); +} + +/*! + * Given an IR, find a branch IR at the end, and also the location of it so it can be patched. + * Returns nullptr as the first item in the pair if it didn't work. + */ +std::pair*> get_condition_branch(std::shared_ptr* in) { + IR_Branch* condition_branch = dynamic_cast(in->get()); + std::shared_ptr* condition_branch_location = in; + if (!condition_branch) { + // not 100% sure this will always work + auto as_seq = dynamic_cast(in->get()); + if (as_seq) { + condition_branch = dynamic_cast(as_seq->forms.back().get()); + condition_branch_location = &as_seq->forms.back(); + } + } + return std::make_pair(condition_branch, condition_branch_location); +} + +/*! + * Given a CondWithElse IR, remove the internal branches and set the condition to be an actual + * compare IR instead of a branch. + * Doesn't "rebalance" the leading condition because this runs way before expression compaction. + */ +void clean_up_cond_with_else(std::shared_ptr* ir, LinkedObjectFile& file) { + (void)file; + auto cwe = dynamic_cast(ir->get()); + assert(cwe); + for (auto& e : cwe->entries) { + if (e.cleaned) { + continue; + } + auto jump_to_next = get_condition_branch(&e.condition); + assert(jump_to_next.first); + assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP); + // patch the jump to next with a condition. + auto replacement = std::make_shared(jump_to_next.first->condition); + replacement->condition.invert(); + *(jump_to_next.second) = replacement; + + // patch the jump at the end of a block. + auto jump_to_end = get_condition_branch(&e.body); + assert(jump_to_end.first); + assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); + assert(jump_to_end.first->condition.kind == Condition::ALWAYS); + + // if possible, we just want to remove this from the sequence its in. + // but sometimes there's a case with nothing in it so there is no sequence. + // in this case, we can just replace the branch with a NOP IR to indicate that nothing + // happens in this case, but there was still GOAL code to test for it. + // this happens rarely, as you would expect. + auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + // In the future we could consider having a more explicit "this case is empty" operator so + // this doesn't get confused with an actual MIPS nop. + *(jump_to_end.second) = std::make_shared(); + } + e.cleaned = true; + } +} + +/*! + * Does the instruction in the delay slot set a register to false? + * Note. a beql s7, x followed by a or y, x, r0 will count as this. I don't know why but + * GOAL does this on comparisons to false. + */ +bool delay_slot_sets_false(IR_Branch* branch) { + if (branch->branch_delay.kind == BranchDelay::SET_REG_FALSE) { + return true; + } + + if (branch->condition.kind == Condition::FALSE && + branch->branch_delay.kind == BranchDelay::SET_REG_REG) { + auto reg_check = dynamic_cast(branch->condition.src0.get()); + assert(reg_check); + auto reg_read = dynamic_cast(branch->branch_delay.source.get()); + assert(reg_read); + return reg_check->reg == reg_read->reg; + } + + return false; +} + +/*! + * Does the instruction in the delay slot set a register to a truthy value, like in a GOAL + * or form branch? Either it explicitly sets #t, or it tests the value for being not false, + * then uses that + */ +bool delay_slot_sets_truthy(IR_Branch* branch) { + if (branch->branch_delay.kind == BranchDelay::SET_REG_TRUE) { + return true; + } + + if (branch->condition.kind == Condition::TRUTHY && + branch->branch_delay.kind == BranchDelay::SET_REG_REG) { + auto reg_check = dynamic_cast(branch->condition.src0.get()); + assert(reg_check); + auto reg_read = dynamic_cast(branch->branch_delay.source.get()); + assert(reg_read); + return reg_check->reg == reg_read->reg; + } + + return false; +} + +/*! + * Try to convert a short circuit to an and. + */ +bool try_clean_up_sc_as_and(std::shared_ptr& ir, LinkedObjectFile& file) { + (void)file; + Register destination; + std::shared_ptr ir_dest = nullptr; + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(&ir->entries.at(i).condition); + assert(branch.first); + if (!delay_slot_sets_false(branch.first)) { + return false; + } + + if (i == 0) { + ir_dest = branch.first->branch_delay.destination; + destination = dynamic_cast(branch.first->branch_delay.destination.get())->reg; + } else { + if (destination != + dynamic_cast(branch.first->branch_delay.destination.get())->reg) { + return false; + } + } + } + + ir->kind = IR_ShortCircuit::AND; + ir->final_result = ir_dest; + + // now get rid of the branches + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(&ir->entries.at(i).condition); + assert(branch.first); + auto replacement = std::make_shared(branch.first->condition); + replacement->condition.invert(); + *(branch.second) = replacement; + } + + return true; +} + +/*! + * Try to convert a short circuit to an or. + * Note - this will convert an and to a very strange or, so always use the try as and first. + */ +bool try_clean_up_sc_as_or(std::shared_ptr& ir, LinkedObjectFile& file) { + (void)file; + Register destination; + std::shared_ptr ir_dest = nullptr; + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(&ir->entries.at(i).condition); + assert(branch.first); + if (!delay_slot_sets_truthy(branch.first)) { + return false; + } + assert(dynamic_cast(branch.first->branch_delay.destination.get())); + + if (i == 0) { + ir_dest = branch.first->branch_delay.destination; + destination = dynamic_cast(branch.first->branch_delay.destination.get())->reg; + } else { + if (destination != + dynamic_cast(branch.first->branch_delay.destination.get())->reg) { + return false; + } + } + } + + ir->kind = IR_ShortCircuit::OR; + ir->final_result = ir_dest; + + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(&ir->entries.at(i).condition); + assert(branch.first); + auto replacement = std::make_shared(branch.first->condition); + *(branch.second) = replacement; + } + + return true; +} + +void clean_up_sc(std::shared_ptr& ir, LinkedObjectFile& file); + +/*! + * A form like (and x (or y z)) will be recognized as a single SC Vertex by the CFG pass. + * In the case where we fail to clean it up as an AND or an OR, we should attempt splitting. + * Part of the complexity here is that we want to clean up the split recursively so things like + * (and x (or y (and a b))) + * or + * (and x (or y (and a b)) c d (or z)) + * will work correctly. This may require doing more splitting on both sections! + */ +bool try_splitting_nested_sc(std::shared_ptr& ir, LinkedObjectFile& file) { + auto first_branch = get_condition_branch(&ir->entries.front().condition); + assert(first_branch.first); + bool first_is_and = delay_slot_sets_false(first_branch.first); + bool first_is_or = delay_slot_sets_truthy(first_branch.first); + assert(first_is_and != first_is_or); // one or the other but not both! + + int first_different = -1; // the index of the first one that's different. + + for (int i = 1; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(&ir->entries.at(i).condition); + assert(branch.first); + bool is_and = delay_slot_sets_false(branch.first); + bool is_or = delay_slot_sets_truthy(branch.first); + assert(is_and != is_or); + + if (first_different == -1) { + // haven't seen a change yet. + if (first_is_and != is_and) { + // change! + first_different = i; + break; + } + } + } + + assert(first_different != -1); + + std::vector nested_ir; + for (int i = first_different; i < int(ir->entries.size()); i++) { + nested_ir.push_back(ir->entries.at(i)); + } + + auto s = int(ir->entries.size()); + for (int i = first_different; i < s; i++) { + ir->entries.pop_back(); + } + + auto nested_sc = std::make_shared(nested_ir); + clean_up_sc(nested_sc, file); + + // the real trick + IR_ShortCircuit::Entry nested_entry; + nested_entry.condition = nested_sc; + ir->entries.push_back(nested_entry); + + clean_up_sc(ir, file); + + return true; +} + +/*! + * Try to clean up a single short circuit IR. It may get split up into nested IR_ShortCircuits + * if there is a case like (and a (or b c)) + */ +void clean_up_sc(std::shared_ptr& ir, LinkedObjectFile& file) { + (void)file; + assert(ir->entries.size() > 1); + if (!try_clean_up_sc_as_and(ir, file)) { + if (!try_clean_up_sc_as_or(ir, file)) { + if (!try_splitting_nested_sc(ir, file)) { + assert(false); + } + } + } +} + +/*! + * A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis. + * But it should not be decompiled as a branching statement. + * This either succeeds or asserts and must be called with with something that can be converted + * successfully + */ +void convert_cond_no_else_to_compare(std::shared_ptr* ir) { + auto cne = dynamic_cast(ir->get()); + assert(cne); + auto condition = get_condition_branch(&cne->entries.front().condition); + assert(condition.first); + auto body = dynamic_cast(cne->entries.front().body.get()); + assert(body); + auto dst = body->dst; + auto src = dynamic_cast(body->src.get()); + assert(src->name == "#f"); + assert(cne->entries.size() == 1); + + auto condition_as_single = dynamic_cast(cne->entries.front().condition.get()); + if (condition_as_single) { + auto replacement = std::make_shared( + IR_Set::REG_64, dst, std::make_shared(condition.first->condition)); + *ir = replacement; + } else { + auto condition_as_seq = dynamic_cast(cne->entries.front().condition.get()); + assert(condition_as_seq); + if (condition_as_seq) { + auto replacement = std::make_shared(); + replacement->forms = condition_as_seq->forms; + assert(condition.second == &condition_as_seq->forms.back()); + replacement->forms.pop_back(); + replacement->forms.push_back(std::make_shared( + IR_Set::REG_64, dst, std::make_shared(condition.first->condition))); + *ir = replacement; + } + } +} + +/*! + * Replace internal branches inside a CondNoElse IR. + * If possible will simplify the entire expression into a comparison operation if possible. + * Will record which registers are set to false in branch delay slots. + * The exact behavior here isn't really clear to me. It's possible that these delay set false + * were disabled in cases where the result of the cond was none, or was a number or something. + * But it generally seems inconsistent. The expression propagation step will have to deal with + * this. + */ +void clean_up_cond_no_else(std::shared_ptr* ir, LinkedObjectFile& file) { + (void)file; + auto cne = dynamic_cast(ir->get()); + assert(cne); + for (size_t idx = 0; idx < cne->entries.size(); idx++) { + auto& e = cne->entries.at(idx); + if (e.cleaned) { + continue; + } + + auto jump_to_next = get_condition_branch(&e.condition); + assert(jump_to_next.first); + + if (jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_TRUE && + cne->entries.size() == 1) { + convert_cond_no_else_to_compare(ir); + } else { + assert(jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_FALSE || + jump_to_next.first->branch_delay.kind == BranchDelay::NOP); + assert(jump_to_next.first->condition.kind != Condition::ALWAYS); + + if (jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_FALSE) { + assert(!e.false_destination); + e.false_destination = jump_to_next.first->branch_delay.destination; + assert(e.false_destination); + } + + auto replacement = std::make_shared(jump_to_next.first->condition); + replacement->condition.invert(); + *(jump_to_next.second) = replacement; + e.cleaned = true; + + if (idx != cne->entries.size() - 1) { + auto jump_to_end = get_condition_branch(&e.body); + assert(jump_to_end.first); + assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); + assert(jump_to_end.first->condition.kind == Condition::ALWAYS); + auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + *(jump_to_end.second) = std::make_shared(); + } + } + } + } +} + +/*! + * Match for a (set! reg (math reg reg)) form + */ +bool is_int_math_3(IR* ir, + MatchParam kind, + MatchParam dst, + MatchParam src0, + MatchParam src1, + Register* dst_out = nullptr, + Register* src0_out = nullptr, + Register* src1_out = nullptr) { + // should be a set reg to int math 2 ir + auto set = dynamic_cast(ir); + if (!set) { + return false; + } + + // destination should be a register + auto dest = dynamic_cast(set->dst.get()); + if (!dest || dst != dest->reg) { + return false; + } + + auto math = dynamic_cast(set->src.get()); + if (!math || kind != math->kind) { + return false; + } + + auto arg0 = dynamic_cast(math->arg0.get()); + auto arg1 = dynamic_cast(math->arg1.get()); + + if (!arg0 || src0 != arg0->reg || !arg1 || src1 != arg1->reg) { + return false; + } + + // it's a match! + if (dst_out) { + *dst_out = dest->reg; + } + + if (src0_out) { + *src0_out = arg0->reg; + } + + if (src1_out) { + *src1_out = arg1->reg; + } + return true; +} + +/*! + * Are these IR's both the same register? False if either is not a register. + */ +bool is_same_reg(IR* a, IR* b) { + auto ar = dynamic_cast(a); + auto br = dynamic_cast(b); + return ar && br && ar->reg == br->reg; +} + +/*! + * Try to convert this SC Vertex into an abs (integer). + * Will return a converted abs IR if successful, or nullptr if its not possible + */ +std::shared_ptr try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { + if (vtx->entries.size() != 1) { + return nullptr; + } + + auto b0 = dynamic_cast(vtx->entries.at(0)); + if (!b0) { + return nullptr; + } + + // todo, seems possible to be a single op instead of a begin here. + auto b0_ptr = cfg_to_ir(f, file, b0); + auto b0_ir = dynamic_cast(b0_ptr.get()); + + auto branch = dynamic_cast(b0_ir->forms.back().get()); + if (!branch) { + return nullptr; + } + + // check the branch instruction + if (!branch->likely || branch->condition.kind != Condition::LESS_THAN_ZERO || + branch->branch_delay.kind != BranchDelay::NEGATE) { + return nullptr; + } + + auto input = branch->condition.src0; + auto output = branch->branch_delay.destination; + + assert(is_same_reg(input.get(), branch->branch_delay.source.get())); + + if (b0_ir->forms.size() == 1) { + // this is probably fine but happens to not occur in anything we try yet. + assert(false); + } else { + // remove the branch + b0_ir->forms.pop_back(); + // add the ash + b0_ir->forms.push_back(std::make_shared( + IR_Set::REG_64, output, std::make_shared(IR_IntMath1::ABS, input))); + return b0_ptr; + } + + return nullptr; +} + +/*! + * Attempt to convert a short circuit expression into an arithmetic shift. + * GOAL's shift function accepts positive/negative numbers to determine the direction + * of the shift. + */ +std::shared_ptr try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { + if (vtx->entries.size() != 2) { + return nullptr; + } + + // todo, I think b0 could possibly be something more complicated, depending on how we order. + auto b0 = dynamic_cast(vtx->entries.at(0)); + auto b1 = dynamic_cast(vtx->entries.at(1)); + if (!b0 || !b1) { + return nullptr; + } + + // todo, seems possible to be a single op instead of a begin... + auto b0_ptr = cfg_to_ir(f, file, b0); + auto b0_ir = dynamic_cast(b0_ptr.get()); + + auto b1_ptr = cfg_to_ir(f, file, b1); + auto b1_ir = dynamic_cast(b1_ptr.get()); + + if (!b0_ir || !b1_ir) { + return nullptr; + } + + auto branch = dynamic_cast(b0_ir->forms.back().get()); + if (!branch || b1_ir->forms.size() != 2) { + return nullptr; + } + + // check the branch instruction + if (!branch->likely || branch->condition.kind != Condition::GEQ_ZERO_SIGNED || + branch->branch_delay.kind != BranchDelay::DSLLV) { + return nullptr; + } + + /* + * bgezl s5, L109 ; s5 is the shift amount + dsllv a0, a0, s5 ; a0 is both input and output here + + dsubu a1, r0, s5 ; a1 is a temp here + dsrav a0, a0, a1 ; a0 is both input and output here + */ + + auto sa_in = dynamic_cast(branch->condition.src0.get()); + assert(sa_in); + auto result = dynamic_cast(branch->branch_delay.destination.get()); + auto value_in = dynamic_cast(branch->branch_delay.source.get()); + auto sa_in2 = dynamic_cast(branch->branch_delay.source2.get()); + assert(result && value_in && sa_in2); + assert(sa_in->reg == sa_in2->reg); + + auto dsubu_candidate = b1_ir->forms.at(0); + auto dsrav_candidate = b1_ir->forms.at(1); + + Register clobber; + if (!is_int_math_3(dsubu_candidate.get(), IR_IntMath2::SUB, {}, make_gpr(Reg::R0), sa_in->reg, + &clobber)) { + return nullptr; + } + + assert(result); + assert(value_in); + + bool is_arith = is_int_math_3(dsrav_candidate.get(), IR_IntMath2::RIGHT_SHIFT_ARITH, result->reg, + value_in->reg, clobber); + bool is_logical = is_int_math_3(dsrav_candidate.get(), IR_IntMath2::RIGHT_SHIFT_LOGIC, + result->reg, value_in->reg, clobber); + + if (!is_arith && !is_logical) { + return nullptr; + } + + std::shared_ptr clobber_ir = nullptr; + auto dsubu_set = dynamic_cast(dsubu_candidate.get()); + auto dsrav_set = dynamic_cast(dsrav_candidate.get()); + if (clobber != result->reg) { + clobber_ir = dsubu_set->dst; + } + + std::shared_ptr dest_ir = branch->branch_delay.destination; + std::shared_ptr shift_ir = branch->condition.src0; + std::shared_ptr value_ir = dynamic_cast(dsrav_set->src.get())->arg0; + if (b0_ir->forms.size() == 1) { + // this is probably fine but happens to not occur in anything we try yet. + assert(false); + } else { + // remove the branch + b0_ir->forms.pop_back(); + // add the ash + b0_ir->forms.push_back(std::make_shared( + IR_Set::REG_64, dest_ir, + std::make_shared(shift_ir, value_ir, clobber_ir, is_arith))); + return b0_ptr; + } + + return nullptr; +} + +/*! + * Try to convert a short circuiting expression into a "type-of" expression. + * We do this before attempting the normal and/or expressions. + */ +std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { + // the assembly looks like this: + /* + dsll32 v1, a0, 29 ;; (set! v1 (shl a0 61)) + beql v1, r0, L60 ;; (bl! (= v1 r0) L60 (unknown-branch-delay)) + lw v1, binteger(s7) + + bgtzl v1, L60 ;; (bl! (>0.s v1) L60 (unknown-branch-delay)) + lw v1, pair(s7) + + lwu v1, -4(a0) ;; (set! v1 (l.wu (+.i a0 -4))) + L60: + */ + + // some of these checks may be a little bit overkill but it's a nice way to sanity check that + // we have actually decoded everything correctly. + if (vtx->entries.size() != 3) { + return nullptr; + } + + auto b0 = dynamic_cast(vtx->entries.at(0)); + auto b1 = dynamic_cast(vtx->entries.at(1)); + auto b2 = dynamic_cast(vtx->entries.at(2)); + + if (!b0 || !b1 || !b2) { + return nullptr; + } + + auto b0_ptr = cfg_to_ir(f, file, b0); + auto b0_ir = dynamic_cast(b0_ptr.get()); + + auto b1_ptr = cfg_to_ir(f, file, b1); + auto b1_ir = dynamic_cast(b1_ptr.get()); + + auto b2_ptr = cfg_to_ir(f, file, b2); + auto b2_ir = dynamic_cast(b2_ptr.get()); + if (!b0_ir || !b1_ir || !b2_ir) { + return nullptr; + } + + auto set_shift = dynamic_cast(b0_ir->forms.at(b0_ir->forms.size() - 2).get()); + if (!set_shift) { + return nullptr; + } + + auto temp_reg0 = dynamic_cast(set_shift->dst.get()); + if (!temp_reg0) { + return nullptr; + } + + auto shift = dynamic_cast(set_shift->src.get()); + if (!shift || shift->kind != IR_IntMath2::LEFT_SHIFT) { + return nullptr; + } + auto src_reg = dynamic_cast(shift->arg0.get()); + auto sa = dynamic_cast(shift->arg1.get()); + if (!src_reg || !sa || sa->value != 61) { + return nullptr; + } + + auto first_branch = dynamic_cast(b0_ir->forms.back().get()); + auto second_branch = b1_ir; + auto else_case = b2_ir; + + if (!first_branch || first_branch->branch_delay.kind != BranchDelay::SET_BINTEGER || + first_branch->condition.kind != Condition::ZERO || !first_branch->likely) { + return nullptr; + } + auto temp_reg = dynamic_cast(first_branch->condition.src0.get()); + assert(temp_reg); + assert(temp_reg->reg == temp_reg0->reg); + auto dst_reg = dynamic_cast(first_branch->branch_delay.destination.get()); + assert(dst_reg); + + if (!second_branch || second_branch->branch_delay.kind != BranchDelay::SET_PAIR || + second_branch->condition.kind != Condition::GREATER_THAN_ZERO_SIGNED || + !second_branch->likely) { + return nullptr; + } + + // check we agree on destination register. + auto dst_reg2 = dynamic_cast(second_branch->branch_delay.destination.get()); + assert(dst_reg2->reg == dst_reg->reg); + + // else case is a lwu to grab the type from a basic + assert(else_case); + auto dst_reg3 = dynamic_cast(else_case->dst.get()); + assert(dst_reg3); + assert(dst_reg3->reg == dst_reg->reg); + auto load_op = dynamic_cast(else_case->src.get()); + if (!load_op || load_op->kind != IR_Load::UNSIGNED || load_op->size != 4) { + return nullptr; + } + auto load_loc = dynamic_cast(load_op->location.get()); + if (!load_loc || load_loc->kind != IR_IntMath2::ADD) { + return nullptr; + } + auto src_reg3 = dynamic_cast(load_loc->arg0.get()); + auto offset = dynamic_cast(load_loc->arg1.get()); + if (!src_reg3 || !offset) { + return nullptr; + } + + assert(src_reg3->reg == src_reg->reg); + assert(offset->value == -4); + + std::shared_ptr clobber = nullptr; + if (temp_reg->reg != dst_reg->reg) { + clobber = first_branch->condition.src0; + } + if (b0_ir->forms.size() == 2) { + return std::make_shared(IR_Set::REG_64, else_case->dst, + std::make_shared(shift->arg0, clobber)); + } else { + // remove the branch + b0_ir->forms.pop_back(); + // remove the shift + b0_ir->forms.pop_back(); + // add the type-of + b0_ir->forms.push_back(std::make_shared( + IR_Set::REG_64, else_case->dst, std::make_shared(shift->arg0, clobber))); + return b0_ptr; + } +} + +std::shared_ptr merge_cond_else_with_sc_cond(CondWithElse* cwe, + const std::shared_ptr& else_ir, + Function& f, + LinkedObjectFile& file) { + auto as_seq = dynamic_cast(else_ir.get()); + if (!as_seq || as_seq->forms.size() != 2) { + return nullptr; + } + + auto first = dynamic_cast(as_seq->forms.at(0).get()); + auto second = dynamic_cast(as_seq->forms.at(1).get()); + if (!first || !second) { + return nullptr; + } + + std::vector entries; + for (auto& x : cwe->entries) { + IR_Cond::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + + auto first_condition = std::make_shared(); + first_condition->forms.push_back(as_seq->forms.at(0)); + first_condition->forms.push_back(second->entries.front().condition); + + second->entries.front().condition = first_condition; + + for (auto& x : second->entries) { + entries.push_back(x); + } + std::shared_ptr result = std::make_shared(entries); + clean_up_cond_no_else(&result, file); + return result; +} + +/*! + * Main CFG vertex to IR conversion. Will pull basic IR ops from the provided function as needed. + */ +std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) { + if (dynamic_cast(vtx)) { + auto* bv = dynamic_cast(vtx); + auto& block = f.basic_blocks.at(bv->block_id); + std::vector> irs; + IR* last = nullptr; + for (int instr = block.start_word; instr < block.end_word; instr++) { + auto got = f.get_basic_op_at_instr(instr); + if (got.get() == last) { + continue; + } + last = got.get(); + irs.push_back(got); + } + + if (irs.size() == 1) { + return irs.front(); + } else { + return std::make_shared(irs); + } + + } else if (dynamic_cast(vtx)) { + auto* sv = dynamic_cast(vtx); + + std::vector> irs; + insert_cfg_into_list(f, file, &irs, sv); + + return std::make_shared(irs); + } else if (dynamic_cast(vtx)) { + auto wvtx = dynamic_cast(vtx); + auto result = std::make_shared(cfg_to_ir(f, file, wvtx->condition), + cfg_to_ir(f, file, wvtx->body)); + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + + // the cfg analysis pass may recognize some things out of order, which can cause + // fake nesting. This is actually a problem at this point because it can turn a normal + // cond into a cond with else, which emits different instructions. This attempts to recognize + // an else which is actually more cases and compacts it into a single statement. At this point + // I don't know if this is sufficient to catch all cases. it may even recognize the wrong + // thing in some cases... maybe we should check the delay slot instead? + auto else_ir = cfg_to_ir(f, file, cvtx->else_vtx); + auto fancy_compact_result = merge_cond_else_with_sc_cond(cvtx, else_ir, f, file); + if (fancy_compact_result) { + return fancy_compact_result; + } + + if (dynamic_cast(else_ir.get())) { + auto extra_cond = dynamic_cast(else_ir.get()); + std::vector entries; + for (auto& x : cvtx->entries) { + IR_Cond::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + for (auto& x : extra_cond->entries) { + entries.push_back(x); + } + std::shared_ptr result = std::make_shared(entries); + clean_up_cond_no_else(&result, file); + return result; + } else { + std::vector entries; + for (auto& x : cvtx->entries) { + IR_CondWithElse::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + std::shared_ptr result = std::make_shared(entries, else_ir); + clean_up_cond_with_else(&result, file); + return result; + } + } else if (dynamic_cast(vtx)) { + auto* svtx = dynamic_cast(vtx); + // try as a type of expression first + auto as_type_of = try_sc_as_type_of(f, file, svtx); + if (as_type_of) { + return as_type_of; + } + + auto as_ash = try_sc_as_ash(f, file, svtx); + if (as_ash) { + return as_ash; + } + + auto as_abs = try_sc_as_abs(f, file, svtx); + if (as_abs) { + return as_abs; + } + // now try as a normal and/or + std::vector entries; + for (auto& x : svtx->entries) { + IR_ShortCircuit::Entry e; + e.condition = cfg_to_ir(f, file, x); + entries.push_back(e); + } + auto result = std::make_shared(entries); + clean_up_sc(result, file); + // todo clean these into real and/or. + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + std::vector entries; + for (auto& x : cvtx->entries) { + IR_Cond::Entry e; + e.condition = cfg_to_ir(f, file, x.condition); + e.body = cfg_to_ir(f, file, x.body); + entries.push_back(std::move(e)); + } + std::shared_ptr result = std::make_shared(entries); + clean_up_cond_no_else(&result, file); + return result; + } + + throw std::runtime_error("not yet implemented IR conversion."); + return nullptr; +} + +/*! + * Post processing pass to clean up while loops - annoyingly the block before a while loop + * has a jump to the condition branch that we need to remove. This currently happens after all + * conversion but this may need to be revisited depending on the final order of simplifications. + */ +void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { + (void)file; + std::vector to_remove; // the list of branches to remove by index in this sequence + for (size_t i = 0; i < sequence->forms.size(); i++) { + auto* form_as_while = dynamic_cast(sequence->forms.at(i).get()); + if (form_as_while) { + assert(i != 0); + auto prev_as_branch = dynamic_cast(sequence->forms.at(i - 1).get()); + assert(prev_as_branch); + // printf("got while intro branch %s\n", prev_as_branch->print(file).c_str()); + // this should be an always jump. We'll assume that the CFG builder successfully checked + // the brach destination, but we will check the condition. + assert(prev_as_branch->condition.kind == Condition::ALWAYS); + assert(prev_as_branch->branch_delay.kind == BranchDelay::NOP); + to_remove.push_back(i - 1); + + // now we should try to find the condition branch: + + auto condition_branch = get_condition_branch(&form_as_while->condition); + + assert(condition_branch.first); + assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP); + // printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); + auto replacement = std::make_shared(condition_branch.first->condition); + *(condition_branch.second) = replacement; + } + } + + // remove the implied forward always branches. + for (int i = int(to_remove.size()); i-- > 0;) { + auto idx = to_remove.at(i); + assert(dynamic_cast(sequence->forms.at(idx).get())); + sequence->forms.erase(sequence->forms.begin() + idx); + } +} +} // namespace + +/*! + * Use a control flow graph to build a single IR representing a function. + * This should be done after basic ops are added and before typing, variable splitting, and + * expression compaction. + */ +std::shared_ptr build_cfg_ir(Function& function, + ControlFlowGraph& cfg, + LinkedObjectFile& file) { + // printf("build cfg ir\n"); + if (!cfg.is_fully_resolved()) { + return nullptr; + } + + try { + auto top_level = cfg.get_single_top_level(); + // todo, we should apply transformations for fixing up branch instructions for each IR. + // and possibly annotate the IR control flow structure so that we can determine if its and/or + // or whatever. This may require rejecting a huge number of inline assembly functions, and + // possibly resolving the min/max/ash issue. + // auto ir = cfg_to_ir(function, file, top_level); + auto ir = std::make_shared(); + insert_cfg_into_list(function, file, &ir->forms, top_level); + auto all_children = ir->get_all_ir(file); + all_children.push_back(ir); + for (auto& child : all_children) { + // printf("child is %s\n", child->print(file).c_str()); + auto as_begin = dynamic_cast(child.get()); + if (as_begin) { + clean_up_while_loops(as_begin, file); + } + } + return ir; + } catch (std::runtime_error& e) { + return nullptr; + } +} \ No newline at end of file diff --git a/decompiler/IR/CfgBuilder.h b/decompiler/IR/CfgBuilder.h new file mode 100644 index 0000000000..b15b08b626 --- /dev/null +++ b/decompiler/IR/CfgBuilder.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +class IR; +class Function; +class LinkedObjectFile; +class ControlFlowGraph; + +std::shared_ptr build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file); \ No newline at end of file diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index b06b4ea117..277e51b6e7 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -1,21 +1,61 @@ #include "IR.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" +std::vector> IR::get_all_ir(LinkedObjectFile& file) const { + (void)file; + std::vector> result; + get_children(&result); + size_t last_checked = 0; + size_t last_last_checked = -1; + + while (last_checked != last_last_checked) { + last_last_checked = last_checked; + auto end_of_check = result.size(); + for (size_t i = last_checked; i < end_of_check; i++) { + auto it = result.at(i).get(); + assert(it); + it->get_children(&result); + } + last_checked = end_of_check; + } + + return result; +} + std::string IR::print(const LinkedObjectFile& file) const { - // return to_form(file)->toStringPretty(); return pretty_print::to_string(to_form(file)); } +goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const { + (void)file; + return pretty_print::build_list("INVALID-OPERATION"); +} + +void IR_Failed::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_Register::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::to_symbol(reg.to_charp()); } +void IR_Register::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_Set::to_form(const LinkedObjectFile& file) const { return pretty_print::build_list(pretty_print::to_symbol("set!"), dst->to_form(file), src->to_form(file)); } +void IR_Set::get_children(std::vector>* output) const { + // note that we are not returning clobber here because it shouldn't contain anything that + // the IR simplification code should touch. + output->push_back(dst); + output->push_back(src); +} + goos::Object IR_Store::to_form(const LinkedObjectFile& file) const { std::string store_operator; switch (kind) { @@ -51,26 +91,33 @@ goos::Object IR_Store::to_form(const LinkedObjectFile& file) const { src->to_form(file)); } -goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const { - (void)file; - return pretty_print::build_list("INVALID-OPERATION"); -} - goos::Object IR_Symbol::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::to_symbol("'" + name); } +void IR_Symbol::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_SymbolValue::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::to_symbol(name); } +void IR_SymbolValue::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_StaticAddress::to_form(const LinkedObjectFile& file) const { // return pretty_print::build_list(pretty_print::to_symbol("&"), file.get_label_name(label_id)); return pretty_print::to_symbol(file.get_label_name(label_id)); } +void IR_StaticAddress::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_Load::to_form(const LinkedObjectFile& file) const { std::string load_operator; switch (kind) { @@ -119,6 +166,10 @@ goos::Object IR_Load::to_form(const LinkedObjectFile& file) const { return pretty_print::build_list(pretty_print::to_symbol(load_operator), location->to_form(file)); } +void IR_Load::get_children(std::vector>* output) const { + output->push_back(location); +} + goos::Object IR_FloatMath2::to_form(const LinkedObjectFile& file) const { std::string math_operator; switch (kind) { @@ -148,6 +199,15 @@ goos::Object IR_FloatMath2::to_form(const LinkedObjectFile& file) const { arg1->to_form(file)); } +void IR_FloatMath2::get_children(std::vector>* output) const { + output->push_back(arg0); + output->push_back(arg1); +} + +void IR_FloatMath1::get_children(std::vector>* output) const { + output->push_back(arg); +} + goos::Object IR_IntMath2::to_form(const LinkedObjectFile& file) const { std::string math_operator; switch (kind) { @@ -203,18 +263,30 @@ goos::Object IR_IntMath2::to_form(const LinkedObjectFile& file) const { arg1->to_form(file)); } +void IR_IntMath2::get_children(std::vector>* output) const { + output->push_back(arg0); + output->push_back(arg1); +} + goos::Object IR_IntMath1::to_form(const LinkedObjectFile& file) const { std::string math_operator; switch (kind) { case NOT: math_operator = "lognot"; break; + case ABS: + math_operator = "abs.si"; + break; default: assert(false); } return pretty_print::build_list(pretty_print::to_symbol(math_operator), arg->to_form(file)); } +void IR_IntMath1::get_children(std::vector>* output) const { + output->push_back(arg); +} + goos::Object IR_FloatMath1::to_form(const LinkedObjectFile& file) const { std::string math_operator; switch (kind) { @@ -244,11 +316,19 @@ goos::Object IR_Call::to_form(const LinkedObjectFile& file) const { return pretty_print::build_list("call!"); } +void IR_Call::get_children(std::vector>* output) const { + (void)output; +} + goos::Object IR_IntegerConstant::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::to_symbol(std::to_string(value)); } +void IR_IntegerConstant::get_children(std::vector>* output) const { + (void)output; +} + goos::Object BranchDelay::to_form(const LinkedObjectFile& file) const { (void)file; switch (kind) { @@ -263,6 +343,20 @@ goos::Object BranchDelay::to_form(const LinkedObjectFile& file) const { case SET_REG_REG: return pretty_print::build_list(pretty_print::to_symbol("set!"), destination->to_form(file), source->to_form(file)); + case SET_BINTEGER: + return pretty_print::build_list(pretty_print::to_symbol("set!"), destination->to_form(file), + "binteger"); + case SET_PAIR: + return pretty_print::build_list(pretty_print::to_symbol("set!"), destination->to_form(file), + "pair"); + case DSLLV: + return pretty_print::build_list( + pretty_print::to_symbol("set!"), destination->to_form(file), + pretty_print::build_list(pretty_print::to_symbol("shl"), source->to_form(file), + source2->to_form(file))); + case NEGATE: + return pretty_print::build_list(pretty_print::to_symbol("set!"), destination->to_form(file), + pretty_print::build_list("-", source->to_form(file))); case UNKNOWN: return pretty_print::build_list("unknown-branch-delay"); default: @@ -270,6 +364,20 @@ goos::Object BranchDelay::to_form(const LinkedObjectFile& file) const { } } +void BranchDelay::get_children(std::vector>* output) const { + if (destination) { + output->push_back(destination); + } + + if (source) { + output->push_back(source); + } + + if (source2) { + output->push_back(source2); + } +} + goos::Object IR_Nop::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::build_list("nop!"); @@ -296,14 +404,108 @@ int Condition::num_args() const { case NONZERO: case FALSE: case TRUTHY: + case GREATER_THAN_ZERO_SIGNED: + case GEQ_ZERO_SIGNED: + case LESS_THAN_ZERO: + case LEQ_ZERO_SIGNED: return 1; case ALWAYS: + case NEVER: return 0; default: assert(false); } } +void Condition::get_children(std::vector>* output) const { + if (src0) { + output->push_back(src0); + } + + if (src1) { + output->push_back(src1); + } +} + +void Condition::invert() { + switch (kind) { + case NOT_EQUAL: + kind = EQUAL; + break; + case EQUAL: + kind = NOT_EQUAL; + break; + case LESS_THAN_SIGNED: + kind = GEQ_SIGNED; + break; + case GREATER_THAN_SIGNED: + kind = LEQ_SIGNED; + break; + case LEQ_SIGNED: + kind = GREATER_THAN_SIGNED; + break; + case GEQ_SIGNED: + kind = LESS_THAN_SIGNED; + break; + case GREATER_THAN_ZERO_SIGNED: + kind = LEQ_ZERO_SIGNED; + break; + case LEQ_ZERO_SIGNED: + kind = GREATER_THAN_ZERO_SIGNED; + break; + case LESS_THAN_ZERO: + kind = GEQ_ZERO_SIGNED; + break; + case GEQ_ZERO_SIGNED: + kind = LESS_THAN_ZERO; + break; + case LESS_THAN_UNSIGNED: + kind = GEQ_UNSIGNED; + break; + case GREATER_THAN_UNSIGNED: + kind = LEQ_UNSIGNED; + break; + case LEQ_UNSIGNED: + kind = GREATER_THAN_UNSIGNED; + break; + case GEQ_UNSIGNED: + kind = LESS_THAN_UNSIGNED; + break; + case ZERO: + kind = NONZERO; + break; + case NONZERO: + kind = ZERO; + break; + case FALSE: + kind = TRUTHY; + break; + case TRUTHY: + kind = FALSE; + break; + case ALWAYS: + kind = NEVER; + break; + case NEVER: + kind = ALWAYS; + break; + case FLOAT_EQUAL: + kind = FLOAT_NOT_EQUAL; + break; + case FLOAT_NOT_EQUAL: + kind = FLOAT_EQUAL; + break; + case FLOAT_LESS_THAN: + kind = FLOAT_GEQ; + break; + case FLOAT_GEQ: + kind = FLOAT_LESS_THAN; + break; + default: + assert(false); + } +} + goos::Object Condition::to_form(const LinkedObjectFile& file) const { int nargs = num_args(); std::string condtion_operator; @@ -353,6 +555,9 @@ goos::Object Condition::to_form(const LinkedObjectFile& file) const { case ALWAYS: condtion_operator = "'#t"; break; + case NEVER: + condtion_operator = "'#f"; + break; case FLOAT_EQUAL: condtion_operator = "=.f"; break; @@ -365,6 +570,18 @@ goos::Object Condition::to_form(const LinkedObjectFile& file) const { case FLOAT_GEQ: condtion_operator = ">=.f"; break; + case GREATER_THAN_ZERO_SIGNED: + condtion_operator = ">0.si"; + break; + case GEQ_ZERO_SIGNED: + condtion_operator = ">=0.si"; + break; + case LESS_THAN_ZERO: + condtion_operator = "<0.si"; + break; + case LEQ_ZERO_SIGNED: + condtion_operator = "<=0.si"; + break; default: assert(false); } @@ -392,11 +609,195 @@ goos::Object IR_Branch::to_form(const LinkedObjectFile& file) const { pretty_print::to_symbol(file.get_label_name(dest_label_idx)), branch_delay.to_form(file)); } +void IR_Branch::get_children(std::vector>* output) const { + condition.get_children(output); + branch_delay.get_children(output); +} + goos::Object IR_Compare::to_form(const LinkedObjectFile& file) const { return condition.to_form(file); } +void IR_Compare::get_children(std::vector>* output) const { + condition.get_children(output); +} + goos::Object IR_Suspend::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::build_list("suspend!"); -} \ No newline at end of file +} + +void IR_Nop::get_children(std::vector>* output) const { + (void)output; +} + +void IR_Suspend::get_children(std::vector>* output) const { + (void)output; +} + +goos::Object IR_Begin::to_form(const LinkedObjectFile& file) const { + std::vector list; + list.push_back(pretty_print::to_symbol("begin")); + for (auto& x : forms) { + list.push_back(x->to_form(file)); + } + return pretty_print::build_list(list); +} + +void IR_Begin::get_children(std::vector>* output) const { + for (auto& x : forms) { + output->push_back(x); + } +} + +namespace { +void print_inlining_begin(std::vector* output, IR* ir, const LinkedObjectFile& file) { + auto as_begin = dynamic_cast(ir); + if (as_begin) { + for (auto& x : as_begin->forms) { + output->push_back(x->to_form(file)); + } + } else { + output->push_back(ir->to_form(file)); + } +} + +bool is_single_expression(IR* in) { + return !dynamic_cast(in); +} +} // namespace + +goos::Object IR_WhileLoop::to_form(const LinkedObjectFile& file) const { + std::vector list; + list.push_back(pretty_print::to_symbol("while")); + list.push_back(condition->to_form(file)); + print_inlining_begin(&list, body.get(), file); + return pretty_print::build_list(list); +} + +void IR_WhileLoop::get_children(std::vector>* output) const { + output->push_back(condition); + output->push_back(body); +} + +goos::Object IR_CondWithElse::to_form(const LinkedObjectFile& file) const { + // for now we only turn it into an if statement if both cases won't require a begin at the top + // level. I think it is more common to write these as a two-case cond instead of an if with begin. + if (entries.size() == 1 && is_single_expression(entries.front().body.get()) && + is_single_expression(else_ir.get())) { + std::vector list; + list.push_back(pretty_print::to_symbol("if")); + list.push_back(entries.front().condition->to_form(file)); + list.push_back(entries.front().body->to_form(file)); + list.push_back(else_ir->to_form(file)); + return pretty_print::build_list(list); + } else { + std::vector list; + list.push_back(pretty_print::to_symbol("cond")); + for (auto& e : entries) { + std::vector entry; + entry.push_back(e.condition->to_form(file)); + print_inlining_begin(&entry, e.body.get(), file); + list.push_back(pretty_print::build_list(entry)); + } + std::vector else_form; + else_form.push_back(pretty_print::to_symbol("else")); + print_inlining_begin(&else_form, else_ir.get(), file); + list.push_back(pretty_print::build_list(else_form)); + return pretty_print::build_list(list); + } +} + +void IR_CondWithElse::get_children(std::vector>* output) const { + for (auto& e : entries) { + output->push_back(e.condition); + output->push_back(e.body); + } + output->push_back(else_ir); +} + +goos::Object IR_GetRuntimeType::to_form(const LinkedObjectFile& file) const { + std::vector list = {pretty_print::to_symbol("type-of"), object->to_form(file)}; + return pretty_print::build_list(list); +} + +void IR_GetRuntimeType::get_children(std::vector>* output) const { + output->push_back(object); +} + +goos::Object IR_Cond::to_form(const LinkedObjectFile& file) const { + if (entries.size() == 1 && is_single_expression(entries.front().body.get())) { + // print as an if statement if we can put the body in a single form. + std::vector list; + list.push_back(pretty_print::to_symbol("if")); + list.push_back(entries.front().condition->to_form(file)); + list.push_back(entries.front().body->to_form(file)); + return pretty_print::build_list(list); + } else if (entries.size() == 1) { + // turn into a when if the body requires multiple forms + // todo check to see if the condition starts with a NOT and this can be simplified to an + // unless. + std::vector list; + list.push_back(pretty_print::to_symbol("when")); + list.push_back(entries.front().condition->to_form(file)); + print_inlining_begin(&list, entries.front().body.get(), file); + return pretty_print::build_list(list); + } else { + std::vector list; + list.push_back(pretty_print::to_symbol("cond")); + for (auto& e : entries) { + std::vector entry; + entry.push_back(e.condition->to_form(file)); + print_inlining_begin(&entry, e.body.get(), file); + list.push_back(pretty_print::build_list(entry)); + } + return pretty_print::build_list(list); + } +} + +void IR_Cond::get_children(std::vector>* output) const { + for (auto& e : entries) { + output->push_back(e.condition); + output->push_back(e.body); + } +} + +goos::Object IR_ShortCircuit::to_form(const LinkedObjectFile& file) const { + std::vector forms; + switch (kind) { + case UNKNOWN: + forms.push_back(pretty_print::to_symbol("unknown-sc")); + break; + case AND: + forms.push_back(pretty_print::to_symbol("and")); + break; + case OR: + forms.push_back(pretty_print::to_symbol("or")); + break; + default: + assert(false); + } + for (auto& x : entries) { + forms.push_back(x.condition->to_form(file)); + } + return pretty_print::build_list(forms); +} + +void IR_ShortCircuit::get_children(std::vector>* output) const { + for (auto& x : entries) { + output->push_back(x.condition); + if (x.output) { + output->push_back(x.output); + } + } +} + +goos::Object IR_Ash::to_form(const LinkedObjectFile& file) const { + return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"), + value->to_form(file), shift_amount->to_form(file)); +} + +void IR_Ash::get_children(std::vector>* output) const { + output->push_back(value); + output->push_back(shift_amount); +} diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index cf79affee2..4ea0b2ac72 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -2,6 +2,7 @@ #define JAK_IR_H #include +#include #include "decompiler/Disasm/Register.h" #include "common/goos/PrettyPrinter.h" @@ -10,7 +11,9 @@ class LinkedObjectFile; class IR { public: virtual goos::Object to_form(const LinkedObjectFile& file) const = 0; + std::vector> get_all_ir(LinkedObjectFile& file) const; std::string print(const LinkedObjectFile& file) const; + virtual void get_children(std::vector>* output) const = 0; bool is_basic_op = false; }; @@ -19,12 +22,14 @@ class IR_Failed : public IR { public: IR_Failed() = default; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Register : public IR { public: IR_Register(Register _reg, int _instr_idx) : reg(_reg), instr_idx(_instr_idx) {} goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; Register reg; int instr_idx = -1; }; @@ -45,6 +50,7 @@ class IR_Set : public IR { IR_Set(Kind _kind, std::shared_ptr _dst, std::shared_ptr _src) : kind(_kind), dst(std::move(_dst)), src(std::move(_src)) {} goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; std::shared_ptr dst, src; std::shared_ptr clobber = nullptr; }; @@ -60,34 +66,38 @@ class IR_Store : public IR_Set { class IR_Symbol : public IR { public: - IR_Symbol(std::string _name) : name(std::move(_name)) {} + explicit IR_Symbol(std::string _name) : name(std::move(_name)) {} std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_SymbolValue : public IR { public: - IR_SymbolValue(std::string _name) : name(std::move(_name)) {} + explicit IR_SymbolValue(std::string _name) : name(std::move(_name)) {} std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_StaticAddress : public IR { public: - IR_StaticAddress(int _label_id) : label_id(_label_id) {} + explicit IR_StaticAddress(int _label_id) : label_id(_label_id) {} int label_id = -1; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Load : public IR { public: enum Kind { UNSIGNED, SIGNED, FLOAT } kind; - IR_Load(Kind _kind, int _size, const std::shared_ptr& _location) - : kind(_kind), size(_size), location(_location) {} + IR_Load(Kind _kind, int _size, std::shared_ptr _location) + : kind(_kind), size(_size), location(std::move(_location)) {} int size; std::shared_ptr location; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_FloatMath2 : public IR { @@ -97,6 +107,7 @@ class IR_FloatMath2 : public IR { : kind(_kind), arg0(std::move(_arg0)), arg1(std::move(_arg1)) {} std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_FloatMath1 : public IR { @@ -105,6 +116,7 @@ class IR_FloatMath1 : public IR { IR_FloatMath1(Kind _kind, std::shared_ptr _arg) : kind(_kind), arg(std::move(_arg)) {} std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_IntMath2 : public IR { @@ -130,20 +142,23 @@ class IR_IntMath2 : public IR { : kind(_kind), arg0(std::move(_arg0)), arg1(std::move(_arg1)) {} std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_IntMath1 : public IR { public: - enum Kind { NOT } kind; + enum Kind { NOT, ABS } kind; IR_IntMath1(Kind _kind, std::shared_ptr _arg) : kind(_kind), arg(std::move(_arg)) {} std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Call : public IR { public: IR_Call() = default; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_IntegerConstant : public IR { @@ -151,13 +166,25 @@ class IR_IntegerConstant : public IR { int64_t value; explicit IR_IntegerConstant(int64_t _value) : value(_value) {} goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; struct BranchDelay { - enum Kind { NOP, SET_REG_FALSE, SET_REG_TRUE, SET_REG_REG, UNKNOWN } kind; - std::shared_ptr destination = nullptr, source = nullptr; - BranchDelay(Kind _kind) : kind(_kind) {} + enum Kind { + NOP, + SET_REG_FALSE, + SET_REG_TRUE, + SET_REG_REG, + SET_BINTEGER, + SET_PAIR, + DSLLV, + NEGATE, + UNKNOWN + } kind; + std::shared_ptr destination = nullptr, source = nullptr, source2 = nullptr; + explicit BranchDelay(Kind _kind) : kind(_kind) {} goos::Object to_form(const LinkedObjectFile& file) const; + void get_children(std::vector>* output) const; }; struct Condition { @@ -168,6 +195,10 @@ struct Condition { GREATER_THAN_SIGNED, LEQ_SIGNED, GEQ_SIGNED, + GREATER_THAN_ZERO_SIGNED, + LEQ_ZERO_SIGNED, + LESS_THAN_ZERO, + GEQ_ZERO_SIGNED, LESS_THAN_UNSIGNED, GREATER_THAN_UNSIGNED, LEQ_UNSIGNED, @@ -177,6 +208,7 @@ struct Condition { FALSE, TRUTHY, ALWAYS, + NEVER, FLOAT_EQUAL, FLOAT_NOT_EQUAL, FLOAT_LESS_THAN, @@ -201,6 +233,8 @@ struct Condition { int num_args() const; goos::Object to_form(const LinkedObjectFile& file) const; std::shared_ptr src0, src1, clobber; + void get_children(std::vector>* output) const; + void invert(); }; class IR_Branch : public IR { @@ -217,6 +251,7 @@ class IR_Branch : public IR { bool likely; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Compare : public IR { @@ -226,18 +261,113 @@ class IR_Compare : public IR { Condition condition; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Nop : public IR { public: IR_Nop() = default; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; class IR_Suspend : public IR { public: IR_Suspend() = default; goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + +class IR_Begin : public IR { + public: + IR_Begin() = default; + explicit IR_Begin(const std::vector>& _forms) : forms(std::move(_forms)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; + std::vector> forms; +}; + +class IR_WhileLoop : public IR { + public: + IR_WhileLoop(std::shared_ptr _condition, std::shared_ptr _body) + : condition(std::move(_condition)), body(std::move(_body)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; + std::shared_ptr condition, body; +}; + +class IR_CondWithElse : public IR { + public: + struct Entry { + std::shared_ptr condition = nullptr; + std::shared_ptr body = nullptr; + bool cleaned = false; + }; + std::vector entries; + std::shared_ptr else_ir; + IR_CondWithElse(std::vector _entries, std::shared_ptr _else_ir) + : entries(std::move(_entries)), else_ir(std::move(_else_ir)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + +// this one doesn't have an else statement. Will return false if none of the cases are taken. +class IR_Cond : public IR { + public: + struct Entry { + std::shared_ptr condition = nullptr; + std::shared_ptr body = nullptr; + std::shared_ptr false_destination = nullptr; + bool cleaned = false; + }; + std::vector entries; + explicit IR_Cond(std::vector _entries) : entries(std::move(_entries)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + +// this will work on pairs, bintegers, or basics +class IR_GetRuntimeType : public IR { + public: + std::shared_ptr object, clobber; + IR_GetRuntimeType(std::shared_ptr _object, std::shared_ptr _clobber) + : object(std::move(_object)), clobber(std::move(_clobber)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + +class IR_ShortCircuit : public IR { + public: + struct Entry { + std::shared_ptr condition = nullptr; + std::shared_ptr output = nullptr; // where the delay slot writes to. + bool cleaned = false; + }; + + enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN; + + std::shared_ptr final_result = nullptr; // the register that the final result goes in. + + std::vector entries; + explicit IR_ShortCircuit(std::vector _entries) : entries(std::move(_entries)) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + +class IR_Ash : public IR { + public: + std::shared_ptr shift_amount, value, clobber; + bool is_signed = true; + IR_Ash(std::shared_ptr _shift_amount, + std::shared_ptr _value, + std::shared_ptr _clobber, + bool _is_signed) + : shift_amount(std::move(_shift_amount)), + value(std::move(_value)), + clobber(std::move(_clobber)), + is_signed(_is_signed) {} + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; }; #endif // JAK_IR_H diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 5929ccb5db..58e86acc15 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -622,6 +622,11 @@ std::string LinkedObjectFile::print_disassembly() { */ } + if (func.ir) { + result += ";; ir\n"; + result += func.ir->print(*this); + } + result += "\n\n\n"; } diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 8bdcb184e4..184dbd34f5 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -19,6 +19,7 @@ #include "common/util/FileUtil.h" #include "decompiler/Function/BasicBlocks.h" #include "decompiler/IR/BasicOpBuilder.h" +#include "decompiler/IR/CfgBuilder.h" /*! * Get a unique name for this object file. @@ -591,17 +592,21 @@ void ObjectFileDB::analyze_functions() { // } } - int total_nontrivial_functions = 0; - int total_resolved_nontrivial_functions = 0; + int total_trivial_cfg_functions = 0; int total_named_functions = 0; int total_basic_ops = 0; int total_failed_basic_ops = 0; + int asm_funcs = 0; + int non_asm_funcs = 0; + int successful_cfg_irs = 0; + std::map> unresolved_by_length; if (get_config().find_basic_blocks) { timer.start(); int total_basic_blocks = 0; for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { + // printf("in %s\n", func.guessed_name.to_string().c_str()); auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); total_basic_blocks += blocks.size(); func.basic_blocks = blocks; @@ -618,17 +623,21 @@ void ObjectFileDB::analyze_functions() { total_basic_ops += func.get_basic_op_count(); total_failed_basic_ops += func.get_failed_basic_op_count(); + func.ir = build_cfg_ir(func, *func.cfg, data.linked_data); + non_asm_funcs++; + if (func.ir) { + successful_cfg_irs++; + } + if (func.cfg->is_fully_resolved()) { resolved_cfg_functions++; } } else { - resolved_cfg_functions++; + asm_funcs++; } if (func.basic_blocks.size() > 1 && !func.suspected_asm) { - total_nontrivial_functions++; if (func.cfg->is_fully_resolved()) { - total_resolved_nontrivial_functions++; } else { if (!func.guessed_name.empty()) { unresolved_by_length[func.end_word - func.start_word].push_back( @@ -637,25 +646,32 @@ void ObjectFileDB::analyze_functions() { } } + if (!func.suspected_asm && func.basic_blocks.size() <= 1) { + total_trivial_cfg_functions++; + } + if (!func.guessed_name.empty()) { total_named_functions++; } + + // if (func.guessed_name.to_string() == "inspect") { + // assert(false); + // } }); - printf("Found %d functions (%d with nontrivial cfgs)\n", total_functions, - total_nontrivial_functions); + printf("Found %d functions (%d with no control flow)\n", total_functions, + total_trivial_cfg_functions); printf("Named %d/%d functions (%.2f%%)\n", total_named_functions, total_functions, 100.f * float(total_named_functions) / float(total_functions)); + printf("Excluding %d asm functions\n", asm_funcs); printf("Found %d basic blocks in %.3f ms\n", total_basic_blocks, timer.getMs()); printf(" %d/%d functions passed cfg analysis stage (%.2f%%)\n", resolved_cfg_functions, - total_functions, 100.f * float(resolved_cfg_functions) / float(total_functions)); - printf(" %d/%d nontrivial cfg's resolved (%.2f%%)\n", total_resolved_nontrivial_functions, - total_nontrivial_functions, - 100.f * float(total_resolved_nontrivial_functions) / float(total_nontrivial_functions)); - + non_asm_funcs, 100.f * float(resolved_cfg_functions) / float(non_asm_funcs)); int successful_basic_ops = total_basic_ops - total_failed_basic_ops; printf(" %d/%d basic ops converted successfully (%.2f%%)\n", successful_basic_ops, total_basic_ops, 100.f * float(successful_basic_ops) / float(total_basic_ops)); + printf(" %d/%d cfgs converted to ir (%.2f%%)\n", successful_cfg_irs, non_asm_funcs, + 100.f * float(successful_cfg_irs) / float(non_asm_funcs)); // for (auto& kv : unresolved_by_length) { // printf("LEN %d\n", kv.first); diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index ee4fc887ee..5404b03c46 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -30,12 +30,45 @@ "asm_functions_by_name":[ // gcommon - "ash", "abs", "min", "max", "(method 2 vec4s)", "quad-copy!", "(method 3 vec4s)", "breakpoint-range-set!", + "min", "max", "(method 2 vec4s)", "quad-copy!", "(method 3 vec4s)", "breakpoint-range-set!", // pskernel "resend-exception", "kernel-set-interrupt-vector", "kernel-set-exception-vector", "return-from-exception", "kernel-read", "kernel-read-function", "kernel-write", "kernel-write-function", "kernel-copy-to-kernel-ram", + // this one needs more investigation. nothing looks weird about it but it fails... + "camera-change-to", + + // two back to back arithmetic shifts... + "texture-relocate", + + // this one fails due to false compaction where an else case has only a not expression in it. + "master-is-hopeful-better?", + + // fails for unknown reason + "target-falling-anim-trans", "change-brother", + + // merged right typecase... can probably handle this + "cspace-inspect-tree", + + // these are all valid, but use short circuiting branches in strange ways. There's probably a few compiler uses that we're not + "(method 21 actor-link-info)","(method 20 actor-link-info)","(method 28 collide-shape-prim-mesh)", "(method 35 collide-shape)", + "debug-menu-item-var-render", "(method 14 level)","add-blue-motion","anim-tester-add-newobj","(method 27 orb-cache-top)", + + // real asm + "cspace<-parented-transformq-joint!", "blerc-a-fragment", "render-boundary-tri", "render-boundary-quad", + "(method 19 collide-shape-prim-sphere)","vector-segment-distance-point!", "exp", "(method 11 collide-mesh-cache)", + "(method 13 collide-edge-work)", "ambient-inspect", + + "(method 11 cpu-thread)", "atan0", "sincos!", "sincos-rad!", "disasm-dma-list", "vblank-handler", "vif1-handler", + "vif1-handler-debug", "entity-actor-count", "decompress-frame-data-pair-to-accumulator", + "decompress-frame-data-to-accumulator", "normalize-frame-quaternions", "clear-frame-accumulator", + "generic-copy-vtx-dclr-dtex", "generic-no-light-dproc-only", "generic-no-light-proc", "mercneric-bittable-asm", + "generic-tie-decompress", "matrix-axis-sin-cos!", "matrix-axis-sin-cos-vu!", "generic-prepare-dma-single", + "(method 13 collide-shape-prim-sphere)", "(method 14 collide-shape-prim-sphere)", "(method 12 collide-shape-prim-sphere)", + "adgif-shader<-texture-with-update!", "generic-interp-dproc", "sprite-draw-distorters", "draw-bones", "(method 9 collide-mesh-cache)", + "(method 18 collide-shape-prim-sphere)","birth-pickup-at-point", + "collide-do-primitives", "draw-bones-check-longest-edge-asm", "sp-launch-particles-var", "(method 15 collide-shape-prim-mesh)", "(method 15 collide-shape-prim-sphere)", "(method 45 collide-shape)", "cam-layout-save-cam-trans", "kernel-copy-function", "dma-sync-hang", "generic-no-light-dproc", diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index ae5aece6f8..6e623e9e3e 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -1,5 +1,5 @@ # We define our own compilation flags here. -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) # Set default compile flags for GCC # optimization level can be set here. Note that game/ overwrites this for building game C++ code.