From b7cee7b666ec04af60bdfd993982c02b969a8ba9 Mon Sep 17 00:00:00 2001 From: water Date: Sat, 3 Oct 2020 15:27:17 -0400 Subject: [PATCH] add type of recognition --- decompiler/IR/BasicOpBuilder.cpp | 32 +++++- decompiler/IR/CfgBuilder.cpp | 167 +++++++++++++++++++++++++++++-- decompiler/IR/IR.cpp | 61 ++++++++--- decompiler/IR/IR.h | 16 ++- 4 files changed, 249 insertions(+), 27 deletions(-) diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index b7012b0291..bf3eaacaad 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -695,6 +695,17 @@ BranchDelay get_branch_delay(Instruction& i, int idx) { BranchDelay b(BranchDelay::SET_REG_TRUE); b.destination = make_reg(i.get_dst(0).get_reg(), idx); return b; + } else if (i.kind == InstructionKind::LW && i.get_src(1).is_reg(make_gpr(Reg::S7)) && + i.get_src(0).is_sym()) { + if (i.get_src(0).get_sym() == "binteger") { + BranchDelay b(BranchDelay::SET_BINTEGER); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + return b; + } else if (i.get_src(0).get_sym() == "pair") { + BranchDelay b(BranchDelay::SET_PAIR); + b.destination = make_reg(i.get_dst(0).get_reg(), idx); + return b; + } } BranchDelay b(BranchDelay::UNKNOWN); return b; @@ -737,7 +748,13 @@ std::shared_ptr try_beql(Instruction& instr, Instruction& next_instr, int id return std::make_shared( Condition(Condition::FALSE, make_reg(instr.get_src(1).get_reg(), idx), nullptr, nullptr), instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); - } else if (instr.kind == InstructionKind::BEQL) { + } else if (instr.kind == InstructionKind::BEQL && instr.get_src(1).is_reg(make_gpr(Reg::R0))) { + return std::make_shared( + Condition(Condition::ZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, nullptr), + instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), true); + } + + else if (instr.kind == InstructionKind::BEQL) { return std::make_shared( Condition(Condition::EQUAL, make_reg(instr.get_src(0).get_reg(), idx), make_reg(instr.get_src(1).get_reg(), idx), nullptr), @@ -765,6 +782,16 @@ std::shared_ptr try_beq(Instruction& instr, Instruction& next_instr, int idx return nullptr; } +std::shared_ptr try_bgtzl(Instruction& instr, Instruction& next_instr, int idx) { + if (instr.kind == InstructionKind::BGTZL) { + return std::make_shared( + Condition(Condition::GREATER_THAN_ZERO_SIGNED, make_reg(instr.get_src(0).get_reg(), idx), + nullptr, nullptr), + instr.get_src(1).get_label(), get_branch_delay(next_instr, idx), true); + } + return nullptr; +} + std::shared_ptr try_daddiu(Instruction& i0, Instruction& i1, int idx) { if (i0.kind == InstructionKind::DADDIU && i1.kind == InstructionKind::MOVN && i0.get_src(0).get_reg() == make_gpr(Reg::S7)) { @@ -1219,6 +1246,9 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec case InstructionKind::BEQ: result = try_beq(i, next, instr); break; + case InstructionKind::BGTZL: + result = try_bgtzl(i, next, instr); + break; case InstructionKind::BEQL: result = try_beql(i, next, instr); break; diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp index da389513cd..006fc6ce83 100644 --- a/decompiler/IR/CfgBuilder.cpp +++ b/decompiler/IR/CfgBuilder.cpp @@ -60,7 +60,18 @@ void insert_cfg_into_list(Function& f, output->push_back(got); } } else { - output->push_back(cfg_to_ir(f, file, vtx)); + // output->push_back(cfg_to_ir(f, file, vtx)); + auto ir = cfg_to_ir(f, file, vtx); + auto ir_as_begin = dynamic_cast(ir.get()); + if (ir_as_begin) { + // we unexpectedly got a begin, even though we didn't think we would. This is okay, but we + // should inline this begin to avoid nested begins. + for (auto& x : ir_as_begin->forms) { + output->push_back(x); + } + } else { + output->push_back(ir); + } } } @@ -94,7 +105,7 @@ void clean_up_cond_with_else(IR_CondWithElse* cwe, LinkedObjectFile& file) { auto jump_to_next = get_condition_branch(&e.condition); assert(jump_to_next.first); assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP); - printf("got cond condition %s\n", jump_to_next.first->print(file).c_str()); + // printf("got cond condition %s\n", jump_to_next.first->print(file).c_str()); auto replacement = std::make_shared(jump_to_next.first->condition); *(jump_to_next.second) = replacement; @@ -116,6 +127,138 @@ void clean_up_cond_with_else(IR_CondWithElse* cwe, LinkedObjectFile& file) { } } +std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { + // the assembly looks like this: + /* + dsll32 v1, a0, 29 ;; (set! v1 (shl a0 61)) + beql v1, r0, L60 ;; (bl! (= v1 r0) L60 (unknown-branch-delay)) + lw v1, binteger(s7) + + bgtzl v1, L60 ;; (bl! (>0.s v1) L60 (unknown-branch-delay)) + lw v1, pair(s7) + + lwu v1, -4(a0) ;; (set! v1 (l.wu (+.i a0 -4))) + L60: + */ + + // some of these checks may be a little bit overkill but it's a nice way to sanity check that + // we have actually decoded everything correctly. + if (vtx->entries.size() != 3) { + return nullptr; + } + + auto b0 = dynamic_cast(vtx->entries.at(0)); + auto b1 = dynamic_cast(vtx->entries.at(1)); + auto b2 = dynamic_cast(vtx->entries.at(2)); + + if (!b0 || !b1 || !b2) { + return nullptr; + } + + auto b0_ptr = cfg_to_ir(f, file, b0); + auto b0_ir = dynamic_cast(b0_ptr.get()); + + auto b1_ptr = cfg_to_ir(f, file, b1); + auto b1_ir = dynamic_cast(b1_ptr.get()); + + auto b2_ptr = cfg_to_ir(f, file, b2); + auto b2_ir = dynamic_cast(b2_ptr.get()); + if (!b0_ir || !b1_ir || !b2_ir) { + return nullptr; + } + + // todo determine temp and source reg from dsll32 instruction. + + auto set_shift = dynamic_cast(b0_ir->forms.at(b0_ir->forms.size() - 2).get()); + if (!set_shift) { + return nullptr; + } + + auto temp_reg0 = dynamic_cast(set_shift->dst.get()); + if (!temp_reg0) { + return nullptr; + } + + auto shift = dynamic_cast(set_shift->src.get()); + if (!shift || shift->kind != IR_IntMath2::LEFT_SHIFT) { + return nullptr; + } + auto src_reg = dynamic_cast(shift->arg0.get()); + auto sa = dynamic_cast(shift->arg1.get()); + if (!src_reg || !sa || sa->value != 61) { + return nullptr; + } + + auto first_branch = dynamic_cast(b0_ir->forms.back().get()); + auto second_branch = b1_ir; + auto else_case = b2_ir; + + if (!first_branch || first_branch->branch_delay.kind != BranchDelay::SET_BINTEGER || + first_branch->condition.kind != Condition::ZERO || !first_branch->likely) { + return nullptr; + } + auto temp_reg = dynamic_cast(first_branch->condition.src0.get()); + assert(temp_reg); + assert(temp_reg->reg == temp_reg0->reg); + auto dst_reg = dynamic_cast(first_branch->branch_delay.destination.get()); + assert(dst_reg); + + if (!second_branch || second_branch->branch_delay.kind != BranchDelay::SET_PAIR || + second_branch->condition.kind != Condition::GREATER_THAN_ZERO_SIGNED || + !second_branch->likely) { + return nullptr; + } + + // check we agree on destination register. + auto dst_reg2 = dynamic_cast(second_branch->branch_delay.destination.get()); + assert(dst_reg2->reg == dst_reg->reg); + + // else case is a lwu to grab the type from a basic + assert(else_case); + auto dst_reg3 = dynamic_cast(else_case->dst.get()); + assert(dst_reg3); + assert(dst_reg3->reg == dst_reg->reg); + auto load_op = dynamic_cast(else_case->src.get()); + if (!load_op || load_op->kind != IR_Load::UNSIGNED || load_op->size != 4) { + return nullptr; + } + auto load_loc = dynamic_cast(load_op->location.get()); + if (!load_loc || load_loc->kind != IR_IntMath2::ADD) { + return nullptr; + } + auto src_reg3 = dynamic_cast(load_loc->arg0.get()); + auto offset = dynamic_cast(load_loc->arg1.get()); + if (!src_reg3 || !offset) { + return nullptr; + } + + assert(src_reg3->reg == src_reg->reg); + assert(offset->value == -4); + + printf("Candidates for SC type-of:\n%s\n%s\n%s\n", b0_ir->print(file).c_str(), + b1_ir->print(file).c_str(), b2_ir->print(file).c_str()); + + std::shared_ptr clobber = nullptr; + if (temp_reg->reg != src_reg->reg && temp_reg->reg != dst_reg->reg) { + clobber = first_branch->condition.src0; + } + if (b0_ir->forms.size() == 2) { + return std::make_shared(IR_Set::REG_64, else_case->dst, + std::make_shared(shift->arg0, clobber)); + } else { + // i'm not brave enough to enable this until I have found a better test case + // remove the branch + b0_ir->forms.pop_back(); + // remove the shift + b0_ir->forms.pop_back(); + // add the type-of + b0_ir->forms.push_back(std::make_shared( + IR_Set::REG_64, else_case->dst, std::make_shared(shift->arg0, clobber))); + return b0_ptr; + } + return nullptr; // todo +} + std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) { if (dynamic_cast(vtx)) { auto* bv = dynamic_cast(vtx); @@ -162,12 +305,16 @@ std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) auto result = std::make_shared(entries, else_ir); clean_up_cond_with_else(result.get(), file); return result; + } else if (dynamic_cast(vtx)) { + auto* svtx = dynamic_cast(vtx); + auto as_type_of = try_sc_as_type_of(f, file, svtx); + if (as_type_of) { + return as_type_of; + } } - else { - throw std::runtime_error("not yet implemented IR conversion."); - return nullptr; - } + throw std::runtime_error("not yet implemented IR conversion."); + return nullptr; } void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { @@ -178,7 +325,7 @@ void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { assert(i != 0); auto prev_as_branch = dynamic_cast(sequence->forms.at(i - 1).get()); assert(prev_as_branch); - printf("got while intro branch %s\n", prev_as_branch->print(file).c_str()); + // printf("got while intro branch %s\n", prev_as_branch->print(file).c_str()); // this should be an always jump. We'll assume that the CFG builder successfully checked // the brach destination, but we will check the condition. assert(prev_as_branch->condition.kind == Condition::ALWAYS); @@ -191,7 +338,7 @@ void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { assert(condition_branch.first); assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP); - printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); + // printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); auto replacement = std::make_shared(condition_branch.first->condition); *(condition_branch.second) = replacement; } @@ -220,7 +367,9 @@ std::shared_ptr build_cfg_ir(Function& function, // and possibly annotate the IR control flow structure so that we can determine if its and/or // or whatever. This may require rejecting a huge number of inline assembly functions, and // possibly resolving the min/max/ash issue. - auto ir = cfg_to_ir(function, file, top_level); + // auto ir = cfg_to_ir(function, file, top_level); + auto ir = std::make_shared(); + insert_cfg_into_list(function, file, &ir->forms, top_level); auto all_children = ir->get_all_ir(file); all_children.push_back(ir); for (auto& child : all_children) { diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index b3a78dfb8b..a60ea68ba9 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -312,6 +312,10 @@ std::shared_ptr
BranchDelay::to_form(const LinkedObjectFile& file) const { return buildList(toForm("set!"), destination->to_form(file), "'#t"); case SET_REG_REG: return buildList(toForm("set!"), destination->to_form(file), source->to_form(file)); + case SET_BINTEGER: + return buildList(toForm("set!"), destination->to_form(file), "binteger"); + case SET_PAIR: + return buildList(toForm("set!"), destination->to_form(file), "pair"); case UNKNOWN: return buildList("unknown-branch-delay"); default: @@ -355,6 +359,7 @@ int Condition::num_args() const { case NONZERO: case FALSE: case TRUTHY: + case GREATER_THAN_ZERO_SIGNED: return 1; case ALWAYS: return 0; @@ -434,6 +439,9 @@ std::shared_ptr Condition::to_form(const LinkedObjectFile& file) const { case FLOAT_GEQ: condtion_operator = ">=.f"; break; + case GREATER_THAN_ZERO_SIGNED: + condtion_operator = ">0.s"; + break; default: assert(false); } @@ -493,7 +501,6 @@ std::shared_ptr IR_Begin::to_form(const LinkedObjectFile& file) const { return buildList(list); } - void IR_Begin::get_children(std::vector>* output) const { for (auto& x : forms) { output->push_back(x); @@ -513,6 +520,10 @@ void print_inlining_begin(std::vector>* output, output->push_back(ir->to_form(file)); } } + +bool is_single_expression(IR* in) { + return !dynamic_cast(in); +} } // namespace std::shared_ptr IR_WhileLoop::to_form(const LinkedObjectFile& file) const { @@ -529,26 +540,46 @@ void IR_WhileLoop::get_children(std::vector>* output) const } std::shared_ptr IR_CondWithElse::to_form(const LinkedObjectFile& file) const { - // todo - special case to print as if with else - std::vector> list; - list.push_back(toForm("cond")); - for(auto& e : entries) { - std::vector> entry; - entry.push_back(e.condition->to_form(file)); - print_inlining_begin(&entry, e.body.get(), file); - list.push_back(buildList(entry)); + // for now we only turn it into an if statement if both cases won't require a begin at the top + // level. I think it is more common to write these as a two-case cond instead of an if with begin. + if (entries.size() == 1 && is_single_expression(entries.front().body.get()) && + is_single_expression(else_ir.get())) { + std::vector> list; + list.push_back(toForm("if")); + list.push_back(entries.front().condition->to_form(file)); + list.push_back(entries.front().body->to_form(file)); + list.push_back(else_ir->to_form(file)); + return buildList(list); + } else { + std::vector> list; + list.push_back(toForm("cond")); + for (auto& e : entries) { + std::vector> entry; + entry.push_back(e.condition->to_form(file)); + print_inlining_begin(&entry, e.body.get(), file); + list.push_back(buildList(entry)); + } + std::vector> else_form; + else_form.push_back(toForm("else")); + print_inlining_begin(&else_form, else_ir.get(), file); + list.push_back(buildList(else_form)); + return buildList(list); } - std::vector> else_form; - else_form.push_back(toForm("else")); - print_inlining_begin(&else_form, else_ir.get(), file); - list.push_back(buildList(else_form)); - return buildList(list); } void IR_CondWithElse::get_children(std::vector>* output) const { - for(auto& e : entries) { + for (auto& e : entries) { output->push_back(e.condition); output->push_back(e.body); } output->push_back(else_ir); +} + +std::shared_ptr IR_GetRuntimeType::to_form(const LinkedObjectFile& file) const { + std::vector> list = {toForm("type-of"), object->to_form(file)}; + return buildList(list); +} + +void IR_GetRuntimeType::get_children(std::vector>* output) const { + output->push_back(object); } \ No newline at end of file diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index 0f7a4ea797..a083c270b2 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -170,7 +170,7 @@ class IR_IntegerConstant : public IR { }; struct BranchDelay { - enum Kind { NOP, SET_REG_FALSE, SET_REG_TRUE, SET_REG_REG, UNKNOWN } kind; + enum Kind { NOP, SET_REG_FALSE, SET_REG_TRUE, SET_REG_REG, SET_BINTEGER, SET_PAIR, UNKNOWN } kind; std::shared_ptr destination = nullptr, source = nullptr; explicit BranchDelay(Kind _kind) : kind(_kind) {} std::shared_ptr to_form(const LinkedObjectFile& file) const; @@ -185,6 +185,7 @@ struct Condition { GREATER_THAN_SIGNED, LEQ_SIGNED, GEQ_SIGNED, + GREATER_THAN_ZERO_SIGNED, LESS_THAN_UNSIGNED, GREATER_THAN_UNSIGNED, LEQ_UNSIGNED, @@ -264,7 +265,8 @@ class IR_Suspend : public IR { class IR_Begin : public IR { public: - IR_Begin(const std::vector>& _forms) : forms(std::move(_forms)) {} + IR_Begin() = default; + explicit IR_Begin(const std::vector>& _forms) : forms(std::move(_forms)) {} std::shared_ptr to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; std::vector> forms; @@ -293,4 +295,14 @@ class IR_CondWithElse : public IR { void get_children(std::vector>* output) const override; }; +// this will work on pairs, bintegers, or basics +class IR_GetRuntimeType : public IR { + public: + std::shared_ptr object, clobber; + explicit IR_GetRuntimeType(std::shared_ptr _object, std::shared_ptr _clobber) + : object(std::move(_object)), clobber(std::move(_clobber)) {} + std::shared_ptr to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; +}; + #endif // JAK_IR_H