From 2901f4a99e1ff3152c98ee94ee833ec36fb34182 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Sat, 9 Jan 2021 20:01:48 -0500 Subject: [PATCH] [Decompiler] Write IR2 to file and implement some Atomic Op conversions (#187) --- CMakeSettings.json | 4 +- decompiler/CMakeLists.txt | 1 + decompiler/Disasm/Instruction.h | 2 + decompiler/Disasm/InstructionParser.cpp | 15 +- decompiler/Disasm/OpcodeInfo.cpp | 2 +- decompiler/Function/Function.cpp | 13 + decompiler/Function/Function.h | 10 + decompiler/IR2/AtomicOp.cpp | 198 ++- decompiler/IR2/AtomicOp.h | 58 +- decompiler/IR2/AtomicOpBuilder.cpp | 1321 ++++++++++++++++- decompiler/IR2/AtomicOpBuilder.h | 2 +- decompiler/ObjectFile/LinkedObjectFile.cpp | 2 +- decompiler/ObjectFile/LinkedObjectFile.h | 4 +- decompiler/ObjectFile/ObjectFileDB.cpp | 50 +- decompiler/ObjectFile/ObjectFileDB.h | 10 +- decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 347 +++++ decompiler/config.cpp | 1 + decompiler/config.h | 1 + decompiler/config/jak1_ntsc_black_label.jsonc | 12 + decompiler/main.cpp | 46 +- scripts/gen-test-cases.py | 40 + test/decompiler/test_AtomicOpBuilder.cpp | 443 +++++- 22 files changed, 2428 insertions(+), 154 deletions(-) create mode 100644 decompiler/ObjectFile/ObjectFileDB_IR2.cpp create mode 100644 scripts/gen-test-cases.py diff --git a/CMakeSettings.json b/CMakeSettings.json index f00f49bebe..2a6a7e3eed 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -1,4 +1,4 @@ -{ +{ "configurations": [ { "name": "Debug", @@ -9,7 +9,7 @@ "installRoot": "${projectDir}\\out\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "", - "addressSanitizerEnabled": true, + "addressSanitizerEnabled": false, "ctestCommandArgs": "", "variables": [ { diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 7306b7dd07..5137c768b6 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -35,6 +35,7 @@ add_library( ObjectFile/LinkedObjectFile.cpp ObjectFile/LinkedObjectFileCreation.cpp ObjectFile/ObjectFileDB.cpp + ObjectFile/ObjectFileDB_IR2.cpp util/DecompilerTypeSystem.cpp util/TP_Type.cpp diff --git a/decompiler/Disasm/Instruction.h b/decompiler/Disasm/Instruction.h index a9b652263c..ff5e7ec7cf 100644 --- a/decompiler/Disasm/Instruction.h +++ b/decompiler/Disasm/Instruction.h @@ -52,6 +52,8 @@ struct InstructionAtom { bool is_sym() const { return kind == IMM_SYM; } bool is_reg(Register r) const { return kind == REGISTER && reg == r; } + bool is_imm(int32_t i) const { return kind == IMM && imm == i; } + bool is_sym(const std::string& name) const { return kind == IMM_SYM && name == sym; } bool operator==(const InstructionAtom& other) const; bool operator!=(const InstructionAtom& other) const { return !((*this) == other); } diff --git a/decompiler/Disasm/InstructionParser.cpp b/decompiler/Disasm/InstructionParser.cpp index b626c0469b..6ae9b0ba4a 100644 --- a/decompiler/Disasm/InstructionParser.cpp +++ b/decompiler/Disasm/InstructionParser.cpp @@ -41,7 +41,8 @@ InstructionParser::InstructionParser() { InstructionKind::BNEL, InstructionKind::BC1FL, InstructionKind::BC1TL, InstructionKind::BLTZ, InstructionKind::BGEZ, InstructionKind::BLEZ, InstructionKind::BGTZ, InstructionKind::BLTZL, InstructionKind::BGTZL, - InstructionKind::BGEZL}) { + InstructionKind::BGEZL, InstructionKind::MTC1, InstructionKind::MFC1, + InstructionKind::MFLO, InstructionKind::MFHI}) { auto& info = gOpcodeInfo[int(i)]; if (info.defined) { m_opcode_name_lookup[info.name] = int(i); @@ -206,6 +207,18 @@ Instruction InstructionParser::parse_single_instruction( if (is_integer(atom_str)) { auto amt = parse_integer(atom_str); atom.set_imm(amt); + } else if (!atom_str.empty() && atom_str.front() == 'L') { + bool found_label = false; + for (size_t id = 0; id < labels.size(); id++) { + if (labels[id].name == atom_str) { + found_label = true; + atom.set_label(id); + break; + } + } + if (!found_label) { + atom.set_sym(atom_str); + } } else { atom.set_sym(atom_str); } diff --git a/decompiler/Disasm/OpcodeInfo.cpp b/decompiler/Disasm/OpcodeInfo.cpp index f016692c3b..6e0fc6d4b6 100644 --- a/decompiler/Disasm/OpcodeInfo.cpp +++ b/decompiler/Disasm/OpcodeInfo.cpp @@ -243,7 +243,7 @@ void init_opcode_info() { // weird moves def(IK::MFC1, "mfc1").dst_gpr(FT::RT).src_fpr(FT::FS); // Move Word from Floating Point - def(IK::MTC1, "mtc1").src_gpr(FT::RT).dst_fpr(FT::FS); // Move Word to Floating Point + def(IK::MTC1, "mtc1").dst_fpr(FT::FS).src_gpr(FT::RT); // Move Word to Floating Point def(IK::MTC0, "mtc0") .src_gpr(FT::RT) .dst(FT::RD, DT::COP0); // Move to System Control Coprocessor diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index 71d7d2a352..c5c9ff6a47 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -684,6 +684,19 @@ std::shared_ptr Function::get_basic_op_at_instr(int idx) { return basic_ops.at(instruction_to_basic_op.at(idx)); } +bool Function::instr_starts_atomic_op(int idx) { + auto op = ir2.atomic_ops->instruction_to_atomic_op.find(idx); + if (op != ir2.atomic_ops->instruction_to_atomic_op.end()) { + auto start_instr = ir2.atomic_ops->atomic_op_to_instruction.at(op->second); + return start_instr == idx; + } + return false; +} + +const AtomicOp& Function::get_atomic_op_at_instr(int idx) { + return *ir2.atomic_ops->ops.at(ir2.atomic_ops->instruction_to_atomic_op.at(idx)); +} + int Function::get_basic_op_count() { return basic_ops.size(); } diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 36c6dd1e9d..80195c62a3 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -8,6 +8,7 @@ #include #include #include +#include "decompiler/IR2/AtomicOpBuilder.h" #include "decompiler/Disasm/Instruction.h" #include "decompiler/Disasm/Register.h" #include "BasicBlocks.h" @@ -83,6 +84,8 @@ class Function { bool has_basic_ops() { return !basic_ops.empty(); } bool instr_starts_basic_op(int idx); std::shared_ptr get_basic_op_at_instr(int idx); + bool instr_starts_atomic_op(int idx); + const AtomicOp& get_atomic_op_at_instr(int idx); int get_basic_op_count(); int get_failed_basic_op_count(); int get_reginfo_basic_op_count(); @@ -154,6 +157,13 @@ class Function { bool uses_fp_register = false; std::vector> basic_ops; + struct { + bool atomic_ops_attempted = false; + bool atomic_ops_succeeded = false; + std::shared_ptr atomic_ops = nullptr; + Env env; + } ir2; + private: void check_epilogue(const LinkedObjectFile& file); std::unordered_map instruction_to_basic_op; diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index aec6744016..c02f8e358c 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "third-party/fmt/core.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" @@ -14,7 +15,10 @@ Variable::Variable(Mode mode, Register reg, int atomic_idx, bool allow_all) : m_mode(mode), m_reg(reg), m_atomic_idx(atomic_idx) { // make sure we're using a valid GPR. if (reg.get_kind() == Reg::GPR && !allow_all) { - assert(Reg::allowed_local_gprs[reg.get_gpr()] || reg.get_gpr() == Reg::S6); + if (!(Reg::allowed_local_gprs[reg.get_gpr()] || reg.get_gpr() == Reg::S6)) { + throw std::runtime_error("Variable could not be constructed from register " + + reg.to_string()); + } } } @@ -51,7 +55,7 @@ bool Variable::operator!=(const Variable& other) const { ///////////////////////////// AtomicOp::AtomicOp(int my_idx) : m_my_idx(my_idx) {} -std::string AtomicOp::to_string(const std::vector& labels, const Env* env) { +std::string AtomicOp::to_string(const std::vector& labels, const Env* env) const { return pretty_print::to_string(to_form(labels, env)); } bool AtomicOp::operator!=(const AtomicOp& other) const { @@ -91,10 +95,18 @@ SimpleAtom SimpleAtom::make_empty_list() { SimpleAtom SimpleAtom::make_int_constant(s64 value) { SimpleAtom result; + result.m_kind = Kind::INTEGER_CONSTANT; result.m_int = value; return result; } +SimpleAtom SimpleAtom::make_static_address(int static_label_id) { + SimpleAtom result; + result.m_kind = Kind::STATIC_ADDRESS; + result.m_int = static_label_id; + return result; +} + goos::Object SimpleAtom::to_form(const std::vector& labels, const Env* env) const { switch (m_kind) { case Kind::VARIABLE: @@ -105,6 +117,8 @@ goos::Object SimpleAtom::to_form(const std::vector& labels, con return pretty_print::to_symbol(fmt::format("'{}", m_string)); case Kind::SYMBOL_VAL: return pretty_print::to_symbol(m_string); + case Kind::EMPTY_LIST: + return pretty_print::to_symbol("'()"); case Kind::STATIC_ADDRESS: return pretty_print::to_symbol(labels.at(m_int).name); default: @@ -142,6 +156,10 @@ void SimpleAtom::get_regs(std::vector* out) const { } } +SimpleExpression SimpleAtom::as_expr() const { + return SimpleExpression(SimpleExpression::Kind::IDENTITY, *this); +} + ///////////////////////////// // SimpleExpression ///////////////////////////// @@ -201,10 +219,22 @@ std::string get_simple_expression_op_name(SimpleExpression::Kind kind) { return "srl"; case SimpleExpression::Kind::MUL_UNSIGNED: return "*.ui"; - case SimpleExpression::Kind::NOT: + case SimpleExpression::Kind::LOGNOT: return "lognot"; case SimpleExpression::Kind::NEG: return "-"; + case SimpleExpression::Kind::GPR_TO_FPR: + return "gpr->fpr"; + case SimpleExpression::Kind::FPR_TO_GPR: + return "fpr->gpr"; + case SimpleExpression::Kind::MIN_SIGNED: + return "min.si"; + case SimpleExpression::Kind::MIN_UNSIGNED: + return "min.ui"; + case SimpleExpression::Kind::MAX_SIGNED: + return "max.si"; + case SimpleExpression::Kind::MAX_UNSIGNED: + return "max.ui"; default: assert(false); } @@ -243,9 +273,16 @@ int get_simple_expression_arg_count(SimpleExpression::Kind kind) { case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: case SimpleExpression::Kind::MUL_UNSIGNED: return 2; - case SimpleExpression::Kind::NOT: + case SimpleExpression::Kind::LOGNOT: case SimpleExpression::Kind::NEG: + case SimpleExpression::Kind::GPR_TO_FPR: + case SimpleExpression::Kind::FPR_TO_GPR: return 1; + case SimpleExpression::Kind::MIN_SIGNED: + case SimpleExpression::Kind::MIN_UNSIGNED: + case SimpleExpression::Kind::MAX_SIGNED: + case SimpleExpression::Kind::MAX_UNSIGNED: + return 2; default: assert(false); } @@ -397,7 +434,7 @@ goos::Object AsmOp::to_form(const std::vector& labels, const En if (m_src[i].has_value()) { forms.push_back(pretty_print::to_symbol(m_src[i].value().to_string(env))); } else { - forms.push_back(pretty_print::to_symbol(m_instr.get_src(1).to_string(labels))); + forms.push_back(pretty_print::to_symbol(m_instr.get_src(i).to_string(labels))); } } @@ -501,12 +538,24 @@ std::string get_condition_kind_name(IR2_Condition::Kind kind) { return "<=.s"; case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: return ">0.si"; + case IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED: + return ">0.ui"; case IR2_Condition::Kind::GEQ_ZERO_SIGNED: return ">=0.si"; - case IR2_Condition::Kind::LESS_THAN_ZERO: + case IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED: return "<0.si"; case IR2_Condition::Kind::LEQ_ZERO_SIGNED: return "<=0.si"; + case IR2_Condition::Kind::LEQ_ZERO_UNSIGNED: + return "<=0.ui"; + case IR2_Condition::Kind::IS_PAIR: + return "pair?"; + case IR2_Condition::Kind::IS_NOT_PAIR: + return "not-pair?"; + case IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED: + return "<0.ui"; + case IR2_Condition::Kind::GEQ_ZERO_UNSIGNED: + return ">=0.ui"; default: assert(false); } @@ -537,8 +586,14 @@ int get_condition_num_args(IR2_Condition::Kind kind) { case IR2_Condition::Kind::TRUTHY: case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: case IR2_Condition::Kind::GEQ_ZERO_SIGNED: - case IR2_Condition::Kind::LESS_THAN_ZERO: + case IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED: case IR2_Condition::Kind::LEQ_ZERO_SIGNED: + case IR2_Condition::Kind::IS_PAIR: + case IR2_Condition::Kind::IS_NOT_PAIR: + case IR2_Condition::Kind::LEQ_ZERO_UNSIGNED: + case IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED: + case IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED: + case IR2_Condition::Kind::GEQ_ZERO_UNSIGNED: return 1; case IR2_Condition::Kind::ALWAYS: case IR2_Condition::Kind::NEVER: @@ -566,10 +621,10 @@ IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind) { return IR2_Condition::Kind::LEQ_ZERO_SIGNED; case IR2_Condition::Kind::LEQ_ZERO_SIGNED: return IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED; - case IR2_Condition::Kind::LESS_THAN_ZERO: + case IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED: return IR2_Condition::Kind::GEQ_ZERO_SIGNED; case IR2_Condition::Kind::GEQ_ZERO_SIGNED: - return IR2_Condition::Kind::LESS_THAN_ZERO; + return IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED; case IR2_Condition::Kind::LESS_THAN_UNSIGNED: return IR2_Condition::Kind::GEQ_UNSIGNED; case IR2_Condition::Kind::GREATER_THAN_UNSIGNED: @@ -602,6 +657,18 @@ IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind) { return IR2_Condition::Kind::FLOAT_LEQ; case IR2_Condition::Kind::FLOAT_LEQ: return IR2_Condition::Kind::FLOAT_GREATER_THAN; + case IR2_Condition::Kind::IS_NOT_PAIR: + return IR2_Condition::Kind::IS_PAIR; + case IR2_Condition::Kind::IS_PAIR: + return IR2_Condition::Kind::IS_NOT_PAIR; + case IR2_Condition::Kind::LEQ_ZERO_UNSIGNED: + return IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED; + case IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED: + return IR2_Condition::Kind::LEQ_ZERO_UNSIGNED; + case IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED: + return IR2_Condition::Kind::GEQ_ZERO_UNSIGNED; + case IR2_Condition::Kind::GEQ_ZERO_UNSIGNED: + return IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED; default: assert(false); } @@ -612,12 +679,13 @@ IR2_Condition::IR2_Condition(Kind kind) : m_kind(kind) { assert(get_condition_num_args(m_kind) == 0); } -IR2_Condition::IR2_Condition(Kind kind, const Variable& src0) : m_kind(kind) { +IR2_Condition::IR2_Condition(Kind kind, const SimpleAtom& src0) : m_kind(kind) { m_src[0] = src0; assert(get_condition_num_args(m_kind) == 1); } -IR2_Condition::IR2_Condition(Kind kind, const Variable& src0, const Variable& src1) : m_kind(kind) { +IR2_Condition::IR2_Condition(Kind kind, const SimpleAtom& src0, const SimpleAtom& src1) + : m_kind(kind) { m_src[0] = src0; m_src[1] = src1; assert(get_condition_num_args(m_kind) == 2); @@ -646,14 +714,18 @@ goos::Object IR2_Condition::to_form(const std::vector& labels, std::vector forms; forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); for (int i = 0; i < get_condition_num_args(m_kind); i++) { - forms.push_back(pretty_print::to_symbol(m_src[i].to_string(env))); + forms.push_back(m_src[i].to_form(labels, env)); + } + if (forms.size() > 1) { + return pretty_print::build_list(forms); + } else { + return forms.front(); } - return pretty_print::build_list(forms); } void IR2_Condition::get_regs(std::vector* out) const { for (int i = 0; i < get_condition_num_args(m_kind); i++) { - out->push_back(m_src[i].reg()); + m_src[i].get_regs(out); } } @@ -710,11 +782,38 @@ void SetVarConditionOp::update_register_info() { // StoreOp ///////////////////////////// -StoreOp::StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx) - : AtomicOp(my_idx), m_addr(std::move(addr)), m_value(std::move(value)) {} +StoreOp::StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom value, int my_idx) + : AtomicOp(my_idx), + m_size(size), + m_is_float(is_float), + m_addr(std::move(addr)), + m_value(std::move(value)) {} goos::Object StoreOp::to_form(const std::vector& labels, const Env* env) const { - return pretty_print::build_list(pretty_print::to_symbol("store!"), m_addr.to_form(labels, env), + std::string store_name; + if (m_is_float) { + assert(m_size == 4); + store_name = "s.f!"; + } else { + switch (m_size) { + case 1: + store_name = "s.b!"; + break; + case 2: + store_name = "s.h!"; + break; + case 4: + store_name = "s.w!"; + break; + case 8: + store_name = "s.d!"; + break; + default: + assert(false); + } + } + + return pretty_print::build_list(pretty_print::to_symbol(store_name), m_addr.to_form(labels, env), m_value.to_form(labels, env)); } @@ -758,13 +857,55 @@ void StoreOp::update_register_info() { // LoadVarOp ///////////////////////////// -LoadVarOp::LoadVarOp(Variable dst, SimpleExpression src, int my_idx) - : AtomicOp(my_idx), m_dst(dst), m_src(std::move(src)) {} +LoadVarOp::LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx) + : AtomicOp(my_idx), m_kind(kind), m_size(size), m_dst(dst), m_src(std::move(src)) {} goos::Object LoadVarOp::to_form(const std::vector& labels, const Env* env) const { - return pretty_print::build_list(pretty_print::to_symbol("set!"), - pretty_print::to_symbol(m_dst.to_string(env)), - m_src.to_form(labels, env)); + std::vector forms = {pretty_print::to_symbol("set!"), + pretty_print::to_symbol(m_dst.to_string(env))}; + + switch (m_kind) { + case Kind::FLOAT: + assert(m_size == 4); + forms.push_back(pretty_print::build_list("l.f", m_src.to_form(labels, env))); + break; + case Kind::UNSIGNED: + switch (m_size) { + case 1: + forms.push_back(pretty_print::build_list("l.bu", m_src.to_form(labels, env))); + break; + case 2: + forms.push_back(pretty_print::build_list("l.hu", m_src.to_form(labels, env))); + break; + case 4: + forms.push_back(pretty_print::build_list("l.wu", m_src.to_form(labels, env))); + break; + case 8: + forms.push_back(pretty_print::build_list("l.d", m_src.to_form(labels, env))); + break; + default: + assert(false); + } + break; + case Kind::SIGNED: + switch (m_size) { + case 1: + forms.push_back(pretty_print::build_list("l.b", m_src.to_form(labels, env))); + break; + case 2: + forms.push_back(pretty_print::build_list("l.h", m_src.to_form(labels, env))); + break; + case 4: + forms.push_back(pretty_print::build_list("l.w", m_src.to_form(labels, env))); + break; + default: + assert(false); + } + break; + default: + assert(false); + } + return pretty_print::build_list(forms); } bool LoadVarOp::operator==(const AtomicOp& other) const { @@ -864,7 +1005,7 @@ goos::Object IR2_BranchDelay::to_form(const std::vector& labels assert(m_var[2].has_value()); return pretty_print::build_list( "set!", m_var[0]->to_string(env), - pretty_print::build_list("dsllv", m_var[1]->to_string(env), m_var[2]->to_string(env))); + pretty_print::build_list("sll", m_var[1]->to_string(env), m_var[2]->to_string(env))); case Kind::NEGATE: assert(m_var[0].has_value()); assert(m_var[1].has_value()); @@ -987,11 +1128,13 @@ goos::Object SpecialOp::to_form(const std::vector& labels, cons (void)env; switch (m_kind) { case Kind::NOP: - return pretty_print::to_symbol("nop!"); + return pretty_print::build_list("nop!"); case Kind::BREAK: - return pretty_print::to_symbol("break!"); + return pretty_print::build_list("break!"); + case Kind::CRASH: + return pretty_print::build_list("crash!"); case Kind::SUSPEND: - return pretty_print::to_symbol("suspend"); + return pretty_print::build_list("suspend"); default: assert(false); } @@ -1073,7 +1216,8 @@ std::unique_ptr CallOp::get_as_expr() const { } void CallOp::update_register_info() { - throw std::runtime_error("CallOp::update_register_info cannot be done until types are known"); + // throw std::runtime_error("CallOp::update_register_info cannot be done until types are known"); + m_read_regs.push_back(Register(Reg::GPR, Reg::T9)); } ///////////////////////////// diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 8ab07a8df5..f31c867a2b 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "common/goos/Object.h" #include "decompiler/Disasm/Register.h" #include "decompiler/Disasm/Instruction.h" @@ -88,7 +89,7 @@ class Variable { class AtomicOp { public: explicit AtomicOp(int my_idx); - std::string to_string(const std::vector& labels, const Env* env); + std::string to_string(const std::vector& labels, const Env* env) const; virtual goos::Object to_form(const std::vector& labels, const Env* env) const = 0; virtual bool operator==(const AtomicOp& other) const = 0; @@ -123,6 +124,9 @@ class AtomicOp { const std::vector& read_regs() { return m_read_regs; } const std::vector& write_regs() { return m_write_regs; } const std::vector& clobber_regs() { return m_clobber_regs; } + void add_clobber_reg(Register r) { m_clobber_regs.push_back(r); } + + virtual ~AtomicOp() = default; protected: int m_my_idx = -1; @@ -135,6 +139,8 @@ class AtomicOp { std::vector m_clobber_regs; }; +class SimpleExpression; + /*! * The has a value. In some cases it can be set. */ @@ -156,6 +162,7 @@ class SimpleAtom { static SimpleAtom make_sym_val(const std::string& name); static SimpleAtom make_empty_list(); static SimpleAtom make_int_constant(s64 value); + static SimpleAtom make_static_address(int static_label_id); goos::Object to_form(const std::vector& labels, const Env* env) const; bool is_var() const { return m_kind == Kind::VARIABLE; } @@ -171,6 +178,7 @@ class SimpleAtom { bool operator==(const SimpleAtom& other) const; bool operator!=(const SimpleAtom& other) const { return !((*this) == other); } void get_regs(std::vector* out) const; + SimpleExpression as_expr() const; private: Kind m_kind = Kind::INVALID; @@ -219,8 +227,14 @@ class SimpleExpression { RIGHT_SHIFT_ARITH, RIGHT_SHIFT_LOGIC, MUL_UNSIGNED, - NOT, - NEG + LOGNOT, + NEG, + GPR_TO_FPR, + FPR_TO_GPR, + MIN_SIGNED, + MAX_SIGNED, + MIN_UNSIGNED, + MAX_UNSIGNED }; // how many arguments? @@ -230,7 +244,7 @@ class SimpleExpression { return m_args[idx]; } Kind kind() const { return m_kind; } - + SimpleExpression() = default; SimpleExpression(Kind kind, const SimpleAtom& arg0); SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1); goos::Object to_form(const std::vector& labels, const Env* env) const; @@ -249,8 +263,8 @@ class SimpleExpression { */ class SetVarOp : public AtomicOp { public: - SetVarOp(const Variable& dst, const SimpleExpression& src, int my_idx) - : AtomicOp(my_idx), m_dst(dst), m_src(src) { + SetVarOp(const Variable& dst, SimpleExpression src, int my_idx) + : AtomicOp(my_idx), m_dst(dst), m_src(std::move(src)) { assert(my_idx == dst.idx()); } virtual goos::Object to_form(const std::vector& labels, @@ -310,9 +324,13 @@ class IR2_Condition { LEQ_SIGNED, GEQ_SIGNED, GREATER_THAN_ZERO_SIGNED, + GREATER_THAN_ZERO_UNSIGNED, LEQ_ZERO_SIGNED, - LESS_THAN_ZERO, + LEQ_ZERO_UNSIGNED, + LESS_THAN_ZERO_SIGNED, GEQ_ZERO_SIGNED, + LESS_THAN_ZERO_UNSIGNED, + GEQ_ZERO_UNSIGNED, LESS_THAN_UNSIGNED, GREATER_THAN_UNSIGNED, LEQ_UNSIGNED, @@ -329,12 +347,15 @@ class IR2_Condition { FLOAT_GEQ, FLOAT_LEQ, FLOAT_GREATER_THAN, + IS_PAIR, + IS_NOT_PAIR, INVALID }; + IR2_Condition() = default; explicit IR2_Condition(Kind kind); - IR2_Condition(Kind kind, const Variable& src0); - IR2_Condition(Kind kind, const Variable& src0, const Variable& src1); + IR2_Condition(Kind kind, const SimpleAtom& src0); + IR2_Condition(Kind kind, const SimpleAtom& src0, const SimpleAtom& src1); void invert(); bool operator==(const IR2_Condition& other) const; @@ -344,7 +365,7 @@ class IR2_Condition { private: Kind m_kind = Kind::INVALID; - Variable m_src[2]; + SimpleAtom m_src[2]; }; /*! @@ -361,6 +382,7 @@ class SetVarConditionOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + void invert() { m_condition.invert(); } private: Variable m_dst; @@ -374,7 +396,7 @@ class SetVarConditionOp : public AtomicOp { */ class StoreOp : public AtomicOp { public: - StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx); + StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom value, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; bool is_variable_set() const override; @@ -385,6 +407,8 @@ class StoreOp : public AtomicOp { void update_register_info() override; private: + int m_size; + bool m_is_float; SimpleExpression m_addr; SimpleAtom m_value; }; @@ -395,7 +419,8 @@ class StoreOp : public AtomicOp { */ class LoadVarOp : public AtomicOp { public: - LoadVarOp(Variable dst, SimpleExpression src, int my_idx); + enum class Kind { UNSIGNED, SIGNED, FLOAT }; + LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; bool is_variable_set() const override; @@ -406,6 +431,8 @@ class LoadVarOp : public AtomicOp { void update_register_info() override; private: + Kind m_kind; + int m_size = -1; Variable m_dst; SimpleExpression m_src; }; @@ -427,7 +454,8 @@ class IR2_BranchDelay { SET_BINTEGER, SET_PAIR, DSLLV, - NEGATE + NEGATE, + UNKNOWN }; explicit IR2_BranchDelay(Kind kind); @@ -437,10 +465,11 @@ class IR2_BranchDelay { goos::Object to_form(const std::vector& labels, const Env* env) const; bool operator==(const IR2_BranchDelay& other) const; void get_regs(std::vector* write, std::vector* read) const; + bool is_known() const { return m_kind != Kind::UNKNOWN; } private: std::optional m_var[3]; - Kind m_kind; + Kind m_kind = Kind::UNKNOWN; }; /*! @@ -479,6 +508,7 @@ class SpecialOp : public AtomicOp { enum class Kind { NOP, BREAK, + CRASH, SUSPEND, }; diff --git a/decompiler/IR2/AtomicOpBuilder.cpp b/decompiler/IR2/AtomicOpBuilder.cpp index 5e2e81a207..79f870867a 100644 --- a/decompiler/IR2/AtomicOpBuilder.cpp +++ b/decompiler/IR2/AtomicOpBuilder.cpp @@ -1,12 +1,48 @@ #include "AtomicOpBuilder.h" + +#include #include "common/log/log.h" +#include "common/symbols.h" #include "decompiler/Function/BasicBlocks.h" #include "decompiler/Function/Function.h" +#include "decompiler/Disasm/InstructionMatching.h" namespace decompiler { namespace { +////////////////////// +// Register Helpers +////////////////////// + +Register rs7() { + return make_gpr(Reg::S7); +} + +Register rr0() { + return make_gpr(Reg::R0); +} + +Register rfp() { + return make_gpr(Reg::FP); +} + +Register rra() { + return make_gpr(Reg::RA); +} + +Register rt9() { + return make_gpr(Reg::T9); +} + +Register rv0() { + return make_gpr(Reg::V0); +} + +///////////////////////// +// Variable Helpers +///////////////////////// + Variable make_dst_var(Register reg, int idx) { return Variable(Variable::Mode::WRITE, reg, idx); } @@ -15,38 +51,1248 @@ Variable make_src_var(Register reg, int idx) { return Variable(Variable::Mode::READ, reg, idx); } +Variable make_dst_var(const Instruction& i, int idx) { + assert(i.n_dst == 1); + return make_dst_var(i.get_dst(0).get_reg(), idx); +} + +//////////////////////// +// Atom Helpers +//////////////////////// + SimpleAtom make_src_atom(Register reg, int idx) { return SimpleAtom::make_var(make_src_var(reg, idx)); } +SimpleAtom false_sym() { + return SimpleAtom::make_sym_ptr("#f"); +} + +//////////////////////// +// Expression Helpers +//////////////////////// + +SimpleExpression make_2reg_expr(const Instruction& instr, SimpleExpression::Kind kind, int idx) { + auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx); + auto src1 = make_src_atom(instr.get_src(1).get_reg(), idx); + return SimpleExpression(kind, src0, src1); +} + +SimpleExpression make_1reg_1imm_expr(const Instruction& instr, + SimpleExpression::Kind kind, + int idx, + int imm_offset = 0) { + auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx); + auto src1 = SimpleAtom::make_int_constant(instr.get_src(1).get_imm() + imm_offset); + return SimpleExpression(kind, src0, src1); +} + +SimpleExpression make_1reg_expr(const Instruction& instr, SimpleExpression::Kind kind, int idx) { + auto src = make_src_atom(instr.get_src(0).get_reg(), idx); + return SimpleExpression(kind, src); +} + +SimpleExpression make_reg_plus_int(Register reg, int integer, int idx) { + return SimpleExpression(SimpleExpression::Kind::ADD, make_src_atom(reg, idx), + SimpleAtom::make_int_constant(integer)); +} + +//////////////////////// +// AtmoicOp Helpers +//////////////////////// + /*! * Convert a single instruction in the form instr dest_reg, src_reg, src_reg * to an atomic op of (set! dst_reg (op src_reg src_reg)) * Like daddu a0, a1, a2 */ -void make_3reg_op(const Instruction& instr, - SimpleExpression::Kind kind, - int idx, - std::unique_ptr& result) { +std::unique_ptr make_3reg_op(const Instruction& instr, + SimpleExpression::Kind kind, + int idx) { auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx); - auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx); - auto src1 = make_src_atom(instr.get_src(1).get_reg(), idx); - result = std::make_unique(dst, SimpleExpression(kind, src0, src1), idx); + return std::make_unique(dst, make_2reg_expr(instr, kind, idx), idx); } -bool convert_and_1(const Instruction& i0, int idx, std::unique_ptr& result) { - // or reg, reg, reg: - make_3reg_op(i0, SimpleExpression::Kind::AND, idx, result); - return true; +std::unique_ptr make_2reg_1imm_op(const Instruction& instr, + SimpleExpression::Kind kind, + int idx, + int imm_offset = 0) { + auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx); + return std::make_unique(dst, make_1reg_1imm_expr(instr, kind, idx, imm_offset), idx); } -bool convert_1(const Instruction& i0, int idx, std::unique_ptr& result) { - switch (i0.kind) { - case InstructionKind::AND: - return convert_and_1(i0, idx, result); - default: - return false; +/*! + * Convert a single instruction in the form instr dest_reg, src_reg + * to an atomic op of (set! dest_reg (op src_reg)) + */ +std::unique_ptr make_2reg_op(const Instruction& instr, + SimpleExpression::Kind kind, + int idx) { + auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx); + return std::make_unique(dst, make_1reg_expr(instr, kind, idx), idx); +} + +/*! + * Common load helper. Supports fp relative, 0 offset, or integer constant offset + */ +std::unique_ptr make_standard_load(const Instruction& i0, + int idx, + int load_size, + LoadVarOp::Kind kind) { + if (i0.get_dst(0).is_reg(rra())) { + return std::make_unique(i0, idx); } + auto dst = make_dst_var(i0, idx); + SimpleExpression src; + if (i0.get_src(0).is_label() && i0.get_src(1).is_reg(rfp())) { + // it's an FP relative load. + src = SimpleAtom::make_static_address(i0.get_src(0).get_label()).as_expr(); + } else if (i0.get_src(0).is_imm() && i0.get_src(0).get_imm() == 0) { + // the offset is 0 + src = make_src_atom(i0.get_src(1).get_reg(), idx).as_expr(); + } else if (i0.get_src(0).is_imm()) { + // the offset is not 0 + src = make_reg_plus_int(i0.get_src(1).get_reg(), i0.get_src(0).get_imm(), idx); + } else { + return nullptr; + } + return std::make_unique(kind, load_size, dst, src, idx); +} + +std::unique_ptr make_standard_store(const Instruction& i0, + int idx, + int store_size, + bool is_float) { + SimpleAtom val; + SimpleExpression dst; + if (i0.get_src(0).is_reg(rs7())) { + assert(!is_float); + val = SimpleAtom::make_sym_ptr("#f"); + } else if (i0.get_src(0).is_reg(rr0())) { + assert(!is_float); + val = SimpleAtom::make_int_constant(0); + } else { + val = make_src_atom(i0.get_src(0).get_reg(), idx); + } + + auto base_reg = make_src_atom(i0.get_src(2).get_reg(), idx); + auto offset = i0.get_src(1).get_imm(); + if (offset == 0) { + dst = base_reg.as_expr(); + } else { + dst = SimpleExpression(SimpleExpression::Kind::ADD, base_reg, + SimpleAtom::make_int_constant(offset)); + } + + return std::make_unique(store_size, is_float, dst, val, idx); +} + +std::unique_ptr make_asm_op(const Instruction& i0, int idx) { + switch (i0.kind) { + case InstructionKind::POR: + case InstructionKind::SLLV: // goal will use dsllv + case InstructionKind::SLL: // goal will use dsll + case InstructionKind::PCPYUD: + case InstructionKind::LQ: + case InstructionKind::SQ: + case InstructionKind::MTC0: + case InstructionKind::MTDAB: + case InstructionKind::MTDABM: + case InstructionKind::SUBU: // goal uses dsubu + case InstructionKind::JR: // normal returns included in epilogue + case InstructionKind::SYSCALL: + case InstructionKind::ADDU: // goal uses daddu + case InstructionKind::SRL: // goal uses dsrl.. except maybe to access bitfields? + case InstructionKind::SRA: + case InstructionKind::ADDIU: + // some weird inline assembly macros in nav-mesh stuff use these a lot in a weird way. + case InstructionKind::SLT: + case InstructionKind::MOVN: + case InstructionKind::SLTI: // a few cases used in inline asm + + // VU/COP2 + case InstructionKind::VMOVE: + case InstructionKind::VFTOI0: + case InstructionKind::VFTOI4: + case InstructionKind::VFTOI12: + case InstructionKind::VITOF0: + case InstructionKind::VITOF12: + case InstructionKind::VITOF15: + case InstructionKind::VABS: + case InstructionKind::VADD: + case InstructionKind::VSUB: + case InstructionKind::VMUL: + case InstructionKind::VMINI: + case InstructionKind::VMAX: + case InstructionKind::VOPMSUB: + case InstructionKind::VMADD: + case InstructionKind::VMSUB: + case InstructionKind::VADD_BC: + case InstructionKind::VSUB_BC: + case InstructionKind::VMUL_BC: + case InstructionKind::VMULA_BC: + case InstructionKind::VMADD_BC: + case InstructionKind::VADDA_BC: + case InstructionKind::VMADDA_BC: + case InstructionKind::VMSUBA_BC: + case InstructionKind::VMSUB_BC: + case InstructionKind::VMINI_BC: + case InstructionKind::VMAX_BC: + case InstructionKind::VADDQ: + case InstructionKind::VSUBQ: + case InstructionKind::VMULQ: + case InstructionKind::VMSUBQ: + case InstructionKind::VMULA: + case InstructionKind::VADDA: + case InstructionKind::VMADDA: + case InstructionKind::VOPMULA: + case InstructionKind::VDIV: + case InstructionKind::VCLIP: + case InstructionKind::VMULAQ: + case InstructionKind::VMTIR: + case InstructionKind::VIAND: + case InstructionKind::VLQI: + case InstructionKind::VIADDI: + case InstructionKind::VSQI: + case InstructionKind::VRGET: + case InstructionKind::VSQRT: + case InstructionKind::VRSQRT: + case InstructionKind::VRXOR: + case InstructionKind::VRNEXT: + case InstructionKind::VNOP: + case InstructionKind::VWAITQ: + case InstructionKind::VCALLMS: + + // FPU/COP1 + case InstructionKind::MULAS: + case InstructionKind::MADDAS: + case InstructionKind::MADDS: + case InstructionKind::ADDAS: + + // Moves / Loads / Stores + case InstructionKind::CTC2: + case InstructionKind::CFC2: + case InstructionKind::SQC2: + case InstructionKind::LQC2: + case InstructionKind::LDR: + case InstructionKind::LDL: + case InstructionKind::QMTC2: + case InstructionKind::QMFC2: + case InstructionKind::MFC0: + case InstructionKind::SYNCL: + case InstructionKind::SYNCP: + case InstructionKind::CACHE_DXWBIN: + case InstructionKind::MTPC: + case InstructionKind::MFPC: + + // MMI + case InstructionKind::PSLLW: + case InstructionKind::PSRAW: + case InstructionKind::PSRAH: + case InstructionKind::PLZCW: + case InstructionKind::PMFHL_UW: + case InstructionKind::PMFHL_LW: + case InstructionKind::PMFHL_LH: + case InstructionKind::PSLLH: + case InstructionKind::PSRLH: + case InstructionKind::PEXTLW: + case InstructionKind::PPACH: + case InstructionKind::PSUBW: + case InstructionKind::PCGTW: + case InstructionKind::PEXTLH: + case InstructionKind::PEXTLB: + case InstructionKind::PMAXH: + case InstructionKind::PPACB: + case InstructionKind::PADDW: + case InstructionKind::PADDH: + case InstructionKind::PMAXW: + case InstructionKind::PPACW: + case InstructionKind::PCEQW: + case InstructionKind::PEXTUW: + case InstructionKind::PMINH: + case InstructionKind::PEXTUH: + case InstructionKind::PEXTUB: + case InstructionKind::PCEQB: + case InstructionKind::PMINW: + case InstructionKind::PABSW: + case InstructionKind::PCPYLD: + case InstructionKind::PROT3W: + case InstructionKind::PAND: + case InstructionKind::PMADDH: + case InstructionKind::PMULTH: + case InstructionKind::PEXEW: + case InstructionKind::PNOR: + case InstructionKind::PCPYH: + case InstructionKind::PINTEH: + + return std::make_unique(i0, idx); + default: + return nullptr; + } +} + +//////////////////////// +// Branch Helpers +//////////////////////// + +IR2_BranchDelay get_branch_delay(const Instruction& i0, int idx) { + if (is_nop(i0)) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::NOP); + } else if (is_gpr_3(i0, InstructionKind::OR, {}, rs7(), rr0())) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::SET_REG_FALSE, make_dst_var(i0, idx)); + } else if (is_gpr_3(i0, InstructionKind::OR, {}, {}, rr0())) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::SET_REG_REG, make_dst_var(i0, idx), + make_src_var(i0.get_src(0).get_reg(), idx)); + } else if (i0.kind == InstructionKind::DADDIU && i0.get_src(0).is_reg(rs7()) && + i0.get_src(1).is_imm(FIX_SYM_TRUE)) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::SET_REG_TRUE, make_dst_var(i0, idx)); + } else if (i0.kind == InstructionKind::LW && i0.get_src(1).is_reg(rs7()) && + i0.get_src(0).is_sym()) { + if (i0.get_src(0).is_sym("binteger")) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::SET_BINTEGER, make_dst_var(i0, idx)); + } else if (i0.get_src(0).is_sym("pair")) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::SET_PAIR, make_dst_var(i0, idx)); + } else { + return IR2_BranchDelay(IR2_BranchDelay::Kind::UNKNOWN); + } + } else if (i0.kind == InstructionKind::DSLLV) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::DSLLV, make_dst_var(i0, idx), + make_src_var(i0.get_src(0).get_reg(), idx), + make_src_var(i0.get_src(1).get_reg(), idx)); + } else if (is_gpr_3(i0, InstructionKind::DSUBU, {}, rr0(), {})) { + return IR2_BranchDelay(IR2_BranchDelay::Kind::NEGATE, make_dst_var(i0, idx), + make_src_var(i0.get_src(1).get_reg(), idx)); + } else { + return IR2_BranchDelay(IR2_BranchDelay::Kind::UNKNOWN); + } +} + +std::unique_ptr make_branch(const IR2_Condition& condition, + const Instruction& delay, + bool likely, + int dest_label, + int my_idx) { + auto branch_delay = get_branch_delay(delay, my_idx); + if (branch_delay.is_known()) { + return std::make_unique(likely, condition, dest_label, branch_delay, my_idx); + } else { + return nullptr; + } +} + +/////////////////////// +// OP 1 Conversions +////////////////////// + +std::unique_ptr convert_or_1(const Instruction& i0, int idx) { + if (i0.get_src(1).is_reg(rra())) { + return std::make_unique(i0, idx); + } + auto dest = make_dst_var(i0, idx); + SimpleExpression src; + + if (is_gpr_3(i0, InstructionKind::OR, {}, rs7(), rr0())) { + // set reg_dest to #f : or reg_dest, s7, r0 + src = false_sym().as_expr(); + } else if (is_gpr_3(i0, InstructionKind::OR, {}, rr0(), rr0())) { + // set reg_dest to 0 : or reg_dest, r0, r0 + src = SimpleAtom::make_int_constant(0).as_expr(); + } else if (is_gpr_3(i0, InstructionKind::OR, {}, {}, rr0())) { + // set dst to src : or dst, src, r0 + src = make_src_atom(i0.get_src(0).get_reg(), idx).as_expr(); + } else { + // actually do a logical OR of two registers: or a0, a1, a2 + src = make_2reg_expr(i0, SimpleExpression::Kind::OR, idx); + } + return std::make_unique(dest, src, idx); +} + +std::unique_ptr convert_ori_1(const Instruction& i0, int idx) { + auto dest = make_dst_var(i0, idx); + SimpleExpression src; + if (i0.get_src(0).is_reg(rr0()) && i0.get_src(1).is_imm()) { + // load a 16-bit integer constant + // ori reg, r0, 1234 + src = SimpleAtom::make_int_constant(i0.get_src(1).get_imm()).as_expr(); + } else if (i0.get_src(1).is_imm()) { + // logical or with constant integer + // ori dst, a0, 1234 + return make_2reg_1imm_op(i0, SimpleExpression::Kind::OR, idx); + } else { + return nullptr; + } + return std::make_unique(dest, src, idx); +} + +std::unique_ptr convert_mtc1_1(const Instruction& i0, int idx) { + if (i0.get_src(0).is_reg(rr0())) { + return std::make_unique(make_dst_var(i0, idx), + SimpleAtom::make_int_constant(0).as_expr(), idx); + } else { + return make_2reg_op(i0, SimpleExpression::Kind::GPR_TO_FPR, idx); + } +} + +std::unique_ptr convert_mfc1_1(const Instruction& i0, int idx) { + if (i0.get_dst(0).is_reg(rr0()) || i0.get_dst(0).is_reg(make_gpr(Reg::AT)) || + i0.get_dst(0).is_reg(rra())) { + // sometimes mfc1 r0, f31 is used like a 'nop'. No idea why. + // at used in some inline assembly gpr -> vf conversions. might as well drop to assembly + // as soon as wel can. + // cursed mfc1 ra, f0 is also assembly. + return std::make_unique(i0, idx); + } else { + return make_2reg_op(i0, SimpleExpression::Kind::FPR_TO_GPR, idx); + } +} + +std::unique_ptr convert_lw_1(const Instruction& i0, int idx) { + if (i0.get_dst(0).is_reg(rra()) || i0.get_dst(0).is_reg(make_gpr(Reg::AT))) { + return std::make_unique(i0, idx); + } + if (i0.get_dst(0).is_reg(rr0()) && i0.get_src(0).is_imm(2) && i0.get_src(1).is_reg(rr0())) { + // lw r0, 2(r0), used to trigger an exception on purpose. + return std::make_unique(SpecialOp::Kind::BREAK, idx); + } else if (i0.get_src(1).is_reg(rs7()) && i0.get_src(0).is_sym()) { + // symbol load. + return std::make_unique( + make_dst_var(i0, idx), SimpleAtom::make_sym_val(i0.get_src(0).get_sym()).as_expr(), idx); + } else { + // fall back to standard loads + return make_standard_load(i0, idx, 4, LoadVarOp::Kind::SIGNED); + } +} + +std::unique_ptr convert_daddiu_1(const Instruction& i0, int idx) { + if (i0.get_src(0).is_reg(rs7()) && i0.get_src(1).is_sym()) { + // get symbol pointer + return std::make_unique( + make_dst_var(i0, idx), SimpleAtom::make_sym_ptr(i0.get_src(1).get_sym()).as_expr(), idx); + } else if (i0.get_src(0).is_reg(rs7()) && i0.get_src(1).is_imm(FIX_SYM_EMPTY_PAIR)) { + // get empty pair + return std::make_unique(make_dst_var(i0, idx), + SimpleAtom::make_empty_list().as_expr(), idx); + } else if (i0.get_src(0).is_reg(rs7()) && i0.get_src(1).is_imm(-32768)) { + // get pointer to beginning of symbol table (this is a bit of a hack) + return std::make_unique( + make_dst_var(i0, idx), SimpleAtom::make_sym_val("__START-OF-TABLE__").as_expr(), idx); + } else if (i0.get_src(0).is_reg(rs7()) && i0.get_src(1).is_imm(FIX_SYM_TRUE)) { + // get pointer to beginning of symbol table (this is a bit of a hack) + return std::make_unique(make_dst_var(i0, idx), + SimpleAtom::make_sym_ptr("#t").as_expr(), idx); + } else if (i0.get_src(0).is_reg(rfp()) && i0.get_src(1).is_label()) { + // get address of static + return std::make_unique( + make_dst_var(i0, idx), SimpleAtom::make_static_address(i0.get_src(1).get_label()).as_expr(), + idx); + } else { + // fall back to normal add. + return make_2reg_1imm_op(i0, SimpleExpression::Kind::ADD, idx); + } +} + +std::unique_ptr convert_daddu_1(const Instruction& i0, int idx) { + if (i0.get_src(1).is_reg(rs7())) { + return std::make_unique(i0, idx); + } else if (i0.get_src(0).is_reg(rr0())) { + // I think the array access code sometimes generates this. To be safe, let's pass it through + // as an explicit reg + 0 access. + return std::make_unique( + make_dst_var(i0, idx), + SimpleExpression(SimpleExpression::Kind::ADD, make_src_atom(i0.get_src(1).get_reg(), idx), + SimpleAtom::make_int_constant(0)), + idx); + } else { + return make_3reg_op(i0, SimpleExpression::Kind::ADD, idx); + } +} + +std::unique_ptr convert_dsubu_1(const Instruction& i0, int idx) { + if (i0.get_src(0).is_reg(rr0())) { + return std::make_unique( + make_dst_var(i0, idx), + SimpleExpression(SimpleExpression::Kind::NEG, make_src_atom(i0.get_src(1).get_reg(), idx)), + idx); + } else { + // fall back + return make_3reg_op(i0, SimpleExpression::Kind::SUB, idx); + } +} + +std::unique_ptr convert_nor_1(const Instruction& i0, int idx) { + if (i0.get_src(1).is_reg(rr0())) { + return std::make_unique(make_dst_var(i0, idx), + SimpleExpression(SimpleExpression::Kind::LOGNOT, + make_src_atom(i0.get_src(0).get_reg(), idx)), + idx); + } else { + // fall back + return make_3reg_op(i0, SimpleExpression::Kind::NOR, idx); + } +} + +std::unique_ptr convert_addiu_1(const Instruction& i0, int idx) { + // addiu is used to load a constant. sometimes. + if (i0.get_src(0).is_reg(rr0())) { + return std::make_unique( + make_dst_var(i0, idx), SimpleAtom::make_int_constant(i0.get_src(1).get_imm()).as_expr(), + idx); + } else { + // may be assembly + return nullptr; + } +} + +std::unique_ptr convert_lui_1(const Instruction& i0, int idx) { + if (i0.get_dst(0).is_reg(make_gpr(Reg::AT))) { + return std::make_unique(i0, idx); + } + if (i0.get_src(0).is_imm()) { + return std::make_unique( + make_dst_var(i0, idx), + SimpleAtom::make_int_constant(i0.get_src(0).get_imm() << 16).as_expr(), idx); + } + return nullptr; +} + +std::unique_ptr convert_sll_1(const Instruction& i0, int idx) { + if (is_nop(i0)) { + return std::make_unique(SpecialOp::Kind::NOP, idx); + } + return nullptr; +} + +std::unique_ptr convert_sw_1(const Instruction& i0, int idx) { + if (i0.get_src(1).is_sym() && i0.get_src(2).is_reg(rs7())) { + auto name = i0.get_src(1).get_sym(); + // store into symbol table! + SimpleAtom val; + if (i0.get_src(0).is_reg(rs7())) { + // store a false + val = SimpleAtom::make_sym_ptr("#f"); + } else if (i0.get_src(0).is_reg(rr0())) { + // store a 0 + val = SimpleAtom::make_int_constant(0); + } else { + // store a register. + val = make_src_atom(i0.get_src(0).get_reg(), idx); + } + return std::make_unique(4, false, SimpleAtom::make_sym_val(name).as_expr(), val, idx); + } else { + return make_standard_store(i0, idx, 4, false); + } +} + +std::unique_ptr convert_sd_1(const Instruction& i0, int idx) { + if (i0.get_src(0).is_reg(rr0()) && i0.get_src(1).is_imm(2) && i0.get_src(2).is_reg(rr0())) { + return std::make_unique(SpecialOp::Kind::CRASH, idx); + } else { + return make_standard_store(i0, idx, 8, false); + } +} + +// movn or movz +std::unique_ptr convert_cmov_1(const Instruction& i0, int idx) { + if (i0.get_src(0).is_reg(rs7())) { + return std::make_unique(make_dst_var(i0, idx), + make_src_var(i0.get_src(1).get_reg(), idx), + i0.kind == InstructionKind::MOVZ, idx); + } else { + return nullptr; + } +} + +std::unique_ptr convert_dsll32_1(const Instruction& i0, int idx) { + if (i0.get_dst(0).is_reg(rra())) { + return std::make_unique(i0, idx); + } + return make_2reg_1imm_op(i0, SimpleExpression::Kind::LEFT_SHIFT, idx, 32); +} + +std::unique_ptr convert_dsrl32_1(const Instruction& i0, int idx) { + if (i0.get_dst(0).is_reg(rra())) { + return std::make_unique(i0, idx); + } + return make_2reg_1imm_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_LOGIC, idx, 32); +} + +std::unique_ptr convert_1(const Instruction& i0, int idx) { + switch (i0.kind) { + case InstructionKind::OR: + return convert_or_1(i0, idx); + case InstructionKind::ORI: + return convert_ori_1(i0, idx); + case InstructionKind::AND: + return make_3reg_op(i0, SimpleExpression::Kind::AND, idx); + case InstructionKind::MTC1: + return convert_mtc1_1(i0, idx); + case InstructionKind::MFC1: + return convert_mfc1_1(i0, idx); + case InstructionKind::LWC1: + return make_standard_load(i0, idx, 4, LoadVarOp::Kind::FLOAT); + case InstructionKind::LB: + return make_standard_load(i0, idx, 1, LoadVarOp::Kind::SIGNED); + case InstructionKind::LBU: + return make_standard_load(i0, idx, 1, LoadVarOp::Kind::UNSIGNED); + case InstructionKind::LHU: + return make_standard_load(i0, idx, 2, LoadVarOp::Kind::UNSIGNED); + case InstructionKind::LH: + return make_standard_load(i0, idx, 2, LoadVarOp::Kind::SIGNED); + case InstructionKind::LWU: + return make_standard_load(i0, idx, 4, LoadVarOp::Kind::UNSIGNED); + case InstructionKind::LW: + return convert_lw_1(i0, idx); + case InstructionKind::LD: + return make_standard_load(i0, idx, 8, LoadVarOp::Kind::UNSIGNED); + case InstructionKind::DSLL: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::LEFT_SHIFT, idx); + case InstructionKind::DSLL32: + return convert_dsll32_1(i0, idx); + case InstructionKind::DSRA: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_ARITH, idx); + case InstructionKind::DSRA32: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_ARITH, idx, 32); + case InstructionKind::DSRL: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_LOGIC, idx); + case InstructionKind::DSRL32: + return convert_dsrl32_1(i0, idx); + case InstructionKind::DIVS: + return make_3reg_op(i0, SimpleExpression::Kind::DIV_S, idx); + case InstructionKind::SUBS: + return make_3reg_op(i0, SimpleExpression::Kind::SUB_S, idx); + case InstructionKind::ADDS: + return make_3reg_op(i0, SimpleExpression::Kind::ADD_S, idx); + case InstructionKind::MULS: + return make_3reg_op(i0, SimpleExpression::Kind::MUL_S, idx); + case InstructionKind::MINS: + return make_3reg_op(i0, SimpleExpression::Kind::MIN_S, idx); + case InstructionKind::MAXS: + return make_3reg_op(i0, SimpleExpression::Kind::MAX_S, idx); + case InstructionKind::DADDIU: + return convert_daddiu_1(i0, idx); + case InstructionKind::DADDU: + return convert_daddu_1(i0, idx); + case InstructionKind::DSUBU: + return convert_dsubu_1(i0, idx); + case InstructionKind::MULT3: + return make_3reg_op(i0, SimpleExpression::Kind::MUL_SIGNED, idx); + case InstructionKind::MULTU3: + return make_3reg_op(i0, SimpleExpression::Kind::MUL_UNSIGNED, idx); + case InstructionKind::ANDI: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::AND, idx); + case InstructionKind::XORI: + return make_2reg_1imm_op(i0, SimpleExpression::Kind::XOR, idx); + case InstructionKind::NOR: + return convert_nor_1(i0, idx); + case InstructionKind::XOR: + return make_3reg_op(i0, SimpleExpression::Kind::XOR, idx); + case InstructionKind::ADDIU: + return convert_addiu_1(i0, idx); + case InstructionKind::LUI: + return convert_lui_1(i0, idx); + case InstructionKind::SLL: + return convert_sll_1(i0, idx); + case InstructionKind::DSRAV: + return make_3reg_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_ARITH, idx); + case InstructionKind::DSRLV: + return make_3reg_op(i0, SimpleExpression::Kind::RIGHT_SHIFT_LOGIC, idx); + case InstructionKind::DSLLV: + return make_3reg_op(i0, SimpleExpression::Kind::LEFT_SHIFT, idx); + case InstructionKind::SB: + return make_standard_store(i0, idx, 1, false); + case InstructionKind::SH: + return make_standard_store(i0, idx, 2, false); + case InstructionKind::SW: + return convert_sw_1(i0, idx); + case InstructionKind::SD: + return convert_sd_1(i0, idx); + case InstructionKind::SWC1: + return make_standard_store(i0, idx, 4, true); + case InstructionKind::CVTWS: // float to int + return make_2reg_op(i0, SimpleExpression::Kind::FLOAT_TO_INT, idx); + case InstructionKind::CVTSW: // int to float + return make_2reg_op(i0, SimpleExpression::Kind::INT_TO_FLOAT, idx); + case InstructionKind::ABSS: + return make_2reg_op(i0, SimpleExpression::Kind::ABS_S, idx); + case InstructionKind::NEGS: + return make_2reg_op(i0, SimpleExpression::Kind::NEG_S, idx); + case InstructionKind::SQRTS: + return make_2reg_op(i0, SimpleExpression::Kind::SQRT_S, idx); + case InstructionKind::MOVS: + return make_2reg_op(i0, SimpleExpression::Kind::IDENTITY, idx); + case InstructionKind::MOVN: + case InstructionKind::MOVZ: + return convert_cmov_1(i0, idx); + default: + return nullptr; + } +} + +/////////////////////// +// OP 2 Conversions +////////////////////// + +std::unique_ptr convert_division_2(const Instruction& i0, + const Instruction& i1, + int idx, + bool is_signed) { + if (i1.kind == InstructionKind::MFLO) { + // divide + auto src = make_2reg_expr( + i0, is_signed ? SimpleExpression::Kind::DIV_SIGNED : SimpleExpression::Kind::DIV_UNSIGNED, + idx); + return std::make_unique(make_dst_var(i1, idx), src, idx); + } else if (i1.kind == InstructionKind::MFHI) { + // mod + auto src = make_2reg_expr( + i0, is_signed ? SimpleExpression::Kind::MOD_SIGNED : SimpleExpression::Kind::MOD_UNSIGNED, + idx); + return std::make_unique(make_dst_var(i1, idx), src, idx); + } else { + return nullptr; + } +} + +std::unique_ptr convert_jalr_2(const Instruction& i0, const Instruction& i1, int idx) { + if (i0.kind == InstructionKind::JALR && i0.get_dst(0).is_reg(rra()) && + i0.get_src(0).is_reg(rt9()) && is_gpr_2_imm_int(i1, InstructionKind::SLL, rv0(), rra(), 0)) { + return std::make_unique(idx); + } + return nullptr; +} + +std::unique_ptr convert_bne_2(const Instruction& i0, + const Instruction& i1, + int idx, + bool likely) { + auto s0 = i0.get_src(0).get_reg(); + auto s1 = i0.get_src(1).get_reg(); + auto dest = i0.get_src(2).get_label(); + IR2_Condition condition; + if (s1 == rr0()) { + condition = IR2_Condition(IR2_Condition::Kind::NONZERO, make_src_atom(s0, idx)); + } else if (i0.get_src(0).is_reg(rs7())) { + condition = IR2_Condition(IR2_Condition::Kind::TRUTHY, make_src_atom(s1, idx)); + } else if (s1 == rs7()) { + // likely a case where somebody wrote (= x #f) or (!= x #f). much rarer than the flipped one + condition = IR2_Condition(IR2_Condition::Kind::NOT_EQUAL, make_src_atom(s0, idx), + SimpleAtom::make_sym_ptr("#f")); + } else { + condition = IR2_Condition(IR2_Condition::Kind::NOT_EQUAL, make_src_atom(s0, idx), + make_src_atom(s1, idx)); + } + return make_branch(condition, i1, likely, dest, idx); +} + +std::unique_ptr convert_beq_2(const Instruction& i0, + const Instruction& i1, + int idx, + bool likely) { + auto s0 = i0.get_src(0).get_reg(); + auto s1 = i0.get_src(1).get_reg(); + auto dest = i0.get_src(2).get_label(); + IR2_Condition condition; + if (s0 == rr0() && s1 == rr0()) { + condition = IR2_Condition(IR2_Condition::Kind::ALWAYS); + } else if (s1 == rr0()) { + condition = IR2_Condition(IR2_Condition::Kind::ZERO, make_src_atom(s0, idx)); + } else if (i0.get_src(0).is_reg(rs7())) { + if (s1 == rs7()) { + // (if #f ...) type code? + condition = IR2_Condition(IR2_Condition::Kind::FALSE, SimpleAtom::make_sym_ptr("#f")); + } else { + condition = IR2_Condition(IR2_Condition::Kind::FALSE, make_src_atom(s1, idx)); + } + } else if (s1 == rs7()) { + // likely a case where somebody wrote (= x #f) or (!= x #f). much rarer than the flipped one + condition = IR2_Condition(IR2_Condition::Kind::EQUAL, make_src_atom(s0, idx), + SimpleAtom::make_sym_ptr("#f")); + } else { + condition = + IR2_Condition(IR2_Condition::Kind::EQUAL, make_src_atom(s0, idx), make_src_atom(s1, idx)); + } + return make_branch(condition, i1, likely, dest, idx); +} + +std::unique_ptr convert_branch_r1_2(const Instruction& i0, + const Instruction& i1, + IR2_Condition::Kind kind, + bool likely, + int idx) { + return make_branch(IR2_Condition(kind, make_src_atom(i0.get_src(0).get_reg(), idx)), i1, likely, + i0.get_src(1).get_label(), idx); +} + +std::unique_ptr convert_daddiu_2(const Instruction& i0, const Instruction& i1, int idx) { + // daddiu dest, s7, 8 + // mov{n,z} dest, s7, src + if (i1.kind == InstructionKind::MOVN || i1.kind == InstructionKind::MOVZ) { + auto dest = i0.get_dst(0).get_reg(); + auto src = i1.get_src(1).get_reg(); + if (!i0.get_src(0).is_reg(rs7())) { + return nullptr; + } + assert(i0.get_src(0).is_reg(rs7())); + assert(i0.get_src(1).is_imm(8)); + assert(i1.get_dst(0).is_reg(dest)); + assert(i1.get_src(0).is_reg(rs7())); + auto kind = + i1.kind == InstructionKind::MOVN ? IR2_Condition::Kind::ZERO : IR2_Condition::Kind::NONZERO; + return std::make_unique(make_dst_var(dest, idx), + IR2_Condition(kind, make_src_atom(src, idx)), idx); + } + return nullptr; +} + +std::unique_ptr convert_lui_2(const Instruction& i0, const Instruction& i1, int idx) { + if (i1.kind == InstructionKind::ORI) { + // lui temp, <> + // ori dst, temp, <> + // possibly temp = dst. + auto temp = i0.get_dst(0).get_reg(); + if (i1.get_src(0).get_reg() != temp) { + return nullptr; + } + auto dst = i1.get_dst(0).get_reg(); + + SimpleAtom src; + if (i0.get_src(0).is_imm() && i1.get_src(1).is_imm()) { + src = SimpleAtom::make_int_constant(s64(i1.get_src(1).get_imm()) + + (s64(i0.get_src(0).get_imm()) << 16)); + } else if (i0.get_src(0).is_label() && i1.get_src(1).is_label()) { + auto label = i0.get_src(0).get_label(); + assert(label == i1.get_src(1).get_label()); + src = SimpleAtom::make_static_address(label); + } + + auto result = std::make_unique(make_dst_var(dst, idx), src.as_expr(), idx); + if (temp != dst) { + result->add_clobber_reg(temp); + } + return result; + } + + return nullptr; +} + +std::unique_ptr convert_slt_2(const Instruction& i0, + const Instruction& i1, + int idx, + bool is_signed) { + // this is to do a min or max. + // possibly due to a GOAL compiler bug, the output always goes in left and there is always + // a clobbered register. Likely there was a swapped register allocation setup here. + // it doesn't generate wrong code, just not optimal. + // slt temp, left, right + // mov{n,z} left, right, temp + auto temp = i0.get_dst(0).get_reg(); + auto left = i0.get_src(0).get_reg(); + auto right = i0.get_src(1).get_reg(); + if (temp == left) { + return nullptr; + } + assert(temp != left); + assert(temp != right); + assert(left != right); + std::unique_ptr result; + SimpleExpression::Kind kind; + if (is_gpr_3(i1, InstructionKind::MOVZ, left, right, temp)) { + kind = is_signed ? SimpleExpression::Kind::MIN_SIGNED : SimpleExpression::Kind::MIN_UNSIGNED; + } else if (is_gpr_3(i1, InstructionKind::MOVN, left, right, temp)) { + kind = is_signed ? SimpleExpression::Kind::MAX_SIGNED : SimpleExpression::Kind::MAX_UNSIGNED; + } else { + return nullptr; + } + result = std::make_unique( + make_dst_var(left, idx), + SimpleExpression(kind, make_src_atom(left, idx), make_src_atom(right, idx)), idx); + result->add_clobber_reg(temp); + return result; +} + +std::unique_ptr convert_2(const Instruction& i0, const Instruction& i1, int idx) { + switch (i0.kind) { + case InstructionKind::DIV: + return convert_division_2(i0, i1, idx, true); + case InstructionKind::DIVU: + return convert_division_2(i0, i1, idx, false); + case InstructionKind::JALR: + return convert_jalr_2(i0, i1, idx); + case InstructionKind::BNE: + return convert_bne_2(i0, i1, idx, false); + case InstructionKind::BNEL: + return convert_bne_2(i0, i1, idx, true); + case InstructionKind::BEQ: + return convert_beq_2(i0, i1, idx, false); + case InstructionKind::BEQL: + return convert_beq_2(i0, i1, idx, true); + case InstructionKind::BGTZL: + return convert_branch_r1_2(i0, i1, IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED, true, idx); + case InstructionKind::BGEZL: + return convert_branch_r1_2(i0, i1, IR2_Condition::Kind::GEQ_ZERO_SIGNED, true, idx); + case InstructionKind::BLTZL: + return convert_branch_r1_2(i0, i1, IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED, true, idx); + case InstructionKind::DADDIU: + return convert_daddiu_2(i0, i1, idx); + case InstructionKind::LUI: + return convert_lui_2(i0, i1, idx); + case InstructionKind::SLT: + return convert_slt_2(i0, i1, idx, true); + case InstructionKind::SLTU: + return convert_slt_2(i0, i1, idx, false); + default: + return nullptr; + } +} + +/////////////////////// +// OP 3 Conversions +////////////////////// + +std::unique_ptr convert_lui_3(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + int idx) { + if (i1.kind == InstructionKind::ORI && i0.get_src(0).is_label() && i1.get_src(1).is_label() && + is_gpr_3(i2, InstructionKind::ADDU, {}, rfp(), {})) { + // lui temp, <> + // ori dst, temp, <> + // addu dst, fp, dst + assert(i0.get_dst(0).get_reg() == i1.get_src(0).get_reg()); // temp + assert(i0.get_src(0).get_label() == i1.get_src(1).get_label()); // labels + assert(i2.get_dst(0).get_reg() == i2.get_src(1).get_reg()); // dst + assert(i2.get_dst(0).get_reg() == i1.get_dst(0).get_reg()); // dst + auto temp = i0.get_dst(0).get_reg(); + auto dst = i2.get_dst(0).get_reg(); + auto label = i0.get_src(0).get_label(); + auto result = std::make_unique(make_dst_var(dst, idx), + SimpleAtom::make_static_address(label).as_expr(), idx); + if (dst != temp) { + result->add_clobber_reg(temp); + } + return result; + } else if (i1.kind == InstructionKind::ORI && i1.get_src(1).is_label() && + is_gpr_3(i2, InstructionKind::DADDU, {}, {}, rfp())) { + // lui temp, <> + // ori temp, temp, <> + // daddu dst, temp, fp + assert(i0.get_dst(0).get_reg() == i1.get_src(0).get_reg()); // temp + assert(i0.get_src(0).get_label() == i1.get_src(1).get_label()); // labels + assert(i0.get_dst(0).get_reg() == i1.get_dst(0).get_reg()); // temp + assert(i2.get_src(0).get_reg() == i0.get_dst(0).get_reg()); // temp + auto temp = i0.get_dst(0).get_reg(); + auto dst = i2.get_dst(0).get_reg(); + auto label = i0.get_src(0).get_label(); + auto result = std::make_unique(make_dst_var(dst, idx), + SimpleAtom::make_static_address(label).as_expr(), idx); + if (dst != temp) { + result->add_clobber_reg(temp); + } + return result; + } + return nullptr; +} + +std::unique_ptr convert_dsubu_3(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + int idx) { + if (i1.kind == InstructionKind::DADDIU && + (i2.kind == InstructionKind::MOVN || i2.kind == InstructionKind::MOVZ)) { + // dsubu temp, a, b + // daddiu dst, s7, 8 + // mov{n,z} dst, s7, temp + auto temp = i0.get_dst(0).get_reg(); + auto a = i0.get_src(0).get_reg(); + auto b = i0.get_src(1).get_reg(); + auto dest = i1.get_dst(0).get_reg(); + assert(i1.get_src(0).is_reg(rs7())); + assert(i1.get_src(1).is_imm(FIX_SYM_TRUE)); + assert(i2.get_dst(0).get_reg() == dest); + assert(i2.get_src(0).is_reg(rs7())); + assert(i2.get_src(1).get_reg() == temp); + assert(temp != dest); + auto kind = i2.kind == InstructionKind::MOVN ? IR2_Condition::Kind::EQUAL + : IR2_Condition::Kind::NOT_EQUAL; + std::unique_ptr result; + if (b == rs7()) { + // some sort of not gone wrong? + result = std::make_unique( + make_dst_var(dest, idx), + IR2_Condition(kind, make_src_atom(a, idx), SimpleAtom::make_sym_ptr("#f")), idx); + } else if (b == rr0()) { + // not the greatest codegen... + result = std::make_unique( + make_dst_var(dest, idx), + IR2_Condition(kind, make_src_atom(a, idx), SimpleAtom::make_int_constant(0)), idx); + } else { + result = std::make_unique( + make_dst_var(dest, idx), + IR2_Condition(kind, make_src_atom(a, idx), make_src_atom(b, idx)), idx); + } + + result->add_clobber_reg(temp); + return result; + } + return nullptr; +} + +void add_clobber_if_unritten(AtomicOp& op, Register clobber) { + op.update_register_info(); + if (std::find(op.write_regs().begin(), op.write_regs().end(), clobber) == op.write_regs().end()) { + op.add_clobber_reg(clobber); + } +} + +std::unique_ptr convert_slt_3(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + bool is_signed, + int idx) { + auto s0 = i0.get_src(0).get_reg(); + auto s1 = i0.get_src(1).get_reg(); + std::unique_ptr result; + if (i1.kind == InstructionKind::BNE || i1.kind == InstructionKind::BEQ) { + // assume bne, invert at the end if it's beq + // slt temp, a0, a1 + // bne temp, r0, dest + // delay slot + auto temp = i0.get_dst(0).get_reg(); + auto dest = i1.get_src(2).get_label(); + assert(i1.get_src(0).get_reg() == temp); + assert(i1.get_src(1).is_reg(rr0())); + + IR2_Condition condition; + if (s1 == rr0()) { + // ???? + auto kind = is_signed ? IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED + : IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s0, idx)); + } else if (s0 == rr0()) { + auto kind = is_signed ? IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED + : IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s1, idx)); + } else { + auto kind = is_signed ? IR2_Condition::Kind::LESS_THAN_SIGNED + : IR2_Condition::Kind::LESS_THAN_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s0, idx), make_src_atom(s1, idx)); + } + + if (i1.kind == InstructionKind::BEQ) { + condition.invert(); + } + result = make_branch(condition, i2, false, dest, idx); + add_clobber_if_unritten(*result, temp); + return result; + } else if (i1.kind == InstructionKind::DADDIU && + (i2.kind == InstructionKind::MOVZ || i2.kind == InstructionKind::MOVN)) { + // all this assumes movz. Then at the end we invert it if it's actually a movn + // slt temp, a0, a1 + // daddiu dest, s7, 8 + // movz dest, s7, temp + auto temp = i0.get_dst(0).get_reg(); + auto dest = i1.get_dst(0).get_reg(); + assert(i1.get_src(0).is_reg(rs7())); + assert(i1.get_src(1).is_imm(FIX_SYM_TRUE)); + assert(i2.get_dst(0).get_reg() == dest); + assert(i2.get_src(0).is_reg(rs7())); + assert(i2.get_src(1).get_reg() == temp); + assert(temp != dest); + IR2_Condition condition; + if (s1 == rr0()) { + auto kind = is_signed ? IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED + : IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED; + // < 0 + condition = IR2_Condition(kind, make_src_atom(s0, idx)); + } else if (s0 == rr0()) { + auto kind = is_signed ? IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED + : IR2_Condition::Kind::GREATER_THAN_ZERO_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s1, idx)); + } else { + auto kind = is_signed ? IR2_Condition::Kind::LESS_THAN_SIGNED + : IR2_Condition::Kind::LESS_THAN_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s0, idx), make_src_atom(s1, idx)); + } + if (i2.kind == InstructionKind::MOVN) { + condition.invert(); + } + result = std::make_unique(make_dst_var(dest, idx), condition, idx); + add_clobber_if_unritten(*result, temp); + return result; + } + return nullptr; +} + +std::unique_ptr convert_slti_3(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + bool is_signed, + int idx) { + auto s0 = i0.get_src(0).get_reg(); + auto s1 = SimpleAtom::make_int_constant(i0.get_src(1).get_imm()); + std::unique_ptr result; + if (i1.kind == InstructionKind::BNE || i1.kind == InstructionKind::BEQ) { + // assume bne, invert at the end if it's beq + // slt temp, a0, <> + // bne temp, r0, dest + // delay slot + auto temp = i0.get_dst(0).get_reg(); + auto dest = i1.get_src(2).get_label(); + assert(i1.get_src(0).get_reg() == temp); + assert(i1.get_src(1).is_reg(rr0())); + auto kind = + is_signed ? IR2_Condition::Kind::LESS_THAN_SIGNED : IR2_Condition::Kind::LESS_THAN_UNSIGNED; + auto condition = IR2_Condition(kind, make_src_atom(s0, idx), s1); + if (i1.kind == InstructionKind::BEQ) { + condition.invert(); + } + result = make_branch(condition, i2, false, dest, idx); + add_clobber_if_unritten(*result, temp); + return result; + } else if (i1.kind == InstructionKind::DADDIU && + (i2.kind == InstructionKind::MOVZ || i2.kind == InstructionKind::MOVN)) { + // all this assumes movz. Then at the end we invert it if it's actually a movn + // slt temp, a0, <> + // daddiu dest, s7, 8 + // movz dest, s7, temp + auto temp = i0.get_dst(0).get_reg(); + auto dest = i1.get_dst(0).get_reg(); + assert(i1.get_src(0).is_reg(rs7())); + assert(i1.get_src(1).is_imm(FIX_SYM_TRUE)); + assert(i2.get_dst(0).get_reg() == dest); + assert(i2.get_src(0).is_reg(rs7())); + assert(i2.get_src(1).get_reg() == temp); + assert(temp != dest); + IR2_Condition condition; + + auto kind = + is_signed ? IR2_Condition::Kind::LESS_THAN_SIGNED : IR2_Condition::Kind::LESS_THAN_UNSIGNED; + condition = IR2_Condition(kind, make_src_atom(s0, idx), s1); + if (i2.kind == InstructionKind::MOVN) { + condition.invert(); + } + result = std::make_unique(make_dst_var(dest, idx), condition, idx); + add_clobber_if_unritten(*result, temp); + return result; + } + return nullptr; +} + +std::unique_ptr convert_fp_branch(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + IR2_Condition::Kind kind, + int idx) { + if (i1.kind == InstructionKind::BC1T || i1.kind == InstructionKind::BC1F) { + IR2_Condition condition(kind, make_src_atom(i0.get_src(0).get_reg(), idx), + make_src_atom(i0.get_src(1).get_reg(), idx)); + if (i1.kind == InstructionKind::BC1F) { + condition.invert(); + } + return make_branch(condition, i2, false, i1.get_src(0).get_label(), idx); + } + return nullptr; +} + +std::unique_ptr convert_3(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + int idx) { + switch (i0.kind) { + case InstructionKind::LUI: + return convert_lui_3(i0, i1, i2, idx); + case InstructionKind::DSUBU: + return convert_dsubu_3(i0, i1, i2, idx); + case InstructionKind::SLT: + return convert_slt_3(i0, i1, i2, true, idx); + case InstructionKind::SLTU: + return convert_slt_3(i0, i1, i2, false, idx); + case InstructionKind::SLTI: + return convert_slti_3(i0, i1, i2, true, idx); + case InstructionKind::SLTIU: + return convert_slti_3(i0, i1, i2, false, idx); + case InstructionKind::CEQS: + return convert_fp_branch(i0, i1, i2, IR2_Condition::Kind::FLOAT_EQUAL, idx); + case InstructionKind::CLTS: + return convert_fp_branch(i0, i1, i2, IR2_Condition::Kind::FLOAT_LESS_THAN, idx); + case InstructionKind::CLES: + return convert_fp_branch(i0, i1, i2, IR2_Condition::Kind::FLOAT_LEQ, idx); + default: + return nullptr; + } +} + +/////////////////////// +// OP 4 Conversions +////////////////////// + +std::unique_ptr convert_dsll32_4(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + const Instruction& i3, + int idx) { + if (i1.kind == InstructionKind::SLT && i2.kind == InstructionKind::BEQ) { + // dsll32 temp, a0, 30 + // slt temp, temp, r0 + // beq temp, r0, <> + // delay + + auto temp = i0.get_dst(0).get_reg(); + auto arg = i0.get_src(0).get_reg(); + auto sa = i0.get_src(1).get_imm(); + // 30 = 64 - (32 + log2(0b10) + 1) + if (sa != 30) { + return nullptr; + } + assert(i1.get_dst(0).get_reg() == temp); + assert(i1.get_src(0).get_reg() == temp); + assert(i1.get_src(1).is_reg(rr0())); + assert(i2.get_src(0).get_reg() == temp); + assert(i2.get_src(1).is_reg(rr0())); + + IR2_Condition condition(IR2_Condition::Kind::IS_NOT_PAIR, make_src_atom(arg, idx)); + auto result = make_branch(condition, i3, false, i2.get_src(2).get_label(), idx); + result->add_clobber_reg(temp); + return result; + } + return nullptr; +} + +std::unique_ptr convert_4(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + const Instruction& i3, + int idx) { + switch (i0.kind) { + case InstructionKind::DSLL32: + return convert_dsll32_4(i0, i1, i2, i3, idx); + default: + return nullptr; + } +} + +/////////////////////// +// OP 5 Conversions +////////////////////// + +std::unique_ptr convert_5(const Instruction& i0, + const Instruction& i1, + const Instruction& i2, + const Instruction& i3, + const Instruction& i4, + int idx) { + auto s6 = make_gpr(Reg::S6); + + if (i0.kind == InstructionKind::LWU && i0.get_dst(0).is_reg(s6) && + i0.get_src(0).get_imm() == 44 && i0.get_src(1).is_reg(s6) && + i1.kind == InstructionKind::MTLO1 && i1.get_src(0).is_reg(s6) && + i2.kind == InstructionKind::LWU && i2.get_dst(0).is_reg(s6) && + i2.get_src(0).get_imm() == 12 && i2.get_src(1).is_reg(s6) && + i3.kind == InstructionKind::JALR && i3.get_dst(0).is_reg(make_gpr(Reg::RA)) && + i3.get_src(0).is_reg(s6) && i4.kind == InstructionKind::MFLO1 && i4.get_dst(0).is_reg(s6)) { + return std::make_unique(SpecialOp::Kind::SUSPEND, idx); + } + return nullptr; } } // namespace @@ -75,21 +1321,46 @@ void convert_block_to_atomic_ops(int begin_idx, bool converted = false; std::unique_ptr op; - if (n_instr >= 4) { + if (n_instr >= 5) { + // try 5 instructions + op = convert_5(instr[0], instr[1], instr[2], instr[3], instr[4], op_idx); + if (op) { + converted = true; + length = 5; + } + } + + if (!converted && n_instr >= 4) { // try 4 instructions + op = convert_4(instr[0], instr[1], instr[2], instr[3], op_idx); + if (op) { + converted = true; + length = 4; + } } if (!converted && n_instr >= 3) { // try 3 instructions + op = convert_3(instr[0], instr[1], instr[2], op_idx); + if (op) { + converted = true; + length = 3; + } } if (!converted && n_instr >= 2) { // try 2 instructions + op = convert_2(instr[0], instr[1], op_idx); + if (op) { + converted = true; + length = 2; + } } if (!converted) { // try 1 instruction - if (convert_1(*instr, op_idx, op)) { + op = convert_1(*instr, op_idx); + if (op) { converted = true; length = 1; } @@ -97,23 +1368,31 @@ void convert_block_to_atomic_ops(int begin_idx, if (!converted) { // try assembly fallback. + op = make_asm_op(*instr, op_idx); + if (op) { + converted = true; + length = 1; + } } if (!converted) { // failed! - lg::die("Failed to convert instruction {} to an atomic op", instr->to_string(labels)); + throw std::runtime_error("Failed to convert " + instr->to_string(labels)); + // lg::die("Failed to convert instruction {} to an atomic op", + // instr->to_string(labels)); } assert(converted && length && op); // add mappings: container->atomic_op_to_instruction[container->ops.size()] = begin_idx; for (int i = 0; i < length; i++) { - container->instruction_to_basic_op[begin_idx + i] = container->ops.size(); + container->instruction_to_atomic_op[begin_idx + i] = container->ops.size(); } // add op->update_register_info(); container->ops.emplace_back(std::move(op)); instr += length; + begin_idx += length; } container->block_id_to_end_atomic_op.push_back(container->ops.size()); } diff --git a/decompiler/IR2/AtomicOpBuilder.h b/decompiler/IR2/AtomicOpBuilder.h index 046cca8f13..7edbbb300e 100644 --- a/decompiler/IR2/AtomicOpBuilder.h +++ b/decompiler/IR2/AtomicOpBuilder.h @@ -15,7 +15,7 @@ struct FunctionAtomicOps { std::vector> ops; // mappings from instructions to atomic ops and back - std::unordered_map instruction_to_basic_op; + std::unordered_map instruction_to_atomic_op; std::unordered_map atomic_op_to_instruction; // map from basic block to the index of the first op diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index aad6fe6964..888d99e1ce 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -989,7 +989,7 @@ goos::Object LinkedObjectFile::to_form_script(int seg, int word_idx, std::vector /*! * Is the thing pointed to a string? */ -bool LinkedObjectFile::is_string(int seg, int byte_idx) { +bool LinkedObjectFile::is_string(int seg, int byte_idx) const { if (byte_idx % 4) { return false; // must be aligned pointer. } diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 9ad369cae6..88c16cd143 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -63,6 +63,8 @@ class LinkedObjectFile { u32 read_data_word(const DecompilerLabel& label); std::string get_goal_string_by_label(const DecompilerLabel& label) const; + std::string get_goal_string(int seg, int word_idx, bool with_quotes = true) const; + bool is_string(int seg, int byte_idx) const; struct Stats { uint32_t total_code_bytes = 0; @@ -129,8 +131,6 @@ class LinkedObjectFile { goos::Object to_form_script(int seg, int word_idx, std::vector& seen); goos::Object to_form_script_object(int seg, int byte_idx, std::vector& seen); bool is_empty_list(int seg, int byte_idx); - bool is_string(int seg, int byte_idx); - std::string get_goal_string(int seg, int word_idx, bool with_quotes = true) const; std::vector> label_per_seg_by_offset; }; diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 42c2deafb9..3fcb94a735 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -126,19 +126,19 @@ ObjectFileDB::ObjectFileDB(const std::vector& _dgos, "consistent naming when doing a partial decompilation."); } - lg::info("-Loading DGOs..."); + lg::info("-Loading {} DGOs...", _dgos.size()); for (auto& dgo : _dgos) { get_objs_from_dgo(dgo); } - lg::info("-Loading plain object files..."); + lg::info("-Loading {} plain object files...", object_files.size()); for (auto& obj : object_files) { auto data = file_util::read_binary_file(obj); auto name = obj_filename_to_name(obj); add_obj_from_dgo(name, name, data.data(), data.size(), "NO-XGO"); } - lg::info("-Loading streaming object files..."); + lg::info("-Loading {} streaming object files...", str_files.size()); for (auto& obj : str_files) { StrFileReader reader(obj); // name from the file name @@ -153,15 +153,7 @@ ObjectFileDB::ObjectFileDB(const std::vector& _dgos, } } - lg::info("ObjectFileDB Initialized:"); - lg::info("Total DGOs: {}", int(_dgos.size())); - lg::info("Total data: {} bytes", stats.total_dgo_bytes); - lg::info("Total objs: {}", stats.total_obj_files); - lg::info("Unique objs: {}", stats.unique_obj_files); - lg::info("Unique data: {} bytes", stats.unique_obj_bytes); - lg::info("Total {:.2f} ms ({:.3f} MB/sec, {:.2f} obj/sec)", timer.getMs(), - stats.total_dgo_bytes / ((1u << 20u) * timer.getSeconds()), - stats.total_obj_files / timer.getSeconds()); + lg::info("ObjectFileDB Initialized\n"); } void ObjectFileDB::load_map_file(const std::string& map_data) { @@ -481,7 +473,7 @@ std::string ObjectFileDB::generate_obj_listing() { * Process all of the linking data of all objects. */ void ObjectFileDB::process_link_data() { - lg::info("- Processing Link Data..."); + lg::info("Processing Link Data..."); Timer process_link_timer; LinkedObjectFile::Stats combined_stats; @@ -491,25 +483,7 @@ void ObjectFileDB::process_link_data() { combined_stats.add(obj.linked_data.stats); }); - lg::info("Processed Link Data:"); - lg::info(" Code {} bytes", combined_stats.total_code_bytes); - lg::info(" v2 Code {} bytes", combined_stats.total_v2_code_bytes); - lg::info(" v2 Link Data {} bytes", combined_stats.total_v2_link_bytes); - lg::info(" v2 Pointers {}", combined_stats.total_v2_pointers); - lg::info(" v2 Pointer Seeks {}", combined_stats.total_v2_pointer_seeks); - lg::info(" v2 Symbols {}", combined_stats.total_v2_symbol_count); - lg::info(" v2 Symbol Links {}", combined_stats.total_v2_symbol_links); - - lg::info(" v3 Code {} bytes", combined_stats.v3_code_bytes); - lg::info(" v3 Link Data {} bytes", combined_stats.v3_link_bytes); - lg::info(" v3 Pointers {}", combined_stats.v3_pointers); - lg::info(" Split {}", combined_stats.v3_split_pointers); - lg::info(" Word {}", combined_stats.v3_word_pointers); - lg::info(" v3 Pointer Seeks {}", combined_stats.v3_pointer_seeks); - lg::info(" v3 Symbols {}", combined_stats.v3_symbol_count); - lg::info(" v3 Offset Symbol Links {}", combined_stats.v3_symbol_link_offset); - lg::info(" v3 Word Symbol Links {}", combined_stats.v3_symbol_link_word); - + lg::info("Processed Link Data"); lg::info(" Total {} ms\n", process_link_timer.getMs()); // printf("\n"); } @@ -518,15 +492,14 @@ void ObjectFileDB::process_link_data() { * Process all of the labels generated from linking and give them reasonable names. */ void ObjectFileDB::process_labels() { - lg::info("- Processing Labels..."); + lg::info("Processing Labels..."); Timer process_label_timer; uint32_t total = 0; for_each_obj([&](ObjectFileData& obj) { total += obj.linked_data.set_ordered_label_names(); }); lg::info("Processed Labels:"); lg::info(" Total {} labels", total); - lg::info(" Total {} ms", process_label_timer.getMs()); - // printf("\n"); + lg::info(" Total {} ms\n", process_label_timer.getMs()); } /*! @@ -636,7 +609,7 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir, * Find code/data zones, identify functions, and disassemble */ void ObjectFileDB::find_code() { - lg::info("- Finding code in object files..."); + lg::info("Finding code in object files..."); LinkedObjectFile::Stats combined_stats; Timer timer; @@ -670,8 +643,7 @@ void ObjectFileDB::find_code() { auto total_ops = combined_stats.code_bytes / 4; lg::info(" Decoded {} / {} ({:.3f} %)", combined_stats.decoded_ops, total_ops, 100.f * (float)combined_stats.decoded_ops / total_ops); - lg::info(" Total {:.3f} ms", timer.getMs()); - // printf("\n"); + lg::info(" Total {:.3f} ms\n", timer.getMs()); } /*! @@ -767,7 +739,7 @@ std::string ObjectFileDB::process_game_count_file() { /*! * This is the main decompiler routine which runs after we've identified functions. */ -void ObjectFileDB::analyze_functions() { +void ObjectFileDB::analyze_functions_ir1() { lg::info("- Analyzing Functions..."); Timer timer; diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 5ea3c10ce2..fb16d43a43 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -65,7 +65,15 @@ class ObjectFileDB { const std::string& file_suffix = ""); void write_debug_type_analysis(const std::string& output_dir, const std::string& suffix = ""); - void analyze_functions(); + void analyze_functions_ir1(); + void analyze_functions_ir2(const std::string& output_dir); + void ir2_top_level_pass(); + void ir2_basic_block_pass(); + void ir2_atomic_op_pass(); + void ir2_write_results(const std::string& output_dir); + std::string ir2_to_file(ObjectFileData& data); + std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); + void process_tpages(); void analyze_expressions(); std::string process_game_count_file(); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp new file mode 100644 index 0000000000..cbe56037c3 --- /dev/null +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -0,0 +1,347 @@ +/*! + * @file ObjectFileDB_IR2.cpp + * This runs the IR2 analysis passes. + */ + +#include "ObjectFileDB.h" +#include "common/log/log.h" +#include "common/util/Timer.h" +#include "common/util/FileUtil.h" +#include "decompiler/Function/TypeInspector.h" + +namespace decompiler { + +/*! + * Main IR2 analysis pass. + * At this point, we assume that the files are loaded and we've run find_code to locate all + * functions, but nothing else. + */ +void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) { + lg::info("Using IR2 analysis..."); + lg::info("Processing top-level functions..."); + ir2_top_level_pass(); + lg::info("Processing basic blocks and control flow graph..."); + ir2_basic_block_pass(); + lg::info("Converting to atomic ops..."); + ir2_atomic_op_pass(); + lg::info("Writing results..."); + ir2_write_results(output_dir); +} + +void ObjectFileDB::ir2_top_level_pass() { + Timer timer; + int total_functions = 0; + int total_named_global_functions = 0; + int total_methods = 0; + int total_top_levels = 0; + int total_unknowns = 0; + + for_each_obj([&](ObjectFileData& data) { + if (data.linked_data.segments == 3) { + // the top level segment should have a single function + assert(data.linked_data.functions_by_seg.at(2).size() == 1); + + auto& func = data.linked_data.functions_by_seg.at(2).front(); + assert(func.guessed_name.empty()); + func.guessed_name.set_as_top_level(); + func.find_global_function_defs(data.linked_data, dts); + func.find_type_defs(data.linked_data, dts); + func.find_method_defs(data.linked_data, dts); + } + }); + + // check for function uniqueness. + std::unordered_set unique_names; + std::unordered_map> duplicated_functions; + + int uid = 1; + for_each_obj([&](ObjectFileData& data) { + int func_in_obj = 0; + for (int segment_id = 0; segment_id < int(data.linked_data.segments); segment_id++) { + for (auto& func : data.linked_data.functions_by_seg.at(segment_id)) { + func.guessed_name.unique_id = uid++; + func.guessed_name.id_in_object = func_in_obj++; + func.guessed_name.object_name = data.to_unique_name(); + auto name = func.guessed_name.to_string(); + + switch (func.guessed_name.kind) { + case FunctionName::FunctionKind::METHOD: + total_methods++; + break; + case FunctionName::FunctionKind::GLOBAL: + total_named_global_functions++; + break; + case FunctionName::FunctionKind::TOP_LEVEL_INIT: + total_top_levels++; + break; + case FunctionName::FunctionKind::UNIDENTIFIED: + total_unknowns++; + break; + default: + assert(false); + } + total_functions++; + + if (unique_names.find(name) != unique_names.end()) { + duplicated_functions[name].insert(data.to_unique_name()); + } + + unique_names.insert(name); + + if (get_config().asm_functions_by_name.find(name) != + get_config().asm_functions_by_name.end()) { + func.warnings += ";; flagged as asm by config\n"; + func.suspected_asm = true; + } + } + } + }); + + for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + auto name = func.guessed_name.to_string(); + + if (duplicated_functions.find(name) != duplicated_functions.end()) { + duplicated_functions[name].insert(data.to_unique_name()); + func.warnings += ";; this function exists in multiple non-identical object files\n"; + } + }); + + lg::info("Found a total of {} functions in {:.2f} ms", total_functions, timer.getMs()); + lg::info("{:4d} unknown {:.2f}%", total_unknowns, 100.f * total_unknowns / total_functions); + lg::info("{:4d} global {:.2f}%", total_named_global_functions, + 100.f * total_named_global_functions / total_functions); + lg::info("{:4d} methods {:.2f}%", total_methods, 100.f * total_methods / total_functions); + lg::info("{:4d} logins {:.2f}%\n", total_top_levels, 100.f * total_top_levels / total_functions); +} + +void ObjectFileDB::ir2_basic_block_pass() { + Timer timer; + // Main Pass over each function... + int total_basic_blocks = 0; + int total_functions = 0; + int functions_with_one_block = 0; + int inspect_methods = 0; + int suspected_asm = 0; + int failed_to_build_cfg = 0; + + for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + total_functions++; + + // first, find basic blocks. + auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); + total_basic_blocks += blocks.size(); + if (blocks.size() == 1) { + functions_with_one_block++; + } + func.basic_blocks = blocks; + + if (!func.suspected_asm) { + // find the prologue/epilogue so they can be excluded from basic blocks. + func.analyze_prologue(data.linked_data); + } + + if (!func.suspected_asm) { + // run analysis + + // build a control flow graph, just looking at branch instructions. + func.cfg = build_cfg(data.linked_data, segment_id, func); + if (!func.cfg->is_fully_resolved()) { + lg::warn("Function {} from {} failed to build control flow graph!", + func.guessed_name.to_string(), data.to_unique_name()); + failed_to_build_cfg++; + } + + // if we got an inspect method, inspect it. + if (func.is_inspect_method) { + auto result = inspect_inspect_method(func, func.method_of_type, dts, data.linked_data); + all_type_defs += ";; " + data.to_unique_name() + "\n"; + all_type_defs += result.print_as_deftype() + "\n"; + inspect_methods++; + } + } + + if (func.suspected_asm) { + suspected_asm++; + } + }); + + lg::info("Found {} basic blocks in {} functions in {:.2f} ms:", total_basic_blocks, + total_functions, timer.getMs()); + lg::info(" {} functions ({:.2f}%) failed to build control flow graph", failed_to_build_cfg, + 100.f * failed_to_build_cfg / total_functions); + lg::info(" {} functions ({:.2f}%) had exactly one basic block", functions_with_one_block, + 100.f * functions_with_one_block / total_functions); + lg::info(" {} functions ({:.2f}%) were ignored as assembly", suspected_asm, + 100.f * suspected_asm / total_functions); + lg::info(" {} functions ({:.2f}%) were inspect methods\n", inspect_methods, + 100.f * inspect_methods / total_functions); +} + +void ObjectFileDB::ir2_atomic_op_pass() { + Timer timer; + int total_functions = 0; + int attempted = 0; + int successful = 0; + for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + total_functions++; + if (!func.suspected_asm) { + func.ir2.atomic_ops_attempted = true; + attempted++; + try { + auto ops = convert_function_to_atomic_ops(func, data.linked_data.labels); + func.ir2.atomic_ops = std::make_shared(std::move(ops)); + func.ir2.atomic_ops_succeeded = true; + successful++; + } catch (std::exception& e) { + lg::warn("Function {} from {} could not be converted to atomic ops: {}", + func.guessed_name.to_string(), data.to_unique_name(), e.what()); + } + } + }); + + lg::info("{}/{}/{} (successful/attempted/total) functions converted to Atomic Ops in {:.2f} ms", + successful, attempted, total_functions, timer.getMs()); + lg::info("{:.2f}% were attempted, {:.2f}% of attempted succeeded\n", + 100.f * attempted / total_functions, 100.f * successful / attempted); +} + +void ObjectFileDB::ir2_write_results(const std::string& output_dir) { + Timer timer; + lg::info("Writing IR2 results to file..."); + int total_files = 0; + int total_bytes = 0; + for_each_obj([&](ObjectFileData& obj) { + if (obj.linked_data.has_any_functions()) { + // todo + total_files++; + auto file_text = ir2_to_file(obj); + total_bytes += file_text.length(); + auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_ir2.asm"); + + file_util::write_text_file(file_name, file_text); + } + }); + lg::info("Wrote {} files ({:.2f} MB) in {:.2f} ms\n", total_files, total_bytes / float(1 << 20), + timer.getMs()); +} + +std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { + std::string result; + + const char* segment_names[] = {"main segment", "debug segment", "top-level segment"}; + assert(data.linked_data.segments <= 3); + for (int seg = data.linked_data.segments; seg-- > 0;) { + // segment header + result += ";------------------------------------------\n; "; + result += segment_names[seg]; + result += "\n;------------------------------------------\n\n"; + + // functions + for (auto& func : data.linked_data.functions_by_seg.at(seg)) { + result += ir2_function_to_string(data, func, seg); + } + + // print data + for (size_t i = data.linked_data.offset_of_data_zone_by_seg.at(seg); + i < data.linked_data.words_by_seg.at(seg).size(); i++) { + for (int j = 0; j < 4; j++) { + auto label_id = data.linked_data.get_label_at(seg, i * 4 + j); + if (label_id != -1) { + result += data.linked_data.labels.at(label_id).name + ":"; + if (j != 0) { + result += " (offset " + std::to_string(j) + ")"; + } + result += "\n"; + } + } + + auto& word = data.linked_data.words_by_seg[seg][i]; + data.linked_data.append_word_to_string(result, word); + + if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "string") { + result += "; " + data.linked_data.get_goal_string(seg, i) + "\n"; + } + } + } + + return result; +} + +std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& func, int seg) { + std::string result; + result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; + result += "; .function " + func.guessed_name.to_string() + "\n"; + result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; + result += func.prologue.to_string(2) + "\n"; + if (!func.warnings.empty()) { + result += ";;Warnings:\n" + func.warnings + "\n"; + } + + bool print_atomics = func.ir2.atomic_ops_succeeded; + // print each instruction in the function. + bool in_delay_slot = false; + + for (int i = 1; i < func.end_word - func.start_word; i++) { + // check for a label to print + auto label_id = data.linked_data.get_label_at(seg, (func.start_word + i) * 4); + if (label_id != -1) { + result += data.linked_data.labels.at(label_id).name + ":\n"; + } + + // check for no misaligned labels in code segments. + for (int j = 1; j < 4; j++) { + assert(data.linked_data.get_label_at(seg, (func.start_word + i) * 4 + j) == -1); + } + + // print the assembly instruction + auto& instr = func.instructions.at(i); + std::string line = " " + instr.to_string(data.linked_data.labels); + + // printf("%d inst %s\n", print_atomics, instr.to_string(data.linked_data.labels).c_str()); + + bool printed_comment = false; + + // print atomic op + if (print_atomics && func.instr_starts_atomic_op(i)) { + if (line.length() < 30) { + line.append(30 - line.length(), ' '); + } + line += + " ;; " + func.get_atomic_op_at_instr(i).to_string(data.linked_data.labels, &func.ir2.env); + printed_comment = true; + } + + // print linked strings + for (int iidx = 0; iidx < instr.n_src; iidx++) { + if (instr.get_src(iidx).is_label()) { + auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label()); + if (data.linked_data.is_string(lab.target_segment, lab.offset)) { + if (!printed_comment) { + line += " ;; "; + printed_comment = true; + } + line += " " + data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1); + } + } + } + result += line + "\n"; + + // print delay slot gap + if (in_delay_slot) { + result += "\n"; + in_delay_slot = false; + } + + // for next time... + if (gOpcodeInfo[(int)instr.kind].has_delay_slot) { + in_delay_slot = true; + } + } + result += "\n"; + + return result; +} + +} // namespace decompiler diff --git a/decompiler/config.cpp b/decompiler/config.cpp index b06bf6376c..185c5de3d8 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -51,6 +51,7 @@ void set_config(const std::string& path_to_config_file) { gConfig.write_func_json = cfg.at("write_func_json").get(); gConfig.function_type_prop = cfg.at("function_type_prop").get(); gConfig.analyze_expressions = cfg.at("analyze_expressions").get(); + gConfig.run_ir2 = cfg.at("run_ir2").get(); std::vector asm_functions_by_name = cfg.at("asm_functions_by_name").get>(); diff --git a/decompiler/config.h b/decompiler/config.h index 1b2aaa17fc..7fc2c9da46 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -43,6 +43,7 @@ struct Config { type_hints_by_function_by_idx; std::unordered_map> anon_function_types_by_obj_by_id; + bool run_ir2 = false; }; Config& get_config(); diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index f398881499..6ff676577a 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -61,6 +61,8 @@ "write_disassembly":true, "write_hex_near_instructions":false, + "run_ir2":false, + // if false, skips printing disassembly of object with functions, as these are usually large (~1 GB) and not interesting yet. "disassemble_objects_without_functions":false, @@ -107,12 +109,21 @@ // gkernel "(method 11 cpu-thread)", + "throw", + "return-from-thread", + "return-from-thread-dead", + "reset-and-call", + "(method 10 cpu-thread)", + "(method 0 catch-frame)", + "throw-dispatch", + "set-to-run-bootstrap", // pskernel "return-from-exception", // F: eret "kernel-read-function", // F: delay slot tricks "kernel-write-function", // F: delay slot tricks "kernel-copy-function", + "kernel-check-hardwired-addresses", // math "rand-uint31-gen", @@ -304,6 +315,7 @@ "memcpy", "sp-process-block-3d", "sp-process-block-2d", + "sp-get-particle", // loader BUG "(method 10 external-art-buffer)", diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 0b9cbc9813..09299188fd 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -22,6 +22,7 @@ int main(int argc, char** argv) { return 1; } + // collect all files to process set_config(argv[1]); std::string in_folder = argv[2]; std::string out_folder = argv[3]; @@ -39,6 +40,8 @@ int main(int argc, char** argv) { strs.push_back(file_util::combine_path(in_folder, str_name)); } + // build file database + lg::info("Setting up object file DB..."); ObjectFileDB db(dgos, get_config().obj_file_name_map_file, objs, strs); file_util::write_text_file(file_util::combine_path(out_folder, "dgo.txt"), db.generate_dgo_listing()); @@ -51,10 +54,36 @@ int main(int argc, char** argv) { db.dump_raw_objects(path); } + // process files (basic) db.process_link_data(); db.find_code(); db.process_labels(); + // IR1 or IR2 function analysis + if (get_config().run_ir2) { + db.analyze_functions_ir2(out_folder); + } else { + if (get_config().analyze_functions) { + db.analyze_functions_ir1(); + } + + if (get_config().write_disassembly) { + db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions, + get_config().write_func_json); + db.write_debug_type_analysis(out_folder); + } + + if (get_config().analyze_expressions) { + db.analyze_expressions(); + db.write_disassembly(out_folder, false, false, "_expr"); + } + } + + // common IR1 and IR2 function stuff: + file_util::write_text_file(file_util::combine_path(out_folder, "all-syms.gc"), + db.dts.dump_symbol_types()); + + // data stuff if (get_config().write_scripts) { db.find_and_write_scripts(out_folder); } @@ -63,10 +92,6 @@ int main(int argc, char** argv) { db.write_object_file_words(out_folder, get_config().write_hexdump_on_v3_only); } - if (get_config().analyze_functions) { - db.analyze_functions(); - } - if (get_config().process_game_text) { auto result = db.process_game_text_files(); file_util::write_text_file(file_util::get_file_path({"assets", "game_text.txt"}), result); @@ -81,22 +106,9 @@ int main(int argc, char** argv) { file_util::write_text_file(file_util::get_file_path({"assets", "game_count.txt"}), result); } - if (get_config().write_disassembly) { - db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions, - get_config().write_func_json); - db.write_debug_type_analysis(out_folder); - } - - if (get_config().analyze_expressions) { - db.analyze_expressions(); - db.write_disassembly(out_folder, false, false, "_expr"); - } - // todo print type summary // printf("%s\n", get_type_info().get_summary().c_str()); - file_util::write_text_file(file_util::combine_path(out_folder, "all-syms.gc"), - db.dts.dump_symbol_types()); lg::info("Disassembly has completed successfully."); return 0; } diff --git a/scripts/gen-test-cases.py b/scripts/gen-test-cases.py new file mode 100644 index 0000000000..9ff28970f7 --- /dev/null +++ b/scripts/gen-test-cases.py @@ -0,0 +1,40 @@ +import re + +# Quick and dirty script to generate decompiler test cases from text file format + +with open("test-cases.txt") as f: + content = f.readlines() +content = [x.strip() for x in content] + +test_cases = {} + +for case in content: + args = re.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", case) + assembly_lines = args[0].replace("\"", "").strip().split("\\n") + main_instruction = assembly_lines[0].split(" ")[0] + if re.match("^L\d*:\s*$", main_instruction): + main_instruction = assembly_lines[1].strip().split(" ")[0] + main_instruction = main_instruction.upper().replace(".", "_") + assembly_lines = "{{{}}}".format(", ".join(["\"{}\"".format(x.replace("\\n", "").strip()) for x in assembly_lines])) + output_lines = args[1].replace("\\n", "").strip() + write_regs = "{{{}}}".format(args[2].replace("\\n", "").strip().replace(" ", "\",\"")) + read_regs = "{{{}}}".format(args[3].replace("\\n", "").strip().replace(" ", "\",\"")) + clob_regs = "{{{}}}".format(args[4].replace("\\n", "").strip().replace(" ", "\",\"")) + + test_case = "test_case(assembly_from_list({}), {}, {}, {}, {});".format(assembly_lines, output_lines, write_regs, read_regs, clob_regs); + + if main_instruction in test_cases: + test_cases[main_instruction].append(test_case) + else: + test_cases[main_instruction] = [] + test_cases[main_instruction].append(test_case) + +with open("test-cases.cpp", "a") as f: + instructions = test_cases.keys() + instructions = sorted(instructions) + for instr in instructions: + f.write("TEST(DecompilerAtomicOpBuilder, {}) {{".format(instr)) + for case in test_cases[instr]: + f.write(case) + f.write("}\n\n") + diff --git a/test/decompiler/test_AtomicOpBuilder.cpp b/test/decompiler/test_AtomicOpBuilder.cpp index 905f0e3848..ee78b69112 100644 --- a/test/decompiler/test_AtomicOpBuilder.cpp +++ b/test/decompiler/test_AtomicOpBuilder.cpp @@ -2,22 +2,40 @@ #include "decompiler/IR2/AtomicOp.h" #include "decompiler/IR2/AtomicOpBuilder.h" #include "decompiler/Disasm/InstructionParser.h" +#include "third-party/fmt/core.h" +#include "third-party/fmt/format.h" +#include using namespace decompiler; -TEST(DecompilerAtomicOpBuilder, Example) { + +std::regex labelRegex("^L\\d+:\\s*$"); + +// Auto indents / adds new-lines to a list of assembly lines +std::string assembly_from_list(std::vector assemblyLines) { + std::string str = ""; + for (std::string line : assemblyLines) { + if (std::regex_match(line, labelRegex)) { + str += fmt::format("{}\n", line); + } else { + str += fmt::format(" {}\n", line); + } + } + return str; +} + +void test_case(std::string assembly_lines, + std::vector output_lines, + std::vector> write_regs, + std::vector> read_regs, + std::vector> clobbered_regs) { InstructionParser parser; - - // some MIPS instructions. Can be a sequence of instructions, possibly with labels. - std::string input_program = - "and v0, v1, a3\n" - "and a1, a2, a2"; - // convert to Instructions: - ParsedProgram prg = parser.parse_program(input_program); + // some MIPS instructions. Can be a sequence of instructions, possibly with labels. + ParsedProgram prg = parser.parse_program(assembly_lines); // this verifies we can convert from a string to an instruction, and back to a string again. // the instruction printer will add two leading spaces and a newline. - EXPECT_EQ(prg.print(), " and v0, v1, a3\n and a1, a2, a2\n"); + EXPECT_EQ(prg.print(), assembly_lines); // next, set up a test environment for the conversion. The FunctionAtomicOps will hold // the result of the conversion @@ -27,8 +45,8 @@ TEST(DecompilerAtomicOpBuilder, Example) { convert_block_to_atomic_ops(0, prg.instructions.begin(), prg.instructions.end(), prg.labels, &container); - // we should get back a single and operation: - EXPECT_EQ(2, container.ops.size()); + // count operations + EXPECT_EQ(container.ops.size(), output_lines.size()); // for now, we create an empty environment. The environment will be used in the future to // rename register to variables, but for now, we just leave it empty and the printing will @@ -36,22 +54,393 @@ TEST(DecompilerAtomicOpBuilder, Example) { Env env; // check the we get the right result: - EXPECT_EQ(container.ops.at(0)->to_string(prg.labels, &env), "(set! v0 (logand v1 a3))"); - EXPECT_EQ(container.ops.at(1)->to_string(prg.labels, &env), "(set! a1 (logand a2 a2))"); + for (size_t i = 0; i < container.ops.size(); i++) { + const auto& op = container.ops.at(i); + EXPECT_EQ(op->to_string(prg.labels, &env), output_lines.at(i)); - // check that the registers read/written are identified for the first op (and v0, v1, a3) - auto& first_op = container.ops.at(0); + // check that the registers read/written are identified for the operation - // two registers read (v1 and a3) - EXPECT_EQ(first_op->read_regs().size(), 2); - // one register written (v0) - EXPECT_EQ(first_op->write_regs().size(), 1); - // no clobber registers (register which ends up with a garbage value in it) - EXPECT_EQ(first_op->clobber_regs().size(), 0); + // check write registers + EXPECT_EQ(op->write_regs().size(), write_regs.at(i).size()); + for (size_t j = 0; j < op->write_regs().size(); j++) { + const std::string expected_reg = op->write_regs().at(j).to_string(); + // the ordering of the registers doesn't matter. It could happen to be in the same order + // as the opcode here, but it may not always be the case. + bool found = false; + for (const std::string reg : write_regs.at(i)) { + // TODO - is there a potential bug here in the event that either list has duplicate + // registers? + if (reg == expected_reg) { + found = true; + break; + } + } + EXPECT_TRUE(found) << fmt::format("Unable to find expected WRITE register - {}", + expected_reg); + } - // the ordering of the two read registers doesn't matter. It happens to be in the same order - // as the opcode here, but it may not always be the case. - EXPECT_EQ(first_op->read_regs().at(0).to_string(), "v1"); - EXPECT_EQ(first_op->read_regs().at(1).to_string(), "a3"); - EXPECT_EQ(first_op->write_regs().at(0).to_string(), "v0"); -} \ No newline at end of file + // check read registers + EXPECT_EQ(op->read_regs().size(), read_regs.at(i).size()); + for (size_t j = 0; j < op->read_regs().size(); j++) { + const std::string expected_reg = op->read_regs().at(j).to_string(); + // the ordering of the registers doesn't matter. It could happen to be in the same order + // as the opcode here, but it may not always be the case. + bool found = false; + for (const std::string reg : read_regs.at(i)) { + // TODO - is there a potential bug here in the event that either list has duplicate + // registers? + if (reg == expected_reg) { + found = true; + break; + } + } + EXPECT_TRUE(found) << fmt::format("Unable to find expected READ register - {}", expected_reg); + } + + // check clobbered registers + EXPECT_EQ(op->clobber_regs().size(), clobbered_regs.at(i).size()); + for (size_t j = 0; j < op->clobber_regs().size(); j++) { + const std::string expected_reg = op->clobber_regs().at(j).to_string(); + // the ordering of the registers doesn't matter. It could happen to be in the same order + // as the opcode here, but it may not always be the case. + bool found = false; + for (const std::string reg : clobbered_regs.at(i)) { + // TODO - is there a potential bug here in the event that either list has duplicate + // registers? + if (reg == expected_reg) { + found = true; + break; + } + } + EXPECT_TRUE(found) << fmt::format("Unable to find expected CLOBBERED register - {}", + expected_reg); + } + } +} + +TEST(DecompilerAtomicOpBuilder, Example) { + test_case(assembly_from_list({"and v0, v1, a3", "and a1, a2, a2"}), + {"(set! v0 (logand v1 a3))", "(set! a1 (logand a2 a2))"}, {{"v0"}, {"a1"}}, + {{"v1", "a3"}, {"a2", "a2"}}, {{}, {}}); +} + +TEST(DecompilerAtomicOpBuilder, ABS_S) { + test_case(assembly_from_list({"abs.s f1, f2"}), {"(set! f1 (abs.s f2))"}, {{"f1"}}, {{"f2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, ADDIU) { + test_case(assembly_from_list({"addiu a1, r0, 12"}), {"(set! a1 12)"}, {{"a1"}}, {{}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, ADD_S) { + test_case(assembly_from_list({"add.s f1, f2, f3"}), {"(set! f1 (+.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, AND) { + test_case(assembly_from_list({"and a1, a2, a3"}), {"(set! a1 (logand a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, ANDI) { + test_case(assembly_from_list({"andi a1, a2, 1234"}), {"(set! a1 (logand a2 1234))"}, {{"a1"}}, + {{"a2"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, CVT_S_W) { + test_case(assembly_from_list({"cvt.s.w f1, f2"}), {"(set! f1 (i2f f2))"}, {{"f1"}}, {{"f2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, CVT_W_S) { + test_case(assembly_from_list({"cvt.w.s f1, f2"}), {"(set! f1 (f2i f2))"}, {{"f1"}}, {{"f2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DADDIU) { + test_case(assembly_from_list({"daddiu a1, s7, test"}), {"(set! a1 'test)"}, {{"a1"}}, {{}}, {{}}); + test_case(assembly_from_list({"daddiu a1, s7, -10"}), {"(set! a1 '())"}, {{"a1"}}, {{}}, {{}}); + test_case(assembly_from_list({"daddiu a1, s7, -32768"}), {"(set! a1 __START-OF-TABLE__)"}, + {{"a1"}}, {{}}, {{}}); + test_case(assembly_from_list({"daddiu a1, s7, 8"}), {"(set! a1 '#t)"}, {{"a1"}}, {{}}, {{}}); + test_case(assembly_from_list({"L123:", "daddiu a1, fp, L123"}), {"(set! a1 L123)"}, {{"a1"}}, + {{}}, {{}}); + test_case(assembly_from_list({"daddiu a1, a2, 1234"}), {"(set! a1 (+ a2 1234))"}, {{"a1"}}, + {{"a2"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DADDU) { + test_case(assembly_from_list({"daddu a1, a2, a3"}), {"(set! a1 (+ a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DIV_S) { + test_case(assembly_from_list({"div.s f1, f2, f3"}), {"(set! f1 (/.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSLL) { + test_case(assembly_from_list({"dsll a2, a3, 3"}), {"(set! a2 (shl a3 3))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSLL32) { + test_case(assembly_from_list({"dsll32 a2, a3, 3"}), {"(set! a2 (shl a3 35))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSLLV) { + test_case(assembly_from_list({"dsllv a1, a2, a3"}), {"(set! a1 (shl a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRA) { + test_case(assembly_from_list({"dsra a2, a3, 3"}), {"(set! a2 (sra a3 3))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRA32) { + test_case(assembly_from_list({"dsra32 a2, a3, 3"}), {"(set! a2 (sra a3 35))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRAV) { + test_case(assembly_from_list({"dsrav a1, a2, a3"}), {"(set! a1 (sra a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRL) { + test_case(assembly_from_list({"dsrl a2, a3, 3"}), {"(set! a2 (srl a3 3))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRL32) { + test_case(assembly_from_list({"dsrl32 a2, a3, 3"}), {"(set! a2 (srl a3 35))"}, {{"a2"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSRLV) { + test_case(assembly_from_list({"dsrlv a1, a2, a3"}), {"(set! a1 (srl a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, DSUBU) { + test_case(assembly_from_list({"dsubu a1, a2, a3"}), {"(set! a1 (- a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); + test_case(assembly_from_list({"dsubu a1, r0, a3"}), {"(set! a1 (- a3))"}, {{"a1"}}, {{"a3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LB) { + test_case(assembly_from_list({"L123:", "lb a3, L123(fp)"}), {"(set! a3 (l.b L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lb a2, 0(a3)"}), {"(set! a2 (l.b a3))"}, {{"a2"}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"lb a2, 12(a3)"}), {"(set! a2 (l.b (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LBU) { + test_case(assembly_from_list({"L123:", "lbu a3, L123(fp)"}), {"(set! a3 (l.bu L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lbu a2, 0(a3)"}), {"(set! a2 (l.bu a3))"}, {{"a2"}}, {{"a3"}}, + {{}}); + test_case(assembly_from_list({"lbu a2, 12(a3)"}), {"(set! a2 (l.bu (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LD) { + test_case(assembly_from_list({"L123:", "ld a3, L123(fp)"}), {"(set! a3 (l.d L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"ld a2, 0(a3)"}), {"(set! a2 (l.d a3))"}, {{"a2"}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"ld a2, 12(a3)"}), {"(set! a2 (l.d (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LH) { + test_case(assembly_from_list({"L123:", "lh a3, L123(fp)"}), {"(set! a3 (l.h L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lh a2, 0(a3)"}), {"(set! a2 (l.h a3))"}, {{"a2"}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"lh a2, 12(a3)"}), {"(set! a2 (l.h (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LHU) { + test_case(assembly_from_list({"L123:", "lhu a3, L123(fp)"}), {"(set! a3 (l.hu L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lhu a2, 0(a3)"}), {"(set! a2 (l.hu a3))"}, {{"a2"}}, {{"a3"}}, + {{}}); + test_case(assembly_from_list({"lhu a2, 12(a3)"}), {"(set! a2 (l.hu (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LUI) { + test_case(assembly_from_list({"lui a3, 2"}), {"(set! a3 131072)"}, {{"a3"}}, {{}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LW) { + test_case(assembly_from_list({"lw r0, 2(r0)"}), {"(break!)"}, {{}}, {{}}, {{}}); + test_case(assembly_from_list({"lw a2, test(s7)"}), {"(set! a2 test)"}, {{"a2"}}, {{}}, {{}}); + test_case(assembly_from_list({"L123:", "lw a3, L123(fp)"}), {"(set! a3 (l.w L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lw a2, 0(a3)"}), {"(set! a2 (l.w a3))"}, {{"a2"}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"lw a2, 12(a3)"}), {"(set! a2 (l.w (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LWC1) { + test_case(assembly_from_list({"L123:", "lwc1 f3, L123(fp)"}), {"(set! f3 (l.f L123))"}, {{"f3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lwc1 f2, 0(a3)"}), {"(set! f2 (l.f a3))"}, {{"f2"}}, {{"a3"}}, + {{}}); + test_case(assembly_from_list({"lwc1 f2, 12(a3)"}), {"(set! f2 (l.f (+ a3 12)))"}, {{"f2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, LWU) { + test_case(assembly_from_list({"L123:", "lwu a3, L123(fp)"}), {"(set! a3 (l.wu L123))"}, {{"a3"}}, + {{}}, {{}}); + test_case(assembly_from_list({"lwu a2, 0(a3)"}), {"(set! a2 (l.wu a3))"}, {{"a2"}}, {{"a3"}}, + {{}}); + test_case(assembly_from_list({"lwu a2, 12(a3)"}), {"(set! a2 (l.wu (+ a3 12)))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MAX_S) { + test_case(assembly_from_list({"max.s f1, f2, f3"}), {"(set! f1 (max.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MFC1) { + test_case(assembly_from_list({"mfc1 a1, f3"}), {"(set! a1 (fpr->gpr f3))"}, {{"a1"}}, {{"f3"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MIN_S) { + test_case(assembly_from_list({"min.s f1, f2, f3"}), {"(set! f1 (min.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MOVN) { + test_case(assembly_from_list({"movn a1, s7, a2"}), {"(cmove-#f-nonzero a1 a2)"}, {{"a1"}}, + {{"a2"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MOVZ) { + test_case(assembly_from_list({"movz a1, s7, a2"}), {"(cmove-#f-zero a1 a2)"}, {{"a1"}}, {{"a2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MOV_S) { + test_case(assembly_from_list({"mov.s f1, f2"}), {"(set! f1 f2)"}, {{"f1"}}, {{"f2"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MTC1) { + test_case(assembly_from_list({"mtc1 f3, a1"}), {"(set! f3 (gpr->fpr a1))"}, {{"f3"}}, {{"a1"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MULT3) { + test_case(assembly_from_list({"mult3 a1, a2, a3"}), {"(set! a1 (*.si a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MULTU3) { + test_case(assembly_from_list({"multu3 a1, a2, a3"}), {"(set! a1 (*.ui a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, MUL_S) { + test_case(assembly_from_list({"mul.s f1, f2, f3"}), {"(set! f1 (*.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, NEG_S) { + test_case(assembly_from_list({"neg.s f1, f2"}), {"(set! f1 (neg.s f2))"}, {{"f1"}}, {{"f2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, NOR) { + test_case(assembly_from_list({"nor a1, a2, r0"}), {"(set! a1 (lognot a2))"}, {{"a1"}}, {{"a2"}}, + {{}}); + test_case(assembly_from_list({"nor a1, a2, a3"}), {"(set! a1 (lognor a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, OR) { + test_case(assembly_from_list({"or a1, a2, a3"}), {"(set! a1 (logior a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); + test_case(assembly_from_list({"or a2, r0, r0"}), {"(set! a2 0)"}, {{"a2"}}, {{}}, {{}}); + test_case(assembly_from_list({"or a1, s7, r0"}), {"(set! a1 '#f)"}, {{"a1"}}, {{}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, ORI) { + test_case(assembly_from_list({"ori a2, r0, 1234"}), {"(set! a2 1234)"}, {{"a2"}}, {{}}, {{}}); + test_case(assembly_from_list({"ori a2, r0, -1234"}), {"(set! a2 -1234)"}, {{"a2"}}, {{}}, {{}}); + test_case(assembly_from_list({"ori a2, a3, -1234"}), {"(set! a2 (logior a3 -1234))"}, {{"a2"}}, + {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SB) { + test_case(assembly_from_list({"sb a1, 2(a3)"}), {"(s.b! (+ a3 2) a1)"}, {{}}, {{"a1", "a3"}}, + {{}}); + test_case(assembly_from_list({"sb a1, 0(a3)"}), {"(s.b! a3 a1)"}, {{}}, {{"a1", "a3"}}, {{}}); + test_case(assembly_from_list({"sb s7, 2(a3)"}), {"(s.b! (+ a3 2) '#f)"}, {{}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"sb s7, 0(a3)"}), {"(s.b! a3 '#f)"}, {{}}, {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SD) { + test_case(assembly_from_list({"sd a1, 2(a3)"}), {"(s.d! (+ a3 2) a1)"}, {{}}, {{"a1", "a3"}}, + {{}}); + test_case(assembly_from_list({"sd a1, 0(a3)"}), {"(s.d! a3 a1)"}, {{}}, {{"a1", "a3"}}, {{}}); + test_case(assembly_from_list({"sd s7, 2(a3)"}), {"(s.d! (+ a3 2) '#f)"}, {{}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"sd s7, 0(a3)"}), {"(s.d! a3 '#f)"}, {{}}, {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SH) { + test_case(assembly_from_list({"sh a1, 2(a3)"}), {"(s.h! (+ a3 2) a1)"}, {{}}, {{"a1", "a3"}}, + {{}}); + test_case(assembly_from_list({"sh a1, 0(a3)"}), {"(s.h! a3 a1)"}, {{}}, {{"a1", "a3"}}, {{}}); + test_case(assembly_from_list({"sh s7, 2(a3)"}), {"(s.h! (+ a3 2) '#f)"}, {{}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"sh s7, 0(a3)"}), {"(s.h! a3 '#f)"}, {{}}, {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SLL) { + test_case(assembly_from_list({"sll r0, r0, 0"}), {"(nop!)"}, {{}}, {{}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SQRT_S) { + test_case(assembly_from_list({"sqrt.s f1, f2"}), {"(set! f1 (sqrt.s f2))"}, {{"f1"}}, {{"f2"}}, + {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SUB_S) { + test_case(assembly_from_list({"sub.s f1, f2, f3"}), {"(set! f1 (-.s f2 f3))"}, {{"f1"}}, + {{"f2", "f3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SW) { + test_case(assembly_from_list({"sw a1, test(s7)"}), {"(s.w! test a1)"}, {{}}, {{"a1"}}, {{}}); + test_case(assembly_from_list({"sw s7, test(s7)"}), {"(s.w! test '#f)"}, {{}}, {{}}, {{}}); + test_case(assembly_from_list({"sw a1, 2(a3)"}), {"(s.w! (+ a3 2) a1)"}, {{}}, {{"a1", "a3"}}, + {{}}); + test_case(assembly_from_list({"sw a1, 0(a3)"}), {"(s.w! a3 a1)"}, {{}}, {{"a1", "a3"}}, {{}}); + test_case(assembly_from_list({"sw s7, 2(a3)"}), {"(s.w! (+ a3 2) '#f)"}, {{}}, {{"a3"}}, {{}}); + test_case(assembly_from_list({"sw s7, 0(a3)"}), {"(s.w! a3 '#f)"}, {{}}, {{"a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, SWC1) { + test_case(assembly_from_list({"swc1 f2, 2(a3)"}), {"(s.f! (+ a3 2) f2)"}, {{}}, {{"f2", "a3"}}, + {{}}); + test_case(assembly_from_list({"swc1 f2, 0(a3)"}), {"(s.f! a3 f2)"}, {{}}, {{"f2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, XOR) { + test_case(assembly_from_list({"xor a1, a2, a3"}), {"(set! a1 (logxor a2 a3))"}, {{"a1"}}, + {{"a2", "a3"}}, {{}}); +} + +TEST(DecompilerAtomicOpBuilder, XORI) { + test_case(assembly_from_list({"xori a1, a2, 1234"}), {"(set! a1 (logxor a2 1234))"}, {{"a1"}}, + {{"a2"}}, {{}}); +}