From d6bbca56208fb488b7fc5213493f44c46d338807 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Sun, 17 Jan 2021 18:08:18 -0500 Subject: [PATCH] [Decompiler] IR2 form implementation (#197) * begin ir2 form implementation * temp * small fixes * fix test --- decompiler/CMakeLists.txt | 5 +- decompiler/Function/CfgVtx.cpp | 52 +- decompiler/Function/CfgVtx.h | 56 +- decompiler/Function/Function.h | 5 +- decompiler/IR2/AtomicOp.cpp | 116 +- decompiler/IR2/AtomicOp.h | 73 +- decompiler/IR2/AtomicOpForm.cpp | 68 + decompiler/IR2/AtomicOpTypeAnalysis.cpp | 5 + decompiler/IR2/Form.cpp | 635 +++++++++ decompiler/IR2/Form.h | 432 ++++++ ...micOpBuilder.cpp => atomic_op_builder.cpp} | 2 +- ...{AtomicOpBuilder.h => atomic_op_builder.h} | 0 decompiler/IR2/cfg_builder.cpp | 1227 +++++++++++++++++ decompiler/IR2/cfg_builder.h | 7 + decompiler/ObjectFile/ObjectFileDB.h | 1 + decompiler/ObjectFile/ObjectFileDB_IR2.cpp | 33 +- test/decompiler/test_AtomicOpBuilder.cpp | 10 +- 17 files changed, 2516 insertions(+), 211 deletions(-) create mode 100644 decompiler/IR2/AtomicOpForm.cpp create mode 100644 decompiler/IR2/Form.cpp create mode 100644 decompiler/IR2/Form.h rename decompiler/IR2/{AtomicOpBuilder.cpp => atomic_op_builder.cpp} (99%) rename decompiler/IR2/{AtomicOpBuilder.h => atomic_op_builder.h} (100%) create mode 100644 decompiler/IR2/cfg_builder.cpp create mode 100644 decompiler/IR2/cfg_builder.h diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 1e6b42ffe3..91c2051e22 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -28,10 +28,13 @@ add_library( IR/IR_ExpressionStack.cpp IR/IR_TypeAnalysis.cpp + IR2/atomic_op_builder.cpp IR2/AtomicOp.cpp - IR2/AtomicOpBuilder.cpp + IR2/AtomicOpForm.cpp IR2/AtomicOpTypeAnalysis.cpp + IR2/cfg_builder.cpp IR2/Env.cpp + IR2/Form.cpp IR2/reg_usage.cpp IR2/variable_naming.cpp diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 22f6114288..75704ca9d6 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -134,7 +134,7 @@ std::string CfgVtx::links_to_string() { /// VERTICES ///////////////////////////////////////// -std::string BlockVtx::to_string() { +std::string BlockVtx::to_string() const { if (is_early_exit_block) { return "Block (EA) " + std::to_string(block_id); } else { @@ -142,11 +142,11 @@ std::string BlockVtx::to_string() { } } -goos::Object BlockVtx::to_form() { +goos::Object BlockVtx::to_form() const { return pretty_print::to_symbol("b" + std::to_string(block_id)); } -std::string SequenceVtx::to_string() { +std::string SequenceVtx::to_string() const { assert(!seq.empty()); // todo - this is not a great way to print it. Maybe sequences should have an ID or name? std::string result = @@ -154,7 +154,7 @@ std::string SequenceVtx::to_string() { return result; } -goos::Object SequenceVtx::to_form() { +goos::Object SequenceVtx::to_form() const { std::vector forms; forms.push_back(pretty_print::to_symbol("seq")); for (auto* x : seq) { @@ -163,27 +163,27 @@ goos::Object SequenceVtx::to_form() { return pretty_print::build_list(forms); } -std::string EntryVtx::to_string() { +std::string EntryVtx::to_string() const { return "ENTRY"; } -goos::Object EntryVtx::to_form() { +goos::Object EntryVtx::to_form() const { return pretty_print::to_symbol("entry"); } -std::string ExitVtx::to_string() { +std::string ExitVtx::to_string() const { return "EXIT"; } -goos::Object ExitVtx::to_form() { +goos::Object ExitVtx::to_form() const { return pretty_print::to_symbol("exit"); } -std::string CondWithElse::to_string() { +std::string CondWithElse::to_string() const { return "CONDWE" + std::to_string(uid); } -goos::Object CondWithElse::to_form() { +goos::Object CondWithElse::to_form() const { std::vector forms; forms.push_back(pretty_print::to_symbol("cond")); for (const auto& x : entries) { @@ -195,11 +195,11 @@ goos::Object CondWithElse::to_form() { return pretty_print::build_list(forms); } -std::string CondNoElse::to_string() { +std::string CondNoElse::to_string() const { return "CONDNE" + std::to_string(uid); } -goos::Object CondNoElse::to_form() { +goos::Object CondNoElse::to_form() const { std::vector forms; forms.push_back(pretty_print::to_symbol("cond")); for (const auto& x : entries) { @@ -209,49 +209,49 @@ goos::Object CondNoElse::to_form() { return pretty_print::build_list(forms); } -std::string WhileLoop::to_string() { +std::string WhileLoop::to_string() const { return "WHL" + std::to_string(uid); } -goos::Object WhileLoop::to_form() { +goos::Object WhileLoop::to_form() const { std::vector forms = {pretty_print::to_symbol("while"), condition->to_form(), body->to_form()}; return pretty_print::build_list(forms); } -std::string UntilLoop::to_string() { +std::string UntilLoop::to_string() const { return "UNTL" + std::to_string(uid); } -goos::Object UntilLoop::to_form() { +goos::Object UntilLoop::to_form() const { std::vector forms = {pretty_print::to_symbol("until"), condition->to_form(), body->to_form()}; return pretty_print::build_list(forms); } -std::string UntilLoop_single::to_string() { +std::string UntilLoop_single::to_string() const { return "UNTLS" + std::to_string(uid); } -goos::Object UntilLoop_single::to_form() { +goos::Object UntilLoop_single::to_form() const { std::vector forms = {pretty_print::to_symbol("until1"), block->to_form()}; return pretty_print::build_list(forms); } -std::string InfiniteLoopBlock::to_string() { +std::string InfiniteLoopBlock::to_string() const { return "INFL" + std::to_string(uid); } -goos::Object InfiniteLoopBlock::to_form() { +goos::Object InfiniteLoopBlock::to_form() const { std::vector forms = {pretty_print::to_symbol("inf-loop"), block->to_form()}; return pretty_print::build_list(forms); } -std::string ShortCircuit::to_string() { +std::string ShortCircuit::to_string() const { return "SC" + std::to_string(uid); } -goos::Object ShortCircuit::to_form() { +goos::Object ShortCircuit::to_form() const { std::vector forms; forms.push_back(pretty_print::to_symbol("sc")); for (const auto& x : entries) { @@ -260,21 +260,21 @@ goos::Object ShortCircuit::to_form() { return pretty_print::build_list(forms); } -std::string GotoEnd::to_string() { +std::string GotoEnd::to_string() const { return "goto_end" + std::to_string(uid); } -goos::Object GotoEnd::to_form() { +goos::Object GotoEnd::to_form() const { std::vector forms = {pretty_print::to_symbol("return-from-function"), body->to_form(), unreachable_block->to_form()}; return pretty_print::build_list(forms); } -std::string Break::to_string() { +std::string Break::to_string() const { return "goto" + std::to_string(uid); } -goos::Object Break::to_form() { +goos::Object Break::to_form() const { std::vector forms = {pretty_print::to_symbol("break"), pretty_print::to_symbol(std::to_string(dest_block)), body->to_form(), unreachable_block->to_form()}; diff --git a/decompiler/Function/CfgVtx.h b/decompiler/Function/CfgVtx.h index b630ec7aef..f8f1764980 100644 --- a/decompiler/Function/CfgVtx.h +++ b/decompiler/Function/CfgVtx.h @@ -65,8 +65,8 @@ void replace_exactly_one_in(std::vector& v, T old, T replace) { */ class CfgVtx { public: - virtual std::string to_string() = 0; // convert to a single line string for debugging - virtual goos::Object to_form() = 0; // recursive print as LISP form. + virtual std::string to_string() const = 0; // convert to a single line string for debugging + virtual goos::Object to_form() const = 0; // recursive print as LISP form. virtual ~CfgVtx() = default; CfgVtx* parent = nullptr; // parent structure, or nullptr if top level @@ -132,8 +132,8 @@ class CfgVtx { class EntryVtx : public CfgVtx { public: EntryVtx() = default; - goos::Object to_form() override; - std::string to_string() override; + goos::Object to_form() const override; + std::string to_string() const override; }; /*! @@ -141,8 +141,8 @@ class EntryVtx : public CfgVtx { */ class ExitVtx : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; }; /*! @@ -151,8 +151,8 @@ class ExitVtx : public CfgVtx { class BlockVtx : public CfgVtx { public: explicit BlockVtx(int id) : block_id(id) {} - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; int block_id = -1; // which block are we? bool is_early_exit_block = false; // are we an empty block at the end for early exits to jump to? }; @@ -163,8 +163,8 @@ class BlockVtx : public CfgVtx { */ class SequenceVtx : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; std::vector seq; }; @@ -175,8 +175,8 @@ class SequenceVtx : public CfgVtx { */ class CondWithElse : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; struct Entry { Entry() = default; @@ -196,8 +196,8 @@ class CondWithElse : public CfgVtx { */ class CondNoElse : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; struct Entry { Entry() = default; @@ -211,8 +211,8 @@ class CondNoElse : public CfgVtx { class WhileLoop : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; CfgVtx* condition = nullptr; CfgVtx* body = nullptr; @@ -220,8 +220,8 @@ class WhileLoop : public CfgVtx { class UntilLoop : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; CfgVtx* condition = nullptr; CfgVtx* body = nullptr; @@ -229,38 +229,38 @@ class UntilLoop : public CfgVtx { class UntilLoop_single : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; CfgVtx* block = nullptr; }; class ShortCircuit : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; std::vector entries; }; class InfiniteLoopBlock : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; CfgVtx* block; }; class GotoEnd : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; CfgVtx* body = nullptr; CfgVtx* unreachable_block = nullptr; }; class Break : public CfgVtx { public: - std::string to_string() override; - goos::Object to_form() override; + std::string to_string() const override; + goos::Object to_form() const override; int dest_block = -1; CfgVtx* body = nullptr; CfgVtx* unreachable_block = nullptr; diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index e6c260ceea..3cefba94c6 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -8,13 +8,14 @@ #include #include #include -#include "decompiler/IR2/AtomicOpBuilder.h" +#include "decompiler/IR2/atomic_op_builder.h" #include "decompiler/Disasm/Instruction.h" #include "decompiler/Disasm/Register.h" #include "BasicBlocks.h" #include "CfgVtx.h" #include "common/type_system/TypeSpec.h" #include "decompiler/config.h" +#include "decompiler/IR2/Form.h" namespace decompiler { class DecompilerTypeSystem; @@ -169,6 +170,8 @@ class Function { RegUsageInfo reg_use; bool has_type_info = false; Env env; + FormPool form_pool; + Form* top_form = nullptr; } ir2; private: diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index fff6318437..683fe4cfdd 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -254,6 +254,7 @@ std::string get_simple_expression_op_name(SimpleExpression::Kind kind) { assert(false); } } +} // namespace int get_simple_expression_arg_count(SimpleExpression::Kind kind) { switch (kind) { @@ -302,7 +303,6 @@ int get_simple_expression_arg_count(SimpleExpression::Kind kind) { assert(false); } } -} // namespace SimpleExpression::SimpleExpression(Kind kind, const SimpleAtom& arg0) : n_args(1) { m_args[0] = arg0; @@ -372,10 +372,6 @@ bool SetVarOp::operator==(const AtomicOp& other) const { return m_dst == po->m_dst && m_src == po->m_src; } -bool SetVarOp::is_variable_set() const { - return true; -} - bool SetVarOp::is_sequence_point() const { if (m_src.is_identity()) { auto& atom = m_src.get_arg(0); @@ -394,14 +390,6 @@ Variable SetVarOp::get_set_destination() const { return m_dst; } -std::unique_ptr SetVarOp::get_set_source_as_expr() const { - throw std::runtime_error("get_set_source_as_expr NYI for SetVarOp"); -} - -std::unique_ptr SetVarOp::get_as_expr() const { - throw std::runtime_error("get_as_expr NYI for SetVarOp"); -} - void SetVarOp::update_register_info() { m_write_regs.push_back(m_dst.reg()); m_src.get_regs(&m_read_regs); @@ -474,10 +462,6 @@ bool AsmOp::operator==(const AtomicOp& other) const { (m_src[1] == po->m_src[1]) && (m_src[2] == po->m_src[2]); } -bool AsmOp::is_variable_set() const { - return false; -} - bool AsmOp::is_sequence_point() const { return true; } @@ -486,14 +470,6 @@ Variable AsmOp::get_set_destination() const { throw std::runtime_error("AsmOp cannot be treated as a set! operation"); } -std::unique_ptr AsmOp::get_set_source_as_expr() const { - throw std::runtime_error("AsmOp cannot be treated as a set! operation"); -} - -std::unique_ptr AsmOp::get_as_expr() const { - throw std::runtime_error("AsmOp::get_as_expr is not implemented."); -} - void AsmOp::update_register_info() { if (m_dst.has_value()) { m_write_regs.push_back(m_dst->reg()); @@ -510,7 +486,6 @@ void AsmOp::update_register_info() { // Condition ///////////////////////////// -namespace { std::string get_condition_kind_name(IR2_Condition::Kind kind) { switch (kind) { case IR2_Condition::Kind::NOT_EQUAL: @@ -694,7 +669,6 @@ IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind) { assert(false); } } -} // namespace IR2_Condition::IR2_Condition(Kind kind) : m_kind(kind) { assert(get_condition_num_args(m_kind) == 0); @@ -774,10 +748,6 @@ bool SetVarConditionOp::operator==(const AtomicOp& other) const { return m_dst == po->m_dst && m_condition == po->m_condition; } -bool SetVarConditionOp::is_variable_set() const { - return true; -} - bool SetVarConditionOp::is_sequence_point() const { return true; } @@ -786,14 +756,6 @@ Variable SetVarConditionOp::get_set_destination() const { return m_dst; } -std::unique_ptr SetVarConditionOp::get_set_source_as_expr() const { - throw std::runtime_error("SetVarConditionOp::get_source_as_expr is not yet implemented."); -} - -std::unique_ptr SetVarConditionOp::get_as_expr() const { - throw std::runtime_error("SetVarConditionOp::get_as_expr is not yet implemented."); -} - void SetVarConditionOp::update_register_info() { m_write_regs.push_back(m_dst.reg()); m_condition.get_regs(&m_read_regs); @@ -849,10 +811,6 @@ bool StoreOp::operator==(const AtomicOp& other) const { return m_addr == po->m_addr && m_value == po->m_value; } -bool StoreOp::is_variable_set() const { - return false; -} - bool StoreOp::is_sequence_point() const { return true; } @@ -861,14 +819,6 @@ Variable StoreOp::get_set_destination() const { throw std::runtime_error("StoreOp cannot be treated as a set! operation"); } -std::unique_ptr StoreOp::get_set_source_as_expr() const { - throw std::runtime_error("StoreOp cannot be treated as a set! operation"); -} - -std::unique_ptr StoreOp::get_as_expr() const { - throw std::runtime_error("StoreOp::get_as_expr is not yet implemented"); -} - void StoreOp::update_register_info() { m_addr.get_regs(&m_read_regs); m_value.get_regs(&m_read_regs); @@ -939,10 +889,6 @@ bool LoadVarOp::operator==(const AtomicOp& other) const { return m_dst == po->m_dst && m_src == po->m_src; } -bool LoadVarOp::is_variable_set() const { - return true; -} - bool LoadVarOp::is_sequence_point() const { return true; } @@ -951,14 +897,6 @@ Variable LoadVarOp::get_set_destination() const { return m_dst; } -std::unique_ptr LoadVarOp::get_set_source_as_expr() const { - throw std::runtime_error("LoadVarOp::get_set_source_as_expr is not yet implemented"); -} - -std::unique_ptr LoadVarOp::get_as_expr() const { - throw std::runtime_error("LoadVarOp::get_as_expr is not yet implemented"); -} - void LoadVarOp::update_register_info() { m_src.get_regs(&m_read_regs); m_write_regs.push_back(m_dst.reg()); @@ -1113,10 +1051,6 @@ bool BranchOp::operator==(const AtomicOp& other) const { m_branch_delay == po->m_branch_delay; } -bool BranchOp::is_variable_set() const { - return false; -} - bool BranchOp::is_sequence_point() const { return true; } @@ -1125,14 +1059,6 @@ Variable BranchOp::get_set_destination() const { throw std::runtime_error("BranchOp cannot be treated as a set! operation"); } -std::unique_ptr BranchOp::get_set_source_as_expr() const { - throw std::runtime_error("BranchOp cannot be treated as a set! operation"); -} - -std::unique_ptr BranchOp::get_as_expr() const { - throw std::runtime_error("BranchOp::get_as_expr is not yet implemented"); -} - void BranchOp::update_register_info() { m_condition.get_regs(&m_read_regs); m_branch_delay.get_regs(&m_write_regs, &m_read_regs); @@ -1172,10 +1098,6 @@ bool SpecialOp::operator==(const AtomicOp& other) const { return m_kind == po->m_kind; } -bool SpecialOp::is_variable_set() const { - return false; -} - bool SpecialOp::is_sequence_point() const { return true; } @@ -1184,14 +1106,6 @@ Variable SpecialOp::get_set_destination() const { throw std::runtime_error("SpecialOp cannot be treated as a set! operation"); } -std::unique_ptr SpecialOp::get_set_source_as_expr() const { - throw std::runtime_error("SpecialOp cannot be treated as a set! operation"); -} - -std::unique_ptr SpecialOp::get_as_expr() const { - throw std::runtime_error("SpecialOp::get_as_expr not yet implemented"); -} - void SpecialOp::update_register_info() { switch (m_kind) { case Kind::NOP: @@ -1232,10 +1146,6 @@ bool CallOp::operator==(const AtomicOp& other) const { return true; } -bool CallOp::is_variable_set() const { - return false; -} - bool CallOp::is_sequence_point() const { return true; } @@ -1244,17 +1154,13 @@ Variable CallOp::get_set_destination() const { throw std::runtime_error("CallOp cannot be treated as a set! operation"); } -std::unique_ptr CallOp::get_set_source_as_expr() const { - throw std::runtime_error("CallOp cannot be treated as a set! operation"); -} - -std::unique_ptr CallOp::get_as_expr() const { - throw std::runtime_error("CallOp::get_as_expr not yet implemented"); -} - void CallOp::update_register_info() { // throw std::runtime_error("CallOp::update_register_info cannot be done until types are known"); m_read_regs.push_back(Register(Reg::GPR, Reg::T9)); + // if the type analysis succeeds, it will remove this if the function doesn't return a value. + // but, in the case we want to keep running without type information, we may need a + // renamed variable here, so we add this. + m_write_regs.push_back(Register(Reg::GPR, Reg::V0)); clobber_temps(); } @@ -1282,10 +1188,6 @@ bool ConditionalMoveFalseOp::operator==(const AtomicOp& other) const { return m_dst == po->m_dst && m_src == po->m_src && m_on_zero == po->m_on_zero; } -bool ConditionalMoveFalseOp::is_variable_set() const { - return false; -} - bool ConditionalMoveFalseOp::is_sequence_point() const { return true; } @@ -1294,14 +1196,6 @@ Variable ConditionalMoveFalseOp::get_set_destination() const { throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation"); } -std::unique_ptr ConditionalMoveFalseOp::get_set_source_as_expr() const { - throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation"); -} - -std::unique_ptr ConditionalMoveFalseOp::get_as_expr() const { - throw std::runtime_error("ConditionalMoveFalseOp::get_as_expr is not yet implemented"); -} - void ConditionalMoveFalseOp::update_register_info() { m_write_regs.push_back(m_dst.reg()); m_read_regs.push_back(m_src.reg()); diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index ea9bb6a1e3..572ad6b2be 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -11,7 +11,9 @@ #include "Env.h" namespace decompiler { -class Expr; +class FormElement; +class ConditionElement; +class FormPool; class DecompilerTypeSystem; /*! @@ -94,10 +96,6 @@ class AtomicOp { virtual bool operator==(const AtomicOp& other) const = 0; bool operator!=(const AtomicOp& other) const; - // determine if this is a (set! thing) form. These will be handled differently in expression - // building. - virtual bool is_variable_set() const = 0; - // determine if this is a GOAL "sequence point". // non-sequence point instructions may be out of order from the point of view of the expression // stack. @@ -106,13 +104,9 @@ class AtomicOp { // get the variable being set by this operation. Only call this if is_variable_set returns true. virtual Variable get_set_destination() const = 0; - // get the value of the variable being set, as an expression. Only call this if is_variable_set - // returns true. - virtual std::unique_ptr get_set_source_as_expr() const = 0; - // convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be // undesirable when expression stacking. - virtual std::unique_ptr get_as_expr() const = 0; + virtual FormElement* get_as_form(FormPool& pool) const = 0; // figure out what registers are read and written in this AtomicOp and update read_regs, // write_regs, and clobber_regs. It's expected that these have duplicates if a register appears @@ -122,6 +116,7 @@ class AtomicOp { TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts); + int op_id() const { return m_my_idx; } const std::vector& read_regs() { return m_read_regs; } const std::vector& write_regs() { return m_write_regs; } const std::vector& clobber_regs() { return m_clobber_regs; } @@ -196,6 +191,10 @@ class SimpleAtom { void get_regs(std::vector* out) const; SimpleExpression as_expr() const; TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const; + const std::string& get_str() const { + assert(is_sym_ptr() || is_sym_val()); + return m_string; + } private: Kind m_kind = Kind::INVALID; @@ -282,6 +281,8 @@ class SimpleExpression { s8 n_args = -1; }; +int get_simple_expression_arg_count(SimpleExpression::Kind kind); + /*! * Set a variable equal to a Simple Expression */ @@ -294,11 +295,9 @@ class SetVarOp : public AtomicOp { virtual goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -321,11 +320,9 @@ class AsmOp : public AtomicOp { AsmOp(Instruction instr, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -392,12 +389,19 @@ class IR2_Condition { bool operator!=(const IR2_Condition& other) const { return !((*this) == other); } goos::Object to_form(const std::vector& labels, const Env* env) const; void get_regs(std::vector* out) const; + Kind kind() const { return m_kind; } + const SimpleAtom& src(int i) const { return m_src[i]; } + ConditionElement* get_as_form(FormPool& pool) const; private: Kind m_kind = Kind::INVALID; SimpleAtom m_src[2]; }; +std::string get_condition_kind_name(IR2_Condition::Kind kind); +int get_condition_num_args(IR2_Condition::Kind kind); +IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind); + /*! * Set a variable to a GOAL boolean, based off of a condition. */ @@ -406,11 +410,9 @@ class SetVarConditionOp : public AtomicOp { SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; void invert() { m_condition.invert(); } TypeState propagate_types_internal(const TypeState& input, @@ -432,11 +434,9 @@ class StoreOp : public AtomicOp { StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom value, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -459,11 +459,9 @@ class LoadVarOp : public AtomicOp { LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -509,6 +507,12 @@ class IR2_BranchDelay { TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) const; + Kind kind() const { return m_kind; } + const Variable& var(int idx) const { + assert(idx < 3); + assert(m_var[idx].has_value()); + return m_var[idx].value(); + } private: std::optional m_var[3]; @@ -528,15 +532,16 @@ class BranchOp : public AtomicOp { int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) override; + const IR2_BranchDelay& branch_delay() const { return m_branch_delay; } + const IR2_Condition& condition() const { return m_condition; } + bool likely() const { return m_likely; } private: bool m_likely = false; @@ -561,11 +566,9 @@ class SpecialOp : public AtomicOp { SpecialOp(Kind kind, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -584,11 +587,9 @@ class CallOp : public AtomicOp { CallOp(int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, @@ -616,11 +617,9 @@ class ConditionalMoveFalseOp : public AtomicOp { ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx); goos::Object to_form(const std::vector& labels, const Env* env) const override; bool operator==(const AtomicOp& other) const override; - bool is_variable_set() const override; bool is_sequence_point() const override; Variable get_set_destination() const override; - std::unique_ptr get_set_source_as_expr() const override; - std::unique_ptr get_as_expr() const override; + FormElement* get_as_form(FormPool& pool) const override; void update_register_info() override; TypeState propagate_types_internal(const TypeState& input, const Env& env, diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp new file mode 100644 index 0000000000..83e80be877 --- /dev/null +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -0,0 +1,68 @@ +#include "AtomicOp.h" +#include "Form.h" + +namespace decompiler { + +ConditionElement* IR2_Condition::get_as_form(FormPool& pool) const { + Form* sources[2] = {nullptr, nullptr}; + int n_sources = get_condition_num_args(m_kind); + for (int i = 0; i < n_sources; i++) { + sources[i] = pool.alloc_single_element_form(nullptr, m_src[i]); + } + + return pool.alloc_element(m_kind, sources[0], sources[1]); +} + +FormElement* SetVarOp::get_as_form(FormPool& pool) const { + auto source = pool.alloc_single_element_form(nullptr, m_src); + return pool.alloc_element(m_dst, source, is_sequence_point()); +} + +FormElement* AsmOp::get_as_form(FormPool& pool) const { + return pool.alloc_element(this); +} + +FormElement* SetVarConditionOp::get_as_form(FormPool& pool) const { + return pool.alloc_element( + m_dst, pool.alloc_single_form(nullptr, m_condition.get_as_form(pool)), is_sequence_point()); +} + +FormElement* StoreOp::get_as_form(FormPool& pool) const { + return pool.alloc_element(this); +} + +FormElement* LoadVarOp::get_as_form(FormPool& pool) const { + auto source = pool.alloc_single_element_form(nullptr, m_src); + auto load = pool.alloc_single_element_form(nullptr, source, m_size, m_kind); + return pool.alloc_element(m_dst, load, true); +} + +FormElement* BranchOp::get_as_form(FormPool& pool) const { + return pool.alloc_element(this); +} + +FormElement* SpecialOp::get_as_form(FormPool& pool) const { + return pool.alloc_element(this); +} + +FormElement* CallOp::get_as_form(FormPool& pool) const { + auto call = pool.alloc_element(this); + if (m_write_regs.empty() && m_call_type_set == true) { + return call; + } else if (m_write_regs.size() == 1 || !m_call_type_set) { + // this is a little scary in the case that type analysis doesn't run and relies on the fact + // that CallOp falls back to writing v0 in the case where the function type isn't known. + Variable out_var(VariableMode::WRITE, Register(Reg::GPR, Reg::V0), m_my_idx); + return pool.alloc_element(out_var, pool.alloc_single_form(nullptr, call), true); + } else { + throw std::runtime_error("CallOp::get_as_expr not yet implemented"); + } +} + +FormElement* ConditionalMoveFalseOp::get_as_form(FormPool& pool) const { + auto source = + pool.alloc_single_element_form(nullptr, SimpleAtom::make_var(m_src)); + return pool.alloc_element(m_dst, source, m_on_zero); +} + +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOpTypeAnalysis.cpp b/decompiler/IR2/AtomicOpTypeAnalysis.cpp index 4de64716c3..b21a8515bc 100644 --- a/decompiler/IR2/AtomicOpTypeAnalysis.cpp +++ b/decompiler/IR2/AtomicOpTypeAnalysis.cpp @@ -748,6 +748,11 @@ TypeState CallOp::propagate_types_internal(const TypeState& input, m_read_regs.emplace_back(Reg::GPR, arg_regs[i]); } + m_write_regs.clear(); + if (in_type.last_arg() != TypeSpec("none")) { + m_write_regs.emplace_back(Reg::GPR, Reg::V0); + } + return end_types; } diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp new file mode 100644 index 0000000000..cbf53ba643 --- /dev/null +++ b/decompiler/IR2/Form.cpp @@ -0,0 +1,635 @@ +#include "Form.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" +#include "common/goos/PrettyPrinter.h" + +namespace decompiler { + +/////////////////// +// FormPool +/////////////////// + +FormPool::~FormPool() { + for (auto& x : m_forms) { + delete x; + } + + for (auto& x : m_elements) { + delete x; + } +} + +/////////////////// +// Form +////////////////// + +goos::Object Form::to_form(const Env& env) const { + assert(!m_elements.empty()); + if (m_elements.size() == 1) { + return m_elements.front()->to_form(env); + } else { + std::vector forms; + forms.push_back(pretty_print::to_symbol("begin")); + for (auto& x : m_elements) { + forms.push_back(x->to_form(env)); + } + return pretty_print::build_list(forms); + } +} + +void Form::inline_forms(std::vector& forms, const Env& env) const { + for (auto& x : m_elements) { + forms.push_back(x->to_form(env)); + } +} + +void Form::apply(const std::function& f) { + for (auto& x : m_elements) { + x->apply(f); + } +} + +void Form::apply_form(const std::function& f) { + f(this); + for (auto& x : m_elements) { + x->apply_form(f); + } +} + +///////////////////////////// +// SimpleExpressionElement +///////////////////////////// + +SimpleExpressionElement::SimpleExpressionElement(const SimpleExpression& expr) : m_expr(expr) {} + +goos::Object SimpleExpressionElement::to_form(const Env& env) const { + return m_expr.to_form(env.file->labels, &env); +} + +void SimpleExpressionElement::apply(const std::function& f) { + f(this); +} + +void SimpleExpressionElement::apply_form(const std::function&) {} + +bool SimpleExpressionElement::is_sequence_point() const { + throw std::runtime_error("Should not check if a SimpleExpressionElement is a sequence point"); +} + +///////////////////////////// +// SetVarElement +///////////////////////////// + +SetVarElement::SetVarElement(const Variable& var, Form* value, bool is_sequence_point) + : m_dst(var), m_src(value), m_is_sequence_point(is_sequence_point) { + value->parent_element = this; +} + +goos::Object SetVarElement::to_form(const Env& env) const { + return pretty_print::build_list("set!", m_dst.to_string(&env), m_src->to_form(env)); +} + +void SetVarElement::apply(const std::function& f) { + f(this); + m_src->apply(f); +} + +void SetVarElement::apply_form(const std::function& f) { + m_src->apply_form(f); +} + +bool SetVarElement::is_sequence_point() const { + return m_is_sequence_point; +} + +///////////////////////////// +// AtomicOpElement +///////////////////////////// + +AtomicOpElement::AtomicOpElement(const AtomicOp* op) : m_op(op) {} + +goos::Object AtomicOpElement::to_form(const Env& env) const { + return m_op->to_form(env.file->labels, &env); +} + +void AtomicOpElement::apply(const std::function& f) { + f(this); +} + +void AtomicOpElement::apply_form(const std::function&) {} + +///////////////////////////// +// ConditionElement +///////////////////////////// + +ConditionElement::ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1) + : m_kind(kind) { + m_src[0] = src0; + m_src[1] = src1; + for (int i = 0; i < 2; i++) { + if (m_src[i]) { + m_src[i]->parent_element = this; + } + } +} + +goos::Object ConditionElement::to_form(const Env& env) const { + std::vector forms; + forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + forms.push_back(m_src[i]->to_form(env)); + } + if (forms.size() > 1) { + return pretty_print::build_list(forms); + } else { + return forms.front(); + } +} + +void ConditionElement::apply(const std::function& f) { + f(this); + for (int i = 0; i < 2; i++) { + if (m_src[i]) { + m_src[i]->apply(f); + } + } +} + +void ConditionElement::apply_form(const std::function& f) { + for (int i = 0; i < 2; i++) { + if (m_src[i]) { + m_src[i]->apply_form(f); + } + } +} + +void ConditionElement::invert() { + m_kind = get_condition_opposite(m_kind); +} + +///////////////////////////// +// StoreElement +///////////////////////////// + +StoreElement::StoreElement(const StoreOp* op) : m_op(op) {} + +goos::Object StoreElement::to_form(const Env& env) const { + return m_op->to_form(env.file->labels, &env); +} + +void StoreElement::apply(const std::function& f) { + f(this); +} + +void StoreElement::apply_form(const std::function&) {} + +///////////////////////////// +// LoadSourceElement +///////////////////////////// + +LoadSourceElement::LoadSourceElement(Form* addr, int size, LoadVarOp::Kind kind) + : m_addr(addr), m_size(size), m_kind(kind) { + m_addr->parent_element = this; +} + +goos::Object LoadSourceElement::to_form(const Env& env) const { + switch (m_kind) { + case LoadVarOp::Kind::FLOAT: + assert(m_size == 4); + return pretty_print::build_list("l.f", m_addr->to_form(env)); + case LoadVarOp::Kind::UNSIGNED: + switch (m_size) { + case 1: + return pretty_print::build_list("l.bu", m_addr->to_form(env)); + case 2: + return pretty_print::build_list("l.hu", m_addr->to_form(env)); + case 4: + return pretty_print::build_list("l.wu", m_addr->to_form(env)); + case 8: + return pretty_print::build_list("l.d", m_addr->to_form(env)); + default: + assert(false); + } + break; + case LoadVarOp::Kind::SIGNED: + switch (m_size) { + case 1: + return pretty_print::build_list("l.b", m_addr->to_form(env)); + case 2: + return pretty_print::build_list("l.h", m_addr->to_form(env)); + case 4: + return pretty_print::build_list("l.w", m_addr->to_form(env)); + default: + assert(false); + } + break; + default: + assert(false); + } +} + +void LoadSourceElement::apply(const std::function& f) { + f(this); + m_addr->apply(f); +} + +void LoadSourceElement::apply_form(const std::function& f) { + m_addr->apply_form(f); +} + +///////////////////////////// +// SimpleAtomElement +///////////////////////////// + +SimpleAtomElement::SimpleAtomElement(const SimpleAtom& atom) : m_atom(atom) {} + +goos::Object SimpleAtomElement::to_form(const Env& env) const { + return m_atom.to_form(env.file->labels, &env); +} + +void SimpleAtomElement::apply(const std::function& f) { + f(this); +} + +void SimpleAtomElement::apply_form(const std::function&) {} + +///////////////////////////// +// FunctionCallElement +///////////////////////////// + +FunctionCallElement::FunctionCallElement(const CallOp* op) : m_op(op) {} + +goos::Object FunctionCallElement::to_form(const Env& env) const { + return m_op->to_form(env.file->labels, &env); +} + +void FunctionCallElement::apply(const std::function& f) { + f(this); +} + +void FunctionCallElement::apply_form(const std::function&) {} + +///////////////////////////// +// BranchElement +///////////////////////////// + +BranchElement::BranchElement(const BranchOp* op) : m_op(op) {} + +goos::Object BranchElement::to_form(const Env& env) const { + return m_op->to_form(env.file->labels, &env); +} + +void BranchElement::apply(const std::function& f) { + f(this); +} + +void BranchElement::apply_form(const std::function&) {} + +///////////////////////////// +// ReturnElement +///////////////////////////// + +goos::Object ReturnElement::to_form(const Env& env) const { + std::vector forms; + forms.push_back(pretty_print::to_symbol("return")); + forms.push_back(pretty_print::build_list(return_code->to_form(env))); + forms.push_back(pretty_print::build_list(dead_code->to_form(env))); + return pretty_print::build_list(forms); +} + +void ReturnElement::apply(const std::function& f) { + f(this); + return_code->apply(f); + dead_code->apply(f); +} + +void ReturnElement::apply_form(const std::function& f) { + return_code->apply_form(f); + dead_code->apply_form(f); +} + +///////////////////////////// +// BreakElement +///////////////////////////// + +goos::Object BreakElement::to_form(const Env& env) const { + std::vector forms; + forms.push_back(pretty_print::to_symbol("break")); + forms.push_back(pretty_print::build_list(return_code->to_form(env))); + forms.push_back(pretty_print::build_list(dead_code->to_form(env))); + return pretty_print::build_list(forms); +} + +void BreakElement::apply(const std::function& f) { + f(this); + return_code->apply(f); + dead_code->apply(f); +} + +void BreakElement::apply_form(const std::function& f) { + return_code->apply_form(f); + dead_code->apply_form(f); +} + +///////////////////////////// +// CondWithElseElement +///////////////////////////// + +goos::Object CondWithElseElement::to_form(const Env& env) const { + // for now we only turn it into an if statement if both cases won't require a begin at the top + // level. I think it is more common to write these as a two-case cond instead of an if with begin. + if (entries.size() == 1 && entries.front().body->is_single_element() && + else_ir->is_single_element()) { + std::vector list; + list.push_back(pretty_print::to_symbol("if")); + list.push_back(entries.front().condition->to_form(env)); + list.push_back(entries.front().body->to_form(env)); + list.push_back(else_ir->to_form(env)); + return pretty_print::build_list(list); + } else { + std::vector list; + list.push_back(pretty_print::to_symbol("cond")); + for (auto& e : entries) { + std::vector entry; + entry.push_back(e.condition->to_form(env)); + e.body->inline_forms(entry, env); + list.push_back(pretty_print::build_list(entry)); + } + std::vector else_form; + else_form.push_back(pretty_print::to_symbol("else")); + else_ir->inline_forms(else_form, env); + list.push_back(pretty_print::build_list(else_form)); + return pretty_print::build_list(list); + } +} + +void CondWithElseElement::apply(const std::function& f) { + f(this); + for (auto& entry : entries) { + entry.condition->apply(f); + entry.body->apply(f); + } + else_ir->apply(f); +} + +void CondWithElseElement::apply_form(const std::function& f) { + for (auto& entry : entries) { + entry.condition->apply_form(f); + entry.body->apply_form(f); + } + else_ir->apply_form(f); +} + +///////////////////////////// +// EmptyElement +///////////////////////////// + +goos::Object EmptyElement::to_form(const Env& env) const { + return pretty_print::build_list("empty"); +} + +void EmptyElement::apply(const std::function& f) { + f(this); +} + +void EmptyElement::apply_form(const std::function&) {} + +///////////////////////////// +// WhileElement +///////////////////////////// + +void WhileElement::apply(const std::function& f) { + // note - this is done in program order, rather than print order. Not sure if this makes sense. + f(this); + body->apply(f); + condition->apply(f); +} + +goos::Object WhileElement::to_form(const Env& env) const { + std::vector list; + list.push_back(pretty_print::to_symbol("while")); + list.push_back(condition->to_form(env)); + body->inline_forms(list, env); + return pretty_print::build_list(list); +} + +void WhileElement::apply_form(const std::function& f) { + body->apply_form(f); + condition->apply_form(f); +} + +///////////////////////////// +// UntilElement +///////////////////////////// + +void UntilElement::apply(const std::function& f) { + // note - this is done in program order, rather than print order. Not sure if this makes sense. + f(this); + body->apply(f); + condition->apply(f); +} + +goos::Object UntilElement::to_form(const Env& env) const { + std::vector list; + list.push_back(pretty_print::to_symbol("until")); + list.push_back(condition->to_form(env)); + body->inline_forms(list, env); + return pretty_print::build_list(list); +} + +void UntilElement::apply_form(const std::function& f) { + body->apply_form(f); + condition->apply_form(f); +} + +///////////////////////////// +// ShortCircuitElement +///////////////////////////// + +void ShortCircuitElement::apply(const std::function& f) { + f(this); + for (auto& x : entries) { + x.condition->apply(f); + // if (x.output) { + // // not sure about this... + // x.output->apply(f); + // } + } +} + +void ShortCircuitElement::apply_form(const std::function& f) { + for (auto& x : entries) { + x.condition->apply_form(f); + // if (x.output) { + // // not sure about this... + // x.output->apply(f); + // } + } +} + +goos::Object ShortCircuitElement::to_form(const Env& env) const { + std::vector forms; + switch (kind) { + case UNKNOWN: + forms.push_back(pretty_print::to_symbol("unknown-sc")); + break; + case AND: + forms.push_back(pretty_print::to_symbol("and")); + break; + case OR: + forms.push_back(pretty_print::to_symbol("or")); + break; + default: + assert(false); + } + for (auto& x : entries) { + forms.push_back(x.condition->to_form(env)); + } + return pretty_print::build_list(forms); +} + +///////////////////////////// +// ShortCircuitElement +///////////////////////////// + +goos::Object CondNoElseElement::to_form(const Env& env) const { + if (entries.size() == 1 && entries.front().body->is_single_element()) { + // print as an if statement if we can put the body in a single form. + std::vector list; + list.push_back(pretty_print::to_symbol("if")); + list.push_back(entries.front().condition->to_form(env)); + list.push_back(entries.front().body->to_form(env)); + return pretty_print::build_list(list); + } else if (entries.size() == 1) { + // turn into a when if the body requires multiple forms + // todo check to see if the condition starts with a NOT and this can be simplified to an + // unless. + std::vector list; + list.push_back(pretty_print::to_symbol("when")); + list.push_back(entries.front().condition->to_form(env)); + entries.front().body->inline_forms(list, env); + return pretty_print::build_list(list); + } else { + std::vector list; + list.push_back(pretty_print::to_symbol("cond")); + for (auto& e : entries) { + std::vector entry; + entry.push_back(e.condition->to_form(env)); + entries.front().body->inline_forms(list, env); + list.push_back(pretty_print::build_list(entry)); + } + return pretty_print::build_list(list); + } +} + +void CondNoElseElement::apply(const std::function& f) { + f(this); + for (auto& e : entries) { + e.condition->apply(f); + e.body->apply(f); + } +} + +void CondNoElseElement::apply_form(const std::function& f) { + for (auto& e : entries) { + e.condition->apply_form(f); + e.body->apply_form(f); + } +} + +///////////////////////////// +// AbsElement +///////////////////////////// + +AbsElement::AbsElement(Form* _source) : source(_source) { + source->parent_element = this; +} + +goos::Object AbsElement::to_form(const Env& env) const { + return pretty_print::build_list("abs", source->to_form(env)); +} + +void AbsElement::apply(const std::function& f) { + f(this); + source->apply(f); +} + +void AbsElement::apply_form(const std::function& f) { + source->apply_form(f); +} + +///////////////////////////// +// AshElement +///////////////////////////// + +AshElement::AshElement(Form* _shift_amount, + Form* _value, + std::optional _clobber, + bool _is_signed) + : shift_amount(_shift_amount), value(_value), clobber(_clobber), is_signed(_is_signed) { + _shift_amount->parent_element = this; + _value->parent_element = this; +} + +goos::Object AshElement::to_form(const Env& env) const { + return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"), + value->to_form(env), shift_amount->to_form(env)); +} + +void AshElement::apply(const std::function& f) { + f(this); + shift_amount->apply(f); + value->apply(f); +} + +void AshElement::apply_form(const std::function& f) { + shift_amount->apply_form(f); + value->apply_form(f); +} + +///////////////////////////// +// TypeOfElement +///////////////////////////// + +TypeOfElement::TypeOfElement(Form* _value, std::optional _clobber) + : value(_value), clobber(_clobber) { + value->parent_element = this; +} + +goos::Object TypeOfElement::to_form(const Env& env) const { + return pretty_print::build_list("type-of", value->to_form(env)); +} + +void TypeOfElement::apply(const std::function& f) { + f(this); + value->apply(f); +} + +void TypeOfElement::apply_form(const std::function& f) { + value->apply_form(f); +} + +///////////////////////////// +// ConditionalMoveFalseElement +///////////////////////////// + +ConditionalMoveFalseElement::ConditionalMoveFalseElement(Variable _dest, + Form* _source, + bool _on_zero) + : dest(_dest), source(_source), on_zero(_on_zero) { + source->parent_element = this; +} + +goos::Object ConditionalMoveFalseElement::to_form(const Env& env) const { + return pretty_print::build_list(on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero", + dest.to_string(&env), source->to_form(env)); +} + +void ConditionalMoveFalseElement::apply(const std::function& f) { + f(this); + source->apply(f); +} + +void ConditionalMoveFalseElement::apply_form(const std::function& f) { + source->apply_form(f); +} +} // namespace decompiler diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h new file mode 100644 index 0000000000..91ebdbe84a --- /dev/null +++ b/decompiler/IR2/Form.h @@ -0,0 +1,432 @@ +#pragma once + +#include +#include +#include +#include +#include "decompiler/Disasm/Register.h" +#include "decompiler/IR2/AtomicOp.h" +#include "common/goos/Object.h" + +namespace decompiler { +class Form; +class Env; + +/*! + * A "FormElement" represents a single LISP form that's not a begin. + * This is a abstract base class that all types of forms should be based on. + */ +class FormElement { + public: + Form* parent_form = nullptr; + + virtual goos::Object to_form(const Env& env) const = 0; + virtual ~FormElement() = default; + virtual void apply(const std::function& f) = 0; + virtual void apply_form(const std::function& f) = 0; + virtual bool is_sequence_point() const { return true; } + + protected: + friend class Form; +}; + +/*! + * A SimpleExpressionElement is a form which has the value of a SimpleExpression. + * Like a SimpleExpression, it has no side effects. + */ +class SimpleExpressionElement : public FormElement { + public: + explicit SimpleExpressionElement(const SimpleExpression& expr); + + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + bool is_sequence_point() const override; + const SimpleExpression& expr() const { return m_expr; } + + private: + SimpleExpression m_expr; +}; + +/*! + * Represents storing a value into memory. + * Because a value can be propagated "into" the source value, this will have to be special cased + * in expression propagation. + */ +class StoreElement : public FormElement { + public: + explicit StoreElement(const StoreOp* op); + + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + + private: + // todo - we may eventually want to use a different representation for more + // complicated store paths. + const StoreOp* m_op; +}; + +/*! + * Representing a value loaded from memory. + * Unclear if this should have some common base with store? + */ +class LoadSourceElement : public FormElement { + public: + LoadSourceElement(Form* addr, int size, LoadVarOp::Kind kind); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + int size() const { return m_size; } + LoadVarOp::Kind kind() const { return m_kind; } + const Form* location() const { return m_addr; } + + private: + Form* m_addr = nullptr; + int m_size = -1; + LoadVarOp::Kind m_kind; +}; + +class SimpleAtomElement : public FormElement { + public: + explicit SimpleAtomElement(const SimpleAtom& var); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + + private: + SimpleAtom m_atom; +}; + +/*! + * Set a variable to a Form. This is the set! form to be used for expression building. + */ +class SetVarElement : public FormElement { + public: + SetVarElement(const Variable& var, Form* value, bool is_sequence_point); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + bool is_sequence_point() const override; + const Variable& dst() const { return m_dst; } + const Form* src() const { return m_src; } + + private: + Variable m_dst; + Form* m_src = nullptr; + bool m_is_sequence_point = true; +}; + +class AtomicOpElement : public FormElement { + public: + explicit AtomicOpElement(const AtomicOp* op); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + + private: + const AtomicOp* m_op; +}; + +class ConditionElement : public FormElement { + public: + ConditionElement(IR2_Condition::Kind kind, Form* src0, Form* src1); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + void invert(); + + private: + IR2_Condition::Kind m_kind; + Form* m_src[2] = {nullptr, nullptr}; +}; + +class FunctionCallElement : public FormElement { + public: + explicit FunctionCallElement(const CallOp* op); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + + private: + const CallOp* m_op; +}; + +class BranchElement : public FormElement { + public: + explicit BranchElement(const BranchOp* op); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + const BranchOp* op() const { return m_op; } + + private: + const BranchOp* m_op; +}; + +class ReturnElement : public FormElement { + public: + Form* return_code = nullptr; + Form* dead_code = nullptr; + ReturnElement(Form* _return_code, Form* _dead_code) + : return_code(_return_code), dead_code(_dead_code) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class BreakElement : public FormElement { + public: + Form* return_code = nullptr; + Form* dead_code = nullptr; + BreakElement(Form* _return_code, Form* _dead_code) + : return_code(_return_code), dead_code(_dead_code) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class CondWithElseElement : public FormElement { + public: + struct Entry { + Form* condition = nullptr; + Form* body = nullptr; + bool cleaned = false; + }; + std::vector entries; + Form* else_ir = nullptr; + CondWithElseElement(std::vector _entries, Form* _else_ir) + : entries(std::move(_entries)), else_ir(_else_ir) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class EmptyElement : public FormElement { + public: + EmptyElement() = default; + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class WhileElement : public FormElement { + public: + WhileElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + Form* condition = nullptr; + Form* body = nullptr; + bool cleaned = false; +}; + +class UntilElement : public FormElement { + public: + UntilElement(Form* _condition, Form* _body) : condition(_condition), body(_body) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + Form* condition = nullptr; + Form* body = nullptr; +}; + +class ShortCircuitElement : public FormElement { + public: + struct Entry { + Form* condition = nullptr; + // in the case where there's no else, each delay slot will write #f to the "output" register. + // this can be with an or , s7, r0 + Form* output = nullptr; + bool is_output_trick = false; + bool cleaned = false; + }; + + enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN; + + Variable final_result; + std::vector entries; + std::optional used_as_value = std::nullopt; + + explicit ShortCircuitElement(std::vector _entries) : entries(std::move(_entries)) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class CondNoElseElement : public FormElement { + public: + struct Entry { + Form* condition = nullptr; + Form* body = nullptr; + std::optional false_destination; + FormElement* original_condition_branch = nullptr; + bool cleaned = false; + }; + Register final_destination; + bool used_as_value = false; + std::vector entries; + explicit CondNoElseElement(std::vector _entries) : entries(std::move(_entries)) {} + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class AbsElement : public FormElement { + public: + explicit AbsElement(Form* _source); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + Form* source = nullptr; +}; + +class AshElement : public FormElement { + public: + Form* shift_amount = nullptr; + Form* value = nullptr; + std::optional clobber; + bool is_signed = true; + AshElement(Form* _shift_amount, Form* _value, std::optional _clobber, bool _is_signed); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class TypeOfElement : public FormElement { + public: + Form* value; + std::optional clobber; + TypeOfElement(Form* _value, std::optional _clobber); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +class ConditionalMoveFalseElement : public FormElement { + public: + Variable dest; + Form* source = nullptr; + bool on_zero = false; + ConditionalMoveFalseElement(Variable _dest, Form* _source, bool _on_zero); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; +}; + +/*! + * A Form is a wrapper around one or more FormElements. + * This is done for two reasons: + * - Easier to "inline" begins, prevents stupid nesting of begins. + * - Easier to manage ownership. + */ +class Form { + public: + Form() = default; + Form(FormElement* parent, FormElement* single_child) + : parent_element(parent), m_elements({single_child}) { + single_child->parent_form = this; + } + + Form(FormElement* parent, const std::vector sequence) + : parent_element(parent), m_elements(sequence) { + for (auto& x : sequence) { + x->parent_form = this; + } + } + + FormElement* try_as_single_element() const { + if (is_single_element()) { + return m_elements.front(); + } + return nullptr; + } + bool is_single_element() const { return m_elements.size() == 1; } + FormElement* operator[](int idx) { return m_elements.at(idx); } + FormElement* at(int idx) { return m_elements.at(idx); } + const FormElement* operator[](int idx) const { return m_elements.at(idx); } + int size() const { return int(m_elements.size()); } + FormElement* back() const { + assert(!m_elements.empty()); + return m_elements.back(); + } + + FormElement** back_ref() { + assert(!m_elements.empty()); + return &m_elements.back(); + } + + void pop_back() { + assert(!m_elements.empty()); + m_elements.pop_back(); + } + + const std::vector& elts() const { return m_elements; } + std::vector& elts() { return m_elements; } + + void push_back(FormElement* elt) { m_elements.push_back(elt); } + + goos::Object to_form(const Env& env) const; + void inline_forms(std::vector& forms, const Env& env) const; + void apply(const std::function& f); + void apply_form(const std::function& f); + FormElement* parent_element = nullptr; + + private: + std::vector m_elements; +}; + +/*! + * A FormPool is used to allocate forms and form elements. + * It will clean up everything when it is destroyed. + * As a result, you don't need to worry about deleting / referencing counting when manipulating + * a Form graph. + */ +class FormPool { + public: + template + T* alloc_element(Args&&... args) { + auto elt = new T(std::forward(args)...); + m_elements.emplace_back(elt); + return elt; + } + + template + Form* alloc_single_element_form(FormElement* parent, Args&&... args) { + auto elt = new T(std::forward(args)...); + m_elements.emplace_back(elt); + auto form = alloc_single_form(parent, elt); + return form; + } + + Form* alloc_single_form(FormElement* parent, FormElement* elt) { + auto form = new Form(parent, elt); + m_forms.push_back(form); + return form; + } + + Form* alloc_sequence_form(FormElement* parent, const std::vector sequence) { + auto form = new Form(parent, sequence); + m_forms.push_back(form); + return form; + } + + Form* acquire(std::unique_ptr
form_ptr) { + Form* form = form_ptr.release(); + m_forms.push_back(form); + return form; + } + + Form* alloc_empty_form() { + Form* form = new Form; + m_forms.push_back(form); + return form; + } + + ~FormPool(); + + private: + std::vector m_forms; + std::vector m_elements; +}; +} // namespace decompiler diff --git a/decompiler/IR2/AtomicOpBuilder.cpp b/decompiler/IR2/atomic_op_builder.cpp similarity index 99% rename from decompiler/IR2/AtomicOpBuilder.cpp rename to decompiler/IR2/atomic_op_builder.cpp index e0f1983f97..97674d26aa 100644 --- a/decompiler/IR2/AtomicOpBuilder.cpp +++ b/decompiler/IR2/atomic_op_builder.cpp @@ -1,4 +1,4 @@ -#include "AtomicOpBuilder.h" +#include "atomic_op_builder.h" #include #include "common/log/log.h" diff --git a/decompiler/IR2/AtomicOpBuilder.h b/decompiler/IR2/atomic_op_builder.h similarity index 100% rename from decompiler/IR2/AtomicOpBuilder.h rename to decompiler/IR2/atomic_op_builder.h diff --git a/decompiler/IR2/cfg_builder.cpp b/decompiler/IR2/cfg_builder.cpp new file mode 100644 index 0000000000..f785ae2146 --- /dev/null +++ b/decompiler/IR2/cfg_builder.cpp @@ -0,0 +1,1227 @@ +/*! + * @file cfg_builder.cpp + * Initial conversion from Control Flow Graph to IR2 Form. + */ + +#include "cfg_builder.h" +#include "decompiler/util/MatchParam.h" + +namespace decompiler { +namespace { + +Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx); + +/*! + * If it's a form containing multiple elements, return a pointer to the branch element and the end + * and also a pointer to the Form containing the branch element. + * Otherwise returns nullptr. Useful to modify or remove branches found at the end of blocks, + * and inline things into the begin they were found in. + */ +std::pair get_condition_branch_as_vector(Form* in) { + // With the current Form setup, we'll never have to dig deper to find the branch. + // so we can just return the input as the Form*. + // If this changes, this can be fixed here, rather than refactoring the whole thing. + if (in->size() > 1) { + auto irb = dynamic_cast(in->back()); + assert(irb); + return std::make_pair(irb, in); + } + return std::make_pair(nullptr, nullptr); +} + +/*! + * Given an IR, find a branch IR at the end, and also the location of it so it can be patched. + * Returns nullptr as the first item in the pair if it didn't work. + * Use this to inspect a sequence ending in branch and have to ability to replace the branch with + * something else if needed. + */ +std::pair get_condition_branch(Form* in) { + BranchElement* condition_branch = dynamic_cast(in->back()); + FormElement** condition_branch_location = in->back_ref(); + + if (!condition_branch) { + auto as_return = dynamic_cast(in->back()); + if (as_return) { + return get_condition_branch(as_return->dead_code); + } + } + + if (!condition_branch) { + auto as_break = dynamic_cast(in->back()); + if (as_break) { + return get_condition_branch(as_break->dead_code); + } + } + return std::make_pair(condition_branch, condition_branch_location); +} + +/*! + * Given a CondWithElse IR, remove the internal branches and set the condition to be an actual + * compare IR instead of a branch. + * Doesn't "rebalance" the leading condition because this runs way before expression compaction. + */ +void clean_up_cond_with_else(FormPool& pool, FormElement* ir) { + auto cwe = dynamic_cast(ir); + assert(cwe); + for (auto& e : cwe->entries) { + // don't reclean already cleaned things. + if (e.cleaned) { + continue; + } + auto jump_to_next = get_condition_branch(e.condition); + assert(jump_to_next.first); + assert(jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + // patch the branch to next with a condition. + auto replacement = jump_to_next.first->op()->condition().get_as_form(pool); + replacement->invert(); + *(jump_to_next.second) = replacement; + + // check the jump at the end of a block. + auto jump_to_end = get_condition_branch(e.body); + assert(jump_to_end.first); + assert(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + + // if possible, we just want to remove this from the sequence its in. + // but sometimes there's a case with nothing in it so there is no sequence. + // in this case, we can just replace the branch with a NOP IR to indicate that nothing + // happens in this case, but there was still GOAL code to test for it. + // this happens rarely, as you would expect. + auto as_end_of_sequence = get_condition_branch_as_vector(e.body); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + // we need to have _something_ as the body, so we just put an (empty). + *(jump_to_end.second) = pool.alloc_element(); + } + e.cleaned = true; + } +} + +/*! + * Replace the branch at the end of an until loop's condition with a condition. + */ +void clean_up_until_loop(FormPool& pool, UntilElement* ir) { + auto condition_branch = get_condition_branch(ir->condition); + assert(condition_branch.first); + assert(condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + auto replacement = condition_branch.first->op()->condition().get_as_form(pool); + replacement->invert(); + *(condition_branch.second) = replacement; +} + +/*! + * Remove the true branch at the end of an infinite while loop. + */ +void clean_up_infinite_while_loop(FormPool& pool, WhileElement* ir) { + auto jump = get_condition_branch(ir->body); + assert(jump.first); + assert(jump.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + auto as_end_of_sequence = get_condition_branch_as_vector(ir->body); + if (as_end_of_sequence.first) { + // there's more in the sequence, just remove the last thing. + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + // Nothing else in the sequence, just replace the jump with an (empty) + *(jump.second) = pool.alloc_element(); + } + ir->cleaned = true; // so we don't try this later... +} + +/*! + * Remove the branch in a return statement + */ +void clean_up_return(FormPool& pool, ReturnElement* ir) { + auto jump_to_end = get_condition_branch(ir->return_code); + assert(jump_to_end.first); + assert(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + *(jump_to_end.second) = pool.alloc_element(); + } +} + +/*! + * Remove the branch in a break (really return-from nonfunction scope) + */ +void clean_up_break(FormPool& pool, BreakElement* ir) { + auto jump_to_end = get_condition_branch(ir->return_code); + assert(jump_to_end.first); + assert(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + *(jump_to_end.second) = pool.alloc_element(); + } +} + +/*! + * Does the instruction in the delay slot set a register to false? + * Note. a beql s7, x followed by a or y, x, r0 will count as this. I don't know why but + * GOAL does this on comparisons to false. + */ +bool delay_slot_sets_false(BranchElement* branch) { + if (branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_FALSE) { + return true; + } + + if (branch->op()->condition().kind() == IR2_Condition::Kind::FALSE && + branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_REG) { + auto& cond = branch->op()->condition(); + auto& delay = branch->op()->branch_delay(); + auto cond_reg = cond.src(0).var().reg(); + auto src_reg = delay.var(1).reg(); + return cond_reg == src_reg; + } + + return false; +} + +/*! + * Does the instruction in the delay slot set a register to a truthy value, like in a GOAL + * or form branch? Either it explicitly sets #t, or it tests the value for being not false, + * then uses that + */ +bool delay_slot_sets_truthy(BranchElement* branch) { + if (branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_TRUE) { + return true; + } + + if (branch->op()->condition().kind() == IR2_Condition::Kind::TRUTHY && + branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_REG) { + auto& cond = branch->op()->condition(); + auto& delay = branch->op()->branch_delay(); + auto cond_reg = cond.src(0).var().reg(); + auto src_reg = delay.var(1).reg(); + return cond_reg == src_reg; + } + + return false; +} + +/*! + * Try to convert a short circuit to an and. + */ +bool try_clean_up_sc_as_and(FormPool& pool, const Function& func, ShortCircuitElement* ir) { + Register destination; + Variable ir_dest; + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(ir->entries.at(i).condition); + assert(branch.first); + if (!delay_slot_sets_false(branch.first)) { + return false; + } + + if (i == 0) { + // first case, remember the destination + ir_dest = branch.first->op()->branch_delay().var(0); + destination = ir_dest.reg(); + } else { + // check destination against the first case. + if (destination != branch.first->op()->branch_delay().var(0).reg()) { + return false; + } + } + } + + ir->kind = ShortCircuitElement::AND; + ir->final_result = ir_dest; + + bool live_out_result = false; + + // now get rid of the branches + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(ir->entries.at(i).condition); + assert(branch.first); + + if (func.ir2.has_reg_use) { + auto& branch_info = func.ir2.reg_use.op.at(branch.first->op()->op_id()); + + if (i == 0) { + live_out_result = (branch_info.written_and_unused.find(ir_dest.reg()) == + branch_info.written_and_unused.end()); + } else { + bool this_live_out = (branch_info.written_and_unused.find(ir_dest.reg()) == + branch_info.written_and_unused.end()); + if (live_out_result != this_live_out) { + lg::error("Bad live out result on {}. At 0 was {} now at {} is {}", + func.guessed_name.to_string(), live_out_result, i, this_live_out); + } + assert(live_out_result == this_live_out); + } + } + + auto replacement = branch.first->op()->condition().get_as_form(pool); + replacement->invert(); + *(branch.second) = replacement; + } + + ir->used_as_value = live_out_result; + return true; +} + +/*! + * Try to convert a short circuit to an or. + * Note - this will convert an and to a very strange or, so always use the try as and first. + */ +bool try_clean_up_sc_as_or(FormPool& pool, const Function& func, ShortCircuitElement* ir) { + Register destination; + Variable ir_dest; + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(ir->entries.at(i).condition); + assert(branch.first); + if (!delay_slot_sets_truthy(branch.first)) { + return false; + } + if (i == 0) { + // first case, remember the destination + ir_dest = branch.first->op()->branch_delay().var(0); + destination = ir_dest.reg(); + } else { + // check destination against the first case. + if (destination != branch.first->op()->branch_delay().var(0).reg()) { + return false; + } + } + } + + ir->kind = ShortCircuitElement::OR; + ir->final_result = ir_dest; + + bool live_out_result = false; + + for (int i = 0; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(ir->entries.at(i).condition); + assert(branch.first); + + if (func.ir2.has_reg_use) { + auto& branch_info = func.ir2.reg_use.op.at(branch.first->op()->op_id()); + + if (i == 0) { + live_out_result = (branch_info.written_and_unused.find(ir_dest.reg()) == + branch_info.written_and_unused.end()); + } else { + bool this_live_out = (branch_info.written_and_unused.find(ir_dest.reg()) == + branch_info.written_and_unused.end()); + assert(live_out_result == this_live_out); + } + } + + auto replacement = branch.first->op()->condition().get_as_form(pool); + *(branch.second) = replacement; + } + + ir->used_as_value = live_out_result; + return true; +} + +void clean_up_sc(FormPool& pool, const Function& func, ShortCircuitElement* ir); + +/*! + * A form like (and x (or y z)) will be recognized as a single SC Vertex by the CFG pass. + * In the case where we fail to clean it up as an AND or an OR, we should attempt splitting. + * Part of the complexity here is that we want to clean up the split recursively so things like + * (and x (or y (and a b))) + * or + * (and x (or y (and a b)) c d (or z)) + * will work correctly. This may require doing more splitting on both sections! + */ +bool try_splitting_nested_sc(FormPool& pool, const Function& func, ShortCircuitElement* ir) { + auto first_branch = get_condition_branch(ir->entries.front().condition); + assert(first_branch.first); + bool first_is_and = delay_slot_sets_false(first_branch.first); + bool first_is_or = delay_slot_sets_truthy(first_branch.first); + assert(first_is_and != first_is_or); // one or the other but not both! + + int first_different = -1; // the index of the first one that's different. + + for (int i = 1; i < int(ir->entries.size()) - 1; i++) { + auto branch = get_condition_branch(ir->entries.at(i).condition); + assert(branch.first); + bool is_and = delay_slot_sets_false(branch.first); + bool is_or = delay_slot_sets_truthy(branch.first); + assert(is_and != is_or); + + if (first_different == -1) { + // haven't seen a change yet. + if (first_is_and != is_and) { + // change! + first_different = i; + break; + } + } + } + + assert(first_different != -1); + + std::vector nested_ir; + for (int i = first_different; i < int(ir->entries.size()); i++) { + nested_ir.push_back(ir->entries.at(i)); + } + + auto s = int(ir->entries.size()); + for (int i = first_different; i < s; i++) { + ir->entries.pop_back(); + } + + // nested_sc has no parent yet. + auto nested_sc = pool.alloc_element(nested_ir); + clean_up_sc(pool, func, nested_sc); + + // the real trick + ShortCircuitElement::Entry nested_entry; + // sets both parents + nested_entry.condition = pool.alloc_single_form(ir, nested_sc); + ir->entries.push_back(nested_entry); + + clean_up_sc(pool, func, ir); + + return true; +} + +/*! + * Try to clean up a single short circuit IR. It may get split up into nested IR_ShortCircuits + * if there is a case like (and a (or b c)) + */ +void clean_up_sc(FormPool& pool, const Function& func, ShortCircuitElement* ir) { + assert(ir->entries.size() > 1); + if (!try_clean_up_sc_as_and(pool, func, ir)) { + if (!try_clean_up_sc_as_or(pool, func, ir)) { + if (!try_splitting_nested_sc(pool, func, ir)) { + assert(false); + } + } + } +} + +const SimpleAtom* get_atom_src(const Form* form) { + auto* elt = form->try_as_single_element(); + if (elt) { + auto* as_expr = dynamic_cast(elt); + if (as_expr) { + if (as_expr->expr().is_identity()) { + return &as_expr->expr().get_arg(0); + } + } + } + return nullptr; +} + +/*! + * A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis. + * But it should not be decompiled as a branching statement. + * This either succeeds or asserts and must be called with with something that can be converted + * successfully + */ +void convert_cond_no_else_to_compare(FormPool& pool, + const Function& f, + FormElement** ir_loc, + Form* parent_form) { + CondNoElseElement* cne = dynamic_cast(*ir_loc); + assert(cne); + auto condition = get_condition_branch(cne->entries.front().condition); + assert(condition.first); + auto body = dynamic_cast(cne->entries.front().body->try_as_single_element()); + assert(body); + auto dst = body->dst(); + auto src_atom = get_atom_src(body->src()); + assert(src_atom); + assert(src_atom->is_sym_ptr()); + assert(src_atom->get_str() == "#f"); + assert(cne->entries.size() == 1); + + auto condition_as_single = + dynamic_cast(cne->entries.front().condition->try_as_single_element()); + auto condition_replacement = condition.first->op()->condition().get_as_form(pool); + auto crf = pool.alloc_single_form(nullptr, condition_replacement); + auto replacement = pool.alloc_element(dst, crf, true); + replacement->parent_form = cne->parent_form; + + if (condition_as_single) { + *ir_loc = replacement; + } else { + // lg::error("Weird case in {}", f.guessed_name.to_string()); + (void)f; + auto seq = cne->entries.front().condition; + seq->pop_back(); + seq->push_back(replacement); + + parent_form->pop_back(); + for (auto& x : seq->elts()) { + parent_form->push_back(x); + } + // auto condition_as_seq = dynamic_cast(cne->entries.front().condition.get()); + // assert(condition_as_seq); + // if (condition_as_seq) { + // auto replacement = std::make_shared(); + // replacement->forms = condition_as_seq->forms; + // assert(condition.second == &condition_as_seq->forms.back()); + // replacement->forms.pop_back(); + // replacement->forms.push_back(std::make_shared( + // IR_Set::REG_64, dst, + // std::make_shared(condition.first->condition, condition.first))); + // *ir = replacement; + // } + } +} + +void clean_up_cond_no_else_final(const Function& func, CondNoElseElement* cne) { + for (size_t idx = 0; idx < cne->entries.size(); idx++) { + auto& entry = cne->entries.at(idx); + if (entry.false_destination.has_value()) { + auto fr = entry.false_destination; + assert(fr.has_value()); + cne->final_destination = fr->reg(); + } else { + assert(false); + } + } + + auto last_branch = dynamic_cast(cne->entries.back().original_condition_branch); + assert(last_branch); + + if (func.ir2.has_reg_use) { + auto& last_branch_info = func.ir2.reg_use.op.at(last_branch->op()->op_id()); + cne->used_as_value = last_branch_info.written_and_unused.find(cne->final_destination) == + last_branch_info.written_and_unused.end(); + } + + // check that all other delay slot writes are unused. + for (size_t i = 0; i < cne->entries.size() - 1; i++) { + if (func.ir2.has_reg_use) { + auto branch = dynamic_cast(cne->entries.at(i).original_condition_branch); + auto& branch_info_i = func.ir2.reg_use.op.at(branch->op()->op_id()); + auto reg = cne->entries.at(i).false_destination; + assert(reg.has_value()); + assert(branch); + assert(branch_info_i.written_and_unused.find(reg->reg()) != + branch_info_i.written_and_unused.end()); + } + } +} + +/*! + * Replace internal branches inside a CondNoElse IR. + * If possible will simplify the entire expression into a comparison operation if possible. + * Will record which registers are set to false in branch delay slots. + * The exact behavior here isn't really clear to me. It's possible that these delay set false + * were disabled in cases where the result of the cond was none, or was a number or something. + * But it generally seems inconsistent. The expression propagation step will have to deal with + * this. + */ +void clean_up_cond_no_else(FormPool& pool, + const Function& f, + FormElement** ir_loc, + Form* parent_form) { + auto cne = dynamic_cast(*ir_loc); + assert(cne); + for (size_t idx = 0; idx < cne->entries.size(); idx++) { + auto& e = cne->entries.at(idx); + if (e.cleaned) { + continue; + } + + auto jump_to_next = get_condition_branch(e.condition); + assert(jump_to_next.first); + + if (jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_TRUE && + cne->entries.size() == 1) { + convert_cond_no_else_to_compare(pool, f, ir_loc, parent_form); + return; + } else { + assert(jump_to_next.first->op()->branch_delay().kind() == + IR2_BranchDelay::Kind::SET_REG_FALSE || + jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump_to_next.first->op()->condition().kind() != IR2_Condition::Kind::ALWAYS); + + if (jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_FALSE) { + assert(!e.false_destination); + e.false_destination = jump_to_next.first->op()->branch_delay().var(0); + assert(e.false_destination); + } + + e.original_condition_branch = *jump_to_next.second; + + auto replacement = jump_to_next.first->op()->condition().get_as_form(pool); + replacement->invert(); + *(jump_to_next.second) = replacement; + e.cleaned = true; + + if (idx != cne->entries.size() - 1) { + auto jump_to_end = get_condition_branch(e.body); + assert(jump_to_end.first); + assert(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + assert(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + auto as_end_of_sequence = get_condition_branch_as_vector(e.body); + if (as_end_of_sequence.first) { + assert(as_end_of_sequence.second->size() > 1); + as_end_of_sequence.second->pop_back(); + } else { + *(jump_to_end.second) = pool.alloc_element(); + } + } + } + } +} + +/*! + * Match for a (set! reg (math reg reg)) form + */ +bool is_op_3(FormElement* ir, + MatchParam kind, + MatchParam dst, + MatchParam src0, + MatchParam src1, + Register* dst_out = nullptr, + Register* src0_out = nullptr, + Register* src1_out = nullptr) { + // should be a set reg to int math 2 ir + auto set = dynamic_cast(ir); + if (!set) { + return false; + } + + // destination should be a register + auto dest = set->dst(); + if (dst != dest.reg()) { + return false; + } + + auto math = dynamic_cast(set->src()->try_as_single_element()); + if (!math || kind != math->expr().kind()) { + return false; + } + + if (get_simple_expression_arg_count(math->expr().kind()) != 2) { + return false; + } + + auto arg0 = math->expr().get_arg(0); + auto arg1 = math->expr().get_arg(1); + + if (!arg0.is_var() || src0 != arg0.var().reg() || !arg1.is_var() || src1 != arg1.var().reg()) { + return false; + } + + // it's a match! + if (dst_out) { + *dst_out = dest.reg(); + } + + if (src0_out) { + *src0_out = arg0.var().reg(); + } + + if (src1_out) { + *src1_out = arg1.var().reg(); + } + return true; +} + +bool is_op_2(FormElement* ir, + MatchParam kind, + MatchParam dst, + MatchParam src0, + Register* dst_out = nullptr, + Register* src0_out = nullptr) { + // should be a set reg to int math 2 ir + auto set = dynamic_cast(ir); + if (!set) { + return false; + } + + // destination should be a register + auto dest = set->dst(); + if (dst != dest.reg()) { + return false; + } + + auto math = dynamic_cast(set->src()->try_as_single_element()); + if (!math || kind != math->expr().kind()) { + return false; + } + + auto arg = math->expr().get_arg(0); + + if (!arg.is_var() || src0 != arg.var().reg()) { + return false; + } + + // it's a match! + if (dst_out) { + *dst_out = dest.reg(); + } + + if (src0_out) { + *src0_out = arg.var().reg(); + } + + return true; +} + +/*! + * Try to convert this SC Vertex into an abs (integer). + * Will return a converted abs IR if successful, or nullptr if its not possible + */ +Form* try_sc_as_abs(FormPool& pool, const Function& f, const ShortCircuit* vtx) { + if (vtx->entries.size() != 1) { + return nullptr; + } + + auto b0 = dynamic_cast(vtx->entries.at(0)); + if (!b0) { + return nullptr; + } + + auto b0_ptr = cfg_to_ir(pool, f, b0); + // auto b0_ir = dynamic_cast(b0_ptr.get()); + + BranchElement* branch = dynamic_cast(b0_ptr->back()); + + if (!branch) { + return nullptr; + } + + // check the branch instruction + if (!branch->op()->likely() || + branch->op()->condition().kind() != IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED || + branch->op()->branch_delay().kind() != IR2_BranchDelay::Kind::NEGATE) { + // todo - if there was an abs(unsigned), it would be missed here. + return nullptr; + } + + auto input = branch->op()->condition().src(0); + auto output = branch->op()->branch_delay().var(0); + + assert(input.is_var()); + assert(input.var().reg() == branch->op()->branch_delay().var(1).reg()); + + // remove the branch + b0_ptr->pop_back(); + // add the ash + auto src_var = pool.alloc_single_element_form(nullptr, input); + auto src_abs = pool.alloc_single_element_form(nullptr, src_var); + auto replacement = pool.alloc_element(output, src_abs, true); + b0_ptr->push_back(replacement); + + return b0_ptr; +} + +/*! + * Attempt to convert a short circuit expression into an arithmetic shift. + * GOAL's shift function accepts positive/negative numbers to determine the direction + * of the shift. + */ +Form* try_sc_as_ash(FormPool& pool, const Function& f, const ShortCircuit* vtx) { + if (vtx->entries.size() != 2) { + return nullptr; + } + + // todo, I think b0 could possibly be something more complicated, depending on how we order. + auto b0 = dynamic_cast(vtx->entries.at(0)); + auto b1 = dynamic_cast(vtx->entries.at(1)); + if (!b0 || !b1) { + return nullptr; + } + + auto b0_ptr = cfg_to_ir(pool, f, b0); + auto b1_ptr = cfg_to_ir(pool, f, b1); + + auto branch = dynamic_cast(b0_ptr->back()); + if (!branch || b1_ptr->size() != 2) { + return nullptr; + } + + // check the branch instruction + if (!branch->op()->likely() || + branch->op()->condition().kind() != IR2_Condition::Kind::GEQ_ZERO_SIGNED || + branch->op()->branch_delay().kind() != IR2_BranchDelay::Kind::DSLLV) { + return nullptr; + } + + /* + * bgezl s5, L109 ; s5 is the shift amount + dsllv a0, a0, s5 ; a0 is both input and output here + + dsubu a1, r0, s5 ; a1 is a temp here + dsrav a0, a0, a1 ; a0 is both input and output here + */ + + auto sa_in = branch->op()->condition().src(0); + assert(sa_in.is_var()); + auto result = branch->op()->branch_delay().var(0); + auto value_in = branch->op()->branch_delay().var(1); + auto sa_in2 = branch->op()->branch_delay().var(2); + assert(sa_in.var().reg() == sa_in2.reg()); + + auto dsubu_candidate = b1_ptr->at(0); + auto dsrav_candidate = b1_ptr->at(1); + + Register clobber; + if (!is_op_2(dsubu_candidate, SimpleExpression::Kind::NEG, {}, sa_in.var().reg(), &clobber)) { + return nullptr; + } + + bool is_arith = is_op_3(dsrav_candidate, SimpleExpression::Kind::RIGHT_SHIFT_ARITH, result.reg(), + value_in.reg(), clobber); + bool is_logical = is_op_3(dsrav_candidate, SimpleExpression::Kind::RIGHT_SHIFT_LOGIC, + result.reg(), value_in.reg(), clobber); + + if (!is_arith && !is_logical) { + return nullptr; + } + + std::optional clobber_ir; + auto dsubu_set = dynamic_cast(dsubu_candidate); + auto dsrav_set = dynamic_cast(dsrav_candidate); + assert(dsubu_set && dsrav_set); + if (clobber != result.reg()) { + clobber_ir = dsubu_set->dst(); + } + + Variable dest_ir = branch->op()->branch_delay().var(0); + SimpleAtom shift_ir = branch->op()->condition().src(0); + auto value_ir = + dynamic_cast(dsrav_set->src()->try_as_single_element()) + ->expr() + .get_arg(0); + + // remove the branch + b0_ptr->pop_back(); + + // setup + auto value_form = pool.alloc_single_element_form(nullptr, value_ir); + auto shift_form = pool.alloc_single_element_form(nullptr, shift_ir); + auto ash_form = pool.alloc_single_element_form(nullptr, shift_form, value_form, + clobber_ir, is_arith); + auto set_form = pool.alloc_element(dest_ir, ash_form, true); + b0_ptr->push_back(set_form); + + return b0_ptr; +} + +/*! + * Try to convert a short circuiting expression into a "type-of" expression. + * We do this before attempting the normal and/or expressions. + */ +Form* try_sc_as_type_of(FormPool& pool, const Function& f, const ShortCircuit* vtx) { + // the assembly looks like this: + /* + dsll32 v1, a0, 29 ;; (set! v1 (shl a0 61)) + beql v1, r0, L60 ;; (bl! (= v1 r0) L60 (unknown-branch-delay)) + lw v1, binteger(s7) + + bgtzl v1, L60 ;; (bl! (>0.s v1) L60 (unknown-branch-delay)) + lw v1, pair(s7) + + lwu v1, -4(a0) ;; (set! v1 (l.wu (+.i a0 -4))) + L60: + */ + + // some of these checks may be a little bit overkill but it's a nice way to sanity check that + // we have actually decoded everything correctly. + if (vtx->entries.size() != 3) { + return nullptr; + } + + auto b0 = dynamic_cast(vtx->entries.at(0)); + auto b1 = dynamic_cast(vtx->entries.at(1)); + auto b2 = dynamic_cast(vtx->entries.at(2)); + + if (!b0 || !b1 || !b2) { + return nullptr; + } + + auto b0_ptr = cfg_to_ir(pool, f, b0); // should be begin. + if (b0_ptr->size() <= 1) { + return nullptr; + } + + auto b1_ptr = cfg_to_ir(pool, f, b1); + auto b1_ir = dynamic_cast(b1_ptr->try_as_single_element()); + + auto b2_ptr = cfg_to_ir(pool, f, b2); + auto b2_ir = dynamic_cast(b2_ptr->try_as_single_element()); + if (!b1_ir || !b2_ir) { + return nullptr; + } + + auto set_shift = dynamic_cast(b0_ptr->at(b0_ptr->size() - 2)); + if (!set_shift) { + return nullptr; + } + + auto temp_reg0 = set_shift->dst(); + + auto shift = dynamic_cast(set_shift->src()->try_as_single_element()); + if (!shift || shift->expr().kind() != SimpleExpression::Kind::LEFT_SHIFT) { + return nullptr; + } + auto src_reg = shift->expr().get_arg(0).var(); + auto sa = shift->expr().get_arg(1); + if (!sa.is_int() || sa.get_int() != 61) { + return nullptr; + } + + auto first_branch = dynamic_cast(b0_ptr->back()); + auto second_branch = b1_ir; + auto else_case = b2_ir; + + if (!first_branch || + first_branch->op()->branch_delay().kind() != IR2_BranchDelay::Kind::SET_BINTEGER || + first_branch->op()->condition().kind() != IR2_Condition::Kind::ZERO || + !first_branch->op()->likely()) { + return nullptr; + } + auto temp_reg = first_branch->op()->condition().src(0).var(); + assert(temp_reg.reg() == temp_reg0.reg()); + auto dst_reg = first_branch->op()->branch_delay().var(0); + + if (!second_branch || + second_branch->op()->branch_delay().kind() != IR2_BranchDelay::Kind::SET_PAIR || + second_branch->op()->condition().kind() != IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED || + !second_branch->op()->likely()) { + return nullptr; + } + + // check we agree on destination register. + auto dst_reg2 = second_branch->op()->branch_delay().var(0); + assert(dst_reg2.reg() == dst_reg.reg()); + + // else case is a lwu to grab the type from a basic + assert(else_case); + auto dst_reg3 = else_case->dst(); + assert(dst_reg3.reg() == dst_reg.reg()); + auto load_op = dynamic_cast(else_case->src()->try_as_single_element()); + if (!load_op || load_op->kind() != LoadVarOp::Kind::UNSIGNED || load_op->size() != 4) { + return nullptr; + } + auto load_loc = + dynamic_cast(load_op->location()->try_as_single_element()); + if (!load_loc || load_loc->expr().kind() != SimpleExpression::Kind::ADD) { + return nullptr; + } + auto src_reg3 = load_loc->expr().get_arg(0); + auto offset = load_loc->expr().get_arg(1); + if (!src_reg3.is_var() || !offset.is_int()) { + return nullptr; + } + + assert(src_reg3.var().reg() == src_reg.reg()); + assert(offset.get_int() == -4); + + std::optional clobber; + if (temp_reg.reg() != dst_reg.reg()) { + clobber = first_branch->op()->condition().src(0).var(); + } + + // remove the branch + b0_ptr->pop_back(); + // remove the shift + b0_ptr->pop_back(); + + auto obj = pool.alloc_single_element_form(nullptr, shift->expr().get_arg(0)); + auto type_op = pool.alloc_single_element_form(nullptr, obj, clobber); + auto op = pool.alloc_element(else_case->dst(), type_op, true); + b0_ptr->push_back(op); + // add the type-of + + return b0_ptr; +} + +Form* merge_cond_else_with_sc_cond(FormPool& pool, + const Function& f, + const CondWithElse* cwe, + Form* else_ir) { + if (else_ir->size() != 2) { + return nullptr; + } + + auto first = dynamic_cast(else_ir->at(0)); + auto second = dynamic_cast(else_ir->at(1)); + if (!first || !second) { + return nullptr; + } + + std::vector entries; + for (auto& x : cwe->entries) { + CondNoElseElement::Entry e; + e.condition = cfg_to_ir(pool, f, x.condition); + e.body = cfg_to_ir(pool, f, x.body); + entries.push_back(std::move(e)); + } + + auto first_condition = pool.alloc_empty_form(); + first_condition->push_back(else_ir->at(0)); + for (auto& x : second->entries.front().condition->elts()) { + first_condition->push_back(x); + } + + second->entries.front().condition = first_condition; + + for (auto& x : second->entries) { + entries.push_back(x); + } + auto result = pool.alloc_single_element_form(nullptr, entries); + clean_up_cond_no_else(pool, f, result->back_ref(), result); + return result; +} + +void insert_cfg_into_list(FormPool& pool, + const Function& f, + const CfgVtx* vtx, + std::vector* output) { + auto as_sequence = dynamic_cast(vtx); + auto as_block = dynamic_cast(vtx); + if (as_sequence) { + // inline the sequence. + for (auto& x : as_sequence->seq) { + insert_cfg_into_list(pool, f, x, output); + } + } else if (as_block) { + // inline the ops. + auto start_op = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(as_block->block_id); + auto end_op = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(as_block->block_id); + for (auto i = start_op; i < end_op; i++) { + output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool)); + } + } else { + auto ir = cfg_to_ir(pool, f, vtx); + for (auto x : ir->elts()) { + output->push_back(x); + } + } +} + +Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { + if (dynamic_cast(vtx)) { + auto* bv = dynamic_cast(vtx); + + Form* output = pool.alloc_empty_form(); + auto start_op = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(bv->block_id); + auto end_op = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(bv->block_id); + for (auto i = start_op; i < end_op; i++) { + output->push_back(f.ir2.atomic_ops->ops.at(i)->get_as_form(pool)); + } + + return output; + + } else if (dynamic_cast(vtx)) { + auto* sv = dynamic_cast(vtx); + Form* output = pool.alloc_empty_form(); + insert_cfg_into_list(pool, f, sv, &output->elts()); + + return output; + } else if (dynamic_cast(vtx)) { + auto wvtx = dynamic_cast(vtx); + + return pool.alloc_single_element_form( + nullptr, cfg_to_ir(pool, f, wvtx->condition), cfg_to_ir(pool, f, wvtx->body)); + } else if (dynamic_cast(vtx)) { + auto wvtx = dynamic_cast(vtx); + auto result = pool.alloc_single_element_form( + nullptr, cfg_to_ir(pool, f, wvtx->condition), cfg_to_ir(pool, f, wvtx->body)); + clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element())); + return result; + } else if (dynamic_cast(vtx)) { + auto wvtx = dynamic_cast(vtx); + auto empty = pool.alloc_single_element_form(nullptr); + auto result = pool.alloc_single_element_form( + nullptr, cfg_to_ir(pool, f, wvtx->block), empty); + clean_up_until_loop(pool, dynamic_cast(result->try_as_single_element())); + return result; + } else if (dynamic_cast(vtx)) { + auto wvtx = dynamic_cast(vtx); + auto condition = pool.alloc_single_element_form( + nullptr, IR2_Condition::Kind::ALWAYS, nullptr, nullptr); + auto result = pool.alloc_single_element_form(nullptr, condition, + cfg_to_ir(pool, f, wvtx->block)); + clean_up_infinite_while_loop(pool, + dynamic_cast(result->try_as_single_element())); + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + + // the cfg analysis pass may recognize some things out of order, which can cause + // fake nesting. This is actually a problem at this point because it can turn a normal + // cond into a cond with else, which emits different instructions. This attempts to recognize + // an else which is actually more cases and compacts it into a single statement. At this point + // I don't know if this is sufficient to catch all cases. it may even recognize the wrong + // thing in some cases... maybe we should check the delay slot instead? + auto else_ir = cfg_to_ir(pool, f, cvtx->else_vtx); + auto fancy_compact_result = merge_cond_else_with_sc_cond(pool, f, cvtx, else_ir); + if (fancy_compact_result) { + return fancy_compact_result; + } + + // this case is disabled because I _think_ it is now properly handled elsewhere. + if (false /*&& dynamic_cast(else_ir.get())*/) { + // auto extra_cond = dynamic_cast(else_ir.get()); + // std::vector entries; + // for (auto& x : cvtx->entries) { + // IR_Cond::Entry e; + // e.condition = cfg_to_ir(f, file, x.condition); + // e.body = cfg_to_ir(f, file, x.body); + // entries.push_back(std::move(e)); + // } + // for (auto& x : extra_cond->entries) { + // entries.push_back(x); + // } + // std::shared_ptr result = std::make_shared(entries); + // clean_up_cond_no_else(&result, file); + // return result; + } else { + std::vector entries; + for (auto& x : cvtx->entries) { + CondWithElseElement::Entry e; + e.condition = cfg_to_ir(pool, f, x.condition); + e.body = cfg_to_ir(pool, f, x.body); + entries.push_back(std::move(e)); + } + auto result = pool.alloc_single_element_form(nullptr, entries, else_ir); + clean_up_cond_with_else(pool, + dynamic_cast(result->try_as_single_element())); + return result; + } + } else if (dynamic_cast(vtx)) { + auto* svtx = dynamic_cast(vtx); + // try as a type of expression first + auto as_type_of = try_sc_as_type_of(pool, f, svtx); + if (as_type_of) { + return as_type_of; + } + + auto as_ash = try_sc_as_ash(pool, f, svtx); + if (as_ash) { + return as_ash; + } + + auto as_abs = try_sc_as_abs(pool, f, svtx); + if (as_abs) { + return as_abs; + } + + if (svtx->entries.size() == 1) { + throw std::runtime_error("Weird short circuit form."); + } + // now try as a normal and/or + std::vector entries; + for (auto& x : svtx->entries) { + ShortCircuitElement::Entry e; + e.condition = cfg_to_ir(pool, f, x); + entries.push_back(e); + } + auto result = pool.alloc_single_element_form(nullptr, entries); + clean_up_sc(pool, f, dynamic_cast(result->try_as_single_element())); + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + std::vector entries; + for (auto& x : cvtx->entries) { + CondNoElseElement::Entry e; + e.condition = cfg_to_ir(pool, f, x.condition); + e.body = cfg_to_ir(pool, f, x.body); + entries.push_back(std::move(e)); + } + auto result = pool.alloc_single_element_form(nullptr, entries); + clean_up_cond_no_else(pool, f, result->back_ref(), result); + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + auto result = pool.alloc_single_element_form( + nullptr, cfg_to_ir(pool, f, cvtx->body), cfg_to_ir(pool, f, cvtx->unreachable_block)); + clean_up_return(pool, dynamic_cast(result->try_as_single_element())); + return result; + } else if (dynamic_cast(vtx)) { + auto* cvtx = dynamic_cast(vtx); + auto result = pool.alloc_single_element_form( + nullptr, cfg_to_ir(pool, f, cvtx->body), cfg_to_ir(pool, f, cvtx->unreachable_block)); + clean_up_break(pool, dynamic_cast(result->try_as_single_element())); + return result; + } + + throw std::runtime_error("not yet implemented IR conversion."); + return nullptr; +} + +/*! + * Post processing pass to clean up while loops - annoyingly the block before a while loop + * has a jump to the condition branch that we need to remove. This currently happens after all + * conversion but this may need to be revisited depending on the final order of simplifications. + */ +void clean_up_while_loops(FormPool& pool, Form* sequence) { + std::vector to_remove; // the list of branches to remove by index in this sequence + for (int i = 0; i < sequence->size(); i++) { + auto* form_as_while = dynamic_cast(sequence->at(i)); + if (form_as_while && !form_as_while->cleaned) { + assert(i != 0); + auto prev_as_branch = dynamic_cast(sequence->at(i - 1)); + assert(prev_as_branch); + // printf("got while intro branch %s\n", prev_as_branch->print(file).c_str()); + // this should be an always jump. We'll assume that the CFG builder successfully checked + // the brach destination, but we will check the condition. + assert(prev_as_branch->op()->condition().kind() == IR2_Condition::Kind::ALWAYS); + assert(prev_as_branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + to_remove.push_back(i - 1); + + // now we should try to find the condition branch: + + auto condition_branch = get_condition_branch(form_as_while->condition); + + assert(condition_branch.first); + assert(condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP); + // printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); + auto replacement = condition_branch.first->op()->condition().get_as_form(pool); + + *(condition_branch.second) = replacement; + } + } + + // remove the implied forward always branches. + for (int i = int(to_remove.size()); i-- > 0;) { + auto idx = to_remove.at(i); + assert(dynamic_cast(sequence->at(idx))); + sequence->elts().erase(sequence->elts().begin() + idx); + } +} +} // namespace + +void build_initial_forms(Function& function) { + auto& cfg = function.cfg; + if (!cfg->is_fully_resolved()) { + return; + } + + try { + auto& pool = function.ir2.form_pool; + auto top_level = function.cfg->get_single_top_level(); + std::vector top_level_elts; + insert_cfg_into_list(pool, function, top_level, &top_level_elts); + auto result = pool.alloc_sequence_form(nullptr, top_level_elts); + + result->apply_form([&](Form* form) { clean_up_while_loops(pool, form); }); + + result->apply([&](FormElement* form) { + auto as_cne = dynamic_cast(form); + if (as_cne) { + clean_up_cond_no_else_final(function, as_cne); + } + }); + + function.ir2.top_form = result; + } catch (std::runtime_error& e) { + lg::warn("Failed to build initial forms in {}: {}", function.guessed_name.to_string(), + e.what()); + } +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/cfg_builder.h b/decompiler/IR2/cfg_builder.h new file mode 100644 index 0000000000..e8745e3d5b --- /dev/null +++ b/decompiler/IR2/cfg_builder.h @@ -0,0 +1,7 @@ +#pragma once + +#include "decompiler/Function/Function.h" + +namespace decompiler { +void build_initial_forms(Function& function); +} \ No newline at end of file diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index ea6742af60..c6e50eeae6 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -73,6 +73,7 @@ class ObjectFileDB { void ir2_type_analysis_pass(); void ir2_register_usage_pass(); void ir2_variable_pass(); + void ir2_cfg_build_pass(); void ir2_write_results(const std::string& output_dir); std::string ir2_to_file(ObjectFileData& data); std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 9e342f579d..a9a185bc54 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -10,6 +10,8 @@ #include "decompiler/Function/TypeInspector.h" #include "decompiler/IR2/reg_usage.h" #include "decompiler/IR2/variable_naming.h" +#include "decompiler/IR2/cfg_builder.h" +#include "common/goos/PrettyPrinter.h" namespace decompiler { @@ -32,6 +34,8 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) { ir2_register_usage_pass(); lg::info("Variable analysis..."); ir2_variable_pass(); + lg::info("Initial conversion to Form..."); + ir2_cfg_build_pass(); lg::info("Writing results..."); ir2_write_results(output_dir); } @@ -307,7 +311,7 @@ void ObjectFileDB::ir2_variable_pass() { for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { (void)segment_id; (void)data; - if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) { + if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.ir2.env.has_type_analysis()) { try { attempted++; auto result = run_variable_renaming(func, func.ir2.reg_use, *func.ir2.atomic_ops, dts); @@ -324,6 +328,28 @@ void ObjectFileDB::ir2_variable_pass() { attempted, timer.getMs()); } +void ObjectFileDB::ir2_cfg_build_pass() { + Timer timer; + int total = 0; + int attempted = 0; + int successful = 0; + for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + (void)data; + total++; + if (!func.suspected_asm && func.ir2.atomic_ops_succeeded && func.cfg->is_fully_resolved()) { + attempted++; + build_initial_forms(func); + } + + if (func.ir2.top_form) { + successful++; + } + }); + + lg::info("{}/{}/{} cfg build in {:.2f} ms\n", successful, attempted, total, timer.getMs()); +} + void ObjectFileDB::ir2_write_results(const std::string& output_dir) { Timer timer; lg::info("Writing IR2 results to file..."); @@ -358,6 +384,11 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { // functions for (auto& func : data.linked_data.functions_by_seg.at(seg)) { result += ir2_function_to_string(data, func, seg); + if (func.ir2.top_form) { + result += '\n'; + result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); + result += '\n'; + } } // print data diff --git a/test/decompiler/test_AtomicOpBuilder.cpp b/test/decompiler/test_AtomicOpBuilder.cpp index 77bc613985..eeb8fb6c84 100644 --- a/test/decompiler/test_AtomicOpBuilder.cpp +++ b/test/decompiler/test_AtomicOpBuilder.cpp @@ -1,6 +1,6 @@ #include "gtest/gtest.h" #include "decompiler/IR2/AtomicOp.h" -#include "decompiler/IR2/AtomicOpBuilder.h" +#include "decompiler/IR2/atomic_op_builder.h" #include "decompiler/Disasm/InstructionParser.h" #include "third-party/fmt/core.h" #include "third-party/fmt/format.h" @@ -67,7 +67,7 @@ void test_case(std::string assembly_lines, // the ordering of the registers doesn't matter. It could happen to be in the same order // as the opcode here, but it may not always be the case. bool found = false; - for (const std::string reg : write_regs.at(i)) { + for (const std::string& reg : write_regs.at(i)) { // TODO - is there a potential bug here in the event that either list has duplicate // registers? if (reg == expected_reg) { @@ -86,7 +86,7 @@ void test_case(std::string assembly_lines, // the ordering of the registers doesn't matter. It could happen to be in the same order // as the opcode here, but it may not always be the case. bool found = false; - for (const std::string reg : read_regs.at(i)) { + for (const std::string& reg : read_regs.at(i)) { // TODO - is there a potential bug here in the event that either list has duplicate // registers? if (reg == expected_reg) { @@ -104,7 +104,7 @@ void test_case(std::string assembly_lines, // the ordering of the registers doesn't matter. It could happen to be in the same order // as the opcode here, but it may not always be the case. bool found = false; - for (const std::string reg : clobbered_regs.at(i)) { + for (const std::string& reg : clobbered_regs.at(i)) { // TODO - is there a potential bug here in the event that either list has duplicate // registers? if (reg == expected_reg) { @@ -417,7 +417,7 @@ TEST(DecompilerAtomicOpBuilder, DSUBU_DADDIU_MOVZ) { } TEST(DecompilerAtomicOpBuilder, JALR_SLL) { - test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{}}, {{"t9"}}, + test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{"v0"}}, {{"t9"}}, {{"a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "at", "v1"}}); }