diff --git a/common/goos/PrettyPrinter.h b/common/goos/PrettyPrinter.h index 676b6c468a..893e034780 100644 --- a/common/goos/PrettyPrinter.h +++ b/common/goos/PrettyPrinter.h @@ -36,6 +36,9 @@ goos::Object build_list(const std::vector& symbols); // fancy wrapper functions. Due to template magic these can call each other // and accept mixed arguments! +template +goos::Object build_list(const goos::Object& car, Args... rest); + template goos::Object build_list(const std::string& str, Args... rest) { return goos::PairObject::make_new(to_symbol(str), build_list(rest...)); diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index bcda80c605..b64661264c 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -404,7 +404,8 @@ MethodInfo TypeSystem::add_new_method(Type* type, const TypeSpec& ts) { * Lookup information on a method. Error if it can't be found. Will check parent types if the * given type doesn't specialize the method. */ -MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::string& method_name) { +MethodInfo TypeSystem::lookup_method(const std::string& type_name, + const std::string& method_name) const { if (method_name == "new") { return lookup_new_method(type_name); } @@ -436,7 +437,9 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::st /*! * Like lookup_method, but won't throw or print an error when things go wrong. */ -bool TypeSystem::try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info) { +bool TypeSystem::try_lookup_method(const std::string& type_name, + int method_id, + MethodInfo* info) const { auto kv = m_types.find(type_name); if (kv == m_types.end()) { return false; @@ -469,7 +472,7 @@ bool TypeSystem::try_lookup_method(const std::string& type_name, int method_id, * Lookup information on a method by ID number. Error if it can't be found. Will check parent types * if the given type doesn't specialize the method. */ -MethodInfo TypeSystem::lookup_method(const std::string& type_name, int method_id) { +MethodInfo TypeSystem::lookup_method(const std::string& type_name, int method_id) const { if (method_id == GOAL_NEW_METHOD) { return lookup_new_method(type_name); } @@ -502,7 +505,7 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, int method_id /*! * Lookup information on a new method and get the most specialized version. */ -MethodInfo TypeSystem::lookup_new_method(const std::string& type_name) { +MethodInfo TypeSystem::lookup_new_method(const std::string& type_name) const { MethodInfo info; // first lookup the type diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index 9fd547c4ac..7a0635ab2e 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -147,10 +147,10 @@ class TypeSystem { const TypeSpec& ts, bool allow_new_method = true); MethodInfo add_new_method(Type* type, const TypeSpec& ts); - MethodInfo lookup_method(const std::string& type_name, const std::string& method_name); - MethodInfo lookup_method(const std::string& type_name, int method_id); - bool try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info); - MethodInfo lookup_new_method(const std::string& type_name); + MethodInfo lookup_method(const std::string& type_name, const std::string& method_name) const; + MethodInfo lookup_method(const std::string& type_name, int method_id) const; + bool try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info) const; + MethodInfo lookup_new_method(const std::string& type_name) const; void assert_method_id(const std::string& type_name, const std::string& method_name, int id); FieldLookupInfo lookup_field_info(const std::string& type_name, diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 34f69419f7..9334e19399 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -5,6 +5,7 @@ add_library( analysis/atomic_op_builder.cpp analysis/cfg_builder.cpp analysis/expression_build.cpp + analysis/final_output.cpp analysis/reg_usage.cpp analysis/variable_naming.cpp diff --git a/decompiler/Disasm/InstructionParser.cpp b/decompiler/Disasm/InstructionParser.cpp index 6ae9b0ba4a..6ba19bfe90 100644 --- a/decompiler/Disasm/InstructionParser.cpp +++ b/decompiler/Disasm/InstructionParser.cpp @@ -253,8 +253,17 @@ Instruction InstructionParser::parse_single_instruction( return instr; } -ParsedProgram InstructionParser::parse_program(const std::string& str) { +ParsedProgram InstructionParser::parse_program(const std::string& str, + const std::vector& predefined_labels) { ParsedProgram program; + for (auto& x : predefined_labels) { + DecompilerLabel label; + label.target_segment = 0; + label.offset = 0; + label.name = x; + program.labels.push_back(label); + } + auto lines = string_to_lines(str); int byte_offset = 0; // first pass diff --git a/decompiler/Disasm/InstructionParser.h b/decompiler/Disasm/InstructionParser.h index bd328f139e..5bff880f10 100644 --- a/decompiler/Disasm/InstructionParser.h +++ b/decompiler/Disasm/InstructionParser.h @@ -21,7 +21,8 @@ class InstructionParser { public: InstructionParser(); Instruction parse_single_instruction(std::string str, const std::vector& labels); - ParsedProgram parse_program(const std::string& str); + ParsedProgram parse_program(const std::string& str, + const std::vector& predefined_labels = {}); private: std::unordered_map m_opcode_name_lookup; diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index ac61590fe0..a135311d9f 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -49,6 +49,7 @@ bool Function::run_type_analysis_ir2(const TypeSpec& my_type, LinkedObjectFile& file, const std::unordered_map>& hints) { (void)file; + ir2.env.set_type_hints(hints); // STEP 0 - set decompiler type system settings for this function. In config we can manually // specify some settings for type propagation to reduce the strictness of type propagation. // TODO - this is kinda hacky so that it works in both unit tests and actual decompilation. @@ -146,7 +147,7 @@ bool Function::run_type_analysis_ir2(const TypeSpec& my_type, auto last_type = op_types.back().get(Register(Reg::GPR, Reg::V0)).typespec(); if (last_type != my_type.last_arg()) { - warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(), + warnings += fmt::format(";; return type mismatch {} vs {}.\n", last_type.print(), my_type.last_arg().print()); } diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index 2a825a38a1..6fbe942177 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -23,20 +23,20 @@ Variable::Variable(VariableMode mode, Register reg, int atomic_idx, bool allow_a } } -std::string Variable::to_string(const Env* env, Print mode) const { +goos::Object Variable::to_form(const Env& env, Print mode) const { switch (mode) { case Print::AS_REG: - return m_reg.to_string(); + return pretty_print::to_symbol(m_reg.to_string()); case Print::FULL: - return fmt::format("{}-{:03d}-{}", m_reg.to_charp(), m_atomic_idx, - m_mode == VariableMode::READ ? 'r' : 'w'); + return pretty_print::to_symbol(fmt::format("{}-{:03d}-{}", m_reg.to_charp(), m_atomic_idx, + m_mode == VariableMode::READ ? 'r' : 'w')); case Print::AS_VARIABLE: - return env->get_variable_name(m_reg, m_atomic_idx, m_mode); + return env.get_variable_name(m_reg, m_atomic_idx, m_mode); case Print::AUTOMATIC: - if (env->has_local_vars()) { - return env->get_variable_name(m_reg, m_atomic_idx, m_mode); + if (env.has_local_vars()) { + return env.get_variable_name(m_reg, m_atomic_idx, m_mode); } else { - return m_reg.to_string(); + return pretty_print::to_symbol(m_reg.to_string()); } default: assert(false); @@ -56,12 +56,12 @@ bool Variable::operator!=(const Variable& other) const { ///////////////////////////// AtomicOp::AtomicOp(int my_idx) : m_my_idx(my_idx) {} -std::string AtomicOp::to_string(const std::vector& labels, const Env* env) const { +std::string AtomicOp::to_string(const std::vector& labels, const Env& env) const { return pretty_print::to_string(to_form(labels, env)); } std::string AtomicOp::to_string(const Env& env) const { - return to_string(env.file->labels, &env); + return to_string(env.file->labels, env); } bool AtomicOp::operator!=(const AtomicOp& other) const { @@ -123,10 +123,10 @@ SimpleAtom SimpleAtom::make_static_address(int static_label_id) { return result; } -goos::Object SimpleAtom::to_form(const std::vector& labels, const Env* env) const { +goos::Object SimpleAtom::to_form(const std::vector& labels, const Env& env) const { switch (m_kind) { case Kind::VARIABLE: - return pretty_print::to_symbol(m_variable.to_string(env)); + return m_variable.to_form(env); case Kind::INTEGER_CONSTANT: return pretty_print::to_symbol(std::to_string(m_int)); case Kind::SYMBOL_PTR: @@ -326,7 +326,7 @@ SimpleExpression::SimpleExpression(Kind kind, const SimpleAtom& arg0, const Simp } goos::Object SimpleExpression::to_form(const std::vector& labels, - const Env* env) const { + const Env& env) const { std::vector forms; if (m_kind == Kind::IDENTITY) { // we are "identity" so just pass through the atom @@ -370,9 +370,8 @@ void SimpleExpression::collect_vars(VariableSet& vars) const { // SetVarOp ///////////////////////////// -goos::Object SetVarOp::to_form(const std::vector& labels, const Env* env) const { - return pretty_print::build_list(pretty_print::to_symbol("set!"), - pretty_print::to_symbol(m_dst.to_string(env)), +goos::Object SetVarOp::to_form(const std::vector& labels, const Env& env) const { + return pretty_print::build_list(pretty_print::to_symbol("set!"), m_dst.to_form(env), m_src.to_form(labels, env)); } @@ -441,7 +440,7 @@ AsmOp::AsmOp(Instruction instr, int my_idx) : AtomicOp(my_idx), m_instr(std::mov } } -goos::Object AsmOp::to_form(const std::vector& labels, const Env* env) const { +goos::Object AsmOp::to_form(const std::vector& labels, const Env& env) const { std::vector forms; forms.push_back(pretty_print::to_symbol("." + m_instr.op_name_to_string())); assert(m_instr.n_dst <= 1); @@ -449,7 +448,7 @@ goos::Object AsmOp::to_form(const std::vector& labels, const En if (m_instr.n_dst == 1) { if (m_dst.has_value()) { // then print it as a variable - forms.push_back(pretty_print::to_symbol(m_dst.value().to_string(env))); + forms.push_back(m_dst.value().to_form(env)); } else { // print the atom forms.push_back(pretty_print::to_symbol(m_instr.get_dst(0).to_string(labels))); @@ -459,7 +458,7 @@ goos::Object AsmOp::to_form(const std::vector& labels, const En assert(m_instr.n_src <= 3); for (int i = 0; i < m_instr.n_src; i++) { if (m_src[i].has_value()) { - forms.push_back(pretty_print::to_symbol(m_src[i].value().to_string(env))); + forms.push_back(m_src[i].value().to_form(env)); } else { forms.push_back(pretty_print::to_symbol(m_instr.get_src(i).to_string(labels))); } @@ -733,7 +732,7 @@ bool IR2_Condition::operator==(const IR2_Condition& other) const { } goos::Object IR2_Condition::to_form(const std::vector& labels, - const Env* env) const { + const Env& env) const { (void)labels; std::vector forms; forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); @@ -764,12 +763,11 @@ void IR2_Condition::collect_vars(VariableSet& vars) const { ///////////////////////////// SetVarConditionOp::SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx) - : AtomicOp(my_idx), m_dst(dst), m_condition(condition) {} + : AtomicOp(my_idx), m_dst(dst), m_condition(std::move(condition)) {} goos::Object SetVarConditionOp::to_form(const std::vector& labels, - const Env* env) const { - return pretty_print::build_list(pretty_print::to_symbol("set!"), - pretty_print::to_symbol(m_dst.to_string(env)), + const Env& env) const { + return pretty_print::build_list(pretty_print::to_symbol("set!"), m_dst.to_form(env), m_condition.to_form(labels, env)); } @@ -812,7 +810,7 @@ StoreOp::StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom valu m_addr(std::move(addr)), m_value(std::move(value)) {} -goos::Object StoreOp::to_form(const std::vector& labels, const Env* env) const { +goos::Object StoreOp::to_form(const std::vector& labels, const Env& env) const { std::string store_name; if (m_is_float) { assert(m_size == 4); @@ -876,9 +874,8 @@ void StoreOp::collect_vars(VariableSet& vars) const { LoadVarOp::LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx) : AtomicOp(my_idx), m_kind(kind), m_size(size), m_dst(dst), m_src(std::move(src)) {} -goos::Object LoadVarOp::to_form(const std::vector& labels, const Env* env) const { - std::vector forms = {pretty_print::to_symbol("set!"), - pretty_print::to_symbol(m_dst.to_string(env))}; +goos::Object LoadVarOp::to_form(const std::vector& labels, const Env& env) const { + std::vector forms = {pretty_print::to_symbol("set!"), m_dst.to_form(env)}; switch (m_kind) { case Kind::FLOAT: @@ -987,39 +984,39 @@ IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variab } goos::Object IR2_BranchDelay::to_form(const std::vector& labels, - const Env* env) const { + const Env& env) const { (void)labels; switch (m_kind) { case Kind::NOP: return pretty_print::build_list("nop!"); case Kind::SET_REG_FALSE: assert(m_var[0].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), "#f"); + return pretty_print::build_list("set!", m_var[0]->to_form(env), "#f"); case Kind::SET_REG_TRUE: assert(m_var[0].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), "#t"); + return pretty_print::build_list("set!", m_var[0]->to_form(env), "#t"); case Kind::SET_REG_REG: assert(m_var[0].has_value()); assert(m_var[1].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), m_var[1]->to_string(env)); + return pretty_print::build_list("set!", m_var[0]->to_form(env), m_var[1]->to_form(env)); case Kind::SET_BINTEGER: assert(m_var[0].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), "binteger"); + return pretty_print::build_list("set!", m_var[0]->to_form(env), "binteger"); case Kind::SET_PAIR: assert(m_var[0].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), "pair"); + return pretty_print::build_list("set!", m_var[0]->to_form(env), "pair"); case Kind::DSLLV: assert(m_var[0].has_value()); assert(m_var[1].has_value()); assert(m_var[2].has_value()); return pretty_print::build_list( - "set!", m_var[0]->to_string(env), - pretty_print::build_list("sll", m_var[1]->to_string(env), m_var[2]->to_string(env))); + "set!", m_var[0]->to_form(env), + pretty_print::build_list("sll", m_var[1]->to_form(env), m_var[2]->to_form(env))); case Kind::NEGATE: assert(m_var[0].has_value()); assert(m_var[1].has_value()); - return pretty_print::build_list("set!", m_var[0]->to_string(env), - pretty_print::build_list("-", m_var[1]->to_string(env))); + return pretty_print::build_list("set!", m_var[0]->to_form(env), + pretty_print::build_list("-", m_var[1]->to_form(env))); default: assert(false); } @@ -1082,7 +1079,7 @@ BranchOp::BranchOp(bool likely, m_label(label), m_branch_delay(branch_delay) {} -goos::Object BranchOp::to_form(const std::vector& labels, const Env* env) const { +goos::Object BranchOp::to_form(const std::vector& labels, const Env& env) const { std::vector forms; if (m_likely) { @@ -1133,7 +1130,7 @@ void BranchOp::collect_vars(VariableSet& vars) const { SpecialOp::SpecialOp(Kind kind, int my_idx) : AtomicOp(my_idx), m_kind(kind) {} -goos::Object SpecialOp::to_form(const std::vector& labels, const Env* env) const { +goos::Object SpecialOp::to_form(const std::vector& labels, const Env& env) const { (void)labels; (void)env; switch (m_kind) { @@ -1198,13 +1195,13 @@ CallOp::CallOp(int my_idx) m_function_var(VariableMode::READ, Register(Reg::GPR, Reg::T9), my_idx), m_return_var(VariableMode::WRITE, Register(Reg::GPR, Reg::V0), my_idx) {} -goos::Object CallOp::to_form(const std::vector& labels, const Env* env) const { +goos::Object CallOp::to_form(const std::vector& labels, const Env& env) const { (void)labels; (void)env; std::vector forms; forms.push_back(pretty_print::to_symbol("call!")); for (auto& x : m_arg_vars) { - forms.push_back(pretty_print::to_symbol(x.to_string(env))); + forms.push_back(x.to_form(env)); } return pretty_print::build_list(forms); } @@ -1256,10 +1253,10 @@ ConditionalMoveFalseOp::ConditionalMoveFalseOp(Variable dst, Variable src, bool : AtomicOp(my_idx), m_dst(dst), m_src(src), m_on_zero(on_zero) {} goos::Object ConditionalMoveFalseOp::to_form(const std::vector& labels, - const Env* env) const { + const Env& env) const { (void)labels; return pretty_print::build_list(m_on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero", - m_dst.to_string(env), m_src.to_string(env)); + m_dst.to_form(env), m_src.to_form(env)); } bool ConditionalMoveFalseOp::operator==(const AtomicOp& other) const { @@ -1315,9 +1312,9 @@ bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out) { FunctionEndOp::FunctionEndOp(int my_idx) : AtomicOp(my_idx), m_return_reg(VariableMode::READ, Register(Reg::GPR, Reg::V0), my_idx) {} -goos::Object FunctionEndOp::to_form(const std::vector&, const Env* env) const { +goos::Object FunctionEndOp::to_form(const std::vector&, const Env& env) const { if (m_function_has_return_value) { - return pretty_print::build_list("ret-value", m_return_reg.to_string(env)); + return pretty_print::build_list("ret-value", m_return_reg.to_form(env)); } else { return pretty_print::build_list("ret-none"); } diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index c31a0fb611..b5f07d53ce 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -43,12 +43,12 @@ class DecompilerTypeSystem; class AtomicOp { public: explicit AtomicOp(int my_idx); - std::string to_string(const std::vector& labels, const Env* env) const; + std::string to_string(const std::vector& labels, const Env& env) const; std::string to_string(const Env& env) const; std::string reg_type_info_as_string(const TypeState& init_types, const TypeState& end_types) const; virtual goos::Object to_form(const std::vector& labels, - const Env* env) const = 0; + const Env& env) const = 0; virtual bool operator==(const AtomicOp& other) const = 0; bool operator!=(const AtomicOp& other) const; @@ -75,9 +75,9 @@ class AtomicOp { TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts); int op_id() const { return m_my_idx; } - const std::vector& read_regs() { return m_read_regs; } - const std::vector& write_regs() { return m_write_regs; } - const std::vector& clobber_regs() { return m_clobber_regs; } + const std::vector& read_regs() const { return m_read_regs; } + const std::vector& write_regs() const { return m_write_regs; } + const std::vector& clobber_regs() const { return m_clobber_regs; } void add_clobber_reg(Register r) { m_clobber_regs.push_back(r); } void clear_register_info() { m_read_regs.clear(); @@ -128,7 +128,7 @@ class SimpleAtom { static SimpleAtom make_empty_list(); static SimpleAtom make_int_constant(s64 value); static SimpleAtom make_static_address(int static_label_id); - goos::Object to_form(const std::vector& labels, const Env* env) const; + goos::Object to_form(const std::vector& labels, const Env& env) const; void collect_vars(VariableSet& vars) const; bool is_var() const { return m_kind == Kind::VARIABLE; } @@ -222,7 +222,7 @@ class SimpleExpression { SimpleExpression() = default; SimpleExpression(Kind kind, const SimpleAtom& arg0); SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1); - goos::Object to_form(const std::vector& labels, const Env* env) const; + goos::Object to_form(const std::vector& labels, const Env& env) const; bool operator==(const SimpleExpression& other) const; bool is_identity() const { return m_kind == Kind::IDENTITY; } void get_regs(std::vector* out) const; @@ -253,7 +253,7 @@ class SetVarOp : public AtomicOp { assert(my_idx == dst.idx()); } virtual goos::Object to_form(const std::vector& labels, - const Env* env) const override; + const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -279,7 +279,7 @@ class SetVarOp : public AtomicOp { class AsmOp : public AtomicOp { public: AsmOp(Instruction instr, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -349,7 +349,7 @@ class IR2_Condition { void invert(); bool operator==(const IR2_Condition& other) const; bool operator!=(const IR2_Condition& other) const { return !((*this) == other); } - goos::Object to_form(const std::vector& labels, const Env* env) const; + goos::Object to_form(const std::vector& labels, const Env& env) const; void get_regs(std::vector* out) const; Kind kind() const { return m_kind; } const SimpleAtom& src(int i) const { return m_src[i]; } @@ -371,7 +371,7 @@ IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind); class SetVarConditionOp : public AtomicOp { public: SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -396,7 +396,7 @@ class SetVarConditionOp : public AtomicOp { class StoreOp : public AtomicOp { public: StoreOp(int size, bool is_float, SimpleExpression addr, SimpleAtom value, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -406,6 +406,8 @@ class StoreOp : public AtomicOp { const Env& env, DecompilerTypeSystem& dts) override; void collect_vars(VariableSet& vars) const override; + const SimpleExpression& addr() const { return m_addr; } + const SimpleAtom& value() const { return m_value; } private: int m_size; @@ -422,7 +424,7 @@ class LoadVarOp : public AtomicOp { public: enum class Kind { UNSIGNED, SIGNED, FLOAT }; LoadVarOp(Kind kind, int size, Variable dst, SimpleExpression src, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -466,7 +468,7 @@ class IR2_BranchDelay { IR2_BranchDelay(Kind kind, Variable var0); IR2_BranchDelay(Kind kind, Variable var0, Variable var1); IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2); - goos::Object to_form(const std::vector& labels, const Env* env) const; + goos::Object to_form(const std::vector& labels, const Env& env) const; bool operator==(const IR2_BranchDelay& other) const; void get_regs(std::vector* write, std::vector* read) const; bool is_known() const { return m_kind != Kind::UNKNOWN; } @@ -497,7 +499,7 @@ class BranchOp : public AtomicOp { int label, IR2_BranchDelay branch_delay, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -533,7 +535,7 @@ class SpecialOp : public AtomicOp { }; SpecialOp(Kind kind, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -556,7 +558,7 @@ class SpecialOp : public AtomicOp { class CallOp : public AtomicOp { public: explicit CallOp(int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -593,7 +595,7 @@ class CallOp : public AtomicOp { class ConditionalMoveFalseOp : public AtomicOp { public: ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx); - goos::Object to_form(const std::vector& labels, const Env* env) const override; + goos::Object to_form(const std::vector& labels, const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; @@ -625,7 +627,7 @@ class FunctionEndOp : public AtomicOp { public: explicit FunctionEndOp(int my_idx); virtual goos::Object to_form(const std::vector& labels, - const Env* env) const override; + const Env& env) const override; bool operator==(const AtomicOp& other) const override; bool is_sequence_point() const override; Variable get_set_destination() const override; diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index 28d9975bf0..4406893ce1 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -25,7 +25,8 @@ DerefToken to_token(FieldReverseLookupOutput::Token in) { case FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX: return DerefToken::make_int_constant(in.idx); default: - assert(false); + // temp + throw std::runtime_error("Cannot convert rd lookup token to deref token"); } } } // namespace @@ -47,7 +48,32 @@ ConditionElement* IR2_Condition::get_as_form(FormPool& pool, const Env& env, int return pool.alloc_element(m_kind, vars[0], vars[1], consumed); } -FormElement* SetVarOp::get_as_form(FormPool& pool, const Env&) const { +FormElement* SetVarOp::get_as_form(FormPool& pool, const Env& env) const { + if (env.has_type_analysis() && m_src.args() == 2 && m_src.get_arg(1).is_int() && + m_src.get_arg(0).is_var() && m_src.kind() == SimpleExpression::Kind::ADD) { + auto arg0_type = env.get_types_before_op(m_my_idx).get(m_src.get_arg(0).var().reg()); + if (arg0_type.kind == TP_Type::Kind::TYPESPEC) { + // access a field. + FieldReverseLookupInput rd_in; + rd_in.deref = std::nullopt; + rd_in.stride = 0; + rd_in.offset = m_src.get_arg(1).get_int(); + rd_in.base_type = arg0_type.typespec(); + auto rd = env.dts->ts.reverse_field_lookup(rd_in); + + if (rd.success) { + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(m_src.get_arg(0).var()).as_expr(), m_my_idx); + std::vector tokens; + for (auto& x : rd.tokens) { + tokens.push_back(to_token(x)); + } + auto load = + pool.alloc_single_element_form(nullptr, source, rd.addr_of, tokens); + return pool.alloc_element(m_dst, load, true); + } + } + } auto source = pool.alloc_single_element_form(nullptr, m_src, m_my_idx); return pool.alloc_element(m_dst, source, is_sequence_point()); } @@ -103,10 +129,48 @@ FormElement* StoreOp::get_as_form(FormPool& pool, const Env& env) const { auto rd = env.dts->ts.reverse_field_lookup(rd_in); if (rd.success) { - // throw std::runtime_error("RD Success in StoreOp::get_as_form"); - return pool.alloc_element(this); - } else { - return pool.alloc_element(this); + auto val = pool.alloc_single_element_form( + nullptr, m_value.as_expr(), m_my_idx); + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + std::vector tokens; + for (auto& x : rd.tokens) { + tokens.push_back(to_token(x)); + } + assert(!rd.addr_of); + auto addr = + pool.alloc_single_element_form(nullptr, source, rd.addr_of, tokens); + return pool.alloc_element(addr, val); + } + + if (input_type.typespec() == TypeSpec("pointer")) { + std::string cast_type; + switch (m_size) { + case 1: + cast_type = "int8"; + break; + case 2: + cast_type = "int16"; + break; + case 4: + cast_type = "int32"; + break; + case 8: + cast_type = "int64"; + break; + default: + assert(false); + } + + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto cast_source = pool.alloc_single_element_form( + nullptr, TypeSpec("pointer", {TypeSpec(cast_type)}), source); + auto deref = pool.alloc_single_element_form(nullptr, cast_source, false, + std::vector()); + auto val = pool.alloc_single_element_form( + nullptr, m_value.as_expr(), m_my_idx); + return pool.alloc_element(deref, val); } } } @@ -136,7 +200,7 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { } // todo structure method - // todo pointer + // todo product trick // todo type of basic fallback @@ -147,6 +211,39 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { return pool.alloc_element(m_dst, load, true); } + if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) { + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = m_kind == Kind::SIGNED; + dk.size = m_size; + rd_in.deref = dk; + rd_in.base_type = input_type.get_obj_plus_const_mult_typespec(); + rd_in.stride = input_type.get_multiplier(); + rd_in.offset = ro.offset; + auto rd = env.dts->ts.reverse_field_lookup(rd_in); + + if (rd.success) { + // load_path_set = true; + // load_path_addr_of = rd.addr_of; + // load_path_base = ro.reg_ir; + // for (auto& x : rd.tokens) { + // load_path.push_back(x.print()); + // } + std::vector tokens; + assert(!rd.tokens.empty()); + for (size_t i = 0; i < rd.tokens.size() - 1; i++) { + tokens.push_back(to_token(rd.tokens.at(i))); + } + assert(rd.tokens.back().kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX); + + auto load = pool.alloc_single_element_form(nullptr, ro.var, tokens, + input_type.get_multiplier()); + return pool.alloc_element(m_dst, load, true); + } + } + if (env.allow_sloppy_pair_typing() && m_kind == Kind::SIGNED && m_size == 4 && (input_type.typespec() == TypeSpec("object") || input_type.typespec() == TypeSpec("pair"))) { @@ -196,6 +293,39 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { pool.alloc_single_element_form(nullptr, source, rd.addr_of, tokens); return pool.alloc_element(m_dst, load, true); } + + if (input_type.typespec() == TypeSpec("pointer")) { + std::string cast_type; + switch (m_size) { + case 1: + cast_type = "int8"; + break; + case 2: + cast_type = "int16"; + break; + case 4: + cast_type = "int32"; + break; + case 8: + cast_type = "int64"; + break; + default: + assert(false); + } + if (m_kind == Kind::UNSIGNED) { + cast_type = "u" + cast_type; + } else if (m_kind == Kind::FLOAT) { + assert(false); // nyi + } + + auto dest = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto cast_dest = pool.alloc_single_element_form( + nullptr, TypeSpec("pointer", {TypeSpec(cast_type)}), dest); + auto deref = pool.alloc_single_element_form(nullptr, cast_dest, false, + std::vector()); + return pool.alloc_element(m_dst, deref, true); + } } } diff --git a/decompiler/IR2/AtomicOpTypeAnalysis.cpp b/decompiler/IR2/AtomicOpTypeAnalysis.cpp index e59d2e76c2..8bbcd684bb 100644 --- a/decompiler/IR2/AtomicOpTypeAnalysis.cpp +++ b/decompiler/IR2/AtomicOpTypeAnalysis.cpp @@ -158,7 +158,7 @@ TP_Type SimpleExpression::get_type(const TypeState& input, return get_type_int1(input, env, dts); default: throw std::runtime_error("Simple expression can't get_type: " + - to_form(env.file->labels, &env).print()); + to_form(env.file->labels, env).print()); } return {}; } @@ -187,7 +187,7 @@ TP_Type SimpleExpression::get_type_int1(const TypeState& input, } throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + - to_form(env.file->labels, &env).print() + " " + arg_type.print()); + to_form(env.file->labels, env).print() + " " + arg_type.print()); } /*! @@ -220,6 +220,11 @@ TP_Type SimpleExpression::get_type_int2(const TypeState& input, } } break; + case Kind::MUL_UNSIGNED: { + // unsigned multiply will always return a unsigned number. + return TP_Type::make_from_ts("uint"); + } break; + case Kind::DIV_SIGNED: case Kind::MOD_SIGNED: { if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { @@ -330,7 +335,7 @@ TP_Type SimpleExpression::get_type_int2(const TypeState& input, } throw std::runtime_error(fmt::format("Can't get_type_int2: {}, args {} and {}", - to_form(env.file->labels, &env).print(), arg0_type.print(), + to_form(env.file->labels, env).print(), arg0_type.print(), arg1_type.print())); } @@ -377,7 +382,7 @@ TypeState IR2_BranchDelay::propagate_types(const TypeState& input, break; default: throw std::runtime_error("Unhandled branch delay in type_prop: " + - to_form(env.file->labels, &env).print()); + to_form(env.file->labels, env).print()); } return output; } @@ -585,7 +590,7 @@ TP_Type LoadVarOp::get_src_type(const TypeState& input, printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.base_type.print().c_str(), rd_in.offset, rd_in.deref.value().sign_extend, rd_in.deref.value().size); throw std::runtime_error(fmt::format("Could not get type of load: {}. Reverse Deref Failed.", - to_form(env.file->labels, &env).print())); + to_form(env.file->labels, env).print())); } if (rd.success) { @@ -619,10 +624,10 @@ TP_Type LoadVarOp::get_src_type(const TypeState& input, } throw std::runtime_error( - fmt::format("Could not get type of load: {}. ", to_form(env.file->labels, &env).print())); + fmt::format("Could not get type of load: {}. ", to_form(env.file->labels, env).print())); throw std::runtime_error("LoadVarOp can't get_src_type: " + - to_form(env.file->labels, &env).print()); + to_form(env.file->labels, env).print()); } TypeState LoadVarOp::propagate_types_internal(const TypeState& input, @@ -677,6 +682,14 @@ TypeState CallOp::propagate_types_internal(const TypeState& input, m_call_type.get_arg(m_call_type.arg_count() - 1) = TypeSpec(dts.type_prop_settings.current_method_type); m_call_type_set = true; + + m_read_regs.clear(); + m_arg_vars.clear(); + m_read_regs.emplace_back(Reg::GPR, Reg::T9); + for (int i = 0; i < int(m_call_type.arg_count()) - 1; i++) { + m_read_regs.emplace_back(Reg::GPR, arg_regs[i]); + m_arg_vars.push_back(Variable(VariableMode::READ, m_read_regs.back(), m_my_idx)); + } return end_types; } diff --git a/decompiler/IR2/Env.cpp b/decompiler/IR2/Env.cpp index cd965c5baa..03b29cd2b1 100644 --- a/decompiler/IR2/Env.cpp +++ b/decompiler/IR2/Env.cpp @@ -4,10 +4,20 @@ #include "Env.h" #include "Form.h" #include "decompiler/analysis/atomic_op_builder.h" +#include "common/goos/PrettyPrinter.h" namespace decompiler { -std::string Env::get_variable_name(Register reg, int atomic_idx, VariableMode mode) const { - return m_var_names.lookup(reg, atomic_idx, mode).name(); +goos::Object Env::get_variable_name(Register reg, int atomic_idx, VariableMode mode) const { + auto type_kv = m_typehints.find(atomic_idx); + if (type_kv != m_typehints.end()) { + for (auto& x : type_kv->second) { + if (x.reg == reg) { + return pretty_print::build_list("the-as", x.type_name, + m_var_names.lookup(reg, atomic_idx, mode).name()); + } + } + } + return pretty_print::to_symbol(m_var_names.lookup(reg, atomic_idx, mode).name()); } /*! @@ -41,7 +51,11 @@ void Env::set_types(const std::vector& block_init_types, std::string Env::print_local_var_types(const Form* top_level_form) const { assert(has_local_vars()); + auto var_info = extract_visible_variables(top_level_form); std::vector entries; + for (auto x : var_info) { + entries.push_back(fmt::format("{}: {}", x.name(), x.type.typespec().print())); + } if (top_level_form) { VariableSet var_set; @@ -112,7 +126,8 @@ std::string Env::print_local_var_types(const Form* top_level_form) const { } constexpr int row_len = 100; - int per_row = std::max(1, row_len / max_len); + // avoid divide by zero on empty env case. + int per_row = max_len ? std::max(1, row_len / max_len) : 1; int entry_len = 100 / per_row; std::string result; @@ -135,6 +150,96 @@ std::string Env::print_local_var_types(const Form* top_level_form) const { return result; } +std::vector Env::extract_visible_variables( + const Form* top_level_form) const { + assert(has_local_vars()); + std::vector entries; + if (top_level_form) { + VariableSet var_set; + top_level_form->collect_vars(var_set); + + // we want to sort them for easier reading: + std::vector> vars; + + for (auto& x : var_set) { + vars.push_back(std::make_pair(get_ssa_var(x), x)); + } + + std::sort(vars.begin(), vars.end(), + [](const std::pair& a, const std::pair& b) { + return a.first < b.first; + }); + + RegId* prev = nullptr; + for (auto& x : vars) { + // sorted by ssa var and there are likely duplicates of Variables and SSA vars, only print + // unique ssa variables. + if (prev && x.first == *prev) { + continue; + } + prev = &x.first; + auto& map = x.second.mode() == VariableMode::WRITE ? m_var_names.write_vars.at(x.second.reg()) + : m_var_names.read_vars.at(x.second.reg()); + auto& info = map.at(x.first.id); + + if (info.initialized) { + entries.push_back(info); + } else { + assert(false); + } + } + } else { + std::unordered_map, Register::hash> printed; + + for (auto& reg_info : m_var_names.read_vars) { + auto& reg_printed = printed[reg_info.first]; + for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) { + auto& info = reg_info.second.at(var_id); + if (info.initialized) { + reg_printed.insert(var_id); + entries.push_back(info); + } + } + } + + for (auto& reg_info : m_var_names.write_vars) { + auto& reg_printed = printed[reg_info.first]; + for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) { + auto& info = reg_info.second.at(var_id); + if (info.initialized) { + if (reg_printed.find(var_id) == reg_printed.end()) { + entries.push_back(info); + } + } + } + } + } + return entries; +} + +goos::Object Env::local_var_type_list(const Form* top_level_form, + int nargs_to_ignore, + int* count_out) const { + assert(nargs_to_ignore <= 8); + auto vars = extract_visible_variables(top_level_form); + + std::vector elts; + elts.push_back(pretty_print::to_symbol("local-vars")); + int count = 0; + for (auto& x : vars) { + if (x.reg_id.reg.get_kind() == Reg::GPR && x.reg_id.reg.get_gpr() < Reg::A0 + nargs_to_ignore && + x.reg_id.reg.get_gpr() >= Reg::A0) { + continue; + } + count++; + elts.push_back(pretty_print::build_list(x.name(), x.type.typespec().print())); + } + if (count_out) { + *count_out = count; + } + return pretty_print::build_list(elts); +} + std::unordered_set Env::get_ssa_var(const VariableSet& vars) const { std::unordered_set result; for (auto& x : vars) { diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index 5c3a5f9f18..e6ea61ee4d 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -3,10 +3,12 @@ #include #include #include +#include #include "decompiler/util/TP_Type.h" #include "decompiler/Disasm/Register.h" #include "decompiler/IR2/IR2_common.h" #include "decompiler/analysis/reg_usage.h" +#include "decompiler/config.h" namespace decompiler { class LinkedObjectFile; @@ -36,7 +38,7 @@ class Env { return m_reg_use; } - std::string get_variable_name(Register reg, int atomic_idx, VariableMode mode) const; + goos::Object get_variable_name(Register reg, int atomic_idx, VariableMode mode) const; /*! * Get the types in registers _after_ the given operation has completed. @@ -72,13 +74,20 @@ class Env { void set_end_var(Variable var) { m_end_var = var; } const Variable& end_var() const { return m_end_var; } + std::vector extract_visible_variables(const Form* top_level_form) const; std::string print_local_var_types(const Form* top_level_form) const; + goos::Object local_var_type_list(const Form* top_level_form, + int nargs_to_ignore, + int* count_out) const; std::unordered_set get_ssa_var(const VariableSet& vars) const; RegId get_ssa_var(const Variable& var) const; bool allow_sloppy_pair_typing() const { return m_allow_sloppy_pair_typing; } void set_sloppy_pair_typing() { m_allow_sloppy_pair_typing = true; } + void set_type_hints(const std::unordered_map>& hints) { + m_typehints = hints; + } LinkedObjectFile* file = nullptr; DecompilerTypeSystem* dts = nullptr; @@ -98,5 +107,7 @@ class Env { std::vector m_op_init_types; bool m_allow_sloppy_pair_typing = false; + + std::unordered_map> m_typehints; }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 5e4a07e7f6..3a229fe08b 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -103,6 +103,12 @@ void Form::collect_vars(VariableSet& vars) const { } } +void Form::get_modified_regs(RegSet& regs) const { + for (auto e : m_elements) { + e->get_modified_regs(regs); + } +} + ///////////////////////////// // SimpleExpressionElement ///////////////////////////// @@ -111,7 +117,7 @@ SimpleExpressionElement::SimpleExpressionElement(SimpleExpression expr, int my_i : m_expr(std::move(expr)), m_my_idx(my_idx) {} goos::Object SimpleExpressionElement::to_form(const Env& env) const { - return m_expr.to_form(env.file->labels, &env); + return m_expr.to_form(env.file->labels, env); } void SimpleExpressionElement::apply(const std::function& f) { @@ -128,6 +134,10 @@ void SimpleExpressionElement::collect_vars(VariableSet& vars) const { m_expr.collect_vars(vars); } +void SimpleExpressionElement::get_modified_regs(RegSet& regs) const { + (void)regs; +} + ///////////////////////////// // StoreElement ///////////////////////////// @@ -135,7 +145,7 @@ void SimpleExpressionElement::collect_vars(VariableSet& vars) const { StoreElement::StoreElement(const StoreOp* op) : m_op(op) {} goos::Object StoreElement::to_form(const Env& env) const { - return m_op->to_form(env.file->labels, &env); + return m_op->to_form(env.file->labels, env); } void StoreElement::apply(const std::function& f) { @@ -148,6 +158,10 @@ void StoreElement::collect_vars(VariableSet& vars) const { return m_op->collect_vars(vars); } +void StoreElement::get_modified_regs(RegSet& regs) const { + (void)regs; +} + ///////////////////////////// // LoadSourceElement ///////////////////////////// @@ -206,6 +220,10 @@ void LoadSourceElement::collect_vars(VariableSet& vars) const { m_addr->collect_vars(vars); } +void LoadSourceElement::get_modified_regs(RegSet& regs) const { + m_addr->get_modified_regs(regs); +} + ///////////////////////////// // SimpleAtomElement ///////////////////////////// @@ -213,7 +231,7 @@ void LoadSourceElement::collect_vars(VariableSet& vars) const { SimpleAtomElement::SimpleAtomElement(const SimpleAtom& atom) : m_atom(atom) {} goos::Object SimpleAtomElement::to_form(const Env& env) const { - return m_atom.to_form(env.file->labels, &env); + return m_atom.to_form(env.file->labels, env); } void SimpleAtomElement::apply(const std::function& f) { @@ -226,6 +244,10 @@ void SimpleAtomElement::collect_vars(VariableSet& vars) const { return m_atom.collect_vars(vars); } +void SimpleAtomElement::get_modified_regs(RegSet& regs) const { + (void)regs; +} + ///////////////////////////// // SetVarElement ///////////////////////////// @@ -236,7 +258,7 @@ SetVarElement::SetVarElement(const Variable& var, Form* value, bool is_sequence_ } goos::Object SetVarElement::to_form(const Env& env) const { - return pretty_print::build_list("set!", m_dst.to_string(&env), m_src->to_form(env)); + return pretty_print::build_list("set!", m_dst.to_form(env), m_src->to_form(env)); } void SetVarElement::apply(const std::function& f) { @@ -257,6 +279,11 @@ void SetVarElement::collect_vars(VariableSet& vars) const { m_src->collect_vars(vars); } +void SetVarElement::get_modified_regs(RegSet& regs) const { + regs.insert(m_dst.reg()); + m_src->get_modified_regs(regs); +} + ///////////////////////////// // SetFormFormElement ///////////////////////////// @@ -288,6 +315,10 @@ void SetFormFormElement::collect_vars(VariableSet& vars) const { m_dst->collect_vars(vars); } +void SetFormFormElement::get_modified_regs(RegSet& regs) const { + (void)regs; +} + ///////////////////////////// // AtomicOpElement ///////////////////////////// @@ -295,7 +326,7 @@ void SetFormFormElement::collect_vars(VariableSet& vars) const { AtomicOpElement::AtomicOpElement(const AtomicOp* op) : m_op(op) {} goos::Object AtomicOpElement::to_form(const Env& env) const { - return m_op->to_form(env.file->labels, &env); + return m_op->to_form(env.file->labels, env); } void AtomicOpElement::apply(const std::function& f) { @@ -308,6 +339,16 @@ void AtomicOpElement::collect_vars(VariableSet& vars) const { m_op->collect_vars(vars); } +void AtomicOpElement::get_modified_regs(RegSet& regs) const { + for (auto r : m_op->write_regs()) { + regs.insert(r); + } + + for (auto r : m_op->clobber_regs()) { + regs.insert(r); + } +} + ///////////////////////////// // ConditionElement ///////////////////////////// @@ -325,7 +366,7 @@ goos::Object ConditionElement::to_form(const Env& env) const { std::vector forms; forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); for (int i = 0; i < get_condition_num_args(m_kind); i++) { - forms.push_back(m_src[i]->to_form(env.file->labels, &env)); + forms.push_back(m_src[i]->to_form(env.file->labels, env)); } if (forms.size() > 1) { return pretty_print::build_list(forms); @@ -336,7 +377,7 @@ goos::Object ConditionElement::to_form(const Env& env) const { goos::Object ConditionElement::to_form_as_condition(const Env& env) const { if (m_kind == IR2_Condition::Kind::TRUTHY) { - return m_src[0]->to_form(env.file->labels, &env); + return m_src[0]->to_form(env.file->labels, env); } else { return to_form(env); } @@ -360,6 +401,10 @@ void ConditionElement::collect_vars(VariableSet& vars) const { } } +void ConditionElement::get_modified_regs(RegSet& regs) const { + (void)regs; +} + ///////////////////////////// // FunctionCallElement ///////////////////////////// @@ -367,7 +412,7 @@ void ConditionElement::collect_vars(VariableSet& vars) const { FunctionCallElement::FunctionCallElement(const CallOp* op) : m_op(op) {} goos::Object FunctionCallElement::to_form(const Env& env) const { - return m_op->to_form(env.file->labels, &env); + return m_op->to_form(env.file->labels, env); } void FunctionCallElement::apply(const std::function& f) { @@ -380,6 +425,16 @@ void FunctionCallElement::collect_vars(VariableSet& vars) const { return m_op->collect_vars(vars); } +void FunctionCallElement::get_modified_regs(RegSet& regs) const { + for (auto r : m_op->write_regs()) { + regs.insert(r); + } + + for (auto r : m_op->clobber_regs()) { + regs.insert(r); + } +} + ///////////////////////////// // BranchElement ///////////////////////////// @@ -387,7 +442,7 @@ void FunctionCallElement::collect_vars(VariableSet& vars) const { BranchElement::BranchElement(const BranchOp* op) : m_op(op) {} goos::Object BranchElement::to_form(const Env& env) const { - return m_op->to_form(env.file->labels, &env); + return m_op->to_form(env.file->labels, env); } void BranchElement::apply(const std::function& f) { @@ -400,6 +455,16 @@ void BranchElement::collect_vars(VariableSet& vars) const { return m_op->collect_vars(vars); } +void BranchElement::get_modified_regs(RegSet& regs) const { + for (auto r : m_op->write_regs()) { + regs.insert(r); + } + + for (auto r : m_op->clobber_regs()) { + regs.insert(r); + } +} + ///////////////////////////// // ReturnElement ///////////////////////////// @@ -428,6 +493,12 @@ void ReturnElement::collect_vars(VariableSet& vars) const { dead_code->collect_vars(vars); } +void ReturnElement::get_modified_regs(RegSet& regs) const { + for (auto x : {return_code, dead_code}) { + x->get_modified_regs(regs); + } +} + ///////////////////////////// // BreakElement ///////////////////////////// @@ -456,6 +527,12 @@ void BreakElement::collect_vars(VariableSet& vars) const { dead_code->collect_vars(vars); } +void BreakElement::get_modified_regs(RegSet& regs) const { + for (auto x : {return_code, dead_code}) { + x->get_modified_regs(regs); + } +} + ///////////////////////////// // CondWithElseElement ///////////////////////////// @@ -513,6 +590,14 @@ void CondWithElseElement::collect_vars(VariableSet& vars) const { else_ir->collect_vars(vars); } +void CondWithElseElement::get_modified_regs(RegSet& regs) const { + for (auto& e : entries) { + e.condition->get_modified_regs(regs); + e.body->get_modified_regs(regs); + } + else_ir->get_modified_regs(regs); +} + ///////////////////////////// // EmptyElement ///////////////////////////// @@ -526,8 +611,8 @@ void EmptyElement::apply(const std::function& f) { } void EmptyElement::apply_form(const std::function&) {} - void EmptyElement::collect_vars(VariableSet&) const {} +void EmptyElement::get_modified_regs(RegSet&) const {} ///////////////////////////// // WhileElement @@ -558,6 +643,11 @@ void WhileElement::collect_vars(VariableSet& vars) const { condition->collect_vars(vars); } +void WhileElement::get_modified_regs(RegSet& regs) const { + condition->get_modified_regs(regs); + body->get_modified_regs(regs); +} + ///////////////////////////// // UntilElement ///////////////////////////// @@ -587,6 +677,11 @@ void UntilElement::collect_vars(VariableSet& vars) const { condition->collect_vars(vars); } +void UntilElement::get_modified_regs(RegSet& regs) const { + condition->get_modified_regs(regs); + body->get_modified_regs(regs); +} + ///////////////////////////// // ShortCircuitElement ///////////////////////////// @@ -640,6 +735,12 @@ void ShortCircuitElement::collect_vars(VariableSet& vars) const { } } +void ShortCircuitElement::get_modified_regs(RegSet& regs) const { + for (auto& e : entries) { + e.condition->get_modified_regs(regs); + } +} + ///////////////////////////// // CondNoElseElement ///////////////////////////// @@ -698,6 +799,14 @@ void CondNoElseElement::collect_vars(VariableSet& vars) const { } } } + +void CondNoElseElement::get_modified_regs(RegSet& regs) const { + for (auto& e : entries) { + e.condition->get_modified_regs(regs); + e.body->get_modified_regs(regs); + } +} + ///////////////////////////// // AbsElement ///////////////////////////// @@ -706,7 +815,7 @@ AbsElement::AbsElement(Variable _source, RegSet _consumed) : source(_source), consumed(std::move(_consumed)) {} goos::Object AbsElement::to_form(const Env& env) const { - return pretty_print::build_list("abs", source.to_string(&env)); + return pretty_print::build_list("abs", source.to_form(env)); } void AbsElement::apply(const std::function& f) { @@ -719,6 +828,8 @@ void AbsElement::collect_vars(VariableSet& vars) const { vars.insert(source); } +void AbsElement::get_modified_regs(RegSet&) const {} + ///////////////////////////// // AshElement ///////////////////////////// @@ -736,7 +847,7 @@ AshElement::AshElement(Variable _shift_amount, goos::Object AshElement::to_form(const Env& env) const { return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"), - value.to_string(&env), shift_amount.to_string(&env)); + value.to_form(env), shift_amount.to_form(env)); } void AshElement::apply(const std::function& f) { @@ -750,6 +861,8 @@ void AshElement::collect_vars(VariableSet& vars) const { vars.insert(shift_amount); } +void AshElement::get_modified_regs(RegSet&) const {} + ///////////////////////////// // TypeOfElement ///////////////////////////// @@ -776,6 +889,8 @@ void TypeOfElement::collect_vars(VariableSet& vars) const { value->collect_vars(vars); } +void TypeOfElement::get_modified_regs(RegSet&) const {} + ///////////////////////////// // ConditionalMoveFalseElement ///////////////////////////// @@ -788,8 +903,8 @@ ConditionalMoveFalseElement::ConditionalMoveFalseElement(Variable _dest, } goos::Object ConditionalMoveFalseElement::to_form(const Env& env) const { - return pretty_print::build_list(on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero", - dest.to_string(&env), source->to_form(env)); + return pretty_print::build_list(on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero", dest.to_form(env), + source->to_form(env)); } void ConditionalMoveFalseElement::apply(const std::function& f) { @@ -806,6 +921,10 @@ void ConditionalMoveFalseElement::collect_vars(VariableSet& vars) const { source->collect_vars(vars); } +void ConditionalMoveFalseElement::get_modified_regs(RegSet& regs) const { + regs.insert(dest.reg()); +} + ///////////////////////////// // GenericElement ///////////////////////////// @@ -903,6 +1022,19 @@ bool GenericOperator::operator!=(const GenericOperator& other) const { return !((*this) == other); } +void GenericOperator::get_modified_regs(RegSet& regs) const { + switch (m_kind) { + case Kind::FIXED_OPERATOR: + case Kind::CONDITION_OPERATOR: + break; + case Kind::FUNCTION_EXPR: + m_function->get_modified_regs(regs); + break; + default: + assert(false); + } +} + std::string fixed_operator_to_string(FixedOperatorKind kind) { switch (kind) { case FixedOperatorKind::GPR_TO_FPR: @@ -937,12 +1069,16 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "lognot"; case FixedOperatorKind::SLL: return "sll"; + case FixedOperatorKind::SRL: + return "srl"; case FixedOperatorKind::CAR: return "car"; case FixedOperatorKind::CDR: return "cdr"; case FixedOperatorKind::NEW: return "new"; + case FixedOperatorKind::OBJECT_NEW: + return "object-new"; default: assert(false); } @@ -992,6 +1128,13 @@ void GenericElement::collect_vars(VariableSet& vars) const { } } +void GenericElement::get_modified_regs(RegSet& regs) const { + m_head.get_modified_regs(regs); + for (auto x : m_elts) { + x->get_modified_regs(regs); + } +} + ///////////////////////////// // CastElement ///////////////////////////// @@ -1015,6 +1158,10 @@ void CastElement::collect_vars(VariableSet& vars) const { m_source->collect_vars(vars); } +void CastElement::get_modified_regs(RegSet& regs) const { + m_source->get_modified_regs(regs); +} + ///////////////////////////// // DerefElement ///////////////////////////// @@ -1092,6 +1239,19 @@ void DerefToken::apply_form(const std::function& f) { } } +void DerefToken::get_modified_regs(RegSet& regs) const { + switch (m_kind) { + case Kind::INTEGER_CONSTANT: + case Kind::FIELD_NAME: + break; + case Kind::INTEGER_EXPRESSION: + m_expr->get_modified_regs(regs); + break; + default: + assert(false); + } +} + DerefElement::DerefElement(Form* base, bool is_addr_of, DerefToken token) : m_base(base), m_is_addr_of(is_addr_of), m_tokens({std::move(token)}) {} @@ -1129,6 +1289,13 @@ void DerefElement::collect_vars(VariableSet& vars) const { } } +void DerefElement::get_modified_regs(RegSet& regs) const { + m_base->get_modified_regs(regs); + for (auto& tok : m_tokens) { + tok.get_modified_regs(regs); + } +} + ///////////////////////////// // DynamicMethodAccess ///////////////////////////// @@ -1136,7 +1303,7 @@ void DerefElement::collect_vars(VariableSet& vars) const { DynamicMethodAccess::DynamicMethodAccess(Variable source) : m_source(source) {} goos::Object DynamicMethodAccess::to_form(const Env& env) const { - return pretty_print::build_list("dyn-method-access", m_source.to_string(&env)); + return pretty_print::build_list("dyn-method-access", m_source.to_form(env)); } void DynamicMethodAccess::apply(const std::function& f) { @@ -1149,4 +1316,79 @@ void DynamicMethodAccess::collect_vars(VariableSet& vars) const { vars.insert(m_source); } +void DynamicMethodAccess::get_modified_regs(RegSet&) const {} + +///////////////////////////// +// ArrayFieldAccess +///////////////////////////// +ArrayFieldAccess::ArrayFieldAccess(Variable source, + const std::vector& deref_tokens, + int expected_stride) + : m_source(source), m_deref_tokens(deref_tokens), m_expected_stride(expected_stride) {} + +goos::Object ArrayFieldAccess::to_form(const Env& env) const { + std::vector elts; + elts.push_back(pretty_print::to_symbol("dynamic-array-field-access")); + elts.push_back(m_source.to_form(env)); + for (auto& tok : m_deref_tokens) { + elts.push_back(tok.to_form(env)); + } + return pretty_print::build_list(elts); +} + +void ArrayFieldAccess::apply(const std::function& f) { + f(this); + for (auto& tok : m_deref_tokens) { + tok.apply(f); + } +} + +void ArrayFieldAccess::apply_form(const std::function& f) { + for (auto& tok : m_deref_tokens) { + tok.apply_form(f); + } +} + +void ArrayFieldAccess::collect_vars(VariableSet& vars) const { + vars.insert(m_source); + for (auto& tok : m_deref_tokens) { + tok.collect_vars(vars); + } +} + +void ArrayFieldAccess::get_modified_regs(RegSet& regs) const { + for (auto& tok : m_deref_tokens) { + tok.get_modified_regs(regs); + } +} + +///////////////////////////// +// GetMethodElement +///////////////////////////// + +GetMethodElement::GetMethodElement(Form* in, std::string name, bool is_object) + : m_in(in), m_name(std::move(name)), m_is_object(is_object) {} + +goos::Object GetMethodElement::to_form(const Env& env) const { + return pretty_print::build_list(m_is_object ? "method-of-object" : "method-of-type", + m_in->to_form(env), m_name); +} + +void GetMethodElement::apply(const std::function& f) { + f(this); + m_in->apply(f); +} + +void GetMethodElement::apply_form(const std::function& f) { + m_in->apply_form(f); +} + +void GetMethodElement::collect_vars(VariableSet& vars) const { + m_in->collect_vars(vars); +} + +void GetMethodElement::get_modified_regs(RegSet& regs) const { + m_in->get_modified_regs(regs); +} + } // namespace decompiler diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 4788a00bfb..62ca38b0af 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -28,6 +28,7 @@ class FormElement { virtual void apply_form(const std::function& f) = 0; virtual bool is_sequence_point() const { return true; } virtual void collect_vars(VariableSet& vars) const = 0; + virtual void get_modified_regs(RegSet& regs) const = 0; std::string to_string(const Env& env) const; // push the result of this operation to the operation stack @@ -59,6 +60,7 @@ class SimpleExpressionElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; void update_from_stack_identity(const Env& env, FormPool& pool, @@ -114,8 +116,7 @@ class SimpleExpressionElement : public FormElement { /*! * Represents storing a value into memory. - * Because a value can be propagated "into" the source value, this will have to be special cased - * in expression propagation. + * This is only used as a placeholder if AtomicOpForm fails to convert it to something nicer. */ class StoreElement : public FormElement { public: @@ -125,6 +126,7 @@ class StoreElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; private: // todo - we may eventually want to use a different representation for more @@ -133,7 +135,7 @@ class StoreElement : public FormElement { }; /*! - * Representing a value loaded from memory. + * Representing a value loaded from memory. Not the destination. * Unclear if this should have some common base with store? */ class LoadSourceElement : public FormElement { @@ -150,6 +152,7 @@ class LoadSourceElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; private: Form* m_addr = nullptr; @@ -168,6 +171,7 @@ class SimpleAtomElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; const SimpleAtom& atom() const { return m_atom; } // void push_to_stack(const Env& env, FormStack& stack) override; @@ -187,6 +191,11 @@ class SetVarElement : public FormElement { bool is_sequence_point() const override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; const Variable& dst() const { return m_dst; } const Form* src() const { return m_src; } @@ -211,6 +220,7 @@ class SetFormFormElement : public FormElement { bool is_sequence_point() const override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; private: Form* m_dst = nullptr; @@ -229,6 +239,7 @@ class AtomicOpElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; const AtomicOp* op() const { return m_op; } private: @@ -259,6 +270,7 @@ class ConditionElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; void invert(); const RegSet& consume() const { return m_consumed; } @@ -283,6 +295,7 @@ class FunctionCallElement : public FormElement { FormStack& stack, std::vector* result) override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; private: const CallOp* m_op; @@ -299,6 +312,7 @@ class BranchElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; const BranchOp* op() const { return m_op; } private: @@ -320,6 +334,7 @@ class ReturnElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -353,6 +368,7 @@ class BreakElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -386,6 +402,7 @@ class CondWithElseElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -403,6 +420,7 @@ class EmptyElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -418,6 +436,7 @@ class WhileElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; Form* condition = nullptr; Form* body = nullptr; bool cleaned = false; @@ -436,6 +455,7 @@ class UntilElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; Form* condition = nullptr; Form* body = nullptr; }; @@ -468,6 +488,11 @@ class ShortCircuitElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -493,6 +518,7 @@ class CondNoElseElement : public FormElement { void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -509,6 +535,7 @@ class AbsElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; Variable source; RegSet consumed; }; @@ -537,6 +564,7 @@ class AshElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; }; /*! @@ -552,6 +580,11 @@ class TypeOfElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; }; /*! @@ -582,6 +615,7 @@ class ConditionalMoveFalseElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; }; std::string fixed_operator_to_string(FixedOperatorKind kind); @@ -603,6 +637,7 @@ class GenericOperator { void apply_form(const std::function& f); bool operator==(const GenericOperator& other) const; bool operator!=(const GenericOperator& other) const; + void get_modified_regs(RegSet& regs) const; Kind kind() const { return m_kind; } FixedOperatorKind fixed_kind() const { assert(m_kind == Kind::FIXED_OPERATOR); @@ -619,6 +654,11 @@ class GenericOperator { return m_function; } + Form* func() { + assert(m_kind == Kind::FUNCTION_EXPR); + return m_function; + } + private: friend class GenericElement; Kind m_kind = Kind::INVALID; @@ -642,7 +682,10 @@ class GenericElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; const GenericOperator& op() const { return m_head; } + GenericOperator& op() { return m_head; } const std::vector& elts() const { return m_elts; } private: @@ -657,8 +700,14 @@ class CastElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; const TypeSpec& type() const { return m_type; } const Form* source() const { return m_source; } + Form* source() { return m_source; } private: TypeSpec m_type; @@ -681,6 +730,7 @@ class DerefToken { goos::Object to_form(const Env& env) const; void apply(const std::function& f); void apply_form(const std::function& f); + void get_modified_regs(RegSet& regs) const; Kind kind() const { return m_kind; } const std::string& field_name() const { @@ -707,9 +757,11 @@ class DerefElement : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; bool is_addr_of() const { return m_is_addr_of; } const Form* base() const { return m_base; } + Form* base() { return m_base; } const std::vector& tokens() const { return m_tokens; } private: @@ -729,11 +781,48 @@ class DynamicMethodAccess : public FormElement { FormPool& pool, FormStack& stack, std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; private: Variable m_source; }; +class ArrayFieldAccess : public FormElement { + public: + ArrayFieldAccess(Variable source, + const std::vector& deref_tokens, + int expected_stride); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + void collect_vars(VariableSet& vars) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; + void get_modified_regs(RegSet& regs) const override; + + private: + Variable m_source; + std::vector m_deref_tokens; + int m_expected_stride = -1; +}; + +class GetMethodElement : public FormElement { + public: + GetMethodElement(Form* in, std::string name, bool is_object); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; + + private: + Form* m_in = nullptr; + std::string m_name; + bool m_is_object = false; +}; + /*! * A Form is a wrapper around one or more FormElements. * This is done for two reasons: @@ -800,6 +889,7 @@ class Form { void collect_vars(VariableSet& vars) const; void update_children_from_stack(const Env& env, FormPool& pool, FormStack& stack); + void get_modified_regs(RegSet& regs) const; FormElement* parent_element = nullptr; diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 94827715ac..5a415f09f6 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -1,6 +1,7 @@ #include "Form.h" #include "FormStack.h" #include "GenericElementMatcher.h" +#include "common/goos/PrettyPrinter.h" /* * TODO @@ -16,69 +17,91 @@ Form* var_to_form(const Variable& var, FormPool& pool) { return pool.alloc_single_element_form(nullptr, SimpleAtom::make_var(var)); } -void update_var_from_stack_helper(int my_idx, - const Env&, - Variable input, - FormPool& pool, - FormStack& stack, - const RegSet& consumes, - std::vector* result) { - if (consumes.find(input.reg()) != consumes.end()) { - // is consumed. - auto stack_val = stack.pop_reg(input); - if (stack_val) { - for (auto x : stack_val->elts()) { - result->push_back(x); - } - return; +void pop_helper(const std::vector& vars, + const Env& env, + FormPool& pool, + FormStack& stack, + const std::vector*>& output, + const std::optional& consumes = std::nullopt) { + std::vector submit_regs; + std::vector submit_reg_to_var; + + // build submission for stack + for (size_t var_idx = 0; var_idx < vars.size(); var_idx++) { + const auto& var = vars.at(var_idx); + auto& ri = env.reg_use().op.at(var.idx()); + RegSet consumes_to_use = consumes.value_or(ri.consumes); + if (consumes_to_use.find(var.reg()) != consumes_to_use.end()) { + // could pop + submit_reg_to_var.push_back(var_idx); + submit_regs.push_back(var.reg()); + } else { + // no way to pop } } - auto elt = - pool.alloc_element(SimpleAtom::make_var(input).as_expr(), my_idx); - result->push_back(elt); -} -void update_var_from_stack_helper(int my_idx, - Variable input, - const Env& env, - FormPool& pool, - FormStack& stack, - std::vector* result) { - auto& ri = env.reg_use().op.at(my_idx); - if (ri.consumes.find(input.reg()) != ri.consumes.end()) { - // is consumed. - auto stack_val = stack.pop_reg(input); - if (stack_val) { - for (auto x : stack_val->elts()) { - result->push_back(x); - } - return; + // submit! + // auto result = stack.pop(submit_regs, env); + std::vector pop_result; + // loop in reverse. + for (size_t i = submit_regs.size(); i-- > 0;) { + // figure out what var we are: + auto var_idx = submit_reg_to_var.at(i); + + // anything _less or equal_ than this should be unmodified by the pop + // note - on the actual popped, pop_reg won't consider the destination. + RegSet pop_barrier_regs; + for (size_t j = 0; j < var_idx; j++) { + pop_barrier_regs.insert(vars.at(j).reg()); + } + + pop_result.push_back(stack.pop_reg(submit_regs.at(i), pop_barrier_regs, env)); + } + std::reverse(pop_result.begin(), pop_result.end()); + + std::vector forms; + forms.resize(vars.size(), nullptr); + if (!pop_result.empty()) { + // success! + for (size_t i = 0; i < submit_regs.size(); i++) { + forms.at(submit_reg_to_var.at(i)) = pop_result.at(i); + } + } + + // fill in the missing pieces: + for (size_t i = 0; i < forms.size(); i++) { + if (forms.at(i)) { + for (auto x : forms.at(i)->elts()) { + output.at(i)->push_back(x); + } + } else { + output.at(i)->push_back(pool.alloc_element( + SimpleAtom::make_var(vars.at(i)).as_expr(), vars.at(i).idx())); } } - auto elt = - pool.alloc_element(SimpleAtom::make_var(input).as_expr(), my_idx); - result->push_back(elt); } -Form* update_var_from_stack_to_form(int my_idx, - Variable input, - const Env& env, - FormPool& pool, - FormStack& stack) { - std::vector elts; - update_var_from_stack_helper(my_idx, input, env, pool, stack, &elts); - return pool.alloc_sequence_form(nullptr, elts); -} +std::vector pop_to_forms(const std::vector& vars, + const Env& env, + FormPool& pool, + FormStack& stack, + const std::optional& consumes = std::nullopt) { + std::vector forms; + std::vector> forms_out; + std::vector*> form_ptrs; + forms_out.resize(vars.size()); + form_ptrs.reserve(vars.size()); + forms.reserve(vars.size()); + for (auto& x : forms_out) { + form_ptrs.push_back(&x); + } -Form* update_var_from_stack_to_form(int my_idx, - const Env& env, - Variable input, - const RegSet& consumes, - FormPool& pool, - FormStack& stack) { - std::vector elts; - update_var_from_stack_helper(my_idx, env, input, pool, stack, consumes, &elts); - return pool.alloc_sequence_form(nullptr, elts); + pop_helper(vars, env, pool, stack, form_ptrs, consumes); + + for (auto& x : forms_out) { + forms.push_back(pool.alloc_sequence_form(nullptr, x)); + } + return forms; } bool is_float_type(const Env& env, int my_idx, Variable var) { @@ -126,7 +149,7 @@ void SimpleExpressionElement::update_from_stack_identity(const Env& env, std::vector* result) { auto& arg = m_expr.get_arg(0); if (arg.is_var()) { - update_var_from_stack_helper(m_my_idx, arg.var(), env, pool, stack, result); + pop_helper({arg.var()}, env, pool, stack, {result}); } else if (arg.is_static_addr()) { // for now, do nothing. result->push_back(this); @@ -177,10 +200,10 @@ void SimpleExpressionElement::update_from_stack_div_s(const Env& env, if (is_float_type(env, m_my_idx, m_expr.get_arg(0).var()) && is_float_type(env, m_my_idx, m_expr.get_arg(1).var())) { // todo - check the order here - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::DIVISION), arg0, arg1); + GenericOperator::make_fixed(FixedOperatorKind::DIVISION), args.at(0), args.at(1)); result->push_back(new_form); } else { throw std::runtime_error(fmt::format("Floating point division attempted on invalid types.")); @@ -202,24 +225,24 @@ void SimpleExpressionElement::update_from_stack_add_i(const Env& env, arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); } - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - Form* arg1; + std::vector args; if (arg1_reg) { - arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); } else { - arg1 = pool.alloc_single_element_form(nullptr, m_expr.get_arg(1)); + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); } if ((arg0_i && arg1_i) || (arg0_u && arg1_u)) { auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::ADDITION), arg0, arg1); + GenericOperator::make_fixed(FixedOperatorKind::ADDITION), args.at(0), args.at(1)); result->push_back(new_form); } else { auto cast = pool.alloc_single_element_form( - nullptr, TypeSpec(arg0_i ? "int" : "uint"), arg1); + nullptr, TypeSpec(arg0_i ? "int" : "uint"), args.at(1)); auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::ADDITION), arg0, cast); + GenericOperator::make_fixed(FixedOperatorKind::ADDITION), args.at(0), cast); result->push_back(new_form); } } @@ -231,19 +254,18 @@ void SimpleExpressionElement::update_from_stack_mult_si(const Env& env, auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); if (!arg0_i) { - arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg0); + args.at(0) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(0)); } if (!arg1_i) { - arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg1); + args.at(1) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(1)); } auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::MULTIPLICATION), arg0, arg1); + GenericOperator::make_fixed(FixedOperatorKind::MULTIPLICATION), args.at(0), args.at(1)); result->push_back(new_form); } @@ -255,18 +277,18 @@ void SimpleExpressionElement::update_from_stack_force_si_2(const Env& env, auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); if (!arg0_i) { - arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg0); + args.at(0) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(0)); } if (!arg1_i) { - arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("int"), arg1); + args.at(1) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(1)); } - auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + auto new_form = + pool.alloc_element(GenericOperator::make_fixed(kind), args.at(0), args.at(1)); result->push_back(new_form); } @@ -284,23 +306,24 @@ void SimpleExpressionElement::update_from_stack_force_ui_2(const Env& env, assert(m_expr.get_arg(1).is_int()); } - Form* arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); - Form* arg1; + std::vector args; if (arg1_reg) { - arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); } else { - arg1 = pool.alloc_single_element_form(nullptr, m_expr.get_arg(1)); + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); } if (!arg0_u) { - arg0 = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), arg0); + args.at(0) = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), args.at(0)); } if (!arg1_u) { - arg1 = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), arg1); + args.at(1) = pool.alloc_single_element_form(nullptr, TypeSpec("uint"), args.at(1)); } - auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + auto new_form = + pool.alloc_element(GenericOperator::make_fixed(kind), args.at(0), args.at(1)); result->push_back(new_form); } @@ -312,21 +335,29 @@ void SimpleExpressionElement::update_from_stack_copy_first_int_2( std::vector* result) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); + if (!m_expr.get_arg(1).is_var()) { + auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(kind), args.at(0), + pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); + result->push_back(new_form); + return; + } auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); auto arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto arg1 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(1).var(), env, pool, stack); - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); if ((arg0_i && arg1_i) || (arg0_u && arg1_u)) { - auto new_form = - pool.alloc_element(GenericOperator::make_fixed(kind), arg0, arg1); + auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), + args.at(0), args.at(1)); result->push_back(new_form); } else { auto cast = pool.alloc_single_element_form( - nullptr, TypeSpec(arg0_i ? "int" : "uint"), arg1); + nullptr, TypeSpec(arg0_i ? "int" : "uint"), args.at(1)); auto new_form = - pool.alloc_element(GenericOperator::make_fixed(kind), arg0, cast); + pool.alloc_element(GenericOperator::make_fixed(kind), args.at(0), cast); result->push_back(new_form); } } @@ -335,9 +366,9 @@ void SimpleExpressionElement::update_from_stack_lognot(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto arg0 = update_var_from_stack_to_form(m_my_idx, m_expr.get_arg(0).var(), env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::LOGNOT), arg0); + GenericOperator::make_fixed(FixedOperatorKind::LOGNOT), args.at(0)); result->push_back(new_form); } @@ -397,6 +428,12 @@ void SimpleExpressionElement::update_from_stack(const Env& env, case SimpleExpression::Kind::LEFT_SHIFT: update_from_stack_force_ui_2(env, FixedOperatorKind::SLL, pool, stack, result); break; + case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: + update_from_stack_force_ui_2(env, FixedOperatorKind::SRL, pool, stack, result); + break; + case SimpleExpression::Kind::MUL_UNSIGNED: + update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result); + break; default: throw std::runtime_error( fmt::format("SimpleExpressionElement::update_from_stack NYI for {}", to_string(env))); @@ -412,11 +449,14 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta if (m_src->is_single_element()) { auto src_as_se = dynamic_cast(m_src->back()); if (src_as_se) { - const auto& consumes = env.reg_use().op.at(m_dst.idx()).consumes; if (src_as_se->expr().kind() == SimpleExpression::Kind::IDENTITY && src_as_se->expr().get_arg(0).is_var()) { - stack.push_non_seq_reg_to_reg(m_dst, src_as_se->expr().get_arg(0).var(), m_src); - return; + auto var = src_as_se->expr().get_arg(0).var(); + auto& info = env.reg_use().op.at(var.idx()); + if (info.consumes.find(var.reg()) != info.consumes.end()) { + stack.push_non_seq_reg_to_reg(m_dst, src_as_se->expr().get_arg(0).var(), m_src); + return; + } } } } @@ -424,6 +464,14 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta stack.push_value_to_reg(m_dst, m_src, true); } +void SetVarElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + m_src->update_children_from_stack(env, pool, stack); + result->push_back(this); +} + void SetFormFormElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { // todo - is the order here right? m_src->update_children_from_stack(env, pool, stack); @@ -439,11 +487,9 @@ void AshElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto val_form = update_var_from_stack_to_form(value.idx(), env, value, consumed, pool, stack); - auto sa_form = - update_var_from_stack_to_form(shift_amount.idx(), env, shift_amount, consumed, pool, stack); + auto forms = pop_to_forms({value, shift_amount}, env, pool, stack, consumed); auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::ARITH_SHIFT), val_form, sa_form); + GenericOperator::make_fixed(FixedOperatorKind::ARITH_SHIFT), forms.at(0), forms.at(1)); result->push_back(new_form); } @@ -455,10 +501,9 @@ void AbsElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto source_form = - update_var_from_stack_to_form(source.idx(), env, source, consumed, pool, stack); + auto forms = pop_to_forms({source}, env, pool, stack, consumed); auto new_form = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::ABS), source_form); + GenericOperator::make_fixed(FixedOperatorKind::ABS), forms.at(0)); result->push_back(new_form); } @@ -474,55 +519,95 @@ void FunctionCallElement::update_from_stack(const Env& env, auto nargs = m_op->arg_vars().size(); args.resize(nargs, nullptr); - for (size_t i = nargs; i-- > 0;) { - auto var = m_op->arg_vars().at(i); - args.at(i) = update_var_from_stack_to_form(m_op->op_id(), var, env, pool, stack); + std::vector all_pop_vars = {m_op->function_var()}; + for (size_t i = 0; i < nargs; i++) { + all_pop_vars.push_back(m_op->arg_vars().at(i)); } - Form* func = update_var_from_stack_to_form(m_op->op_id(), m_op->function_var(), env, pool, stack); - auto new_form = pool.alloc_element(GenericOperator::make_function(func), args); + auto unstacked = pop_to_forms(all_pop_vars, env, pool, stack); + std::vector arg_forms; + arg_forms.insert(arg_forms.begin(), unstacked.begin() + 1, unstacked.end()); + auto new_form = pool.alloc_element( + GenericOperator::make_function(unstacked.at(0)), arg_forms); - // detect method calls: - // ex: ((-> pair methods-by-name new) (quote global) pair gp-0 a3-0) - constexpr int type_for_method = 0; - constexpr int method_name = 1; + { + // detect method calls: + // ex: ((-> pair methods-by-name new) (quote global) pair gp-0 a3-0) + constexpr int type_for_method = 0; + constexpr int method_name = 1; - auto deref_matcher = Matcher::deref( - Matcher::any_symbol(type_for_method), false, - {DerefTokenMatcher::string("methods-by-name"), DerefTokenMatcher::any_string(method_name)}); + auto deref_matcher = Matcher::deref( + Matcher::any_symbol(type_for_method), false, + {DerefTokenMatcher::string("methods-by-name"), DerefTokenMatcher::any_string(method_name)}); - auto matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), {}); - auto temp_form = pool.alloc_single_form(nullptr, new_form); - auto match_result = match(matcher, temp_form); - if (match_result.matched) { - auto type_1 = match_result.maps.strings.at(type_for_method); - auto name = match_result.maps.strings.at(method_name); + auto matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), {}); + auto temp_form = pool.alloc_single_form(nullptr, new_form); + auto match_result = match(matcher, temp_form); + if (match_result.matched) { + auto type_1 = match_result.maps.strings.at(type_for_method); + auto name = match_result.maps.strings.at(method_name); - if (name == "new") { - constexpr int allocation = 2; - constexpr int type_for_arg = 3; - auto alloc_matcher = Matcher::any_quoted_symbol(allocation); - auto type_arg_matcher = Matcher::any_symbol(type_for_arg); - matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), - {alloc_matcher, type_arg_matcher}); - match_result = match(matcher, temp_form); - auto alloc = match_result.maps.strings.at(allocation); - if (alloc != "global") { - throw std::runtime_error("Unrecognized heap symbol for new: " + alloc); - } - auto type_2 = match_result.maps.strings.at(type_for_arg); - if (type_1 != type_2) { - throw std::runtime_error( - fmt::format("Inconsistent types in method call: {} and {}", type_1, type_2)); + if (name == "new" && type_1 == "object") { + std::vector new_args = dynamic_cast(new_form)->elts(); + auto new_op = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::OBJECT_NEW), new_args); + result->push_back(new_op); + return; + } else if (name == "new") { + constexpr int allocation = 2; + constexpr int type_for_arg = 3; + auto alloc_matcher = Matcher::any_quoted_symbol(allocation); + auto type_arg_matcher = Matcher::any_symbol(type_for_arg); + matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), + {alloc_matcher, type_arg_matcher}); + match_result = match(matcher, temp_form); + if (match_result.matched) { + auto alloc = match_result.maps.strings.at(allocation); + if (alloc != "global") { + throw std::runtime_error("Unrecognized heap symbol for new: " + alloc); + } + auto type_2 = match_result.maps.strings.at(type_for_arg); + if (type_1 != type_2) { + throw std::runtime_error( + fmt::format("Inconsistent types in method call: {} and {}", type_1, type_2)); + } + + std::vector new_args = dynamic_cast(new_form)->elts(); + + auto new_op = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::NEW), new_args); + result->push_back(new_op); + return; + } else { + throw std::runtime_error("Failed to match new method"); + } + } else { + throw std::runtime_error("Method call detected, not yet implemented"); } + } + } - std::vector new_args = dynamic_cast(new_form)->elts(); + { + // detect method calls: + // ex: ((-> XXX methods-by-name new) (quote global) pair gp-0 a3-0) + constexpr int method_name = 0; + constexpr int type_source = 1; - auto new_op = pool.alloc_element( - GenericOperator::make_fixed(FixedOperatorKind::NEW), new_args); - result->push_back(new_op); + auto deref_matcher = Matcher::deref( + Matcher::any(type_source), false, + {DerefTokenMatcher::string("methods-by-name"), DerefTokenMatcher::any_string(method_name)}); + + auto matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), {}); + auto temp_form = pool.alloc_single_form(nullptr, new_form); + auto match_result = match(matcher, temp_form); + if (match_result.matched) { + auto name = match_result.maps.strings.at(method_name); + auto type_source_form = match_result.maps.forms.at(type_source); + auto method_op = + pool.alloc_single_element_form(nullptr, type_source_form, name, false); + auto gop = GenericOperator::make_function(method_op); + + result->push_back(pool.alloc_element(gop, arg_forms)); return; - } else { - throw std::runtime_error("Method call detected, not yet implemented"); } } @@ -546,6 +631,13 @@ void DerefElement::update_from_stack(const Env& env, std::vector* result) { // todo - update var tokens from stack? m_base->update_children_from_stack(env, pool, stack); + auto as_deref = dynamic_cast(m_base->try_as_single_element()); + if (as_deref) { + if (!m_is_addr_of && !as_deref->is_addr_of()) { + m_tokens.insert(m_tokens.begin(), as_deref->tokens().begin(), as_deref->tokens().end()); + m_base = as_deref->m_base; + } + } result->push_back(this); } @@ -642,10 +734,20 @@ void CondWithElseElement::push_to_stack(const Env& env, FormPool& pool, FormStac } } - if (rewrite_as_set) { - assert(last_var.has_value()); + if (!last_var.has_value()) { + rewrite_as_set = false; } + // determine if set destination is used + bool set_unused = false; + if (rewrite_as_set) { + auto& info = env.reg_use().op.at(last_var->idx()); + if (info.written_and_unused.find(last_var->reg()) != info.written_and_unused.end()) { + set_unused = true; + } + } + + // process everything. for (auto& entry : entries) { for (auto form : {entry.condition, entry.body}) { FormStack temp_stack; @@ -684,8 +786,22 @@ void CondWithElseElement::push_to_stack(const Env& env, FormPool& pool, FormStac else_ir->push_back(e); } + auto top_condition = entries.front().condition; + if (!top_condition->is_single_element()) { + auto real_condition = top_condition->back(); + top_condition->pop_back(); + for (auto x : top_condition->elts()) { + x->push_to_stack(env, pool, stack); + } + top_condition->elts() = {real_condition}; + } + if (rewrite_as_set) { - stack.push_value_to_reg(*last_var, pool.alloc_single_form(nullptr, this), true); + if (set_unused) { + stack.push_form_element(this, true); + } else { + stack.push_value_to_reg(*last_var, pool.alloc_single_form(nullptr, this), true); + } } else { stack.push_form_element(this, true); } @@ -726,17 +842,47 @@ void ShortCircuitElement::push_to_stack(const Env& env, FormPool& pool, FormStac } } +void ShortCircuitElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + (void)stack; + for (int i = 0; i < int(entries.size()); i++) { + auto& entry = entries.at(i); + FormStack temp_stack; + for (auto& elt : entry.condition->elts()) { + elt->push_to_stack(env, pool, temp_stack); + } + + std::vector new_entries; + if (i == int(entries.size()) - 1) { + new_entries = temp_stack.rewrite_to_get_var(pool, final_result, env); + } else { + new_entries = temp_stack.rewrite(pool); + } + + entry.condition->clear(); + for (auto e : new_entries) { + entry.condition->push_back(e); + } + } + result->push_back(this); +} + /////////////////// // ConditionElement /////////////////// void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { std::vector source_forms; + std::vector vars; for (int i = 0; i < get_condition_num_args(m_kind); i++) { - source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), env, - m_src[i]->var(), m_consumed, pool, stack)); + vars.push_back(m_src[i]->var()); } + std::reverse(vars.begin(), vars.end()); + source_forms = pop_to_forms(vars, env, pool, stack, m_consumed); + std::reverse(source_forms.begin(), source_forms.end()); stack.push_form_element( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms), @@ -748,14 +894,12 @@ void ConditionElement::update_from_stack(const Env& env, FormStack& stack, std::vector* result) { std::vector source_forms; + std::vector vars; - // for (int i = 0; i < get_condition_num_args(m_kind); i++) { - for (int i = get_condition_num_args(m_kind); i-- > 0;) { - source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), env, - m_src[i]->var(), m_consumed, pool, stack)); + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + vars.push_back(m_src[i]->var()); } - - std::reverse(source_forms.begin(), source_forms.end()); + source_forms = pop_to_forms(vars, env, pool, stack, m_consumed); result->push_back( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms)); @@ -791,6 +935,12 @@ void AtomicOpElement::push_to_stack(const Env& env, FormPool&, FormStack& stack) return; } } + + auto as_asm = dynamic_cast(m_op); + if (as_asm) { + stack.push_form_element(this, true); + return; + } throw std::runtime_error("Can't push atomic op to stack: " + m_op->to_string(env)); } @@ -808,6 +958,12 @@ void GenericElement::update_from_stack(const Env& env, result->push_back(this); } +void GenericElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + (void)env; + (void)pool; + stack.push_form_element(this, true); +} + //////////////////////// // DynamicMethodAccess //////////////////////// @@ -816,7 +972,7 @@ void DynamicMethodAccess::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto new_val = stack.pop_reg(m_source); + auto new_val = stack.pop_reg(m_source, {}, env); auto reg0_matcher = Matcher::match_or({Matcher::any_reg(0), Matcher::cast("uint", Matcher::any_reg(0))}); auto reg1_matcher = @@ -842,4 +998,103 @@ void DynamicMethodAccess::update_from_stack(const Env& env, result->push_back(deref); } +//////////////////////// +// ArrayFieldAccess +//////////////////////// + +namespace { +bool is_power_of_two(int in, int* out) { + int x = 1; + for (int i = 0; i < 32; i++) { + if (x == in) { + *out = i; + return true; + } + x = x * 2; + } + return false; +} +} // namespace + +void ArrayFieldAccess::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + auto new_val = stack.pop_reg(m_source, {}, env); + + int power_of_two = 0; + if (m_expected_stride == 1) { + // reg0 is idx + auto reg0_matcher = + Matcher::match_or({Matcher::any_reg(0), Matcher::cast("int", Matcher::any_reg(0))}); + // reg1 is base + auto reg1_matcher = + Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); + auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {reg0_matcher, reg1_matcher}); + auto match_result = match(matcher, new_val); + if (!match_result.matched) { + throw std::runtime_error("Couldn't match ArrayFieldAccess (stride 1) values: " + + new_val->to_string(env)); + } + auto idx = match_result.maps.regs.at(0); + auto base = match_result.maps.regs.at(1); + assert(idx.has_value() && base.has_value()); + + std::vector tokens = m_deref_tokens; + tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); + + auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); + result->push_back(deref); + } else if (is_power_of_two(m_expected_stride, &power_of_two)) { + // (+ (sll (the-as uint a1-0) 2) (the-as int a0-0)) + auto reg0_matcher = + Matcher::match_or({Matcher::any_reg(0), Matcher::cast("uint", Matcher::any_reg(0))}); + auto reg1_matcher = + Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); + auto sll_matcher = + Matcher::fixed_op(FixedOperatorKind::SLL, {reg0_matcher, Matcher::integer(power_of_two)}); + auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {sll_matcher, reg1_matcher}); + auto match_result = match(matcher, new_val); + if (!match_result.matched) { + throw std::runtime_error("Couldn't match ArrayFieldAccess (stride power of 2) values: " + + new_val->to_string(env)); + } + auto idx = match_result.maps.regs.at(0); + auto base = match_result.maps.regs.at(1); + assert(idx.has_value() && base.has_value()); + + std::vector tokens = m_deref_tokens; + tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); + + auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); + result->push_back(deref); + } else { + throw std::runtime_error("Not power of two case, not yet implemented"); + } +} + +//////////////////////// +// CastElement +//////////////////////// + +void CastElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + m_source->update_children_from_stack(env, pool, stack); + result->push_back(this); +} + +//////////////////////// +// TypeOfElement +//////////////////////// + +void TypeOfElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + value->update_children_from_stack(env, pool, stack); + result->push_back(this); +} + } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/FormStack.cpp b/decompiler/IR2/FormStack.cpp index edb6fdb7b1..5328d2a2f6 100644 --- a/decompiler/IR2/FormStack.cpp +++ b/decompiler/IR2/FormStack.cpp @@ -1,3 +1,4 @@ +#include #include "FormStack.h" #include "Form.h" @@ -63,16 +64,35 @@ void FormStack::push_form_element(FormElement* elt, bool sequence_point) { m_stack.push_back(entry); } -Form* FormStack::pop_reg(Register reg) { +Form* FormStack::pop_reg(const Variable& var, const RegSet& barrier, const Env& env) { + return pop_reg(var.reg(), barrier, env); +} + +namespace { +bool nonempty_intersection(const RegSet& a, const RegSet& b) { + // todo - if we ever switch to bit reg sets, this could be a lot faster. + std::vector isect; + std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), std::back_inserter(isect)); + return !isect.empty(); +} +} // namespace + +Form* FormStack::pop_reg(Register reg, const RegSet& barrier, const Env& env) { + (void)env; // keep this for easy debugging. + RegSet modified; for (size_t i = m_stack.size(); i-- > 0;) { auto& entry = m_stack.at(i); if (entry.active) { if (entry.destination->reg() == reg) { + entry.source->get_modified_regs(modified); + if (nonempty_intersection(modified, barrier)) { + return nullptr; + } entry.active = false; assert(entry.source); if (entry.non_seq_source.has_value()) { assert(entry.sequence_point == false); - auto result = pop_reg(entry.non_seq_source->reg()); + auto result = pop_reg(entry.non_seq_source->reg(), barrier, env); if (result) { return result; } @@ -84,6 +104,14 @@ Form* FormStack::pop_reg(Register reg) { // and it's a sequence point! can't look any more back than this. return nullptr; } + // no match, and not a sequence: + if (entry.source) { + assert(!entry.elt); + entry.source->get_modified_regs(modified); + } else { + assert(entry.elt); + entry.elt->get_modified_regs(modified); + } } } } @@ -91,10 +119,6 @@ Form* FormStack::pop_reg(Register reg) { return nullptr; } -Form* FormStack::pop_reg(const Variable& var) { - return pop_reg(var.reg()); -} - std::vector FormStack::rewrite(FormPool& pool) { std::vector result; diff --git a/decompiler/IR2/FormStack.h b/decompiler/IR2/FormStack.h index aab82a1692..7bb6e98e55 100644 --- a/decompiler/IR2/FormStack.h +++ b/decompiler/IR2/FormStack.h @@ -16,8 +16,8 @@ class FormStack { void push_value_to_reg(Variable var, Form* value, bool sequence_point); void push_non_seq_reg_to_reg(const Variable& dst, const Variable& src, Form* src_as_form); void push_form_element(FormElement* elt, bool sequence_point); - Form* pop_reg(const Variable& var); - Form* pop_reg(Register reg); + Form* pop_reg(const Variable& var, const RegSet& barrier, const Env& env); + Form* pop_reg(Register reg, const RegSet& barrier, const Env& env); bool is_single_expression(); std::vector rewrite(FormPool& pool); std::vector rewrite_to_get_var(FormPool& pool, const Variable& var, const Env& env); diff --git a/decompiler/IR2/GenericElementMatcher.cpp b/decompiler/IR2/GenericElementMatcher.cpp index fb35da4475..56cfd3badb 100644 --- a/decompiler/IR2/GenericElementMatcher.cpp +++ b/decompiler/IR2/GenericElementMatcher.cpp @@ -47,9 +47,10 @@ Matcher Matcher::cast(const std::string& type, Matcher value) { return m; } -Matcher Matcher::any() { +Matcher Matcher::any(int match_id) { Matcher m; m.m_kind = Kind::ANY; + m.m_form_match = match_id; return m; } @@ -85,8 +86,13 @@ Matcher Matcher::deref(const Matcher& root, return m; } -bool Matcher::do_match(const Form* input, MatchResult::Maps* maps_out) const { +bool Matcher::do_match(Form* input, MatchResult::Maps* maps_out) const { switch (m_kind) { + case Kind::ANY: + if (m_form_match != -1) { + maps_out->forms[m_form_match] = input; + } + return true; case Kind::ANY_REG: { bool got = false; Variable result; @@ -285,7 +291,7 @@ Matcher Matcher::any_reg_cast_to_int_or_uint(int match_id) { {any_reg(match_id), cast("uint", any_reg(match_id)), cast("int", any_reg(match_id))}); } -MatchResult match(const Matcher& spec, const Form* input) { +MatchResult match(const Matcher& spec, Form* input) { MatchResult result; result.matched = spec.do_match(input, &result.maps); return result; @@ -336,7 +342,7 @@ GenericOpMatcher GenericOpMatcher::func(const Matcher& func_matcher) { return m; } -bool GenericOpMatcher::do_match(const GenericOperator& input, MatchResult::Maps* maps_out) const { +bool GenericOpMatcher::do_match(GenericOperator& input, MatchResult::Maps* maps_out) const { switch (m_kind) { case Kind::FIXED: if (input.kind() == GenericOperator::Kind::FIXED_OPERATOR) { diff --git a/decompiler/IR2/GenericElementMatcher.h b/decompiler/IR2/GenericElementMatcher.h index 398e45fbbb..ecdf941f00 100644 --- a/decompiler/IR2/GenericElementMatcher.h +++ b/decompiler/IR2/GenericElementMatcher.h @@ -16,6 +16,7 @@ struct MatchResult { struct Maps { std::vector> regs; std::unordered_map strings; + std::unordered_map forms; } maps; }; @@ -27,7 +28,7 @@ class Matcher { static Matcher fixed_op(FixedOperatorKind op, const std::vector& args); static Matcher match_or(const std::vector& args); static Matcher cast(const std::string& type, Matcher value); - static Matcher any(); + static Matcher any(int match_id = -1); static Matcher integer(std::optional value); static Matcher any_reg_cast_to_int_or_uint(int match_id = -1); static Matcher any_quoted_symbol(int match_id = -1); @@ -50,7 +51,7 @@ class Matcher { INVALID }; - bool do_match(const Form* input, MatchResult::Maps* maps_out) const; + bool do_match(Form* input, MatchResult::Maps* maps_out) const; private: std::vector m_sub_matchers; @@ -60,11 +61,12 @@ class Matcher { Kind m_kind = Kind::INVALID; int m_reg_out_id = -1; int m_string_out_id = -1; + int m_form_match = -1; std::optional m_int_match; std::string m_str; }; -MatchResult match(const Matcher& spec, const Form* input); +MatchResult match(const Matcher& spec, Form* input); class DerefTokenMatcher { public: @@ -88,7 +90,7 @@ class GenericOpMatcher { enum class Kind { FIXED, FUNC, INVALID }; - bool do_match(const GenericOperator& input, MatchResult::Maps* maps_out) const; + bool do_match(GenericOperator& input, MatchResult::Maps* maps_out) const; private: Kind m_kind = Kind::INVALID; diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index 06b7bf915f..66b6333401 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -1,6 +1,7 @@ #pragma once #include #include "common/common_types.h" +#include "common/goos/Object.h" #include "decompiler/Disasm/Register.h" #include "decompiler/util/TP_Type.h" #include "third-party/fmt/core.h" @@ -65,7 +66,7 @@ class Variable { AUTOMATIC, // print as variable, but if that's not possible print as reg. }; - std::string to_string(const Env* env, Print mode = Print::AUTOMATIC) const; + goos::Object to_form(const Env& env, Print mode = Print::AUTOMATIC) const; bool operator==(const Variable& other) const; bool operator!=(const Variable& other) const; @@ -105,9 +106,11 @@ enum class FixedOperatorKind { LOGNOR, LOGNOT, SLL, + SRL, CAR, CDR, NEW, + OBJECT_NEW, INVALID }; diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 9862fa24c7..ed90d59b3d 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -155,6 +155,11 @@ ObjectFileDB::ObjectFileDB(const std::vector& _dgos, } lg::info("ObjectFileDB Initialized\n"); + if (obj_files_by_name.empty()) { + lg::die( + "No object files have been added. Check that there are input files and the allowed_objects " + "list."); + } } void ObjectFileDB::load_map_file(const std::string& map_data) { @@ -280,6 +285,12 @@ void ObjectFileDB::add_obj_from_dgo(const std::string& obj_name, const uint8_t* obj_data, uint32_t obj_size, const std::string& dgo_name) { + const auto& config = get_config(); + if (!config.allowed_objects.empty()) { + if (config.allowed_objects.find(obj_name) == config.allowed_objects.end()) { + return; + } + } stats.total_obj_files++; assert(obj_size > 128); uint16_t version = *(const uint16_t*)(obj_data + 8); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index cb40a864aa..70fa4adbed 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -11,6 +11,7 @@ #include "decompiler/analysis/reg_usage.h" #include "decompiler/analysis/variable_naming.h" #include "decompiler/analysis/cfg_builder.h" +#include "decompiler/analysis/final_output.h" #include "decompiler/analysis/expression_build.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/IR2/Form.h" @@ -387,11 +388,13 @@ void ObjectFileDB::ir2_build_expressions() { (void)segment_id; (void)data; total++; - if (func.ir2.top_form) { + if (func.ir2.top_form && func.ir2.env.has_type_analysis()) { attempted++; if (convert_to_expressions(func.ir2.top_form, *func.ir2.form_pool, func, dts)) { successful++; func.ir2.print_debug_forms = true; + // auto end = final_defun_out(func, func.ir2.env, dts); + // fmt::print("{}\n\n", end); } } }); @@ -433,14 +436,24 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { // functions for (auto& func : data.linked_data.functions_by_seg.at(seg)) { result += ir2_function_to_string(data, func, seg); - if (func.ir2.top_form) { + if (func.ir2.top_form && func.ir2.env.has_local_vars()) { result += '\n'; - result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); + if (func.ir2.env.has_local_vars()) { + if (!func.ir2.print_debug_forms) { + result += ";; expression building failed part way through, function may be weird\n"; + } + result += final_defun_out(func, func.ir2.env, dts); + } else { + result += ";; no variable information\n"; + result += pretty_print::to_string(func.ir2.top_form->to_form(func.ir2.env)); + } + result += '\n'; } if (func.ir2.print_debug_forms) { result += '\n'; + result += ";; DEBUG OUTPUT BELOW THIS LINE:\n"; result += func.ir2.debug_form_string; result += '\n'; } @@ -591,7 +604,7 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& auto& op = func.get_atomic_op_at_instr(instr_id); op_id = func.ir2.atomic_ops->instruction_to_atomic_op.at(instr_id); append_commented(line, printed_comment, - op.to_string(data.linked_data.labels, &func.ir2.env)); + op.to_form(data.linked_data.labels, func.ir2.env).print()); if (func.ir2.env.has_type_analysis()) { append_commented( diff --git a/decompiler/analysis/cfg_builder.cpp b/decompiler/analysis/cfg_builder.cpp index e9c99491d5..d19c10b57f 100644 --- a/decompiler/analysis/cfg_builder.cpp +++ b/decompiler/analysis/cfg_builder.cpp @@ -949,7 +949,8 @@ Form* try_sc_as_type_of(FormPool& pool, const Function& f, const ShortCircuit* v // remove the shift b0_ptr->pop_back(); - auto obj = pool.alloc_single_element_form(nullptr, shift->expr().get_arg(0)); + auto obj = pool.alloc_single_element_form( + nullptr, shift->expr().get_arg(0).as_expr(), set_shift->dst().idx()); auto type_op = pool.alloc_single_element_form(nullptr, obj, clobber); auto op = pool.alloc_element(else_case->dst(), type_op, true); b0_ptr->push_back(op); diff --git a/decompiler/analysis/expression_build.cpp b/decompiler/analysis/expression_build.cpp index 4ed4a02acf..400d8d3865 100644 --- a/decompiler/analysis/expression_build.cpp +++ b/decompiler/analysis/expression_build.cpp @@ -3,24 +3,11 @@ #include "decompiler/IR2/Form.h" #include "decompiler/IR2/FormStack.h" #include "decompiler/util/DecompilerTypeSystem.h" +#include "common/goos/PrettyPrinter.h" namespace decompiler { -void insert_extras_into_parent(Form* top_condition, Form* parent_form, FormElement* this_elt) { - auto real_condition = top_condition->back(); - top_condition->pop_back(); - - auto& parent_vector = parent_form->elts(); - // find us in the parent vector - auto me = std::find_if(parent_vector.begin(), parent_vector.end(), - [&](FormElement* x) { return x == this_elt; }); - assert(me != parent_vector.end()); - - // now insert the fake condition - parent_vector.insert(me, top_condition->elts().begin(), top_condition->elts().end()); - top_condition->elts() = {real_condition}; -} - +// TODO - remove all these and put them in the analysis methods instead. void clean_up_ifs(Form* top_level_form) { bool changed = true; while (changed) { @@ -49,30 +36,6 @@ void clean_up_ifs(Form* top_level_form) { } }); - top_level_form->apply([&](FormElement* elt) { - auto as_cwe = dynamic_cast(elt); - if (!as_cwe) { - return; - } - - auto top_condition = as_cwe->entries.front().condition; - if (!top_condition->is_single_element() && elt->parent_form) { - auto real_condition = top_condition->back(); - top_condition->pop_back(); - - auto& parent_vector = elt->parent_form->elts(); - // find us in the parent vector - auto me = std::find_if(parent_vector.begin(), parent_vector.end(), - [&](FormElement* x) { return x == elt; }); - assert(me != parent_vector.end()); - - // now insert the fake condition - parent_vector.insert(me, top_condition->elts().begin(), top_condition->elts().end()); - top_condition->elts() = {real_condition}; - changed = true; - } - }); - top_level_form->apply([&](FormElement* elt) { auto as_sc = dynamic_cast(elt); if (!as_sc) { @@ -95,22 +58,6 @@ void clean_up_ifs(Form* top_level_form) { top_condition->elts() = {real_condition}; changed = true; } - // if (!changed) { - // auto as_condition = - // dynamic_cast(top_condition->try_as_single_element()); if - // (as_condition) { - // if (as_condition->op().kind() == GenericOperator::Kind::CONDITION_OPERATOR) { - // if (as_condition->op().condition_kind() == IR2_Condition::Kind::TRUTHY) { - // auto to_repack = as_condition->elts().front(); - // if (!to_repack->try_as_single_element() && as_condition->parent_form) { - // changed = true; - // insert_extras_into_parent(to_repack, as_condition->parent_form, - // as_condition); - // } - // } - // } - // } - // } }); top_level_form->apply([&](FormElement* elt) { @@ -146,10 +93,12 @@ void clean_up_ifs(Form* top_level_form) { bool convert_to_expressions(Form* top_level_form, FormPool& pool, - const Function& f, + Function& f, const DecompilerTypeSystem& dts) { assert(top_level_form); + // fmt::print("Before anything:\n{}\n", + // pretty_print::to_string(top_level_form->to_form(f.ir2.env))); try { // top_level_form->apply_form([&](Form* form) { // if (form == top_level_form || !form->is_single_element()) { @@ -176,6 +125,7 @@ bool convert_to_expressions(Form* top_level_form, for (auto& entry : top_level_form->elts()) { // fmt::print("push {} to stack\n", entry->to_form(f.ir2.env).print()); entry->push_to_stack(f.ir2.env, pool, stack); + // fmt::print("Stack is now:\n{}\n", stack.print(f.ir2.env)); } std::vector new_entries; if (f.type.last_arg() != TypeSpec("none")) { @@ -200,11 +150,15 @@ bool convert_to_expressions(Form* top_level_form, top_level_form->push_back(x); } + // fmt::print("Before clean:\n{}\n", + // pretty_print::to_string(top_level_form->to_form(f.ir2.env))); // fix up stuff clean_up_ifs(top_level_form); } catch (std::exception& e) { - lg::warn("Expression building failed: {}", e.what()); + std::string warning = fmt::format("Expression building failed: {}", e.what()); + lg::warn(warning); + f.warnings.append(";; " + warning); return false; } diff --git a/decompiler/analysis/expression_build.h b/decompiler/analysis/expression_build.h index bb1e803fa0..6cf6220656 100644 --- a/decompiler/analysis/expression_build.h +++ b/decompiler/analysis/expression_build.h @@ -7,6 +7,6 @@ class FormPool; class DecompilerTypeSystem; bool convert_to_expressions(Form* top_level_form, FormPool& pool, - const Function& f, + Function& f, const DecompilerTypeSystem& dts); } // namespace decompiler \ No newline at end of file diff --git a/decompiler/analysis/final_output.cpp b/decompiler/analysis/final_output.cpp new file mode 100644 index 0000000000..f47bfc0dd9 --- /dev/null +++ b/decompiler/analysis/final_output.cpp @@ -0,0 +1,84 @@ +#include "final_output.h" +#include "decompiler/IR2/Form.h" +#include "common/goos/PrettyPrinter.h" +#include "decompiler/util/DecompilerTypeSystem.h" + +namespace decompiler { + +namespace { +void append(goos::Object& _in, const goos::Object& add) { + auto* in = &_in; + while (in->is_pair() && !in->as_pair()->cdr.is_empty_list()) { + in = &in->as_pair()->cdr; + } + + if (!in->is_pair()) { + assert(false); // invalid list + } + in->as_pair()->cdr = add; +} +} // namespace + +std::string final_defun_out(const Function& func, const Env& env, const DecompilerTypeSystem& dts) { + auto code_body = func.ir2.top_form->to_form(env); + + int var_count = 0; + auto var_dec = env.local_var_type_list(func.ir2.top_form, func.type.arg_count() - 1, &var_count); + + std::vector argument_elts; + assert(func.type.arg_count() >= 1); + for (size_t i = 0; i < func.type.arg_count() - 1; i++) { + argument_elts.push_back( + pretty_print::build_list(fmt::format("a{}-0", i), func.type.get_arg(i).print())); + } + auto arguments = pretty_print::build_list(argument_elts); + + if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { + std::vector top; + top.push_back(pretty_print::to_symbol("defun")); + top.push_back(pretty_print::to_symbol(func.guessed_name.to_string())); + top.push_back(arguments); + auto top_form = pretty_print::build_list(top); + + if (var_count > 0) { + append(top_form, pretty_print::build_list(var_dec)); + } + + append(top_form, pretty_print::build_list(code_body)); + return pretty_print::to_string(top_form); + } + + if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { + std::vector top; + top.push_back(pretty_print::to_symbol("defmethod")); + auto method_info = + dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id); + top.push_back(pretty_print::to_symbol(method_info.name)); + top.push_back(pretty_print::to_symbol(func.guessed_name.type_name)); + top.push_back(arguments); + auto top_form = pretty_print::build_list(top); + + if (var_count > 0) { + append(top_form, pretty_print::build_list(var_dec)); + } + + append(top_form, pretty_print::build_list(code_body)); + return pretty_print::to_string(top_form); + } + + if (func.guessed_name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) { + std::vector top; + top.push_back(pretty_print::to_symbol("top-level-function")); + top.push_back(arguments); + auto top_form = pretty_print::build_list(top); + + if (var_count > 0) { + append(top_form, pretty_print::build_list(var_dec)); + } + + append(top_form, pretty_print::build_list(code_body)); + return pretty_print::to_string(top_form); + } + return "nyi"; +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/analysis/final_output.h b/decompiler/analysis/final_output.h new file mode 100644 index 0000000000..8da3ffb634 --- /dev/null +++ b/decompiler/analysis/final_output.h @@ -0,0 +1,7 @@ +#pragma once +#include +#include "decompiler/Function/Function.h" + +namespace decompiler { +std::string final_defun_out(const Function& func, const Env& env, const DecompilerTypeSystem& dts); +} diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 185c5de3d8..a0d6b500cc 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -76,6 +76,11 @@ void set_config(const std::string& path_to_config_file) { gConfig.bad_inspect_types.insert(x); } + auto allowed = cfg.at("allowed_objects").get>(); + for (const auto& x : allowed) { + gConfig.allowed_objects.insert(x); + } + auto type_hints_json = read_json_file_from_config(cfg, "type_hints_file"); for (auto& kv : type_hints_json.items()) { auto& function_name = kv.key(); diff --git a/decompiler/config.h b/decompiler/config.h index 7fc2c9da46..97e464cc1d 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -39,6 +39,7 @@ struct Config { std::unordered_set asm_functions_by_name; std::unordered_set pair_functions_by_name; std::unordered_set no_type_analysis_functions_by_name; + std::unordered_set allowed_objects; std::unordered_map>> type_hints_by_function_by_idx; std::unordered_map> diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 87b40a698f..51215bfb4e 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -51,6 +51,7 @@ "STR/GRSOBBB.STR","STR/SA3INTRO.STR" ], "str_file_names_":[], + "allowed_objects":[], "type_hints_file":"decompiler/config/jak1_ntsc_black_label/type_hints.jsonc", "anonymous_function_types_file":"decompiler/config/jak1_ntsc_black_label/anonymous_function_types.jsonc", diff --git a/doc/changelog.md b/doc/changelog.md index 814c211a1a..70ee70e46a 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -106,4 +106,5 @@ - Improved getting the value of `#f`, `#t`, and `()`. - Accessing a constant field of an array now constant propagates the memory offset like field access and avoids a runtime multiply. - Fixed a bug where loading or storing a `vf` register from a memory location + constant offset would cause the compiler to throw an error. -- Accessing array elements uses more efficient indexing for power-of-two element sizes. \ No newline at end of file +- Accessing array elements uses more efficient indexing for power-of-two element sizes. +- Added a `local-vars` form for declaring a bunch of local variables for the decompiler. \ No newline at end of file diff --git a/doc/goal_doc.md b/doc/goal_doc.md index 19e0529ee9..dcb0048583 100644 --- a/doc/goal_doc.md +++ b/doc/goal_doc.md @@ -895,6 +895,16 @@ Example: This form will probably get more options in the future. +## `local-vars` +Declare variables local to a function, without an initial value. This will be used by the decompiler before `let` has been fully implemented. +```lisp +(local-vars (name type-spec)...) +``` + +The name can be any valid symbol. The scope of the variable is _always_ the function scope. Other scopes inside a function will always hide variables declared with `local-vars`. The type can be any GOAL typespec. If you use `float`, you get a floating point register, otherwise you get a normal GPR. + +It's recommended to avoid using this form. + # Compiler Forms - Macro Forms ## `#cond` diff --git a/goal_src/goal-lib.gc b/goal_src/goal-lib.gc index b3e5b1cc1a..a8fc41914b 100644 --- a/goal_src/goal-lib.gc +++ b/goal_src/goal-lib.gc @@ -496,3 +496,16 @@ (defmacro finish-test () `(format #t "Test ~A: ~D Passes~%" *test-name* *test-count*) ) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Decompiler Macros +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defmacro <.si (a b) + `(< (the-as int ,a) (the-as int ,b)) + ) + +(defmacro <0.si (a) + `(< (the-as int ,a) (the-as int 0)) + ) \ No newline at end of file diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index 9493367d32..19ecccb8d2 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -373,6 +373,7 @@ class Compiler { Val* compile_lambda(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_inline(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_declare(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_local_vars(const goos::Object& form, const goos::Object& rest, Env* env); // Type Val* compile_deftype(const goos::Object& form, const goos::Object& rest, Env* env); diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index 88520ea0c7..ca83bce137 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -94,6 +94,7 @@ static const std::unordered_map< {"lambda", &Compiler::compile_lambda}, {"declare", &Compiler::compile_declare}, {"inline", &Compiler::compile_inline}, + {"local-vars", &Compiler::compile_local_vars}, // {"with-inline", &Compiler::compile_with_inline}, // {"get-ra-ptr", &Compiler::compile_get_ra_ptr}, diff --git a/goalc/compiler/compilation/Function.cpp b/goalc/compiler/compilation/Function.cpp index 2f421b9b61..68537fc8ae 100644 --- a/goalc/compiler/compilation/Function.cpp +++ b/goalc/compiler/compilation/Function.cpp @@ -64,6 +64,44 @@ Val* Compiler::compile_inline(const goos::Object& form, const goos::Object& rest return fe->alloc_val(kv->second->type(), kv->second); } +Val* Compiler::compile_local_vars(const goos::Object& form, const goos::Object& rest, Env* env) { + auto fe = get_parent_env_of_type(env); + + for_each_in_list(rest, [&](const goos::Object& o) { + if (o.is_symbol()) { + // if it has no type, assume object. + auto name = symbol_string(o); + if (fe->params.find(name) != fe->params.end()) { + throw_compiler_error(form, "Cannot declare a local named {}, this already exists.", name); + } + auto ireg = fe->make_ireg(m_ts.make_typespec("object"), RegClass::GPR_64); + ireg->mark_as_settable(); + fe->params[name] = ireg; + } else { + auto param_args = get_va(o, o); + va_check(o, param_args, {goos::ObjectType::SYMBOL, {}}, {}); + auto name = symbol_string(param_args.unnamed.at(0)); + auto type = parse_typespec(param_args.unnamed.at(1)); + + if (fe->params.find(name) != fe->params.end()) { + throw_compiler_error(form, "Cannot declare a local named {}, this already exists.", name); + } + + if (type == TypeSpec("float")) { + auto ireg = fe->make_ireg(type, RegClass::FLOAT); + ireg->mark_as_settable(); + fe->params[name] = ireg; + } else { + auto ireg = fe->make_ireg(type, RegClass::GPR_64); + ireg->mark_as_settable(); + fe->params[name] = ireg; + } + } + }); + + return get_none(); +} + /*! * Compile a lambda. This is used for real lambdas, lets, and defuns. So there are a million * confusing special cases... diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5a40bbe02c..63a5458cab 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(goalc-test decompiler/test_AtomicOpBuilder.cpp decompiler/test_FormBeforeExpressions.cpp decompiler/test_FormExpressionBuild.cpp + decompiler/test_FormExpressionBuildLong.cpp decompiler/test_InstructionParser.cpp ${GOALC_TEST_FRAMEWORK_SOURCES} ${GOALC_TEST_CASES}) diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index 1ffa33f89e..a87af3273c 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -6,6 +6,7 @@ #include "decompiler/analysis/expression_build.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/IR2/Form.h" +#include "third-party/json.hpp" using namespace decompiler; @@ -70,12 +71,18 @@ std::unique_ptr FormRegressionTest::make_function( bool do_expressions, bool allow_pairs, const std::string& method_name, - const std::vector>& strings) { + const std::vector>& strings, + const std::unordered_map>& hints) { dts->type_prop_settings.locked = true; dts->type_prop_settings.reset(); dts->type_prop_settings.allow_pair = allow_pairs; dts->type_prop_settings.current_method_type = method_name; - auto program = parser->parse_program(code); + + std::vector string_label_names; + for (auto& x : strings) { + string_label_names.push_back(x.first); + } + auto program = parser->parse_program(code, string_label_names); // printf("prg:\n%s\n\n", program.print().c_str()); auto test = std::make_unique(program.instructions.size()); test->file.words_by_seg.resize(3); @@ -100,7 +107,7 @@ std::unique_ptr FormRegressionTest::make_function( test->func.ir2.atomic_ops_succeeded = true; test->func.ir2.env.set_end_var(test->func.ir2.atomic_ops->end_op().return_var()); - EXPECT_TRUE(test->func.run_type_analysis_ir2(function_type, *dts, test->file, {})); + EXPECT_TRUE(test->func.run_type_analysis_ir2(function_type, *dts, test->file, hints)); test->func.ir2.env.set_reg_use(analyze_ir2_register_usage(test->func)); @@ -152,9 +159,10 @@ void FormRegressionTest::test(const std::string& code, bool do_expressions, bool allow_pairs, const std::string& method_name, - const std::vector>& strings) { + const std::vector>& strings, + const std::unordered_map>& hints) { auto ts = dts->parse_type_spec(type); - auto test = make_function(code, ts, do_expressions, allow_pairs, method_name, strings); + auto test = make_function(code, ts, do_expressions, allow_pairs, method_name, strings, hints); ASSERT_TRUE(test); auto expected_form = pretty_print::get_pretty_printer_reader().read_from_string(expected, false).as_pair()->car; @@ -172,5 +180,22 @@ void FormRegressionTest::test(const std::string& code, EXPECT_TRUE(expected_form == actual_form); } +std::unordered_map> FormRegressionTest::parse_hint_json( + const std::string& in) { + std::unordered_map> out; + auto hints = nlohmann::json::parse(in); + for (auto& hint : hints) { + auto idx = hint.at(0).get(); + for (size_t i = 1; i < hint.size(); i++) { + auto& assignment = hint.at(i); + TypeHint type_hint; + type_hint.reg = Register(assignment.at(0).get()); + type_hint.type_name = assignment.at(1).get(); + out[idx].push_back(type_hint); + } + } + return out; +} + std::unique_ptr FormRegressionTest::parser; std::unique_ptr FormRegressionTest::dts; \ No newline at end of file diff --git a/test/decompiler/FormRegressionTest.h b/test/decompiler/FormRegressionTest.h index 4e229886f0..f88a76a9ce 100644 --- a/test/decompiler/FormRegressionTest.h +++ b/test/decompiler/FormRegressionTest.h @@ -7,6 +7,10 @@ #include "decompiler/Function/Function.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" +namespace decompiler { +struct TypeHint; +} + class FormRegressionTest : public ::testing::Test { protected: static std::unique_ptr parser; @@ -29,7 +33,8 @@ class FormRegressionTest : public ::testing::Test { bool do_expressions, bool allow_pairs = false, const std::string& method_name = "", - const std::vector>& strings = {}); + const std::vector>& strings = {}, + const std::unordered_map>& hints = {}); void test(const std::string& code, const std::string& type, @@ -37,23 +42,29 @@ class FormRegressionTest : public ::testing::Test { bool do_expressions, bool allow_pairs = false, const std::string& method_name = "", - const std::vector>& strings = {}); + const std::vector>& strings = {}, + const std::unordered_map>& hints = {}); void test_no_expr(const std::string& code, const std::string& type, const std::string& expected, bool allow_pairs = false, const std::string& method_name = "", - const std::vector>& strings = {}) { - test(code, type, expected, false, allow_pairs, method_name, strings); + const std::vector>& strings = {}, + const std::unordered_map>& hints = {}) { + test(code, type, expected, false, allow_pairs, method_name, strings, hints); } - void test_with_expr(const std::string& code, - const std::string& type, - const std::string& expected, - bool allow_pairs = false, - const std::string& method_name = "", - const std::vector>& strings = {}) { - test(code, type, expected, true, allow_pairs, method_name, strings); + void test_with_expr( + const std::string& code, + const std::string& type, + const std::string& expected, + bool allow_pairs = false, + const std::string& method_name = "", + const std::vector>& strings = {}, + const std::unordered_map>& hints = {}) { + test(code, type, expected, true, allow_pairs, method_name, strings, hints); } + + std::unordered_map> parse_hint_json(const std::string& in); }; \ No newline at end of file diff --git a/test/decompiler/test_AtomicOpBuilder.cpp b/test/decompiler/test_AtomicOpBuilder.cpp index 2ca6b8c6e3..f35d25e472 100644 --- a/test/decompiler/test_AtomicOpBuilder.cpp +++ b/test/decompiler/test_AtomicOpBuilder.cpp @@ -56,7 +56,7 @@ void test_case(std::string assembly_lines, // check the we get the right result: for (size_t i = 0; i < container.ops.size(); i++) { const auto& op = container.ops.at(i); - EXPECT_EQ(op->to_string(prg.labels, &env), output_lines.at(i)); + EXPECT_EQ(op->to_form(prg.labels, env).print(), output_lines.at(i)); // check that the registers read/written are identified for the operation diff --git a/test/decompiler/test_FormBeforeExpressions.cpp b/test/decompiler/test_FormBeforeExpressions.cpp index 6e5c85c8ba..70d8a5ed20 100644 --- a/test/decompiler/test_FormBeforeExpressions.cpp +++ b/test/decompiler/test_FormBeforeExpressions.cpp @@ -804,11 +804,11 @@ TEST_F(FormRegressionTest, NewMethod) { " (set! a1-2 (*.ui gp-0 a1-1))\n" " (set! a2-2 (+ a2-1 a1-2))\n" " (set! a1-3 v1-1)\n" // size! - " (set! v0-0 (call!))\n" + " (set! v0-0 (call! a0-0 a1-3 a2-2))\n" " (nonzero? v0-0)\n" // only if we got memory... " )\n" - " (s.w! v0-0 gp-0)\n" // store size - " (s.w! (+ v0-0 4) gp-0)\n" + " (set! (-> v0-0 length) gp-0)\n" // store size + " (set! (-> v0-0 allocated-length) gp-0)\n" " )" " (ret-value v0-0))\n"; test_no_expr(func, type, expected, false, "inline-array-class"); diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index 6fff82ec51..7b6da961ea 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -378,7 +378,6 @@ TEST_F(FormRegressionTest, ExprTrue) { TEST_F(FormRegressionTest, ExprPrintBfloat) { std::string func = " sll r0, r0, 0\n" - "L343:\n" " daddiu sp, sp, -32\n" " sd ra, 0(sp)\n" " sd fp, 8(sp)\n" @@ -1551,7 +1550,9 @@ TEST_F(FormRegressionTest, ExprSort) { " (not\n" " (or (= (cdr s3-0) (quote ())) (>=0.si (sll (the-as uint (cdr s3-0)) 62)))\n" " )\n" - " (set! v1-1 (s5-0 (car s3-0) (car (cdr s3-0))))\n" + " (set! s2-0 (car s3-0))\n" + " (set! s1-0 (car (cdr s3-0)))\n" + " (set! v1-1 (s5-0 s2-0 s1-0))\n" " (when\n" " (and (or (not v1-1) (>0.si v1-1)) (!= v1-2 (quote #t)))\n" " (set! s4-0 (+ s4-0 1))\n" @@ -1568,4 +1569,614 @@ TEST_F(FormRegressionTest, ExprSort) { " gp-0\n" " )"; test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprInlineArrayMethod0) { + std::string func = + " sll r0, r0, 0\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sq gp, 16(sp)\n" + + " or gp, a2, r0\n" + " lw v1, object(s7)\n" + " lwu t9, 16(v1)\n" + " or v1, a1, r0\n" + " lhu a2, 8(a1)\n" + " lhu a1, 12(a1)\n" + " multu3 a1, gp, a1\n" + " daddu a2, a2, a1\n" + " or a1, v1, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " beq v0, r0, L199\n" + " or v1, s7, r0\n" + + " sw gp, 0(v0)\n" + " sw gp, 4(v0)\n" + + "L199:\n" + " ld ra, 0(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function symbol type int inline-array-class)"; + + std::string expected = + "(begin\n" + " (set! gp-0 a2-0)\n" + " (set!\n" + " v0-0\n" + " (object-new\n" + " a0-0\n" + " a1-0\n" + " (+ (-> a1-0 size) (* (the-as uint gp-0) (-> a1-0 heap-base)))\n" + " )\n" + " )\n" + " (when\n" + " (nonzero? v0-0)\n" + " (set! (-> v0-0 length) gp-0)\n" + " (set! (-> v0-0 allocated-length) gp-0)\n" + " )\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected, true, "inline-array-class"); +} + +TEST_F(FormRegressionTest, ExprInlineArrayMethod4) { + std::string func = + " sll r0, r0, 0\n" + " lw v0, 0(a0)\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function inline-array-class int)"; + + std::string expected = "(-> a0-0 length)"; + test_with_expr(func, type, expected, true, "inline-array-class"); +} + +TEST_F(FormRegressionTest, ExprInlineArrayMethod5) { + std::string func = + " sll r0, r0, 0\n" + " lwu v1, -4(a0)\n" + " lhu v1, 8(v1)\n" + " lw a1, 4(a0)\n" + " lwu a0, -4(a0)\n" + " lhu a0, 12(a0)\n" + " mult3 a0, a1, a0\n" + " daddu v0, v1, a0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function inline-array-class int)"; + + std::string expected = + "(the-as int\n" + " (+ (-> a0-0 type size)\n" + " (the-as uint\n" + " (* (-> a0-0 allocated-length)" + " (the-as int (-> a0-0 type heap-base)))\n" + " )\n" + " )\n" + " )"; + test_with_expr(func, type, expected, true, "inline-array-class"); +} + +TEST_F(FormRegressionTest, ExprArrayMethod0) { + std::string func = + " sll r0, r0, 0\n" + " daddiu sp, sp, -112\n" + " sd ra, 0(sp)\n" + " sq s1, 16(sp)\n" + " sq s2, 32(sp)\n" + " sq s3, 48(sp)\n" + " sq s4, 64(sp)\n" + " sq s5, 80(sp)\n" + " sq gp, 96(sp)\n" + + " or gp, a2, r0\n" + " or s5, a3, r0\n" + " lw v1, object(s7)\n" + " lwu s4, 16(v1)\n" + " or s3, a0, r0\n" + " or s2, a1, r0\n" + " lhu s1, 8(a1)\n" + " lw t9, type-type?(s7)\n" + " or a0, gp, r0\n" + " lw a1, number(s7)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " beq s7, v0, L194\n" + " sll r0, r0, 0\n" + + " lhu v1, 8(gp)\n" + " beq r0, r0, L195\n" + " sll r0, r0, 0\n" + + "L194:\n" + " addiu v1, r0, 4\n" + + "L195:\n" + " mult3 v1, s5, v1\n" + " daddu a2, s1, v1\n" + " or t9, s4, r0\n" + " or a0, s3, r0\n" + " or a1, s2, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " sw s5, 4(v0)\n" + " sw s5, 0(v0)\n" + " sw gp, 8(v0)\n" + " ld ra, 0(sp)\n" + " lq gp, 96(sp)\n" + " lq s5, 80(sp)\n" + " lq s4, 64(sp)\n" + " lq s3, 48(sp)\n" + " lq s2, 32(sp)\n" + " lq s1, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 112"; + std::string type = "(function symbol type type int array)"; + + std::string expected = + "(begin\n" + " (set! gp-0 a2-0)\n" + " (set! s5-0 a3-0)\n" + " (set!\n" + " v0-1\n" + " (object-new\n" + " a0-0\n" + " a1-0\n" + " (+\n" + " (-> a1-0 size)\n" + " (the-as uint (* s5-0 (if (type-type? gp-0 number) (-> gp-0 size) 4)))\n" + " )\n" + " )\n" + " )\n" + " (set! (-> v0-1 allocated-length) s5-0)\n" + " (set! (-> v0-1 length) s5-0)\n" + " (set! (-> v0-1 content-type) gp-0)\n" + " v0-1\n" + " )"; + test_with_expr(func, type, expected, true, "array"); +} + +TEST_F(FormRegressionTest, ExprArrayMethod4) { + std::string func = + " sll r0, r0, 0\n" + "L90:\n" + " lw v0, 0(a0)\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function array int)"; + + std::string expected = "(-> a0-0 length)"; + test_with_expr(func, type, expected, true, "array"); +} + +TEST_F(FormRegressionTest, ExprArrayMethod5) { + std::string func = + " sll r0, r0, 0\n" + "L87:\n" + " daddiu sp, sp, -64\n" + " sd ra, 0(sp)\n" + " sq s4, 16(sp)\n" + " sq s5, 32(sp)\n" + " sq gp, 48(sp)\n" + " or s4, a0, r0\n" + " lw v1, array(s7)\n" + " lhu gp, 8(v1)\n" + " lw s5, 4(s4)\n" + " lw t9, type-type?(s7)\n" + " lwu a0, 8(s4)\n" + " lw a1, number(s7)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " beq s7, v0, L88\n" + " sll r0, r0, 0\n" + "\n" + " lwu v1, 8(s4)\n" + " lhu v1, 8(v1)\n" + " beq r0, r0, L89\n" + " sll r0, r0, 0\n" + "\n" + "L88:\n" + " addiu v1, r0, 4\n" + "L89:\n" + " mult3 v1, s5, v1\n" + " daddu v0, gp, v1\n" + " ld ra, 0(sp)\n" + " lq gp, 48(sp)\n" + " lq s5, 32(sp)\n" + " lq s4, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 64"; + std::string type = "(function array int)"; + + std::string expected = + "(begin\n" + " (set! s4-0 a0-0)\n" + " (the-as\n" + " int\n" + " (+\n" + " (-> array size)\n" + " (the-as\n" + " uint\n" + " (*\n" + " (-> s4-0 allocated-length)\n" + " (if\n" + " (type-type? (-> s4-0 content-type) number)\n" + " (-> s4-0 content-type size)\n" + " 4\n" + " )\n" + " )\n" + " )\n" + " )\n" + " )\n" + " )"; + test_with_expr(func, type, expected, true, "array"); +} + +TEST_F(FormRegressionTest, ExprMemCopy) { + std::string func = + " sll r0, r0, 0\n" + "L84:\n" + " or v0, a0, r0\n" + " addiu v1, r0, 0\n" + " beq r0, r0, L86\n" + " sll r0, r0, 0\n" + + "L85:\n" + " lbu a3, 0(a1)\n" + " sb a3, 0(a0)\n" + " daddiu a0, a0, 1\n" + " daddiu a1, a1, 1\n" + " daddiu v1, v1, 1\n" + "L86:\n" + " slt a3, v1, a2\n" + " bne a3, r0, L85\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function pointer pointer int pointer)"; + + std::string expected = + "(begin\n" + " (set! v0-0 a0-0)\n" + " (set! v1-0 0)\n" + " (while\n" + " (<.si v1-0 a2-0)\n" + " (set! (-> (the-as (pointer int8) a0-0)) (-> (the-as (pointer uint8) a1-0)))\n" + " (set! a0-0 (+ a0-0 (the-as uint 1)))\n" + " (set! a1-0 (+ a1-0 (the-as uint 1)))\n" + " (set! v1-0 (+ v1-0 1))\n" + " )\n" + " (set! v1-1 (quote #f))\n" + " (set! v1-2 (quote #f))\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMemSet32) { + std::string func = + " sll r0, r0, 0\n" + "L75:\n" + " or v0, a0, r0\n" + " addiu v1, r0, 0\n" + " beq r0, r0, L77\n" + " sll r0, r0, 0\n" + "\n" + "L76:\n" + " sw a2, 0(a0)\n" + " daddiu a0, a0, 4\n" + " sll r0, r0, 0\n" + " daddiu v1, v1, 1\n" + "L77:\n" + " slt a3, v1, a1\n" + " bne a3, r0, L76\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " jr ra\n" + " daddu sp, sp, r0\n"; + std::string type = "(function pointer int int pointer)"; + + std::string expected = + "(begin\n" + " (set! v0-0 a0-0)\n" + " (set! v1-0 0)\n" + " (while\n" + " (<.si v1-0 a1-0)\n" + " (set! (-> (the-as (pointer int32) a0-0)) a2-0)\n" + " (set! a0-0 (+ a0-0 (the-as uint 4)))\n" + " (nop!)\n" + " (set! v1-0 (+ v1-0 1))\n" + " )\n" + " (set! v1-1 (quote #f))\n" + " (set! v1-2 (quote #f))\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprMemOr) { + std::string func = + " sll r0, r0, 0\n" + "L72:\n" + " or v0, a0, r0\n" + " addiu v1, r0, 0\n" + " beq r0, r0, L74\n" + " sll r0, r0, 0\n" + "\n" + "L73:\n" + " lbu a3, 0(a0)\n" + " lbu t0, 0(a1)\n" + " or a3, a3, t0\n" + " sb a3, 0(a0)\n" + " daddiu a0, a0, 1\n" + " daddiu a1, a1, 1\n" + " daddiu v1, v1, 1\n" + "L74:\n" + " slt a3, v1, a2\n" + " bne a3, r0, L73\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function pointer pointer int pointer)"; + + std::string expected = + "(begin\n" + " (set! v0-0 a0-0)\n" + " (set! v1-0 0)\n" + " (while\n" + " (<.si v1-0 a2-0)\n" + " (set!\n" + " (-> (the-as (pointer int8) a0-0))\n" + " (logior\n" + " (-> (the-as (pointer uint8) a0-0))\n" + " (-> (the-as (pointer uint8) a1-0))\n" + " )\n" + " )\n" + " (set! a0-0 (+ a0-0 (the-as uint 1)))\n" + " (set! a1-0 (+ a1-0 (the-as uint 1)))\n" + " (set! v1-0 (+ v1-0 1))\n" + " )\n" + " (set! v1-1 (quote #f))\n" + " (set! v1-2 (quote #f))\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprFact) { + std::string func = + " sll r0, r0, 0\n" + "L65:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sq gp, 16(sp)\n" + " or gp, a0, r0\n" + " addiu v1, r0, 1\n" + " bne gp, v1, L66\n" + " sll r0, r0, 0\n" + "\n" + " addiu v0, r0, 1\n" + " beq r0, r0, L67\n" + " sll r0, r0, 0\n" + "\n" + "L66:\n" + " lw t9, fact(s7)\n" + " daddiu a0, gp, -1\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " mult3 v0, gp, v0\n" + "L67:\n" + " ld ra, 0(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function int int)"; + + std::string expected = "(begin (set! gp-0 a0-0) (if (= gp-0 1) 1 (* gp-0 (fact (+ gp-0 -1)))))"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprPrint) { + std::string func = + " sll r0, r0, 0\n" + "L63:\n" + " daddiu sp, sp, -16\n" + " sd ra, 0(sp)\n" + " dsll32 v1, a0, 29\n" + " beql v1, r0, L64\n" + " lw v1, binteger(s7)\n" + "\n" + " bgtzl v1, L64\n" + " lw v1, pair(s7)\n" + "\n" + " lwu v1, -4(a0)\n" + "L64:\n" + " lwu t9, 24(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " ld ra, 0(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function object object)"; + + std::string expected = "((method-of-type (type-of a0-0) print) a0-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprPrintl) { + std::string func = + " sll r0, r0, 0\n" + "L61:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq gp, 16(sp)\n" + " or gp, a0, r0\n" + " or a0, gp, r0\n" + " dsll32 v1, a0, 29\n" + " beql v1, r0, L62\n" + " lw v1, binteger(s7)\n" + "\n" + " bgtzl v1, L62\n" + " lw v1, pair(s7)\n" + "\n" + " lwu v1, -4(a0)\n" + "L62:\n" + " lwu t9, 24(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L324\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v0, gp, r0\n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function object object)"; + + // todo - I think this is a sign that we're unscrambling method calls in the wrong order. + // but I want to wait for a less confusing example before making a change. + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (set! a0-1 gp-0)\n" + " (set! v1-2 ((method-of-type (type-of a0-1) print) a0-1))\n" + " (format (quote #t) L324)\n" + " gp-0\n" + " )"; + test_with_expr(func, type, expected, false, "", {{"L324", "~%"}}); +} + +TEST_F(FormRegressionTest, ExprInspect) { + std::string func = + " sll r0, r0, 0\n" + "L59:\n" + " daddiu sp, sp, -16\n" + " sd ra, 0(sp)\n" + " dsll32 v1, a0, 29\n" + " beql v1, r0, L60\n" + " lw v1, binteger(s7)\n" + "\n" + " bgtzl v1, L60\n" + " lw v1, pair(s7)\n" + "\n" + " lwu v1, -4(a0)\n" + "L60:\n" + " lwu t9, 28(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " ld ra, 0(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function object object)"; + + std::string expected = "((method-of-type (type-of a0-0) inspect) a0-0)"; + test_with_expr(func, type, expected); +} + +TEST_F(FormRegressionTest, ExprPrintTreeBitmask) { + std::string func = + " sll r0, r0, 0\n" + "L54:\n" + " daddiu sp, sp, -64\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq s4, 16(sp)\n" + " sq s5, 32(sp)\n" + " sq gp, 48(sp)\n" + " or gp, a0, r0\n" + " or s5, a1, r0\n" + " addiu s4, r0, 0\n" + " beq r0, r0, L58\n" + " sll r0, r0, 0\n" + "\n" + "L55:\n" + " andi v1, gp, 1\n" + " bne v1, r0, L56\n" + " sll r0, r0, 0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L323\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + " beq r0, r0, L57\n" + " sll r0, r0, 0\n" + "\n" + "L56:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L322\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L57:\n" + " dsrl gp, gp, 1\n" + " daddiu s4, s4, 1\n" + "L58:\n" + " slt v1, s4, s5\n" + " bne v1, r0, L55\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v0, s7, r0\n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 48(sp)\n" + " lq s5, 32(sp)\n" + " lq s4, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 64"; + std::string type = "(function int int symbol)"; + + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (set! s5-0 a1-0)\n" + " (set! s4-0 0)\n" + " (while\n" + " (<.si s4-0 s5-0)\n" + " (if\n" + " (zero? (logand gp-0 1))\n" + " (format (quote #t) L323)\n" + " (format (quote #t) L322)\n" + " )\n" + " (set! gp-0 (srl (the-as uint gp-0) 1))\n" + " (set! s4-0 (+ s4-0 1))\n" + " )\n" + " (set! v1-3 (quote #f))\n" + " (quote #f)\n" + " )"; + test_with_expr(func, type, expected, false, "", {{"L323", " "}, {"L322", "| "}}); } \ No newline at end of file diff --git a/test/decompiler/test_FormExpressionBuildLong.cpp b/test/decompiler/test_FormExpressionBuildLong.cpp new file mode 100644 index 0000000000..07ebf6cdcc --- /dev/null +++ b/test/decompiler/test_FormExpressionBuildLong.cpp @@ -0,0 +1,2053 @@ +#include "gtest/gtest.h" +#include "FormRegressionTest.h" + +using namespace decompiler; + +TEST_F(FormRegressionTest, ExprArrayMethod2) { + std::string func = + " sll r0, r0, 0\n" + "L130:\n" + " daddiu sp, sp, -48\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq s5, 16(sp)\n" + " sq gp, 32(sp)\n" + + " or gp, a0, r0\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L342\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " lw t9, type-type?(s7)\n" + " lwu a0, 8(gp)\n" + " lw a1, integer(s7)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " beq s7, v0, L182\n" + " sll r0, r0, 0\n" + + " lwu v1, 8(gp)\n" + " lwu v1, 0(v1)\n" + " daddiu a0, s7, int32\n" + " bne v1, a0, L135\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L134\n" + " sll r0, r0, 0\n" + + "L131:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L132\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L133\n" + " sll r0, r0, 0\n" + + "L132:\n" + " daddiu a1, fp, L340\n" + "L133:\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L134:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L131\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + "L135:\n" + " daddiu a0, s7, uint32\n" + " bne v1, a0, L140\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L139\n" + " sll r0, r0, 0\n" + + "L136:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L137\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L138\n" + " sll r0, r0, 0\n" + + "L137:\n" + " daddiu a1, fp, L340\n" + "L138:\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lwu a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L139:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L136\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + + "L140:\n" + " daddiu a0, s7, int64\n" + " bne v1, a0, L145\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L144\n" + " sll r0, r0, 0\n" + + "L141:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L142\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L143\n" + " sll r0, r0, 0\n" + + "L142:\n" + " daddiu a1, fp, L340\n" + "L143:\n" + " dsll v1, s5, 3\n" + " daddu v1, v1, gp\n" + " ld a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + " daddiu s5, s5, 1\n" + "L144:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L141\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + "L145:\n" + " daddiu a0, s7, uint64\n" + " bne v1, a0, L150\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L149\n" + " sll r0, r0, 0\n" + + "L146:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L147\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L339\n" + " beq r0, r0, L148\n" + " sll r0, r0, 0\n" + + "L147:\n" + " daddiu a1, fp, L338\n" + "L148:\n" + " dsll v1, s5, 3\n" + " daddu v1, v1, gp\n" + " ld a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L149:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L146\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + + "L150:\n" + " daddiu a0, s7, int8\n" + " bne v1, a0, L155\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L154\n" + " sll r0, r0, 0\n" + + "L151:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L152\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L153\n" + " sll r0, r0, 0\n" + + "L152:\n" + " daddiu a1, fp, L340\n" + "L153:\n" + " daddu v1, s5, gp\n" + " lb a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L154:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L151\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + + "L155:\n" + " daddiu a0, s7, uint8\n" + " bne v1, a0, L160\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L159\n" + " sll r0, r0, 0\n" + + "L156:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L157\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L158\n" + " sll r0, r0, 0\n" + + "L157:\n" + " daddiu a1, fp, L340\n" + "L158:\n" + " daddu v1, s5, gp\n" + " lbu a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L159:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L156\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + "L160:\n" + " daddiu a0, s7, int16\n" + " bne v1, a0, L165\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L164\n" + " sll r0, r0, 0\n" + + "L161:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L162\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L163\n" + " sll r0, r0, 0\n" + + "L162:\n" + " daddiu a1, fp, L340\n" + + "L163:\n" + " dsll v1, s5, 1\n" + " daddu v1, v1, gp\n" + " lh a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + " daddiu s5, s5, 1\n" + + "L164:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L161\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + + "L165:\n" + " daddiu a0, s7, uint16\n" + " bne v1, a0, L170\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L169\n" + " sll r0, r0, 0\n" + + "L166:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L167\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L168\n" + " sll r0, r0, 0\n" + + "L167:\n" + " daddiu a1, fp, L340\n" + "L168:\n" + " dsll v1, s5, 1\n" + " daddu v1, v1, gp\n" + " lhu a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " daddiu s5, s5, 1\n" + "L169:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L166\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + "L170:\n" + " daddiu a0, s7, uint128\n" + " dsubu a0, v1, a0\n" + " daddiu a1, s7, 8\n" + " movn a1, s7, a0\n" + " bnel s7, a1, L171\n" + " or v1, a1, r0\n" + + " daddiu a0, s7, int128\n" + " dsubu a0, v1, a0\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a0\n" + + "L171:\n" + " beq s7, v1, L176\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L175\n" + " sll r0, r0, 0\n" + + "L172:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L173\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L339\n" + " beq r0, r0, L174\n" + " sll r0, r0, 0\n" + + "L173:\n" + " daddiu a1, fp, L338\n" + + "L174:\n" + " dsll v1, s5, 4\n" + " daddu v1, v1, gp\n" + " lq a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + " daddiu s5, s5, 1\n" + "L175:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L172\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L181\n" + " sll r0, r0, 0\n" + + "L176:\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L180\n" + " sll r0, r0, 0\n" + + "L177:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " bne s5, r0, L178\n" + " sll r0, r0, 0\n" + + " daddiu a1, fp, L341\n" + " beq r0, r0, L179\n" + " sll r0, r0, 0\n" + + "L178:\n" + " daddiu a1, fp, L340\n" + "L179:\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + " daddiu s5, s5, 1\n" + + "L180:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L177\n" + " sll r0, r0, 0\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + "L181:\n" + " beq r0, r0, L192\n" + " sll r0, r0, 0\n" + "L182:\n" + " lw v1, float(s7)\n" + " lwu a0, 8(gp)\n" + " bne a0, v1, L187\n" + " sll r0, r0, 0\n" + + " addiu s5, r0, 0\n" + " beq r0, r0, L186\n" + " sll r0, r0, 0\n" + "L183:\n" + " bne s5, r0, L184\n" + " sll r0, r0, 0\n" + + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L343\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + " beq r0, r0, L185\n" + " sll r0, r0, 0\n" + "L184:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L337\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + "L185:\n" + " daddiu s5, s5, 1\n" + "L186:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L183\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L192\n" + " sll r0, r0, 0\n" + "L187:\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L191\n" + " sll r0, r0, 0\n" + "L188:\n" + " bne s5, r0, L189\n" + " sll r0, r0, 0\n" + + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L336\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + " beq r0, r0, L190\n" + " sll r0, r0, 0\n" + "L189:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L335\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a2, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + " or v1, v0, r0\n" + "L190:\n" + " daddiu s5, s5, 1\n" + "L191:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L188\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + "L192:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L334\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v0, gp, r0\n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 32(sp)\n" + " lq s5, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 48"; + std::string type = "(function array array)"; + + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (format (quote #t) L342)\n" + " (cond\n" + " ((type-type? (-> gp-0 content-type) integer)\n" + " (set! v1-1 (-> gp-0 content-type symbol))\n" + " (cond\n" + " ((= v1-1 (quote int32))\n" + " (set! s5-0 0)\n" + " (while\n" + " (<.si s5-0 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-0) L341 L340)\n" + " (-> (the-as (array int32) gp-0) s5-0)\n" + " )\n" + " (set! s5-0 (+ s5-0 1))\n" + " )\n" + " (set! v1-5 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint32))\n" + " (set! s5-1 0)\n" + " (while\n" + " (<.si s5-1 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-1) L341 L340)\n" + " (-> (the-as (array uint32) gp-0) s5-1)\n" + " )\n" + " (set! s5-1 (+ s5-1 1))\n" + " )\n" + " (set! v1-10 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int64))\n" + " (set! s5-2 0)\n" + " (while\n" + " (<.si s5-2 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-2) L341 L340)\n" + " (-> (the-as (array int64) gp-0) s5-2)\n" + " )\n" + " (set! s5-2 (+ s5-2 1))\n" + " )\n" + " (set! v1-15 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint64))\n" + " (set! s5-3 0)\n" + " (while\n" + " (<.si s5-3 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-3) L339 L338)\n" + " (-> (the-as (array uint64) gp-0) s5-3)\n" + " )\n" + " (set! s5-3 (+ s5-3 1))\n" + " )\n" + " (set! v1-20 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int8))\n" + " (set! s5-4 0)\n" + " (while\n" + " (<.si s5-4 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-4) L341 L340)\n" + " (-> (the-as (array int8) gp-0) s5-4)\n" + " )\n" + " (set! s5-4 (+ s5-4 1))\n" + " )\n" + " (set! v1-24 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint8))\n" + " (set! s5-5 0)\n" + " (while\n" + " (<.si s5-5 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-5) L341 L340)\n" + " (-> (the-as (array uint8) gp-0) s5-5)\n" + " )\n" + " (set! s5-5 (+ s5-5 1))\n" + " )\n" + " (set! v1-28 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int16))\n" + " (set! s5-6 0)\n" + " (while\n" + " (<.si s5-6 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-6) L341 L340)\n" + " (-> (the-as (array int16) gp-0) s5-6)\n" + " )\n" + " (set! s5-6 (+ s5-6 1))\n" + " )\n" + " (set! v1-33 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint16))\n" + " (set! s5-7 0)\n" + " (while\n" + " (<.si s5-7 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-7) L341 L340)\n" + " (-> (the-as (array uint16) gp-0) s5-7)\n" + " )\n" + " (set! s5-7 (+ s5-7 1))\n" + " )\n" + " (set! v1-38 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! v1-40 (or (= v1-1 (quote uint128)) (= v1-40 (quote int128))))\n" + " (cond\n" + " (v1-40\n" + " (set! s5-8 0)\n" + " (while\n" + " (<.si s5-8 (-> gp-0 length))\n" + " (set! t9-10 format)\n" + " (set! a0-21 (quote #t))\n" + " (set! a1-11 (if (zero? s5-8) L339 L338))\n" + " (set!\n" + " v1-42\n" + " (+\n" + " (sll (the-as uint s5-8) 4)\n" + " (the-as int (the-as (array uint128) gp-0))\n" + " )\n" + " )\n" + " (.lq a2-8 12 v1-42)\n" + " (t9-10 a0-21 a1-11 a2-8)\n" + " (set! s5-8 (+ s5-8 1))\n" + " )\n" + " (set! v1-44 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! s5-9 0)\n" + " (while\n" + " (<.si s5-9 (-> gp-0 length))\n" + " (format\n" + " (quote #t)\n" + " (if (zero? s5-9) L341 L340)\n" + " (-> (the-as (array int32) gp-0) s5-9)\n" + " )\n" + " (set! s5-9 (+ s5-9 1))\n" + " )\n" + " (set! v1-49 (quote #f))\n" + " (quote #f)\n" + " )\n" + " )\n" + " v1-39\n" + " )\n" + " )\n" + " )\n" + " (else\n" + " (cond\n" + " ((= (-> gp-0 content-type) float)\n" + " (set! s5-10 0)\n" + " (while\n" + " (<.si s5-10 (-> gp-0 length))\n" + " (if\n" + " (zero? s5-10)\n" + " (format (quote #t) L343 (-> (the-as (array float) gp-0) s5-10))\n" + " (format (quote #t) L337 (-> (the-as (array float) gp-0) s5-10))\n" + " )\n" + " (set! s5-10 (+ s5-10 1))\n" + " )\n" + " (set! v1-59 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! s5-11 0)\n" + " (while\n" + " (<.si s5-11 (-> gp-0 length))\n" + " (if\n" + " (zero? s5-11)\n" + " (format (quote #t) L336 (-> (the-as (array basic) gp-0) s5-11))\n" + " (format (quote #t) L335 (-> (the-as (array basic) gp-0) s5-11))\n" + " )\n" + " (set! s5-11 (+ s5-11 1))\n" + " )\n" + " (set! v1-68 (quote #f))\n" + " (quote #f)\n" + " )\n" + " )\n" + " )\n" + " )\n" + " (format (quote #t) L334)\n" + " gp-0\n" + " )"; + test_with_expr(func, type, expected, true, "array", + {{"L343", "~f"}, + {"L342", "#("}, + {"L341", "~D"}, + {"L340", " ~D"}, + {"L339", "#x~X"}, + {"L338", " #x~X"}, + {"L337", " ~f"}, + {"L336", "~A"}, + {"L335", " ~A"}, + {"L334", ")"}}, + parse_hint_json("[" + "\t\t[23, [\"gp\", \"(array int32)\"]],\n" + "\t\t[43, [\"gp\", \"(array uint32)\"]],\n" + "\t\t[63, [\"gp\", \"(array int64)\"]],\n" + "\t\t[83, [\"gp\", \"(array uint64)\"]],\n" + "\t\t[102, [\"gp\", \"(array int8)\"]],\n" + "\t\t[121, [\"gp\", \"(array uint8)\"]],\n" + "\t\t[141, [\"gp\", \"(array int16)\"]],\n" + "\t\t[161, [\"gp\", \"(array uint16)\"]],\n" + "\t\t[185, [\"gp\", \"(array uint128)\"]],\n" + "\t\t[203, [\"gp\", \"(array int32)\"]],\n" + "\t\t[222, [\"gp\", \"(array float)\"]],\n" + "\t\t[231, [\"gp\", \"(array float)\"]],\n" + "\t\t[248, [\"gp\", \"(array basic)\"]],\n" + "\t\t[257, [\"gp\", \"(array basic)\"]]]")); +} + +TEST_F(FormRegressionTest, ExprArrayMethod3) { + std::string func = + " sll r0, r0, 0\n" + "L91:\n" + " daddiu sp, sp, -48\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq s5, 16(sp)\n" + " sq gp, 32(sp)\n" + " or gp, a0, r0\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L333\n" + " or a2, gp, r0\n" + " lwu a3, -4(gp)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L332\n" + " lw a2, 4(gp)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L331\n" + " lw a2, 0(gp)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L330\n" + " lwu a2, 8(gp)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L329\n" + " lw a2, 4(gp)\n" + " daddiu a3, gp, 12\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " lw t9, type-type?(s7)\n" + " lwu a0, 8(gp)\n" + " lw a1, integer(s7)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " beq s7, v0, L123\n" + " sll r0, r0, 0\n" + "\n" + " lwu v1, 8(gp)\n" + " lwu v1, 0(v1)\n" + " daddiu a0, s7, int32\n" + " bne v1, a0, L94\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L93\n" + " sll r0, r0, 0\n" + "\n" + "L92:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L93:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L92\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L94:\n" + " daddiu a0, s7, uint32\n" + " bne v1, a0, L97\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L96\n" + " sll r0, r0, 0\n" + "\n" + "L95:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lwu a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L96:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L95\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L97:\n" + " daddiu a0, s7, int64\n" + " bne v1, a0, L100\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L99\n" + " sll r0, r0, 0\n" + "\n" + "L98:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 3\n" + " daddu v1, v1, gp\n" + " ld a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L99:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L98\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L100:\n" + " daddiu a0, s7, uint64\n" + " bne v1, a0, L103\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L102\n" + " sll r0, r0, 0\n" + "\n" + "L101:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L327\n" + " or a2, s5, r0\n" + " dsll v1, s5, 3\n" + " daddu v1, v1, gp\n" + " ld a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L102:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L101\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L103:\n" + " daddiu a0, s7, int8\n" + " bne v1, a0, L106\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L105\n" + " sll r0, r0, 0\n" + "\n" + "L104:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " daddu v1, s5, gp\n" + " lb a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L105:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L104\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L106:\n" + " daddiu a0, s7, uint8\n" + " bne v1, a0, L109\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L108\n" + " sll r0, r0, 0\n" + "\n" + "L107:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " daddu v1, s5, gp\n" + " lb a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L108:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L107\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L109:\n" + " daddiu a0, s7, int16\n" + " bne v1, a0, L112\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L111\n" + " sll r0, r0, 0\n" + "\n" + "L110:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 1\n" + " daddu v1, v1, gp\n" + " lh a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L111:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L110\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L112:\n" + " daddiu a0, s7, uint16\n" + " bne v1, a0, L115\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L114\n" + " sll r0, r0, 0\n" + "\n" + "L113:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 1\n" + " daddu v1, v1, gp\n" + " lhu a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L114:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L113\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L115:\n" + " daddiu a0, s7, int128\n" + " dsubu a0, v1, a0\n" + " daddiu a1, s7, 8\n" + " movn a1, s7, a0\n" + " bnel s7, a1, L116\n" + " or v1, a1, r0\n" + "\n" + " daddiu a0, s7, uint128\n" + " dsubu a0, v1, a0\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a0\n" + "L116:\n" + " beq s7, v1, L119\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L118\n" + " sll r0, r0, 0\n" + "\n" + "L117:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L327\n" + " or a2, s5, r0\n" + " dsll v1, s5, 4\n" + " daddu v1, v1, gp\n" + " lq a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L118:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L117\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L122\n" + " sll r0, r0, 0\n" + "\n" + "L119:\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L121\n" + " sll r0, r0, 0\n" + "\n" + "L120:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L328\n" + " or a2, s5, r0\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L121:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L120\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + "L122:\n" + " beq r0, r0, L129\n" + " sll r0, r0, 0\n" + "\n" + "L123:\n" + " lw v1, float(s7)\n" + " lwu a0, 8(gp)\n" + " bne a0, v1, L126\n" + " sll r0, r0, 0\n" + "\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L125\n" + " sll r0, r0, 0\n" + "\n" + "L124:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L326\n" + " or a2, s5, r0\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lwc1 f0, 12(v1)\n" + " mfc1 a3, f0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L125:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L124\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " beq r0, r0, L129\n" + " sll r0, r0, 0\n" + "\n" + "L126:\n" + " addiu s5, r0, 0\n" + " beq r0, r0, L128\n" + " sll r0, r0, 0\n" + "\n" + "L127:\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L325\n" + " or a2, s5, r0\n" + " dsll v1, s5, 2\n" + " daddu v1, v1, gp\n" + " lw a3, 12(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " daddiu s5, s5, 1\n" + "L128:\n" + " lw v1, 0(gp)\n" + " slt v1, s5, v1\n" + " bne v1, r0, L127\n" + " sll r0, r0, 0\n" + "\n" + " or v1, s7, r0\n" + " or v1, s7, r0\n" + "L129:\n" + " or v0, gp, r0\n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 32(sp)\n" + " lq s5, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 48"; + std::string type = "(function array array)"; + + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (format (quote #t) L333 gp-0 (-> gp-0 type))\n" + " (format (quote #t) L332 (-> gp-0 allocated-length))\n" + " (format (quote #t) L331 (-> gp-0 length))\n" + " (format (quote #t) L330 (-> gp-0 content-type))\n" + " (format (quote #t) L329 (-> gp-0 allocated-length) (-> gp-0 data))\n" + " (cond\n" + " ((type-type? (-> gp-0 content-type) integer)\n" + " (set! v1-1 (-> gp-0 content-type symbol))\n" + " (cond\n" + " ((= v1-1 (quote int32))\n" + " (set! s5-0 0)\n" + " (while\n" + " (<.si s5-0 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-0 (-> (the-as (array int32) gp-0) s5-0))\n" + " (set! s5-0 (+ s5-0 1))\n" + " )\n" + " (set! v1-5 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint32))\n" + " (set! s5-1 0)\n" + " (while\n" + " (<.si s5-1 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-1 (-> (the-as (array uint32) gp-0) s5-1))\n" + " (set! s5-1 (+ s5-1 1))\n" + " )\n" + " (set! v1-10 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int64))\n" + " (set! s5-2 0)\n" + " (while\n" + " (<.si s5-2 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-2 (-> (the-as (array int64) gp-0) s5-2))\n" + " (set! s5-2 (+ s5-2 1))\n" + " )\n" + " (set! v1-15 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint64))\n" + " (set! s5-3 0)\n" + " (while\n" + " (<.si s5-3 (-> gp-0 length))\n" + " (format (quote #t) L327 s5-3 (-> (the-as (array uint64) gp-0) s5-3))\n" + " (set! s5-3 (+ s5-3 1))\n" + " )\n" + " (set! v1-20 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int8))\n" + " (set! s5-4 0)\n" + " (while\n" + " (<.si s5-4 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-4 (-> (the-as (array int8) gp-0) s5-4))\n" + " (set! s5-4 (+ s5-4 1))\n" + " )\n" + " (set! v1-24 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint8))\n" + " (set! s5-5 0)\n" + " (while\n" + " (<.si s5-5 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-5 (-> (the-as (array int8) gp-0) s5-5))\n" + " (set! s5-5 (+ s5-5 1))\n" + " )\n" + " (set! v1-28 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote int16))\n" + " (set! s5-6 0)\n" + " (while\n" + " (<.si s5-6 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-6 (-> (the-as (array int16) gp-0) s5-6))\n" + " (set! s5-6 (+ s5-6 1))\n" + " )\n" + " (set! v1-33 (quote #f))\n" + " (quote #f)\n" + " )\n" + " ((= v1-1 (quote uint16))\n" + " (set! s5-7 0)\n" + " (while\n" + " (<.si s5-7 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-7 (-> (the-as (array uint16) gp-0) s5-7))\n" + " (set! s5-7 (+ s5-7 1))\n" + " )\n" + " (set! v1-38 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! v1-40 (or (= v1-1 (quote int128)) (= v1-40 (quote uint128))))\n" + " (cond\n" + " (v1-40\n" + " (set! s5-8 0)\n" + " (while\n" + " (<.si s5-8 (-> gp-0 length))\n" + " (set! t9-14 format)\n" + " (set! a0-25 (quote #t))\n" + " (set! a1-15 L327)\n" + " (set! a2-13 s5-8)\n" + " (set!\n" + " v1-42\n" + " (+\n" + " (sll (the-as uint s5-8) 4)\n" + " (the-as int (the-as (array uint128) gp-0))\n" + " )\n" + " )\n" + " (.lq a3-10 12 v1-42)\n" + " (t9-14 a0-25 a1-15 a2-13 a3-10)\n" + " (set! s5-8 (+ s5-8 1))\n" + " )\n" + " (set! v1-44 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! s5-9 0)\n" + " (while\n" + " (<.si s5-9 (-> gp-0 length))\n" + " (format (quote #t) L328 s5-9 (-> gp-0 s5-9))\n" + " (set! s5-9 (+ s5-9 1))\n" + " )\n" + " (set! v1-49 (quote #f))\n" + " (quote #f)\n" + " )\n" + " )\n" + " v1-39\n" + " )\n" + " )\n" + " )\n" + " (else\n" + " (cond\n" + " ((= (-> gp-0 content-type) float)\n" + " (set! s5-10 0)\n" + " (while\n" + " (<.si s5-10 (-> gp-0 length))\n" + " (format (quote #t) L326 s5-10 (-> (the-as (array float) gp-0) s5-10))\n" + " (set! s5-10 (+ s5-10 1))\n" + " )\n" + " (set! v1-55 (quote #f))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set! s5-11 0)\n" + " (while\n" + " (<.si s5-11 (-> gp-0 length))\n" + " (format (quote #t) L325 s5-11 (-> (the-as (array basic) gp-0) s5-11))\n" + " (set! s5-11 (+ s5-11 1))\n" + " )\n" + " (set! v1-60 (quote #f))\n" + " (quote #f)\n" + " )\n" + " )\n" + " )\n" + " )\n" + " gp-0\n" + " )"; + test_with_expr(func, type, expected, true, "array", + {{"L333", "[~8x] ~A~%"}, + {"L332", "~Tallocated-length: ~D~%"}, + {"L331", "~Tlength: ~D~%"}, + {"L330", " ~Tcontent-type: ~A~%"}, + {"L329", "~Tdata[~D]: @ #x~X~%"}, + {"L328", "~T [~D] ~D~%"}, + {"L327", "~T [~D] #x~X~%"}, + {"L326", "~T [~D] ~f~%"}, + {"L325", "~T [~D] ~A~%"}}, + parse_hint_json("[\t\t[44, [\"gp\", \"(array int32)\"]],\n" + "\t\t[62, [\"gp\", \"(array uint32)\"]],\n" + "\t\t[80, [\"gp\", \"(array int64)\"]],\n" + "\t\t[98, [\"gp\", \"(array uint64)\"]],\n" + "\t\t[115, [\"gp\", \"(array int8)\"]],\n" + "\t\t[132, [\"gp\", \"(array int8)\"]],\n" + "\t\t[150, [\"gp\", \"(array int16)\"]],\n" + "\t\t[168, [\"gp\", \"(array uint16)\"]],\n" + "\t\t[190, [\"gp\", \"(array uint128)\"]],\n" + "\t\t[203, [\"gp\", \"(array int32)\"]],\n" + "\t\t[225, [\"gp\", \"(array float)\"]],\n" + "\t\t[242, [\"gp\", \"(array basic)\"]]]")); +} + +TEST_F(FormRegressionTest, ExprValid) { + std::string func = + " sll r0, r0, 0\n" + "L1:\n" + " daddiu sp, sp, -80\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq s3, 16(sp)\n" + " sq s4, 32(sp)\n" + " sq s5, 48(sp)\n" + " sq gp, 64(sp)\n" + " or gp, a0, r0\n" + " or s3, a1, r0\n" + " or s4, a2, r0\n" + " or s5, t0, r0\n" + " daddiu v1, s7, -32768\n" + " sltu v1, gp, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " beql s7, a0, L2\n" + " or v1, a0, r0\n" + "\n" + " lui v1, 2048\n" + " sltu a0, gp, v1\n" + " daddiu v1, s7, 8\n" + " movz v1, s7, a0\n" + "L2:\n" + " bne s7, s3, L8\n" + " sll r0, r0, 0\n" + "\n" + " andi a0, gp, 3\n" + " beq a0, r0, L4\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L3\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " daddiu a1, fp, L321\n" + " or a0, s5, r0\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L3:\n" + " or v0, s7, r0\n" + " beq r0, r0, L7\n" + " sll r0, r0, 0\n" + "\n" + "L4:\n" + " bne s7, v1, L6\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L5\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L320\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L5:\n" + " or v0, s7, r0\n" + " beq r0, r0, L7\n" + " sll r0, r0, 0\n" + "\n" + "L6:\n" + " daddiu v0, s7, #t\n" + "L7:\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L8:\n" + " beql s7, a3, L9\n" + " or a0, a3, r0\n" + "\n" + " beq s7, gp, L9\n" + " daddiu a0, s7, 8\n" + "\n" + " or a0, s7, r0\n" + "L9:\n" + " beq s7, a0, L10\n" + " sll r0, r0, 0\n" + "\n" + " daddiu v0, s7, #t\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L10:\n" + " lw a0, structure(s7)\n" + " bne s3, a0, L17\n" + " sll r0, r0, 0\n" + "\n" + " andi a0, gp, 15\n" + " beq a0, r0, L12\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L11\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L319\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L11:\n" + " or v0, s7, r0\n" + " beq r0, r0, L16\n" + " sll r0, r0, 0\n" + "\n" + "L12:\n" + " beql s7, v1, L13\n" + " daddiu v1, s7, 8\n" + "\n" + " ori v1, r0, 32768\n" + " daddu v1, v1, s7\n" + " sltu a0, gp, v1\n" + " daddiu v1, s7, 8\n" + " movz v1, s7, a0\n" + "L13:\n" + " beq s7, v1, L15\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L14\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L318\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L14:\n" + " or v0, s7, r0\n" + " beq r0, r0, L16\n" + " sll r0, r0, 0\n" + "\n" + "L15:\n" + " daddiu v0, s7, #t\n" + "L16:\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L17:\n" + " lw a0, pair(s7)\n" + " bne s3, a0, L23\n" + " sll r0, r0, 0\n" + "\n" + " addiu a0, r0, 2\n" + " andi a1, gp, 7\n" + " beq a1, a0, L19\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L18\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L319\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L18:\n" + " or v0, s7, r0\n" + " beq r0, r0, L22\n" + " sll r0, r0, 0\n" + "\n" + "L19:\n" + " bne s7, v1, L21\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L20\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L318\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L20:\n" + " or v0, s7, r0\n" + " beq r0, r0, L22\n" + " sll r0, r0, 0\n" + "\n" + "L21:\n" + " daddiu v0, s7, #t\n" + "L22:\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L23:\n" + " lw a0, binteger(s7)\n" + " bne s3, a0, L27\n" + " sll r0, r0, 0\n" + "\n" + " andi v1, gp, 7\n" + " bne v1, r0, L24\n" + " sll r0, r0, 0\n" + "\n" + " daddiu v0, s7, #t\n" + " beq r0, r0, L26\n" + " sll r0, r0, 0\n" + "\n" + "L24:\n" + " beq s7, s4, L25\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L319\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L25:\n" + " or v0, s7, r0\n" + "L26:\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L27:\n" + " addiu a0, r0, 4\n" + " andi a1, gp, 7\n" + " beq a1, a0, L29\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L28\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L319\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L28:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L29:\n" + " bne s7, v1, L31\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L30\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L318\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L30:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L31:\n" + " lw v1, type(s7)\n" + " dsubu v1, s3, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " beql s7, a0, L33\n" + " or v1, a0, r0\n" + "\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L32\n" + " lw v1, binteger(s7)\n" + "\n" + " bgtzl v1, L32\n" + " lw v1, pair(s7)\n" + "\n" + " lwu v1, -4(gp)\n" + "L32:\n" + " lw a0, type(s7)\n" + " dsubu a0, v1, a0\n" + " daddiu v1, s7, 8\n" + " movz v1, s7, a0\n" + "L33:\n" + " beq s7, v1, L36\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L35\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L317\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L34\n" + " lw t1, binteger(s7)\n" + "\n" + " bgtzl v1, L34\n" + " lw t1, pair(s7)\n" + "\n" + " lwu t1, -4(gp)\n" + "L34:\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L35:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L36:\n" + " lw v1, type(s7)\n" + " dsubu v1, s3, v1\n" + " daddiu a0, s7, 8\n" + " movz a0, s7, v1\n" + " beql s7, a0, L38\n" + " or v1, a0, r0\n" + "\n" + " lw t9, valid?(s7)\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L37\n" + " lw a0, binteger(s7)\n" + "\n" + " bgtzl v1, L37\n" + " lw a0, pair(s7)\n" + "\n" + " lwu a0, -4(gp)\n" + "L37:\n" + " lw a1, type(s7)\n" + " or a2, s7, r0\n" + " daddiu a3, s7, #t\n" + " addiu t0, r0, 0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " beq s7, v0, L38\n" + " daddiu v1, s7, 8\n" + "\n" + " or v1, s7, r0\n" + "L38:\n" + " beq s7, v1, L41\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L40\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L317\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L39\n" + " lw t1, binteger(s7)\n" + "\n" + " bgtzl v1, L39\n" + " lw t1, pair(s7)\n" + "\n" + " lwu t1, -4(gp)\n" + "L39:\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L40:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L41:\n" + " lw t9, type-type?(s7)\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L42\n" + " lw a0, binteger(s7)\n" + "\n" + " bgtzl v1, L42\n" + " lw a0, pair(s7)\n" + "\n" + " lwu a0, -4(gp)\n" + "L42:\n" + " or a1, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " bne s7, v0, L45\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L44\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L316\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " dsll32 v1, gp, 29\n" + " beql v1, r0, L43\n" + " lw t1, binteger(s7)\n" + "\n" + " bgtzl v1, L43\n" + " lw t1, pair(s7)\n" + "\n" + " lwu t1, -4(gp)\n" + "L43:\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L44:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L45:\n" + " lw v1, symbol(s7)\n" + " bne s3, v1, L49\n" + " sll r0, r0, 0\n" + "\n" + " ori v1, r0, 32768\n" + " daddu v1, v1, s7\n" + " sltu v1, gp, v1\n" + " bne v1, r0, L47\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L46\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L315\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L46:\n" + " or v0, s7, r0\n" + " beq r0, r0, L48\n" + " sll r0, r0, 0\n" + "\n" + "L47:\n" + " daddiu v0, s7, #t\n" + "L48:\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L49:\n" + " ori v1, r0, 32768\n" + " daddu v1, v1, s7\n" + " sltu v1, gp, v1\n" + " beq v1, r0, L51\n" + " sll r0, r0, 0\n" + "\n" + " beq s7, s4, L50\n" + " or v1, s7, r0\n" + "\n" + " lw t9, format(s7)\n" + " or a0, s5, r0\n" + " daddiu a1, fp, L314\n" + " or a2, gp, r0\n" + " or a3, s4, r0\n" + " or t0, s3, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " or v1, v0, r0\n" + "L50:\n" + " or v0, s7, r0\n" + " beq r0, r0, L52\n" + " sll r0, r0, 0\n" + "\n" + "L51:\n" + " daddiu v0, s7, #t\n" + "L52:\n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 64(sp)\n" + " lq s5, 48(sp)\n" + " lq s4, 32(sp)\n" + " lq s3, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 80\n"; + std::string type = "(function object type basic basic object symbol)"; + + std::string expected = + "(begin\n" + " (set!\n" + " v1-1\n" + " (begin\n" + " (set! gp-0 a0-0)\n" + " (set! s3-0 a1-0)\n" + " (set! s4-0 a2-0)\n" + " (set! s5-0 t0-0)\n" + " (and (>=.ui gp-0 __START-OF-TABLE__) (begin (<.ui gp-0 134217728) v1-1))\n" + " )\n" + " )\n" + " (cond\n" + " ((not s3-0)\n" + " (cond\n" + " ((nonzero? (logand gp-0 3))\n" + " (if s4-0 (set! v1-4 (format s5-0 L321 gp-0 s4-0)))\n" + " (quote #f)\n" + " )\n" + " ((not v1-1) (if s4-0 (set! v1-6 (format s5-0 L320 gp-0 s4-0))) (quote #f))\n" + " (else (quote #t))\n" + " )\n" + " )\n" + " ((and a3-2 (not gp-0)) (quote #t))\n" + " (else\n" + " (cond\n" + " ((= s3-0 structure)\n" + " (cond\n" + " ((nonzero? (logand gp-0 15))\n" + " (if s4-0 (set! v1-8 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " ((or\n" + " (not v1-1)\n" + " (begin (set! v1-10 32768) (.daddu v1-11 v1-10 s7-0) (<.ui gp-0 v1-11))\n" + " )\n" + " (if s4-0 (set! v1-13 (format s5-0 L318 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " (else (quote #t))\n" + " )\n" + " )\n" + " ((= s3-0 pair)\n" + " (cond\n" + " ((!= (logand gp-0 7) 2)\n" + " (if s4-0 (set! v1-15 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " ((not v1-1)\n" + " (if s4-0 (set! v1-17 (format s5-0 L318 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " (else (quote #t))\n" + " )\n" + " )\n" + " ((= s3-0 binteger)\n" + " (cond\n" + " ((zero? (logand gp-0 7)) (quote #t))\n" + " (else\n" + " (if s4-0 (set! v1-20 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " )\n" + " )\n" + " ((!= (logand gp-0 7) 4)\n" + " (if s4-0 (set! v1-22 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " ((not v1-1)\n" + " (if s4-0 (set! v1-24 (format s5-0 L318 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " ((and (= s3-0 type) (!= (type-of gp-0) type))\n" + " (if s4-0 (set! v1-31 (format s5-0 L317 gp-0 s4-0 s3-0 (type-of gp-0))))\n" + " (quote #f)\n" + " )\n" + " (else\n" + " (set!\n" + " v1-33\n" + " (and\n" + " (!= s3-0 type)\n" + " (not (valid? (type-of gp-0) type (quote #f) (quote #t) 0))\n" + " )\n" + " )\n" + " (cond\n" + " (v1-33\n" + " (if s4-0 (set! v1-37 (format s5-0 L317 gp-0 s4-0 s3-0 (type-of gp-0))))\n" + " (quote #f)\n" + " )\n" + " ((not (type-type? (type-of gp-0) s3-0))\n" + " (if s4-0 (set! v1-41 (format s5-0 L316 gp-0 s4-0 s3-0 (type-of gp-0))))\n" + " (quote #f)\n" + " )\n" + " ((= s3-0 symbol)\n" + " (set! v1-43 32768)\n" + " (.daddu v1-44 v1-43 s7-0)\n" + " (cond\n" + " ((>=.ui gp-0 v1-44)\n" + " (if s4-0 (set! v1-46 (format s5-0 L315 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " (else (quote #t))\n" + " )\n" + " )\n" + " ((begin (set! v1-47 32768) (.daddu v1-48 v1-47 s7-0) (<.ui gp-0 v1-48))\n" + " (if s4-0 (set! v1-50 (format s5-0 L314 gp-0 s4-0 s3-0)))\n" + " (quote #f)\n" + " )\n" + " (else (quote #t))\n" + " )\n" + " )\n" + " )\n" + " )\n" + " )\n" + " )"; + test_with_expr( + func, type, expected, false, "", + {{"L321", "ERROR: object #x~X ~S is not a valid object (misaligned)~%"}, + {"L320", "ERROR: object #x~X ~S is not a valid object (bad address)~%"}, + {"L319", "ERROR: object #x~X ~S is not a valid object of type '~A' (misaligned)~%"}, + {"L318", "ERROR: object #x~X ~S is not a valid object of type '~A' (bad address)~%"}, + {"L317", "ERROR: object #x~X ~S is not a valid object of type '~A' (invalid type #x~X)~%"}, + {"L316", + "ERROR: object #x~X ~S is not a valid object of type '~A' (is type '~A' instead)~%"}, + {"L315", "ERROR: object #x~X ~S is not a valid object of type '~A' (not in symbol table)~%"}, + {"L314", + "ERROR: object #x~X ~S is not a valid object of type '~A' (inside symbol table)~%"}}); +} \ No newline at end of file diff --git a/test/goalc/source_templates/with_game/test-local-vars.gc b/test/goalc/source_templates/with_game/test-local-vars.gc new file mode 100644 index 0000000000..6997687a46 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-local-vars.gc @@ -0,0 +1,9 @@ +(defun lv-test () + (local-vars (x int) (y string) (z float)) + (set! y "test") + (set! x 12) + (set! z 3.2) + (format #t "y is ~A, x is ~D, z is ~f~%" y x z) + ) + +(lv-test) \ No newline at end of file diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index 59e94e4721..621652cd36 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -365,9 +365,14 @@ TEST_F(WithGameTests, BoxedArrayIndex) { runner.run_static_test(env, testCategory, "test-boxed-array-index.gc", {"18\n0\n"}); } +TEST_F(WithGameTests, LocalVars) { + runner.run_static_test(env, testCategory, "test-local-vars.gc", + {"y is \"test\", x is 12, z is 3.2000\n0\n"}); +} + TEST(TypeConsistency, TypeConsistency) { Compiler compiler; compiler.enable_throw_on_redefines(); compiler.run_test_no_load("test/goalc/source_templates/with_game/test-build-game.gc"); compiler.run_test_no_load("decompiler/config/all-types.gc"); -} +} \ No newline at end of file