diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 6c0eaec64d..ac61590fe0 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -51,11 +51,18 @@ bool Function::run_type_analysis_ir2(const TypeSpec& my_type, (void)file; // STEP 0 - set decompiler type system settings for this function. In config we can manually // specify some settings for type propagation to reduce the strictness of type propagation. + // TODO - this is kinda hacky so that it works in both unit tests and actual decompilation. + // it would be better if this setting came 100% from the IR2 env. if (!dts.type_prop_settings.locked) { dts.type_prop_settings.reset(); if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != get_config().pair_functions_by_name.end()) { dts.type_prop_settings.allow_pair = true; + ir2.env.set_sloppy_pair_typing(); + } + } else { + if (dts.type_prop_settings.allow_pair) { + ir2.env.set_sloppy_pair_typing(); } } diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 9d41fce3f5..c31a0fb611 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -543,6 +543,7 @@ class SpecialOp : public AtomicOp { const Env& env, DecompilerTypeSystem& dts) override; void collect_vars(VariableSet& vars) const override; + Kind kind() const { return m_kind; } private: Kind m_kind; diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index a5fc11b435..28d9975bf0 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -62,7 +62,54 @@ FormElement* SetVarConditionOp::get_as_form(FormPool& pool, const Env& env) cons is_sequence_point()); } -FormElement* StoreOp::get_as_form(FormPool& pool, const Env&) const { +FormElement* StoreOp::get_as_form(FormPool& pool, const Env& env) const { + if (env.has_type_analysis()) { + IR2_RegOffset ro; + if (get_as_reg_offset(m_addr, &ro)) { + auto& input_type = env.get_types_before_op(m_my_idx).get(ro.reg); + + if (env.allow_sloppy_pair_typing() && m_size == 4 && + (input_type.typespec() == TypeSpec("object") || + input_type.typespec() == TypeSpec("pair"))) { + if (ro.offset == 2) { + auto base = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto val = pool.alloc_single_element_form( + nullptr, m_value.as_expr(), m_my_idx); + auto addr = pool.alloc_single_element_form( + nullptr, GenericOperator::make_fixed(FixedOperatorKind::CDR), base); + auto fr = pool.alloc_element(addr, val); + return fr; + } else if (ro.offset == -2) { + auto base = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto val = pool.alloc_single_element_form( + nullptr, m_value.as_expr(), m_my_idx); + auto addr = pool.alloc_single_element_form( + nullptr, GenericOperator::make_fixed(FixedOperatorKind::CAR), base); + return pool.alloc_element(addr, val); + } + } + + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = true; + dk.reg_kind = get_reg_kind(ro.reg); + dk.size = m_size; + rd_in.deref = dk; + rd_in.base_type = input_type.typespec(); + rd_in.stride = 0; + rd_in.offset = ro.offset; + auto rd = env.dts->ts.reverse_field_lookup(rd_in); + + if (rd.success) { + // throw std::runtime_error("RD Success in StoreOp::get_as_form"); + return pool.alloc_element(this); + } else { + return pool.alloc_element(this); + } + } + } return pool.alloc_element(this); } @@ -72,7 +119,22 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { if (get_as_reg_offset(m_src, &ro)) { auto& input_type = env.get_types_before_op(m_my_idx).get(ro.reg); - // todo basic method + if (input_type.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD && ro.offset >= 16 && + (ro.offset & 3) == 0 && m_size == 4 && m_kind == Kind::UNSIGNED) { + // method get of fixed type + auto type_name = input_type.get_type_objects_typespec().base_type(); + auto method_id = (ro.offset - 16) / 4; + auto method_info = env.dts->ts.lookup_method(type_name, method_id); + + std::vector tokens; + tokens.push_back(DerefToken::make_field_name("methods-by-name")); + tokens.push_back(DerefToken::make_field_name(method_info.name)); + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto load = pool.alloc_single_element_form(nullptr, source, false, tokens); + return pool.alloc_element(m_dst, load, true); + } + // todo structure method // todo pointer // todo product trick @@ -85,6 +147,29 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { return pool.alloc_element(m_dst, load, true); } + if (env.allow_sloppy_pair_typing() && m_kind == Kind::SIGNED && m_size == 4 && + (input_type.typespec() == TypeSpec("object") || + input_type.typespec() == TypeSpec("pair"))) { + // these rules are of course not always correct or the most specific, but it's the best + // we can do. + if (ro.offset == 2) { + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto load = pool.alloc_single_element_form( + nullptr, GenericOperator::make_fixed(FixedOperatorKind::CDR), source); + // cdr = another pair. + return pool.alloc_element(m_dst, load, true); + } else if (ro.offset == -2) { + // car = some object. + auto source = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_var(ro.var).as_expr(), m_my_idx); + auto load = pool.alloc_single_element_form( + nullptr, GenericOperator::make_fixed(FixedOperatorKind::CAR), source); + // cdr = another pair. + return pool.alloc_element(m_dst, load, true); + } + } + // Assume we're accessing a field of an object. FieldReverseLookupInput rd_in; DerefKind dk; @@ -111,8 +196,6 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { pool.alloc_single_element_form(nullptr, source, rd.addr_of, tokens); return pool.alloc_element(m_dst, load, true); } - - // todo, try as pair } } diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index 9d3ebdb3b6..5c3a5f9f18 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -77,6 +77,9 @@ class Env { std::unordered_set get_ssa_var(const VariableSet& vars) const; RegId get_ssa_var(const Variable& var) const; + bool allow_sloppy_pair_typing() const { return m_allow_sloppy_pair_typing; } + void set_sloppy_pair_typing() { m_allow_sloppy_pair_typing = true; } + LinkedObjectFile* file = nullptr; DecompilerTypeSystem* dts = nullptr; @@ -93,5 +96,7 @@ class Env { std::vector m_block_init_types; std::vector m_op_end_types; std::vector m_op_init_types; + + bool m_allow_sloppy_pair_typing = false; }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 6fb72d3426..1aeb1a2d30 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -61,8 +61,8 @@ void Form::inline_forms(std::vector& forms, const Env& env) const } void Form::apply(const std::function& f) { - for (auto& x : m_elements) { - x->apply(f); + for (size_t i = 0; i < m_elements.size(); i++) { + m_elements.at(i)->apply(f); } } @@ -233,6 +233,37 @@ void SetVarElement::collect_vars(VariableSet& vars) const { m_src->collect_vars(vars); } +///////////////////////////// +// SetFormFormElement +///////////////////////////// + +SetFormFormElement::SetFormFormElement(Form* dst, Form* src) : m_dst(dst), m_src(src) {} + +goos::Object SetFormFormElement::to_form(const Env& env) const { + std::vector forms = {pretty_print::to_symbol("set!"), m_dst->to_form(env), + m_src->to_form(env)}; + return pretty_print::build_list(forms); +} + +void SetFormFormElement::apply(const std::function& f) { + m_src->apply(f); + m_dst->apply(f); +} + +void SetFormFormElement::apply_form(const std::function& f) { + m_src->apply_form(f); + m_dst->apply_form(f); +} + +bool SetFormFormElement::is_sequence_point() const { + return true; +} + +void SetFormFormElement::collect_vars(VariableSet& vars) const { + m_src->collect_vars(vars); + m_dst->collect_vars(vars); +} + ///////////////////////////// // AtomicOpElement ///////////////////////////// @@ -874,6 +905,12 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "lognot"; case FixedOperatorKind::SLL: return "sll"; + case FixedOperatorKind::CAR: + return "car"; + case FixedOperatorKind::CDR: + return "cdr"; + case FixedOperatorKind::NEW: + return "new"; default: assert(false); } diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 2d0368555d..97cf582636 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -196,6 +196,26 @@ class SetVarElement : public FormElement { bool m_is_sequence_point = true; }; +/*! + * Like SetVar, but sets a form to another form. + * This is intended to be used with stores. + * NOTE: do not use this when SetVarElement could be used instead. + */ +class SetFormFormElement : public FormElement { + public: + SetFormFormElement(Form* dst, Form* src); + goos::Object to_form(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + bool is_sequence_point() const override; + void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; + + private: + Form* m_dst = nullptr; + Form* m_src = nullptr; +}; + /*! * A wrapper around a single AtomicOp. * The "important" special AtomicOps have their own Form type, like FuncitonCallElement. @@ -363,6 +383,7 @@ class CondWithElseElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; }; /*! @@ -394,6 +415,7 @@ class WhileElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; Form* condition = nullptr; Form* body = nullptr; bool cleaned = false; @@ -460,7 +482,7 @@ class CondNoElseElement : public FormElement { FormElement* original_condition_branch = nullptr; bool cleaned = false; }; - Register final_destination; + Variable final_destination; bool used_as_value = false; std::vector entries; explicit CondNoElseElement(std::vector _entries) : entries(std::move(_entries)) {} @@ -579,8 +601,19 @@ class GenericOperator { void apply_form(const std::function& f); bool operator==(const GenericOperator& other) const; bool operator!=(const GenericOperator& other) const; + Kind kind() const { return m_kind; } + FixedOperatorKind fixed_kind() const { + assert(m_kind == Kind::FIXED_OPERATOR); + return m_fixed_kind; + } + + const Form* func() const { + assert(m_kind == Kind::FUNCTION_EXPR); + return m_function; + } private: + friend class GenericElement; Kind m_kind = Kind::INVALID; IR2_Condition::Kind m_condition_kind = IR2_Condition::Kind::INVALID; FixedOperatorKind m_fixed_kind = FixedOperatorKind::INVALID; @@ -598,6 +631,10 @@ class GenericElement : public FormElement { void apply(const std::function& f) override; void apply_form(const std::function& f) override; void collect_vars(VariableSet& vars) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) override; const GenericOperator& op() const { return m_head; } const std::vector& elts() const { return m_elts; } @@ -638,6 +675,12 @@ class DerefToken { void apply(const std::function& f); void apply_form(const std::function& f); + Kind kind() const { return m_kind; } + const std::string& field_name() const { + assert(m_kind == Kind::FIELD_NAME); + return m_name; + } + private: Kind m_kind = Kind::INVALID; s64 m_int_constant = -1; @@ -658,6 +701,10 @@ class DerefElement : public FormElement { FormStack& stack, std::vector* result) override; + bool is_addr_of() const { return m_is_addr_of; } + const Form* base() const { return m_base; } + const std::vector& tokens() const { return m_tokens; } + private: Form* m_base = nullptr; bool m_is_addr_of = false; diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index f00a1823fe..55e038145a 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -17,6 +17,7 @@ Form* var_to_form(const Variable& var, FormPool& pool) { } void update_var_from_stack_helper(int my_idx, + const Env&, Variable input, FormPool& pool, FormStack& stack, @@ -70,12 +71,13 @@ Form* update_var_from_stack_to_form(int my_idx, } Form* update_var_from_stack_to_form(int my_idx, + const Env& env, Variable input, const RegSet& consumes, FormPool& pool, FormStack& stack) { std::vector elts; - update_var_from_stack_helper(my_idx, input, pool, stack, consumes, &elts); + update_var_from_stack_helper(my_idx, env, input, pool, stack, consumes, &elts); return pool.alloc_sequence_form(nullptr, elts); } @@ -128,7 +130,7 @@ void SimpleExpressionElement::update_from_stack_identity(const Env& env, } else if (arg.is_static_addr()) { // for now, do nothing. result->push_back(this); - } else if (arg.is_sym_ptr() || arg.is_sym_val()) { + } else if (arg.is_sym_ptr() || arg.is_sym_val() || arg.is_int() || arg.is_empty_list()) { result->push_back(this); } else { throw std::runtime_error(fmt::format( @@ -421,17 +423,24 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta stack.push_value_to_reg(m_dst, m_src, true); } +void SetFormFormElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + // todo - is the order here right? + m_src->update_children_from_stack(env, pool, stack); + m_dst->update_children_from_stack(env, pool, stack); + stack.push_form_element(this, true); +} + /////////////////// // AshElement /////////////////// -void AshElement::update_from_stack(const Env&, +void AshElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto val_form = update_var_from_stack_to_form(value.idx(), value, consumed, pool, stack); + auto val_form = update_var_from_stack_to_form(value.idx(), env, value, consumed, pool, stack); auto sa_form = - update_var_from_stack_to_form(shift_amount.idx(), shift_amount, consumed, pool, stack); + update_var_from_stack_to_form(shift_amount.idx(), env, shift_amount, consumed, pool, stack); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::ARITH_SHIFT), val_form, sa_form); result->push_back(new_form); @@ -441,11 +450,12 @@ void AshElement::update_from_stack(const Env&, // AbsElement /////////////////// -void AbsElement::update_from_stack(const Env&, +void AbsElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { - auto source_form = update_var_from_stack_to_form(source.idx(), source, consumed, pool, stack); + auto source_form = + update_var_from_stack_to_form(source.idx(), env, source, consumed, pool, stack); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::ABS), source_form); result->push_back(new_form); @@ -469,6 +479,52 @@ void FunctionCallElement::update_from_stack(const Env& env, } Form* func = update_var_from_stack_to_form(m_op->op_id(), m_op->function_var(), env, pool, stack); auto new_form = pool.alloc_element(GenericOperator::make_function(func), args); + + // detect method calls: + // ex: ((-> pair methods-by-name new) (quote global) pair gp-0 a3-0) + constexpr int type_for_method = 0; + constexpr int method_name = 1; + + auto deref_matcher = Matcher::deref( + Matcher::any_symbol(type_for_method), false, + {DerefTokenMatcher::string("methods-by-name"), DerefTokenMatcher::any_string(method_name)}); + + auto matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), {}); + auto temp_form = pool.alloc_single_form(nullptr, new_form); + auto match_result = match(matcher, temp_form); + if (match_result.matched) { + auto type_1 = match_result.maps.strings.at(type_for_method); + auto name = match_result.maps.strings.at(method_name); + + if (name == "new") { + constexpr int allocation = 2; + constexpr int type_for_arg = 3; + auto alloc_matcher = Matcher::any_quoted_symbol(allocation); + auto type_arg_matcher = Matcher::any_symbol(type_for_arg); + matcher = Matcher::op_with_rest(GenericOpMatcher::func(deref_matcher), + {alloc_matcher, type_arg_matcher}); + match_result = match(matcher, temp_form); + auto alloc = match_result.maps.strings.at(allocation); + if (alloc != "global") { + throw std::runtime_error("Unrecognized heap symbol for new: " + alloc); + } + auto type_2 = match_result.maps.strings.at(type_for_arg); + if (type_1 != type_2) { + throw std::runtime_error( + fmt::format("Inconsistent types in method call: {} and {}", type_1, type_2)); + } + + std::vector new_args = dynamic_cast(new_form)->elts(); + + auto new_op = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::NEW), new_args); + result->push_back(new_op); + return; + } else { + throw std::runtime_error("Method call detected, not yet implemented"); + } + } + result->push_back(new_form); } @@ -512,10 +568,54 @@ void UntilElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stac stack.push_form_element(this, true); } +void WhileElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + for (auto form : {condition, body}) { + FormStack temp_stack; + for (auto& entry : form->elts()) { + entry->push_to_stack(env, pool, temp_stack); + } + auto new_entries = temp_stack.rewrite(pool); + form->clear(); + for (auto e : new_entries) { + form->push_back(e); + } + } + stack.push_form_element(this, true); +} + /////////////////// // CondNoElseElement /////////////////// void CondNoElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + for (auto& entry : entries) { + for (auto form : {entry.condition, entry.body}) { + FormStack temp_stack; + for (auto& elt : form->elts()) { + elt->push_to_stack(env, pool, temp_stack); + } + + std::vector new_entries; + if (form == entry.body && used_as_value) { + new_entries = temp_stack.rewrite_to_get_var(pool, final_destination, env); + } else { + new_entries = temp_stack.rewrite(pool); + } + + form->clear(); + for (auto e : new_entries) { + form->push_back(e); + } + } + } + + if (used_as_value) { + stack.push_value_to_reg(final_destination, pool.alloc_single_form(nullptr, this), true); + } else { + stack.push_form_element(this, true); + } +} + +void CondWithElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { for (auto& entry : entries) { for (auto form : {entry.condition, entry.body}) { FormStack temp_stack; @@ -537,6 +637,18 @@ void CondNoElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& } } + FormStack temp_stack; + for (auto& elt : else_ir->elts()) { + elt->push_to_stack(env, pool, temp_stack); + } + + auto new_entries = temp_stack.rewrite(pool); + + else_ir->clear(); + for (auto e : new_entries) { + else_ir->push_back(e); + } + stack.push_form_element(this, true); } @@ -579,12 +691,12 @@ void ShortCircuitElement::push_to_stack(const Env& env, FormPool& pool, FormStac // ConditionElement /////////////////// -void ConditionElement::push_to_stack(const Env&, FormPool& pool, FormStack& stack) { +void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { std::vector source_forms; for (int i = 0; i < get_condition_num_args(m_kind); i++) { - source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), m_src[i]->var(), - m_consumed, pool, stack)); + source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), env, + m_src[i]->var(), m_consumed, pool, stack)); } stack.push_form_element( @@ -592,17 +704,20 @@ void ConditionElement::push_to_stack(const Env&, FormPool& pool, FormStack& stac true); } -void ConditionElement::update_from_stack(const Env&, +void ConditionElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, std::vector* result) { std::vector source_forms; - for (int i = 0; i < get_condition_num_args(m_kind); i++) { - source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), m_src[i]->var(), - m_consumed, pool, stack)); + // for (int i = 0; i < get_condition_num_args(m_kind); i++) { + for (int i = get_condition_num_args(m_kind); i-- > 0;) { + source_forms.push_back(update_var_from_stack_to_form(m_src[i]->var().idx(), env, + m_src[i]->var(), m_consumed, pool, stack)); } + std::reverse(source_forms.begin(), source_forms.end()); + result->push_back( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms)); } @@ -623,15 +738,37 @@ void ReturnElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta stack.push_form_element(this, true); } -void AtomicOpElement::push_to_stack(const Env& env, FormPool&, FormStack&) { +void AtomicOpElement::push_to_stack(const Env& env, FormPool&, FormStack& stack) { auto as_end = dynamic_cast(m_op); if (as_end) { // we don't want to push this to the stack (for now at least) return; } + + auto as_special = dynamic_cast(m_op); + if (as_special) { + if (as_special->kind() == SpecialOp::Kind::NOP) { + stack.push_form_element(this, true); + return; + } + } throw std::runtime_error("Can't push atomic op to stack: " + m_op->to_string(env)); } +void GenericElement::update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result) { + if (m_head.m_kind == GenericOperator::Kind::FUNCTION_EXPR) { + m_head.m_function->update_children_from_stack(env, pool, stack); + } + + for (auto& x : m_elts) { + x->update_children_from_stack(env, pool, stack); + } + result->push_back(this); +} + //////////////////////// // DynamicMethodAccess //////////////////////// diff --git a/decompiler/IR2/GenericElementMatcher.cpp b/decompiler/IR2/GenericElementMatcher.cpp index 30338d3487..fb35da4475 100644 --- a/decompiler/IR2/GenericElementMatcher.cpp +++ b/decompiler/IR2/GenericElementMatcher.cpp @@ -8,10 +8,18 @@ Matcher Matcher::any_reg(int match_id) { return m; } -Matcher Matcher::op(GenericOperator op, const std::vector& args) { +Matcher Matcher::op(const GenericOpMatcher& op, const std::vector& args) { Matcher m; m.m_kind = Kind::GENERIC_OP; - m.m_gen_op = op; + m.m_gen_op_matcher = std::make_shared(op); + m.m_sub_matchers = args; + return m; +} + +Matcher Matcher::op_with_rest(const GenericOpMatcher& op, const std::vector& args) { + Matcher m; + m.m_kind = Kind::GENERIC_OP_WITH_REST; + m.m_gen_op_matcher = std::make_shared(op); m.m_sub_matchers = args; return m; } @@ -19,7 +27,7 @@ Matcher Matcher::op(GenericOperator op, const std::vector& args) { Matcher Matcher::fixed_op(FixedOperatorKind op, const std::vector& args) { Matcher m; m.m_kind = Kind::GENERIC_OP; - m.m_gen_op = GenericOperator::make_fixed(op); + m.m_gen_op_matcher = std::make_shared(GenericOpMatcher::fixed(op)); m.m_sub_matchers = args; return m; } @@ -52,6 +60,31 @@ Matcher Matcher::integer(std::optional value) { return m; } +Matcher Matcher::any_quoted_symbol(int match_id) { + Matcher m; + m.m_kind = Kind::ANY_QUOTED_SYMBOL; + m.m_string_out_id = match_id; + return m; +} + +Matcher Matcher::any_symbol(int match_id) { + Matcher m; + m.m_kind = Kind::ANY_SYMBOL; + m.m_string_out_id = match_id; + return m; +} + +Matcher Matcher::deref(const Matcher& root, + bool is_addr_of, + const std::vector& tokens) { + Matcher m; + m.m_kind = Kind::DEREF_OP; + m.m_sub_matchers = {root}; + m.m_deref_is_addr_of = is_addr_of; + m.m_token_matchers = tokens; + return m; +} + bool Matcher::do_match(const Form* input, MatchResult::Maps* maps_out) const { switch (m_kind) { case Kind::ANY_REG: { @@ -89,7 +122,7 @@ bool Matcher::do_match(const Form* input, MatchResult::Maps* maps_out) const { case Kind::GENERIC_OP: { auto as_generic = dynamic_cast(input->try_as_single_element()); if (as_generic) { - if (as_generic->op() != m_gen_op) { + if (!m_gen_op_matcher->do_match(as_generic->op(), maps_out)) { return false; } @@ -107,6 +140,27 @@ bool Matcher::do_match(const Form* input, MatchResult::Maps* maps_out) const { return false; } break; + case Kind::GENERIC_OP_WITH_REST: { + auto as_generic = dynamic_cast(input->try_as_single_element()); + if (as_generic) { + if (!m_gen_op_matcher->do_match(as_generic->op(), maps_out)) { + return false; + } + + if (as_generic->elts().size() < m_sub_matchers.size()) { + return false; + } + + for (size_t i = 0; i < m_sub_matchers.size(); i++) { + if (!m_sub_matchers.at(i).do_match(as_generic->elts().at(i), maps_out)) { + return false; + } + } + return true; + } + return false; + } break; + case Kind::OR: { for (auto& matcher : m_sub_matchers) { if (matcher.do_match(input, maps_out)) { @@ -148,6 +202,76 @@ bool Matcher::do_match(const Form* input, MatchResult::Maps* maps_out) const { } } + return false; + } break; + + case Kind::ANY_QUOTED_SYMBOL: { + auto as_simple_atom = dynamic_cast(input->try_as_single_element()); + if (as_simple_atom) { + if (as_simple_atom->atom().is_sym_ptr()) { + if (m_string_out_id != -1) { + maps_out->strings[m_string_out_id] = as_simple_atom->atom().get_str(); + } + return true; + } + } + + auto as_expr = dynamic_cast(input->try_as_single_element()); + if (as_expr && as_expr->expr().is_identity()) { + auto atom = as_expr->expr().get_arg(0); + if (atom.is_sym_ptr()) { + if (m_string_out_id != -1) { + maps_out->strings[m_string_out_id] = atom.get_str(); + } + return true; + } + } + return false; + } + + case Kind::ANY_SYMBOL: { + auto as_simple_atom = dynamic_cast(input->try_as_single_element()); + if (as_simple_atom) { + if (as_simple_atom->atom().is_sym_val()) { + if (m_string_out_id != -1) { + maps_out->strings[m_string_out_id] = as_simple_atom->atom().get_str(); + } + return true; + } + } + + auto as_expr = dynamic_cast(input->try_as_single_element()); + if (as_expr && as_expr->expr().is_identity()) { + auto atom = as_expr->expr().get_arg(0); + if (atom.is_sym_val()) { + if (m_string_out_id != -1) { + maps_out->strings[m_string_out_id] = atom.get_str(); + } + return true; + } + } + return false; + } + + case Kind::DEREF_OP: { + auto as_deref = dynamic_cast(input->try_as_single_element()); + if (as_deref) { + if (as_deref->is_addr_of() != m_deref_is_addr_of) { + return false; + } + if (!m_sub_matchers.at(0).do_match(as_deref->base(), maps_out)) { + return false; + } + if (as_deref->tokens().size() != m_token_matchers.size()) { + return false; + } + for (size_t i = 0; i < as_deref->tokens().size(); i++) { + if (!m_token_matchers.at(i).do_match(as_deref->tokens().at(i), maps_out)) { + return false; + } + } + return true; + } return false; } @@ -166,4 +290,66 @@ MatchResult match(const Matcher& spec, const Form* input) { result.matched = spec.do_match(input, &result.maps); return result; } + +DerefTokenMatcher DerefTokenMatcher::string(const std::string& str) { + DerefTokenMatcher result; + result.m_kind = Kind::STRING; + result.m_str = str; + return result; +} + +DerefTokenMatcher DerefTokenMatcher::any_string(int match_id) { + DerefTokenMatcher result; + result.m_kind = Kind::ANY_STRING; + result.m_str_out_id = match_id; + return result; +} + +bool DerefTokenMatcher::do_match(const DerefToken& input, MatchResult::Maps* maps_out) const { + switch (m_kind) { + case Kind::STRING: + return input.kind() == DerefToken::Kind::FIELD_NAME && input.field_name() == m_str; + case Kind::ANY_STRING: + if (input.kind() == DerefToken::Kind::FIELD_NAME) { + if (m_str_out_id != -1) { + maps_out->strings[m_str_out_id] = input.field_name(); + } + return true; + } + return false; + default: + assert(false); + } +} + +GenericOpMatcher GenericOpMatcher::fixed(FixedOperatorKind kind) { + GenericOpMatcher m; + m.m_kind = Kind::FIXED; + m.m_fixed_kind = kind; + return m; +} + +GenericOpMatcher GenericOpMatcher::func(const Matcher& func_matcher) { + GenericOpMatcher m; + m.m_kind = Kind::FUNC; + m.m_func_matcher = func_matcher; + return m; +} + +bool GenericOpMatcher::do_match(const GenericOperator& input, MatchResult::Maps* maps_out) const { + switch (m_kind) { + case Kind::FIXED: + if (input.kind() == GenericOperator::Kind::FIXED_OPERATOR) { + return input.fixed_kind() == m_fixed_kind; + } + return false; + case Kind::FUNC: + if (input.kind() == GenericOperator::Kind::FUNCTION_EXPR) { + return m_func_matcher.do_match(input.func(), maps_out); + } + return false; + default: + assert(false); + } +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/GenericElementMatcher.h b/decompiler/IR2/GenericElementMatcher.h index d6dd4110a2..398e45fbbb 100644 --- a/decompiler/IR2/GenericElementMatcher.h +++ b/decompiler/IR2/GenericElementMatcher.h @@ -8,46 +8,92 @@ #include "Form.h" namespace decompiler { +class DerefTokenMatcher; +class GenericOpMatcher; struct MatchResult { bool matched = false; struct Maps { std::vector> regs; + std::unordered_map strings; } maps; }; class Matcher { public: static Matcher any_reg(int match_id = -1); - static Matcher op(GenericOperator op, const std::vector& args); + static Matcher op(const GenericOpMatcher& op, const std::vector& args); + static Matcher op_with_rest(const GenericOpMatcher& op, const std::vector& args); static Matcher fixed_op(FixedOperatorKind op, const std::vector& args); static Matcher match_or(const std::vector& args); static Matcher cast(const std::string& type, Matcher value); static Matcher any(); static Matcher integer(std::optional value); static Matcher any_reg_cast_to_int_or_uint(int match_id = -1); + static Matcher any_quoted_symbol(int match_id = -1); + static Matcher any_symbol(int match_id = -1); + static Matcher deref(const Matcher& root, + bool is_addr_of, + const std::vector& tokens); enum class Kind { ANY_REG, // matching any register GENERIC_OP, // matching + GENERIC_OP_WITH_REST, OR, CAST, ANY, INT, + ANY_QUOTED_SYMBOL, + ANY_SYMBOL, + DEREF_OP, INVALID }; bool do_match(const Form* input, MatchResult::Maps* maps_out) const; private: - GenericOperator m_gen_op; std::vector m_sub_matchers; + std::vector m_token_matchers; + std::shared_ptr m_gen_op_matcher; + bool m_deref_is_addr_of = false; Kind m_kind = Kind::INVALID; int m_reg_out_id = -1; + int m_string_out_id = -1; std::optional m_int_match; std::string m_str; }; MatchResult match(const Matcher& spec, const Form* input); +class DerefTokenMatcher { + public: + static DerefTokenMatcher string(const std::string& str); + static DerefTokenMatcher any_string(int match_id = -1); + + enum class Kind { STRING, ANY_STRING, INVALID }; + + bool do_match(const DerefToken& input, MatchResult::Maps* maps_out) const; + + private: + Kind m_kind = Kind::INVALID; + std::string m_str; + int m_str_out_id = -1; +}; + +class GenericOpMatcher { + public: + static GenericOpMatcher fixed(FixedOperatorKind kind); + static GenericOpMatcher func(const Matcher& func_matcher); + + enum class Kind { FIXED, FUNC, INVALID }; + + bool do_match(const GenericOperator& input, MatchResult::Maps* maps_out) const; + + private: + Kind m_kind = Kind::INVALID; + FixedOperatorKind m_fixed_kind = FixedOperatorKind::INVALID; + Matcher m_func_matcher; +}; + } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index a7d975ee0c..06b7bf915f 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -105,6 +105,9 @@ enum class FixedOperatorKind { LOGNOR, LOGNOT, SLL, + CAR, + CDR, + NEW, INVALID }; diff --git a/decompiler/analysis/cfg_builder.cpp b/decompiler/analysis/cfg_builder.cpp index 3fac615ce0..e9c99491d5 100644 --- a/decompiler/analysis/cfg_builder.cpp +++ b/decompiler/analysis/cfg_builder.cpp @@ -483,7 +483,7 @@ void clean_up_cond_no_else_final(const Function& func, CondNoElseElement* cne) { if (entry.false_destination.has_value()) { auto fr = entry.false_destination; assert(fr.has_value()); - cne->final_destination = fr->reg(); + cne->final_destination = *fr; } else { assert(false); } @@ -494,7 +494,7 @@ void clean_up_cond_no_else_final(const Function& func, CondNoElseElement* cne) { if (func.ir2.env.has_reg_use()) { auto& last_branch_info = func.ir2.env.reg_use().op.at(last_branch->op()->op_id()); - cne->used_as_value = last_branch_info.written_and_unused.find(cne->final_destination) == + cne->used_as_value = last_branch_info.written_and_unused.find(cne->final_destination.reg()) == last_branch_info.written_and_unused.end(); } diff --git a/decompiler/analysis/expression_build.cpp b/decompiler/analysis/expression_build.cpp index 5aa4b21dc8..0bca8b9714 100644 --- a/decompiler/analysis/expression_build.cpp +++ b/decompiler/analysis/expression_build.cpp @@ -28,6 +28,29 @@ void clean_up_ifs(Form* top_level_form) { top_condition->elts() = {real_condition}; } }); + + top_level_form->apply([&](FormElement* elt) { + auto as_cwe = dynamic_cast(elt); + if (!as_cwe) { + return; + } + + auto top_condition = as_cwe->entries.front().condition; + if (!top_condition->is_single_element() && elt->parent_form) { + auto real_condition = top_condition->back(); + top_condition->pop_back(); + + auto& parent_vector = elt->parent_form->elts(); + // find us in the parent vector + auto me = std::find_if(parent_vector.begin(), parent_vector.end(), + [&](FormElement* x) { return x == elt; }); + assert(me != parent_vector.end()); + + // now insert the fake condition + parent_vector.insert(me, top_condition->elts().begin(), top_condition->elts().end()); + top_condition->elts() = {real_condition}; + } + }); } bool convert_to_expressions(Form* top_level_form, diff --git a/test/decompiler/test_FormBeforeExpressions.cpp b/test/decompiler/test_FormBeforeExpressions.cpp index 10e5406c90..1b05fe3a34 100644 --- a/test/decompiler/test_FormBeforeExpressions.cpp +++ b/test/decompiler/test_FormBeforeExpressions.cpp @@ -420,12 +420,12 @@ TEST_F(FormRegressionTest, SimpleLoopMergeCheck) { " (<.si v1-0 a1-0)\n" " (nop!)\n" " (nop!)\n" - " (set! a0-0 (l.w (+ a0-0 2)))\n" // should have merged - " (set! v1-0 (+ v1-0 1))\n" // also should have merged + " (set! a0-0 (cdr a0-0))\n" // should have merged + " (set! v1-0 (+ v1-0 1))\n" // also should have merged " )\n" " (set! v1-1 '#f)\n" " (set! v1-2 '#f)\n" - " (set! v0-0 (l.w (+ a0-0 -2)))\n" + " (set! v0-0 (car a0-0))\n" " (ret-value v0-0)\n" " )"; test_no_expr(func, type, expected, true); @@ -482,8 +482,8 @@ TEST_F(FormRegressionTest, And) { "(cond\n" " ((begin (set! v1-0 '()) (= a0-0 v1-0)) (set! v0-0 0))\n" // should be a case, not a return " (else\n" - " (set! v1-1 (-> a0-0 cdr))\n" // v1-1 iteration. - " (set! v0-0 1)\n" // v0-1 count + " (set! v1-1 (cdr a0-0))\n" // v1-1 iteration. + " (set! v0-0 1)\n" // v0-1 count " (while\n" " (begin\n" " (and\n" @@ -492,8 +492,8 @@ TEST_F(FormRegressionTest, And) { " )\n" " (truthy a0-2)\n" // this variable doesn't appear, but is set by the and. " )\n" - " (set! v0-0 (+ v0-0 1))\n" // merged (and the result) - " (set! v1-1 (l.w (+ v1-1 2)))\n" // also merged. + " (set! v0-0 (+ v0-0 1))\n" // merged (and the result) + " (set! v1-1 (cdr v1-1))\n" // also merged. " )\n" " (set! v1-2 '#f)\n" // while's false, I think. " )\n" @@ -566,7 +566,7 @@ TEST_F(FormRegressionTest, FunctionCall) { " (begin (set! v1-0 '()) (set! a0-1 (= gp-0 v1-0)) (truthy a0-1))\n" // got empty list. " (begin\n" " (set! t9-0 name=)\n" - " (set! a0-2 (l.w (+ gp-0 -2)))\n" + " (set! a0-2 (car gp-0))\n" " (set! a1-1 s5-0)\n" " (set! v0-0 (call! a0-2 a1-1))\n" " (set! v1-1 v0-0)\n" // name match @@ -574,7 +574,7 @@ TEST_F(FormRegressionTest, FunctionCall) { " )\n" " (not v1-1)\n" // no name match AND no empty list. " )\n" - " (set! gp-0 (l.w (+ gp-0 2)))\n" // get next (merged) + " (set! gp-0 (cdr gp-0))\n" // get next (merged) " )\n" " (set! v1-2 '#f)\n" // while loop thing " (set! v1-3 '())\n" // @@ -703,13 +703,13 @@ TEST_F(FormRegressionTest, NestedAndOr) { " (begin\n" " (or\n" " (begin\n" - " (set! v1-6 (l.w (+ s3-0 2)))\n" // s3-0 = cdr + " (set! v1-6 (cdr s3-0))\n" // s3-0 = cdr " (set! a0-4 '())\n" " (set! a0-5 (= v1-6 a0-4))\n" " (truthy a0-5)\n" // cdr = empty list (sets v1-7 secretly) " )\n" " (begin\n" - " (set! v1-8 (l.w (+ s3-0 2)))\n" + " (set! v1-8 (cdr s3-0))\n" " (set! v1-9 (sll v1-8 62))\n" " (set! v1-7 (>=0.si v1-9))\n" // car is not a list. " )\n" @@ -722,9 +722,9 @@ TEST_F(FormRegressionTest, NestedAndOr) { " (begin\n" " (or\n" " (begin\n" - " (set! s2-0 (l.w (+ s3-0 -2)))\n" // s2 = car - " (set! v1-0 (l.w (+ s3-0 2)))\n" - " (set! s1-0 (-> v1-0 car))\n" // s1 = cadr + " (set! s2-0 (car s3-0))\n" // s2 = car + " (set! v1-0 (cdr s3-0))\n" + " (set! s1-0 (car v1-0))\n" // s1 = cadr " (set! t9-0 s5-0)\n" // func " (set! a0-1 s2-0)\n" // car " (set! a1-1 s1-0)\n" // cadr @@ -740,13 +740,13 @@ TEST_F(FormRegressionTest, NestedAndOr) { " )\n" " (truthy v1-2)\n" // (and (or false >0) (not #t)) " )\n" - " (set! s4-0 (+ s4-0 1))\n" // increment, merge - " (s.w! (+ s3-0 -2) s1-0)\n" // set iter's car to cadr - " (set! v1-4 (l.w (+ s3-0 2)))\n" // current cdr - " (s.w! (+ v1-4 -2) s2-0)\n" // set cadr - " (set! v1-5 s2-0)\n" // iteration thing? + " (set! s4-0 (+ s4-0 1))\n" // increment, merge + " (set! (car s3-0) s1-0)\n" // set iter's car to cadr + " (set! v1-4 (cdr s3-0))\n" // current cdr + " (set! (car v1-4) s2-0)\n" // set cadr + " (set! v1-5 s2-0)\n" // iteration thing? " )\n" - " (set! s3-0 (l.w (+ s3-0 2)))\n" // increment! + " (set! s3-0 (cdr s3-0))\n" // increment! " )\n" " (set! v1-10 '#f)\n" " (set! v1-11 '#f)\n" @@ -797,9 +797,9 @@ TEST_F(FormRegressionTest, NewMethod) { " (begin\n" " (set! gp-0 a2-0)\n" // gp-0 is size " (set! v1-0 object)\n" - " (set! t9-0 (-> v1-0 method-table 0))\n" // object new - " (set! v1-1 a1-0)\n" // ? - " (set! a2-1 (-> a1-0 size))\n" // math + " (set! t9-0 (-> v1-0 methods-by-name new))\n" // object new + " (set! v1-1 a1-0)\n" // ? + " (set! a2-1 (-> a1-0 size))\n" // math " (set! a1-1 (-> a1-0 heap-base))\n" " (set! a1-2 (*.ui gp-0 a1-1))\n" " (set! a2-2 (+ a2-1 a1-2))\n" @@ -888,7 +888,7 @@ TEST_F(FormRegressionTest, TypeOf) { std::string expected = "(begin\n" " (set! v1-1 (type-of a0-0))\n" - " (set! t9-0 (-> v1-1 method-table 2))\n" // print method. + " (set! t9-0 (-> v1-1 methods-by-name print))\n" // print method. " (set! v0-0 (call! a0-0))\n" " (ret-value v0-0)\n" " )"; diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index 2ea1f8c63c..59ef0595e9 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -587,4 +587,983 @@ TEST_F(FormRegressionTest, ExprFindParentMethod) { " v0-0\n" " )"; test_with_expr(func, type, expected, false, ""); -} \ No newline at end of file +} + +TEST_F(FormRegressionTest, ExprRef) { + std::string func = + " sll r0, r0, 0\n" + "L272:\n" + " addiu v1, r0, 0\n" + " beq r0, r0, L274\n" + " sll r0, r0, 0\n" + + "L273:\n" + " sll r0, r0, 0\n" + " sll r0, r0, 0\n" + " lw a0, 2(a0)\n" + " daddiu v1, v1, 1\n" + + "L274:\n" + " slt a2, v1, a1\n" + " bne a2, r0, L273\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " lw v0, -2(a0)\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object int object)"; + + std::string expected = + "(begin\n" + " (set! v1-0 0)\n" + " (while\n" + " (<.si v1-0 a1-0)\n" + " (nop!)\n" + " (nop!)\n" + " (set! a0-0 (cdr a0-0))\n" + " (set! v1-0 (+ v1-0 1))\n" + " )\n" + " (set! v1-1 '#f)\n" + " (set! v1-2 '#f)\n" + " (car a0-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprPairMethod4) { + std::string func = + " sll r0, r0, 0\n" + "L266:\n" + " daddiu v1, s7, -10\n" + " bne a0, v1, L267\n" + " sll r0, r0, 0\n" + + " addiu v0, r0, 0\n" + " beq r0, r0, L271\n" + " sll r0, r0, 0\n" + + "L267:\n" + " lw v1, 2(a0)\n" + " addiu v0, r0, 1\n" + " beq r0, r0, L269\n" + " sll r0, r0, 0\n" + + "L268:\n" + " daddiu v0, v0, 1\n" + " lw v1, 2(v1)\n" + + "L269:\n" + " daddiu a0, s7, -10\n" + " dsubu a0, v1, a0\n" + " daddiu a1, s7, 8\n" + " movz a1, s7, a0\n" + " beql s7, a1, L270\n" + " or a0, a1, r0\n" + + " dsll32 a0, v1, 30\n" + " slt a1, a0, r0\n" + " daddiu a0, s7, 8\n" + " movz a0, s7, a1\n" + + "L270:\n" + " bne s7, a0, L268\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + + "L271:\n" + " jr ra\n" + " daddu sp, sp, r0\n"; + std::string type = "(function pair int)"; + + std::string expected = + "(begin\n" + " (cond\n" + " ((= a0-0 '()) (set! v0-0 0))\n" + " (else\n" + " (set! v1-1 (cdr a0-0))\n" + " (set! v0-0 1)\n" + " (while\n" + " (truthy\n" + " (and (truthy (!= v1-1 '())) " + " (<0.si (sll (the-as uint v1-1) 62)))\n" + " )\n" + " (set! v0-0 (+ v0-0 1))\n" + " (set! v1-1 (cdr v1-1))\n" + " )\n" + " (set! v1-2 '#f)\n" + " )\n" + " )\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprPairMethod5) { + std::string func = + " sll r0, r0, 0\n" + " lw v1, pair(s7)\n" + " lhu v0, 8(v1)\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function pair uint)"; + + std::string expected = "(-> pair size)"; + test_with_expr(func, type, expected, false, ""); +} + +TEST_F(FormRegressionTest, ExprLast) { + std::string func = + " sll r0, r0, 0\n" + + " or v0, a0, r0\n" + " beq r0, r0, L264\n" + " sll r0, r0, 0\n" + + "L263:\n" + " sll r0, r0, 0\n" + " sll r0, r0, 0\n" + " lw v0, 2(v0)\n" + + "L264:\n" + " daddiu v1, s7, -10\n" + " lw a0, 2(v0)\n" + " bne a0, v1, L263\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object)"; + + std::string expected = + "(begin\n" + " (set! v0-0 a0-0)\n" + " (while (!= (cdr v0-0) '())" + " (nop!)\n" + " (nop!)\n" + " (set! v0-0 (cdr v0-0)))\n" + " (set! v1-1 '#f)\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprMember) { + std::string func = + " sll r0, r0, 0\n" + "L257:\n" + " or v1, a1, r0\n" + " beq r0, r0, L259\n" + " sll r0, r0, 0\n" + + "L258:\n" + " lw v1, 2(v1)\n" + + "L259:\n" + " daddiu a1, s7, -10\n" + " dsubu a1, v1, a1\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, a1\n" + " bnel s7, a2, L260\n" + " or a1, a2, r0\n" + + " lw a1, -2(v1)\n" + " dsubu a2, a1, a0\n" + " daddiu a1, s7, 8\n" + " movn a1, s7, a2\n" + + "L260:\n" + " beq s7, a1, L258\n" + " sll r0, r0, 0\n" + + " or a0, s7, r0\n" + " daddiu a0, s7, -10\n" + " beq v1, a0, L261\n" + " or v0, s7, r0\n" + + " or v0, v1, r0\n" + + "L261:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object object)"; + + std::string expected = + "(begin\n" + " (set! v1-0 a1-0)\n" + " (while\n" + " (not (or (truthy (= v1-0 '())) (= (car v1-0) a0-0)))\n" + " (set! v1-0 (cdr v1-0))\n" + " )\n" + " (set! a0-1 '#f)\n" + " (if (!= v1-0 '()) v1-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprNmember) { + std::string func = + " sll r0, r0, 0\n" + " daddiu sp, sp, -48\n" + " sd ra, 0(sp)\n" + " sq s5, 16(sp)\n" + " sq gp, 32(sp)\n" + + " or s5, a0, r0\n" + " or gp, a1, r0\n" + " beq r0, r0, L254\n" + " sll r0, r0, 0\n" + + "L253:\n" + " lw gp, 2(gp)\n" + + "L254:\n" + " daddiu v1, s7, -10\n" + " dsubu v1, gp, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " bnel s7, a0, L255\n" + " or v1, a0, r0\n" + + " lw t9, name=(s7)\n" + " lw a0, -2(gp)\n" + " or a1, s5, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + + "L255:\n" + " beq s7, v1, L253\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " daddiu v1, s7, -10\n" + " beq gp, v1, L256\n" + " or v0, s7, r0\n" + + " or v0, gp, r0\n" + + "L256:\n" + " ld ra, 0(sp)\n" + " lq gp, 32(sp)\n" + " lq s5, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 48"; + std::string type = "(function basic object object)"; + + std::string expected = + "(begin\n" + " (set! s5-0 a0-0)\n" + " (set! gp-0 a1-0)\n" + " (while\n" + " (not (or (truthy (= gp-0 '())) (name= (car gp-0) s5-0)))\n" + " (set! gp-0 (cdr gp-0))\n" + " )\n" + " (set! v1-2 '#f)\n" + " (if (!= gp-0 '()) gp-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprAssoc) { + std::string func = + " sll r0, r0, 0\n" + " or v1, a1, r0\n" + " beq r0, r0, L249\n" + " sll r0, r0, 0\n" + "L248:\n" + " lw v1, 2(v1)\n" + + "L249:\n" + " daddiu a1, s7, -10\n" + " dsubu a1, v1, a1\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, a1\n" + " bnel s7, a2, L250\n" + " or a1, a2, r0\n" + + " lw a1, -2(v1)\n" + " lw a1, -2(a1)\n" + " dsubu a2, a1, a0\n" + " daddiu a1, s7, 8\n" + " movn a1, s7, a2\n" + + "L250:\n" + " beq s7, a1, L248\n" + " sll r0, r0, 0\n" + + " or a0, s7, r0\n" + " daddiu a0, s7, -10\n" + " beq v1, a0, L251\n" + " or v0, s7, r0\n" + + " lw v0, -2(v1)\n" + + "L251:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object object)"; + + std::string expected = + "(begin\n" + " (set! v1-0 a1-0)\n" + " (while\n" + " (not (or (truthy (= v1-0 '())) (= (car (car v1-0)) a0-0)))\n" + " (set! v1-0 (cdr v1-0))\n" + " )\n" + " (set! a0-1 '#f)\n" + " (if (!= v1-0 '()) (car v1-0))\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprAssoce) { + std::string func = + " sll r0, r0, 0\n" + + " or v1, a1, r0\n" + " beq r0, r0, L244\n" + " sll r0, r0, 0\n" + + "L243:\n" + " lw v1, 2(v1)\n" + + "L244:\n" + " daddiu a1, s7, -10\n" + " dsubu a1, v1, a1\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, a1\n" + " bnel s7, a2, L245\n" + " or a1, a2, r0\n" + + " lw a1, -2(v1)\n" + " lw a1, -2(a1)\n" + " dsubu a1, a1, a0\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, a1\n" + " bnel s7, a2, L245\n" + " or a1, a2, r0\n" + + " lw a1, -2(v1)\n" + " lw a1, -2(a1)\n" + " daddiu a2, s7, else\n" + " dsubu a2, a1, a2\n" + " daddiu a1, s7, 8\n" + " movn a1, s7, a2\n" + + "L245:\n" + " beq s7, a1, L243\n" + " sll r0, r0, 0\n" + + " or a0, s7, r0\n" + " daddiu a0, s7, -10\n" + " beq v1, a0, L246\n" + " or v0, s7, r0\n" + + " lw v0, -2(v1)\n" + + "L246:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object object)"; + + std::string expected = + "(begin\n" + " (set! v1-0 a1-0)\n" + " (while\n" + " (not\n" + " (or\n" + " (truthy (= v1-0 '()))\n" + " (truthy (= (car (car v1-0)) a0-0))\n" + " (= (car (car v1-0)) 'else)\n" + " )\n" + " )\n" + " (set! v1-0 (cdr v1-0))\n" + " )\n" + " (set! a0-1 '#f)\n" + " (if (!= v1-0 '()) (car v1-0))\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprNassoc) { + std::string func = + " sll r0, r0, 0\n" + " daddiu sp, sp, -48\n" + " sd ra, 0(sp)\n" + " sq s5, 16(sp)\n" + " sq gp, 32(sp)\n" + + " or s5, a0, r0\n" + " or gp, a1, r0\n" + " beq r0, r0, L238\n" + " sll r0, r0, 0\n" + + "L237:\n" + " lw gp, 2(gp)\n" + + "L238:\n" + " daddiu v1, s7, -10\n" + " dsubu v1, gp, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " bnel s7, a0, L240\n" + " or v1, a0, r0\n" + + " lw v1, -2(gp)\n" + " lw a1, -2(v1)\n" + " dsll32 v1, a1, 30\n" + " slt v1, v1, r0\n" + " beq v1, r0, L239\n" + " sll r0, r0, 0\n" + + " lw t9, nmember(s7)\n" + " or a0, s5, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + " beq r0, r0, L240\n" + " sll r0, r0, 0\n" + + "L239:\n" + " lw t9, name=(s7)\n" + " or a0, a1, r0\n" + " or a1, s5, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + + "L240:\n" + " beq s7, v1, L237\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " daddiu v1, s7, -10\n" + " beq gp, v1, L241\n" + " or v0, s7, r0\n" + + " lw v0, -2(gp)\n" + + "L241:\n" + " ld ra, 0(sp)\n" + " lq gp, 32(sp)\n" + " lq s5, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 48"; + std::string type = "(function object object object)"; + + // will need fixing if we clean up the set! if thing + std::string expected = + "(begin\n" + " (set! s5-0 a0-0)\n" + " (set! gp-0 a1-0)\n" + " (while\n" + " (not\n" + " (or\n" + " (truthy (= gp-0 '()))\n" + " (begin\n" + " (set! a1-1 (car (car gp-0)))\n" + " (if\n" + " (pair? a1-1)\n" + " (set! v1-1 (nmember s5-0 a1-1))\n" + " (set! v1-1 (name= a1-1 s5-0))\n" + " )\n" + " v1-1\n" + " )\n" + " )\n" + " )\n" + " (set! gp-0 (cdr gp-0))\n" + " )\n" + " (set! v1-3 '#f)\n" + " (if (!= gp-0 '()) (car gp-0))\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprNassoce) { + std::string func = + " sll r0, r0, 0\n" + "L230:\n" + " daddiu sp, sp, -64\n" + " sd ra, 0(sp)\n" + " sq s4, 16(sp)\n" + " sq s5, 32(sp)\n" + " sq gp, 48(sp)\n" + + " or s5, a0, r0\n" + " or gp, a1, r0\n" + " beq r0, r0, L232\n" + " sll r0, r0, 0\n" + + "L231:\n" + " lw gp, 2(gp)\n" + + "L232:\n" + " daddiu v1, s7, -10\n" + " dsubu v1, gp, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " bnel s7, a0, L234\n" + " or v1, a0, r0\n" + + " lw v1, -2(gp)\n" + " lw s4, -2(v1)\n" + " dsll32 v1, s4, 30\n" + " slt v1, v1, r0\n" + " beq v1, r0, L233\n" + " sll r0, r0, 0\n" + + " lw t9, nmember(s7)\n" + " or a0, s5, r0\n" + " or a1, s4, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + " beq r0, r0, L234\n" + " sll r0, r0, 0\n" + + "L233:\n" + " lw t9, name=(s7)\n" + " or a0, s4, r0\n" + " or a1, s5, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " bnel s7, v0, L234\n" + " or v1, v0, r0\n" + + " daddiu v1, s7, else\n" + " dsubu a0, s4, v1\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a0\n" + + "L234:\n" + " beq s7, v1, L231\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " daddiu v1, s7, -10\n" + " beq gp, v1, L235\n" + " or v0, s7, r0\n" + + " lw v0, -2(gp)\n" + + "L235:\n" + " ld ra, 0(sp)\n" + " lq gp, 48(sp)\n" + " lq s5, 32(sp)\n" + " lq s4, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 64"; + std::string type = "(function object object object)"; + + // will need fixing if we clean up the set! if thing + std::string expected = + "(begin\n" + " (set! s5-0 a0-0)\n" + " (set! gp-0 a1-0)\n" + " (while\n" + " (not\n" + " (or\n" + " (truthy (= gp-0 '()))\n" + " (begin\n" + " (set! s4-0 (car (car gp-0)))\n" + " (if\n" + " (pair? s4-0)\n" + " (set! v1-1 (nmember s5-0 s4-0))\n" + " (set! v1-1 (or (truthy (name= s4-0 s5-0)) (= s4-0 'else)))\n" + " )\n" + " v1-1\n" + " )\n" + " )\n" + " )\n" + " (set! gp-0 (cdr gp-0))\n" + " )\n" + " (set! v1-4 '#f)\n" + " (if (!= gp-0 '()) (car gp-0))\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprAppend) { + std::string func = + " sll r0, r0, 0\n" + "L224:\n" + " daddiu v1, s7, -10\n" + " bne a0, v1, L225\n" + " sll r0, r0, 0\n" + + " or v0, a1, r0\n" + " beq r0, r0, L229\n" + " sll r0, r0, 0\n" + + "L225:\n" + " or v1, a0, r0\n" + " beq r0, r0, L227\n" + " sll r0, r0, 0\n" + + "L226:\n" + " sll r0, r0, 0\n" + " sll r0, r0, 0\n" + " lw v1, 2(v1)\n" + + "L227:\n" + " daddiu a2, s7, -10\n" + " lw a3, 2(v1)\n" + " bne a3, a2, L226\n" + " sll r0, r0, 0\n" + + " or a2, s7, r0\n" + " daddiu a2, s7, -10\n" + " beq v1, a2, L228\n" + " or a2, s7, r0\n" + + " sw a1, 2(v1)\n" + " or v1, a1, r0\n" + + "L228:\n" + " or v0, a0, r0\n" + + "L229:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object object)"; + + // todo - will be changed by if fix. + std::string expected = + "(begin\n" + " (cond\n" + " ((= a0-0 '()) (set! v0-0 a1-0))\n" + " (else\n" + " (set! v1-1 a0-0)\n" + " (while (!= (cdr v1-1) '()) " + " (nop!) " + " (nop!) " + " (set! v1-1 (cdr v1-1)))\n" + " (set! a2-1 '#f)\n" + " (when (!= v1-1 '()) " + " (set! (cdr v1-1) a1-0) " + " (set! v1-2 a1-0))\n" + " (set! v0-0 a0-0)\n" + " )\n" + " )\n" + " v0-0\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprDelete) { + std::string func = + " sll r0, r0, 0\n" + "L217:\n" + " lw v1, -2(a1)\n" + " bne a0, v1, L218\n" + " sll r0, r0, 0\n" + + " lw v0, 2(a1)\n" + " beq r0, r0, L223\n" + " sll r0, r0, 0\n" + + "L218:\n" + " or v1, a1, r0\n" + " lw a2, 2(a1)\n" + " beq r0, r0, L220\n" + " sll r0, r0, 0\n" + + "L219:\n" + " or v1, a2, r0\n" + " lw a2, 2(a2)\n" + + "L220:\n" + " daddiu a3, s7, -10\n" + " dsubu a3, a2, a3\n" + " daddiu t0, s7, 8\n" + " movn t0, s7, a3\n" + " bnel s7, t0, L221\n" + " or a3, t0, r0\n" + + " lw a3, -2(a2)\n" + " dsubu t0, a3, a0\n" + " daddiu a3, s7, 8\n" + " movn a3, s7, t0\n" + + "L221:\n" + " beq s7, a3, L219\n" + " sll r0, r0, 0\n" + + " or a0, s7, r0\n" + " daddiu a0, s7, -10\n" + " beq a2, a0, L222\n" + " or a0, s7, r0\n" + + " lw a0, 2(a2)\n" + " sw a0, 2(v1)\n" + + "L222:\n" + " or v0, a1, r0\n" + + "L223:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object pair)"; + + // todo - will be changed by if fix. + std::string expected = + "(begin\n" + " (cond\n" + " ((= a0-0 (car a1-0)) (set! v0-0 (cdr a1-0)))\n" + " (else\n" + " (set! v1-1 a1-0)\n" + " (set! a2-0 (cdr a1-0))\n" + " (while\n" + " (not (or (truthy (= a2-0 (quote ()))) (= (car a2-0) a0-0)))\n" + " (set! v1-1 a2-0)\n" + " (set! a2-0 (cdr a2-0))\n" + " )\n" + " (set! a0-1 (quote #f))\n" + " (if (!= a2-0 (quote ())) (set! (cdr v1-1) (cdr a2-0)))\n" + " (set! v0-0 a1-0)\n" + " )\n" + " )\n" + " (the-as pair v0-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprDeleteCar) { + std::string func = + " sll r0, r0, 0\n" + "L210:\n" + " lw v1, -2(a1)\n" + " lw v1, -2(v1)\n" + " bne a0, v1, L211\n" + " sll r0, r0, 0\n" + + " lw v0, 2(a1)\n" + " beq r0, r0, L216\n" + " sll r0, r0, 0\n" + + "L211:\n" + " or v1, a1, r0\n" + " lw a2, 2(a1)\n" + " beq r0, r0, L213\n" + " sll r0, r0, 0\n" + + "L212:\n" + " or v1, a2, r0\n" + " lw a2, 2(a2)\n" + + "L213:\n" + " daddiu a3, s7, -10\n" + " dsubu a3, a2, a3\n" + " daddiu t0, s7, 8\n" + " movn t0, s7, a3\n" + " bnel s7, t0, L214\n" + " or a3, t0, r0\n" + + " lw a3, -2(a2)\n" + " lw a3, -2(a3)\n" + " dsubu t0, a3, a0\n" + " daddiu a3, s7, 8\n" + " movn a3, s7, t0\n" + + "L214:\n" + " beq s7, a3, L212\n" + " sll r0, r0, 0\n" + + " or a0, s7, r0\n" + " daddiu a0, s7, -10\n" + " beq a2, a0, L215\n" + " or a0, s7, r0\n" + + " lw a0, 2(a2)\n" + " sw a0, 2(v1)\n" + + "L215:\n" + " or v0, a1, r0\n" + + "L216:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object pair)"; + + // todo - will be changed by if fix. + std::string expected = + "(begin\n" + " (cond\n" + " ((= a0-0 (car (car a1-0))) (set! v0-0 (cdr a1-0)))\n" + " (else\n" + " (set! v1-2 a1-0)\n" + " (set! a2-0 (cdr a1-0))\n" + " (while\n" + " (not (or (truthy (= a2-0 (quote ()))) (= (car (car a2-0)) a0-0)))\n" + " (set! v1-2 a2-0)\n" + " (set! a2-0 (cdr a2-0))\n" + " )\n" + " (set! a0-1 (quote #f))\n" + " (if (!= a2-0 (quote ())) (set! (cdr v1-2) (cdr a2-0)))\n" + " (set! v0-0 a1-0)\n" + " )\n" + " )\n" + " (the-as pair v0-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +TEST_F(FormRegressionTest, ExprInsertCons) { + std::string func = + " sll r0, r0, 0\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sq gp, 16(sp)\n" + + " or gp, a0, r0\n" + " lw t9, delete-car!(s7)\n" + " lw a0, -2(gp)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or a3, v0, r0\n" + " lw v1, pair(s7)\n" + " lwu t9, 16(v1)\n" + " daddiu a0, s7, global\n" + " lw a1, pair(s7)\n" + " or a2, gp, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " ld ra, 0(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function object object pair)"; + + // NOTE - this appears to _not_ be a nested call. + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (set! a3-0 (delete-car! (car gp-0) a1-0))\n" + " (new 'global pair gp-0 a3-0)\n" + " )"; + test_with_expr(func, type, expected, true, ""); +} + +// TEST_F(FormRegressionTest, ExprSort) { +// std::string func = +// " sll r0, r0, 0\n" +// " daddiu sp, sp, -112\n" +// " sd ra, 0(sp)\n" +// " sq s1, 16(sp)\n" +// " sq s2, 32(sp)\n" +// " sq s3, 48(sp)\n" +// " sq s4, 64(sp)\n" +// " sq s5, 80(sp)\n" +// " sq gp, 96(sp)\n" +// +// " or gp, a0, r0\n" +// " or s5, a1, r0\n" +// " addiu s4, r0, -1\n" +// " beq r0, r0, L208\n" +// " sll r0, r0, 0\n" +// +// "L201:\n" +// " addiu s4, r0, 0\n" +// " or s3, gp, r0\n" +// " beq r0, r0, L206\n" +// " sll r0, r0, 0\n" +// +// "L202:\n" +// " lw s2, -2(s3)\n" +// " lw v1, 2(s3)\n" +// " lw s1, -2(v1)\n" +// " or t9, s5, r0\n" +// " or a0, s2, r0\n" +// " or a1, s1, r0\n" +// " jalr ra, t9\n" +// " sll v0, ra, 0\n" +// +// " or v1, v0, r0\n" +// " beql s7, v1, L203\n" +// " daddiu a0, s7, 8\n" +// +// " slt a1, r0, v1\n" +// " daddiu a0, s7, 8\n" +// " movz a0, s7, a1\n" +// +// "L203:\n" +// " beql s7, a0, L204\n" +// " or v1, a0, r0\n" +// +// " daddiu a0, s7, #t\n" +// " dsubu a0, v1, a0\n" +// " daddiu v1, s7, 8\n" +// " movz v1, s7, a0\n" +// +// "L204:\n" +// " beq s7, v1, L205\n" +// " or v1, s7, r0\n" +// +// " daddiu s4, s4, 1\n" +// " sw s1, -2(s3)\n" +// " lw v1, 2(s3)\n" +// " sw s2, -2(v1)\n" +// " or v1, s2, r0\n" +// +// "L205:\n" +// " lw s3, 2(s3)\n" +// +// "L206:\n" +// " lw v1, 2(s3)\n" +// " daddiu a0, s7, -10\n" +// " dsubu v1, v1, a0\n" +// " daddiu a0, s7, 8\n" +// " movn a0, s7, v1\n" +// " bnel s7, a0, L207\n" +// " or v1, a0, r0\n" +// +// " lw v1, 2(s3)\n" +// " dsll32 v1, v1, 30\n" +// " slt a0, v1, r0\n" +// " daddiu v1, s7, 8\n" +// " movn v1, s7, a0\n" +// +// "L207:\n" +// " beq s7, v1, L202\n" +// " sll r0, r0, 0\n" +// +// " or v1, s7, r0\n" +// " or v1, s7, r0\n" +// +// "L208:\n" +// " bne s4, r0, L201\n" +// " sll r0, r0, 0\n" +// +// " or v1, s7, r0\n" +// " or v0, gp, r0\n" +// " ld ra, 0(sp)\n" +// " lq gp, 96(sp)\n" +// " lq s5, 80(sp)\n" +// " lq s4, 64(sp)\n" +// " lq s3, 48(sp)\n" +// " lq s2, 32(sp)\n" +// " lq s1, 16(sp)\n" +// " jr ra\n" +// " daddiu sp, sp, 112"; +// std::string type = "(function object (function object object object) object)"; +// +// // NOTE - this appears to _not_ be a nested call. +// std::string expected = +// "(begin\n" +// " (set! gp-0 a0-0)\n" +// " (set! a3-0 (delete-car! (car gp-0) a1-0))\n" +// " (new 'global pair gp-0 a3-0)\n" +// " )"; +// test_with_expr(func, type, expected, true, ""); +//} \ No newline at end of file