diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 5d48658235..a64d828699 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -15,18 +15,12 @@ add_library( Function/BasicBlocks.cpp Function/CfgVtx.cpp - Function/ExpressionBuilder.cpp - Function/ExpressionStack.cpp Function/Function.cpp - Function/RegUsage.cpp Function/TypeAnalysis.cpp Function/TypeInspector.cpp IR/BasicOpBuilder.cpp - IR/CfgBuilder.cpp IR/IR.cpp - IR/IR_ExpressionStack.cpp - IR/IR_TypeAnalysis.cpp IR2/atomic_op_builder.cpp IR2/AtomicOp.cpp diff --git a/decompiler/Function/ExpressionBuilder.cpp b/decompiler/Function/ExpressionBuilder.cpp deleted file mode 100644 index f7fdd6fb41..0000000000 --- a/decompiler/Function/ExpressionBuilder.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "Function.h" -#include "decompiler/IR/IR.h" -#include "ExpressionStack.h" - -namespace decompiler { -namespace { -bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) { - ExpressionStack stack; - // todo - this might need to run multiple times? - for (auto& op : begin->forms) { - op->expression_stack(stack, file); - } - // printf("%s\n", stack.print(file).c_str()); - begin->forms = stack.get_result(); - return true; -} -} // namespace - -bool Function::build_expression(LinkedObjectFile& file) { - if (!ir) { - printf("build_expression on %s failed due to no IR.\n", guessed_name.to_string().c_str()); - return false; - } - - try { - // first we get a list of begins, which are where we can build up expressions. - // we want to start with innermost begins because we'll probably need to do some fixing up - // or more complicated analysis to do as good as possible on outer begins. - auto all_children = ir->get_all_ir(file); - std::vector all_begins; - - // the top level may also be a begin - auto as_begin = dynamic_cast(ir.get()); - if (as_begin) { - all_begins.push_back(as_begin); - } - - for (auto& i : all_children) { - auto child_as_begin = dynamic_cast(i.get()); - if (child_as_begin) { - all_begins.push_back(child_as_begin); - } - } - - // turn each begin into an expression - for (auto b : all_begins) { - // printf("BEFORE:\n%s\n", b->print(file).c_str()); - if (!expressionize_begin(b, file)) { - return false; - } - // printf("AFTER:\n%s\n", b->print(file).c_str()); - } - } catch (std::exception& e) { - printf("build_expression failed on %s due to %s\n", guessed_name.to_string().c_str(), e.what()); - return false; - } - - return true; -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.cpp b/decompiler/Function/ExpressionStack.cpp deleted file mode 100644 index 6135362335..0000000000 --- a/decompiler/Function/ExpressionStack.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "third-party/fmt/core.h" -#include "ExpressionStack.h" - -namespace decompiler { -std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) { - return fmt::format("d: {} s: {} | {} <- {}", display, sequence_point, - destination.has_value() ? destination.value().to_charp() : "N/A", - source->print(file)); -} - -std::string ExpressionStack::print(LinkedObjectFile& file) { - std::string result; - for (auto& x : m_stack) { - result += x.print(file); - result += '\n'; - } - return result; -} - -void ExpressionStack::set(Register reg, std::shared_ptr value, bool sequence_point) { - StackEntry entry; - entry.display = true; // by default, we should display everything! - entry.sequence_point = sequence_point; - entry.destination = reg; - entry.source = std::move(value); - m_stack.push_back(entry); -} - -bool ExpressionStack::is_single_expression() { - int count = 0; - for (auto& e : m_stack) { - if (e.display) { - count++; - } - } - return count == 1; -} - -void ExpressionStack::add_no_set(std::shared_ptr value, bool sequence_point) { - StackEntry entry; - entry.display = true; - entry.destination = std::nullopt; - entry.source = value; - entry.sequence_point = sequence_point; - m_stack.push_back(entry); -} - -/*! - * "Remove" an entry from the stack. Cannot cross a sequence point. - * Internally, the entry is still stored. It is just flagged with display=false. - */ -std::shared_ptr ExpressionStack::get(Register reg) { - for (size_t i = m_stack.size(); i-- > 0;) { - auto& entry = m_stack.at(i); - if (entry.display) { - if (entry.destination == reg) { - entry.display = false; - return entry.source; - } else { - // we didn't match - if (entry.sequence_point) { - // and it's a sequence point! can't look any more back than this. - return std::make_shared(reg, -1); - } - } - } - } - return std::make_shared(reg, -1); -} - -/*! - * Convert the stack into a sequence of compacted expressions. - * This is final result of the expression compaction algorithm. - */ -std::vector> ExpressionStack::get_result() { - std::vector> result; - - for (auto& e : m_stack) { - if (!e.display) { - continue; - } - if (e.destination.has_value()) { - auto dst_reg = std::make_shared(e.destination.value(), -1); - auto op = std::make_shared(IR_Set::EXPR, dst_reg, e.source); - result.push_back(op); - } else { - result.push_back(e.source); - } - } - - return result; -} - -bool ExpressionStack::display_stack_empty() { - for (auto& e : m_stack) { - if (e.display) { - return false; - } - } - return true; -} - -ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() { - for (size_t i = m_stack.size(); i-- > 0;) { - auto& entry = m_stack.at(i); - if (entry.display) { - return entry; - } - } - assert(false); -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.h b/decompiler/Function/ExpressionStack.h deleted file mode 100644 index 281254d9a0..0000000000 --- a/decompiler/Function/ExpressionStack.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include -#include -#include "decompiler/IR/IR.h" -#include "decompiler/Disasm/Register.h" -#include "decompiler/util/TP_Type.h" - -namespace decompiler { -/*! - * An ExpressionStack is used to track partial expressions when rebuilding the tree structure of - * GOAL code. Linear sequences of operations are added onto the expression stack. - */ -class ExpressionStack { - public: - ExpressionStack() = default; - void set(Register reg, std::shared_ptr value, bool sequence_point); - void add_no_set(std::shared_ptr value, bool sequence_point); - std::shared_ptr get(Register reg); - bool is_single_expression(); - std::string print(LinkedObjectFile& file); - std::vector> get_result(); - - private: - struct StackEntry { - bool display = true; // should this appear in the output? - std::optional destination; // what register we are setting (or nullopt if no dest.) - std::shared_ptr source; // the value we are setting the register to. - bool sequence_point = false; - // TP_Type type; - std::string print(LinkedObjectFile& file); - }; - std::vector m_stack; - - bool display_stack_empty(); - StackEntry& get_display_stack_top(); -}; -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index c5c9ff6a47..7048cf7115 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -711,16 +711,6 @@ int Function::get_failed_basic_op_count() { return count; } -int Function::get_reginfo_basic_op_count() { - int count = 0; - for (auto& x : basic_ops) { - if (x->reg_info_set) { - count++; - } - } - return count; -} - /*! * Topological sort of basic blocks. * Returns a valid ordering + a list of blocks that you can't reach and therefore diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 9e78dbabd5..1c7f4ffc40 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -89,17 +89,11 @@ class Function { const AtomicOp& get_atomic_op_at_instr(int idx); int get_basic_op_count(); int get_failed_basic_op_count(); - int get_reginfo_basic_op_count(); - bool run_type_analysis(const TypeSpec& my_type, - DecompilerTypeSystem& dts, - LinkedObjectFile& file, - const std::unordered_map>& hints); + bool run_type_analysis_ir2(const TypeSpec& my_type, DecompilerTypeSystem& dts, LinkedObjectFile& file, const std::unordered_map>& hints); - void run_reg_usage(); - bool build_expression(LinkedObjectFile& file); BlockTopologicalSort bb_topo_sort(); TypeSpec type; diff --git a/decompiler/Function/RegUsage.cpp b/decompiler/Function/RegUsage.cpp deleted file mode 100644 index 61bcd991d0..0000000000 --- a/decompiler/Function/RegUsage.cpp +++ /dev/null @@ -1,175 +0,0 @@ -#include "Function.h" -#include "decompiler/IR/IR.h" - -namespace decompiler { -namespace { -bool in_set(RegSet& set, const Register& obj) { - return set.find(obj) != set.end(); -} - -void phase1(Function& f, BasicBlock& block) { - for (int i = block.end_basic_op; i-- > block.start_basic_op;) { - auto& instr = f.basic_ops.at(i); - auto& lv = block.live.at(i - block.start_basic_op); - auto& dd = block.dead.at(i - block.start_basic_op); - - // make all read live out - auto read = instr->read_regs; - lv.clear(); - for (auto& x : read) { - lv.insert(x); - } - - // kill things which are overwritten - dd.clear(); - auto write = instr->write_regs; - for (auto& x : write) { - if (!in_set(lv, x)) { - dd.insert(x); - } - } - - // b.use = i.liveout - RegSet use_old = block.use; - block.use.clear(); - for (auto& x : lv) { - block.use.insert(x); - } - // | (bu.use & !i.dead) - for (auto& x : use_old) { - if (!in_set(dd, x)) { - block.use.insert(x); - } - } - - // b.defs = i.dead - RegSet defs_old = block.defs; - block.defs.clear(); - for (auto& x : dd) { - block.defs.insert(x); - } - // | b.defs & !i.lv - for (auto& x : defs_old) { - if (!in_set(lv, x)) { - block.defs.insert(x); - } - } - } -} - -bool phase2(std::vector& blocks, BasicBlock& block) { - bool changed = false; - auto out = block.defs; - - for (auto s : {block.succ_branch, block.succ_ft}) { - if (s == -1) { - continue; - } - for (auto in : blocks.at(s).input) { - out.insert(in); - } - } - - RegSet in = block.use; - for (auto x : out) { - if (!in_set(block.defs, x)) { - in.insert(x); - } - } - - if (in != block.input || out != block.output) { - changed = true; - block.input = in; - block.output = out; - } - - return changed; -} - -void phase3(std::vector& blocks, BasicBlock& block) { - RegSet live_local; - for (auto s : {block.succ_branch, block.succ_ft}) { - if (s == -1) { - continue; - } - for (auto i : blocks.at(s).input) { - live_local.insert(i); - } - } - - for (int i = block.end_basic_op; i-- > block.start_basic_op;) { - auto& lv = block.live.at(i - block.start_basic_op); - auto& dd = block.dead.at(i - block.start_basic_op); - - RegSet new_live = lv; - for (auto x : live_local) { - if (!in_set(dd, x)) { - new_live.insert(x); - } - } - lv = live_local; - live_local = new_live; - } -} - -} // namespace -/*! - * Analyze the function use of registers to determine which are live where. - */ -void Function::run_reg_usage() { - // phase 1 - for (auto& block : basic_blocks) { - block.live.resize(block.basic_op_size()); - block.dead.resize(block.basic_op_size()); - phase1(*this, block); - } - - // phase 2 - bool changed = false; - do { - changed = false; - for (auto& block : basic_blocks) { - if (phase2(basic_blocks, block)) { - changed = true; - } - } - } while (changed); - - // phase 3 - for (auto& block : basic_blocks) { - phase3(basic_blocks, block); - } - - // we want to know if an op "consumes" a register. - // this means that the value of the register coming in to the operation is: - // A. read by the operation. - // B. no longer read after the operation. - for (auto& block : basic_blocks) { - for (int i = block.start_basic_op; i < block.end_basic_op; i++) { - auto& op = basic_ops.at(i); - // look at each register that we read - for (auto reg : op->read_regs) { - if (!block.op_has_reg_live_out(i, reg)) { - // if the register is not live out, we definitely consume it. - op->consumed.insert(reg); - } else { - // it's live out... but it could be a new value. - for (auto wr : op->write_regs) { - if (wr == reg) { - op->consumed.insert(reg); - } - } - } - } - - for (auto reg : op->write_regs) { - if (!block.op_has_reg_live_out(i, reg)) { - // we wrote it, but it is immediately dead. this is nice to know for things like - // "is this if/and/or expression used as a value?" - op->written_and_unused.insert(reg); - } - } - } - } -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 5a22d52663..d38fdbf0f8 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -44,97 +44,6 @@ void try_apply_hints(int idx, } } // namespace -bool Function::run_type_analysis(const TypeSpec& my_type, - DecompilerTypeSystem& dts, - LinkedObjectFile& file, - const std::unordered_map>& hints) { - // STEP 0 - setup settings - dts.type_prop_settings.reset(); - if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != - get_config().pair_functions_by_name.end()) { - dts.type_prop_settings.allow_pair = true; - } - - if (guessed_name.kind == FunctionName::FunctionKind::METHOD) { - dts.type_prop_settings.current_method_type = guessed_name.type_name; - } - - // STEP 1 - get the topo sort. - auto order = bb_topo_sort(); - // fmt::print("blocks: {}\n ", basic_blocks.size()); - // for (auto x : order.vist_order) { - // fmt::print("{} ", x); - // } - // fmt::print("\n"); - - // STEP 2 - establish visit order - assert(!order.vist_order.empty()); - assert(order.vist_order.front() == 0); - - // STEP 3 - initialize type state. - basic_blocks.at(0).init_types = construct_initial_typestate(my_type); - // and add hints: - try_apply_hints(0, hints, &basic_blocks.at(0).init_types, dts); - - // STEP 2 - loop while types are changing - bool run_again = true; - while (run_again) { - run_again = false; - // each block in order now. - for (auto block_id : order.vist_order) { - auto& block = basic_blocks.at(block_id); - TypeState* init_types = &block.init_types; - for (int op_id = block.start_basic_op; op_id < block.end_basic_op; op_id++) { - auto& op = basic_ops.at(op_id); - - // apply type hints only if we are not the first op. - if (op_id != block.start_basic_op) { - try_apply_hints(op_id, hints, init_types, dts); - } - - // while the implementation of propagate_types_internal is in progress, it may throw - // for unimplemented cases. Eventually this try/catch should be removed. - try { - op->propagate_types(*init_types, file, dts); - } catch (std::runtime_error& e) { - fmt::print("Type prop fail on {}: {}\n", guessed_name.to_string(), e.what()); - warnings += ";; Type prop attempted and failed.\n"; - return false; - } - - // todo, set run again?? - - // for the next op... - init_types = &op->end_types; - } - - // propagate the types: for each possible succ - for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { - if (succ_block_id != -1) { - auto& succ_block = basic_blocks.at(succ_block_id); - // apply hint - try_apply_hints(succ_block.start_basic_op, hints, init_types, dts); - - // set types to LCA (current, new) - if (dts.tp_lca(&succ_block.init_types, *init_types)) { - // if something changed, run again! - run_again = true; - } - } - } - } - } - - auto last_op = basic_ops.back(); - auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).typespec(); - if (last_type != my_type.last_arg()) { - warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(), - my_type.last_arg().print()); - } - - return true; -} - bool Function::run_type_analysis_ir2(const TypeSpec& my_type, DecompilerTypeSystem& dts, LinkedObjectFile& file, @@ -232,4 +141,4 @@ bool Function::run_type_analysis_ir2(const TypeSpec& my_type, return true; } -} // namespace decompiler \ No newline at end of file +} // namespace decompiler diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp deleted file mode 100644 index df4066587d..0000000000 --- a/decompiler/IR/CfgBuilder.cpp +++ /dev/null @@ -1,1282 +0,0 @@ -#include "third-party/fmt/core.h" -#include -#include "decompiler/util/MatchParam.h" -#include "CfgBuilder.h" -#include "decompiler/Function/CfgVtx.h" -#include "decompiler/Function/Function.h" -#include "decompiler/Disasm/InstructionMatching.h" -#include "decompiler/IR/IR.h" - -namespace decompiler { -namespace { - -std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx); - -/*! - * This adds a single CfgVtx* to a list of IR's by converting it with cfg to IR. - * The trick here is that it will recursively inline anything which would generate an IR begin. - * This avoids the case where Begin's are nested excessively. - */ -void insert_cfg_into_list(Function& f, - LinkedObjectFile& file, - std::vector>* output, - CfgVtx* vtx) { - auto as_sequence = dynamic_cast(vtx); - auto as_block = dynamic_cast(vtx); - if (as_sequence) { - for (auto& x : as_sequence->seq) { - insert_cfg_into_list(f, file, output, x); - } - } else if (as_block) { - auto& block = f.basic_blocks.at(as_block->block_id); - IR* last = nullptr; - for (int instr = block.start_word; instr < block.end_word; instr++) { - auto got = f.get_basic_op_at_instr(instr); - if (got.get() == last) { - continue; - } - last = got.get(); - output->push_back(got); - } - } else { - // doesn't look like we're going to get something that can be inlined, so try as usual - auto ir = cfg_to_ir(f, file, vtx); - auto ir_as_begin = dynamic_cast(ir.get()); - if (ir_as_begin) { - // we unexpectedly got a begin, even though we didn't think we would. This is okay, but we - // should inline this begin to avoid nested begins. This happens in the case where an entire - // control flow pattern is turned into a single op (like type-of) and includes some ops at - // the beginning. We don't have a good way of knowing this will happen until we try it. - for (auto& x : ir_as_begin->forms) { - output->push_back(x); - } - } else { - output->push_back(ir); - } - } -} - -/*! - * If it's a begin with a branch as the last operation, returns a pointer to the branch IR - * and also a pointer to the vector which holds the branch operation in its last slot. - * Otherwise returns nullptr. Useful to modify or remove branches found at the end of blocks, - * and inline things into the begin they were found in. - */ -std::pair>*> get_condition_branch_as_vector(IR* in) { - auto as_seq = dynamic_cast(in); - if (as_seq) { - auto irb = dynamic_cast(as_seq->forms.back().get()); - auto loc = &as_seq->forms; - assert(irb); - return std::make_pair(irb, loc); - } - return std::make_pair(nullptr, nullptr); -} - -/*! - * Given an IR, find a branch IR at the end, and also the location of it so it can be patched. - * Returns nullptr as the first item in the pair if it didn't work. - */ -std::pair*> get_condition_branch(std::shared_ptr* in) { - IR_Branch_Atomic* condition_branch = dynamic_cast(in->get()); - std::shared_ptr* condition_branch_location = in; - if (!condition_branch) { - // not 100% sure this will always work - auto as_seq = dynamic_cast(in->get()); - if (as_seq) { - condition_branch = dynamic_cast(as_seq->forms.back().get()); - condition_branch_location = &as_seq->forms.back(); - } - } - - if (!condition_branch) { - auto as_return = dynamic_cast(in->get()); - if (as_return) { - return get_condition_branch(&as_return->dead_code); - } - } - - if (!condition_branch) { - auto as_break = dynamic_cast(in->get()); - if (as_break) { - return get_condition_branch(&as_break->dead_code); - } - } - return std::make_pair(condition_branch, condition_branch_location); -} - -/*! - * Given a CondWithElse IR, remove the internal branches and set the condition to be an actual - * compare IR instead of a branch. - * Doesn't "rebalance" the leading condition because this runs way before expression compaction. - */ -void clean_up_cond_with_else(std::shared_ptr* ir, LinkedObjectFile& file) { - (void)file; - auto cwe = dynamic_cast(ir->get()); - assert(cwe); - for (auto& e : cwe->entries) { - if (e.cleaned) { - continue; - } - auto jump_to_next = get_condition_branch(&e.condition); - assert(jump_to_next.first); - assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP); - // patch the jump to next with a condition. - auto replacement = - std::make_shared(jump_to_next.first->condition, jump_to_next.first); - replacement->condition.invert(); - *(jump_to_next.second) = replacement; - - // patch the jump at the end of a block. - auto jump_to_end = get_condition_branch(&e.body); - assert(jump_to_end.first); - assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); - assert(jump_to_end.first->condition.kind == Condition::ALWAYS); - - // if possible, we just want to remove this from the sequence its in. - // but sometimes there's a case with nothing in it so there is no sequence. - // in this case, we can just replace the branch with a NOP IR to indicate that nothing - // happens in this case, but there was still GOAL code to test for it. - // this happens rarely, as you would expect. - auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); - if (as_end_of_sequence.first) { - assert(as_end_of_sequence.second->size() > 1); - as_end_of_sequence.second->pop_back(); - } else { - // In the future we could consider having a more explicit "this case is empty" operator so - // this doesn't get confused with an actual MIPS nop. - *(jump_to_end.second) = std::make_shared(); - } - e.cleaned = true; - } -} - -void clean_up_until_loop(IR_UntilLoop* ir) { - auto condition_branch = get_condition_branch(&ir->condition); - assert(condition_branch.first); - assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP); - auto replacement = - std::make_shared(condition_branch.first->condition, condition_branch.first); - replacement->condition.invert(); - *(condition_branch.second) = replacement; -} - -void clean_up_infinite_while_loop(IR_WhileLoop* ir) { - auto jump = get_condition_branch(&ir->body); - assert(jump.first); - assert(jump.first->branch_delay.kind == BranchDelay::NOP); - assert(jump.first->condition.kind == Condition::ALWAYS); - auto as_end_of_sequence = get_condition_branch_as_vector(ir->body.get()); - if (as_end_of_sequence.first) { - assert(as_end_of_sequence.second->size() > 1); - as_end_of_sequence.second->pop_back(); - } else { - // In the future we could consider having a more explicit "this case is empty" operator so - // this doesn't get confused with an actual MIPS nop. - *(jump.second) = std::make_shared(); - } - ir->cleaned = true; // so we don't try this later... -} - -void clean_up_return(IR_Return* ir) { - auto jump_to_end = get_condition_branch(&ir->return_code); - assert(jump_to_end.first); - assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); - assert(jump_to_end.first->condition.kind == Condition::ALWAYS); - auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code.get()); - if (as_end_of_sequence.first) { - assert(as_end_of_sequence.second->size() > 1); - as_end_of_sequence.second->pop_back(); - } else { - // In the future we could consider having a more explicit "this case is empty" operator so - // this doesn't get confused with an actual MIPS nop. - *(jump_to_end.second) = std::make_shared(); - } -} - -void clean_up_break(IR_Break* ir) { - auto jump_to_end = get_condition_branch(&ir->return_code); - assert(jump_to_end.first); - assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); - assert(jump_to_end.first->condition.kind == Condition::ALWAYS); - auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code.get()); - if (as_end_of_sequence.first) { - assert(as_end_of_sequence.second->size() > 1); - as_end_of_sequence.second->pop_back(); - } else { - // In the future we could consider having a more explicit "this case is empty" operator so - // this doesn't get confused with an actual MIPS nop. - *(jump_to_end.second) = std::make_shared(); - } -} - -/*! - * Does the instruction in the delay slot set a register to false? - * Note. a beql s7, x followed by a or y, x, r0 will count as this. I don't know why but - * GOAL does this on comparisons to false. - */ -bool delay_slot_sets_false(IR_Branch* branch) { - if (branch->branch_delay.kind == BranchDelay::SET_REG_FALSE) { - return true; - } - - if (branch->condition.kind == Condition::FALSE && - branch->branch_delay.kind == BranchDelay::SET_REG_REG) { - auto reg_check = dynamic_cast(branch->condition.src0.get()); - assert(reg_check); - auto reg_read = dynamic_cast(branch->branch_delay.source.get()); - assert(reg_read); - return reg_check->reg == reg_read->reg; - } - - return false; -} - -/*! - * Does the instruction in the delay slot set a register to a truthy value, like in a GOAL - * or form branch? Either it explicitly sets #t, or it tests the value for being not false, - * then uses that - */ -bool delay_slot_sets_truthy(IR_Branch* branch) { - if (branch->branch_delay.kind == BranchDelay::SET_REG_TRUE) { - return true; - } - - if (branch->condition.kind == Condition::TRUTHY && - branch->branch_delay.kind == BranchDelay::SET_REG_REG) { - auto reg_check = dynamic_cast(branch->condition.src0.get()); - assert(reg_check); - auto reg_read = dynamic_cast(branch->branch_delay.source.get()); - assert(reg_read); - return reg_check->reg == reg_read->reg; - } - - return false; -} - -/*! - * Try to convert a short circuit to an and. - */ -bool try_clean_up_sc_as_and(std::shared_ptr& ir, LinkedObjectFile& file) { - (void)file; - Register destination; - std::shared_ptr ir_dest = nullptr; - for (int i = 0; i < int(ir->entries.size()) - 1; i++) { - auto branch = get_condition_branch(&ir->entries.at(i).condition); - assert(branch.first); - if (!delay_slot_sets_false(branch.first)) { - return false; - } - - if (i == 0) { - ir_dest = branch.first->branch_delay.destination; - destination = dynamic_cast(branch.first->branch_delay.destination.get())->reg; - } else { - if (destination != - dynamic_cast(branch.first->branch_delay.destination.get())->reg) { - return false; - } - } - } - - ir->kind = IR_ShortCircuit::AND; - ir->final_result = ir_dest; - auto* dest_reg = dynamic_cast(ir_dest.get()); - assert(dest_reg); - - bool live_out_result = false; - - // now get rid of the branches - for (int i = 0; i < int(ir->entries.size()) - 1; i++) { - auto branch = get_condition_branch(&ir->entries.at(i).condition); - assert(branch.first); - - if (i == 0) { - live_out_result = (branch.first->written_and_unused.find(dest_reg->reg) == - branch.first->written_and_unused.end()); - } else { - bool this_live_out = (branch.first->written_and_unused.find(dest_reg->reg) == - branch.first->written_and_unused.end()); - assert(live_out_result == this_live_out); - } - - auto replacement = std::make_shared(branch.first->condition, branch.first); - replacement->condition.invert(); - *(branch.second) = replacement; - } - - ir->used_as_value = live_out_result; - return true; -} - -/*! - * Try to convert a short circuit to an or. - * Note - this will convert an and to a very strange or, so always use the try as and first. - */ -bool try_clean_up_sc_as_or(std::shared_ptr& ir, LinkedObjectFile& file) { - (void)file; - Register destination; - std::shared_ptr ir_dest = nullptr; - for (int i = 0; i < int(ir->entries.size()) - 1; i++) { - auto branch = get_condition_branch(&ir->entries.at(i).condition); - assert(branch.first); - if (!delay_slot_sets_truthy(branch.first)) { - return false; - } - assert(dynamic_cast(branch.first->branch_delay.destination.get())); - - if (i == 0) { - ir_dest = branch.first->branch_delay.destination; - destination = dynamic_cast(branch.first->branch_delay.destination.get())->reg; - } else { - if (destination != - dynamic_cast(branch.first->branch_delay.destination.get())->reg) { - return false; - } - } - } - - ir->kind = IR_ShortCircuit::OR; - ir->final_result = ir_dest; - auto* dest_reg = dynamic_cast(ir_dest.get()); - assert(dest_reg); - - bool live_out_result = false; - - for (int i = 0; i < int(ir->entries.size()) - 1; i++) { - auto branch = get_condition_branch(&ir->entries.at(i).condition); - assert(branch.first); - if (i == 0) { - live_out_result = (branch.first->written_and_unused.find(dest_reg->reg) == - branch.first->written_and_unused.end()); - } else { - bool this_live_out = (branch.first->written_and_unused.find(dest_reg->reg) == - branch.first->written_and_unused.end()); - assert(live_out_result == this_live_out); - } - auto replacement = std::make_shared(branch.first->condition, branch.first); - *(branch.second) = replacement; - } - - ir->used_as_value = live_out_result; - return true; -} - -void clean_up_sc(std::shared_ptr& ir, LinkedObjectFile& file); - -/*! - * A form like (and x (or y z)) will be recognized as a single SC Vertex by the CFG pass. - * In the case where we fail to clean it up as an AND or an OR, we should attempt splitting. - * Part of the complexity here is that we want to clean up the split recursively so things like - * (and x (or y (and a b))) - * or - * (and x (or y (and a b)) c d (or z)) - * will work correctly. This may require doing more splitting on both sections! - */ -bool try_splitting_nested_sc(std::shared_ptr& ir, LinkedObjectFile& file) { - auto first_branch = get_condition_branch(&ir->entries.front().condition); - assert(first_branch.first); - bool first_is_and = delay_slot_sets_false(first_branch.first); - bool first_is_or = delay_slot_sets_truthy(first_branch.first); - assert(first_is_and != first_is_or); // one or the other but not both! - - int first_different = -1; // the index of the first one that's different. - - for (int i = 1; i < int(ir->entries.size()) - 1; i++) { - auto branch = get_condition_branch(&ir->entries.at(i).condition); - assert(branch.first); - bool is_and = delay_slot_sets_false(branch.first); - bool is_or = delay_slot_sets_truthy(branch.first); - assert(is_and != is_or); - - if (first_different == -1) { - // haven't seen a change yet. - if (first_is_and != is_and) { - // change! - first_different = i; - break; - } - } - } - - assert(first_different != -1); - - std::vector nested_ir; - for (int i = first_different; i < int(ir->entries.size()); i++) { - nested_ir.push_back(ir->entries.at(i)); - } - - auto s = int(ir->entries.size()); - for (int i = first_different; i < s; i++) { - ir->entries.pop_back(); - } - - auto nested_sc = std::make_shared(nested_ir); - clean_up_sc(nested_sc, file); - - // the real trick - IR_ShortCircuit::Entry nested_entry; - nested_entry.condition = nested_sc; - ir->entries.push_back(nested_entry); - - clean_up_sc(ir, file); - - return true; -} - -/*! - * Try to clean up a single short circuit IR. It may get split up into nested IR_ShortCircuits - * if there is a case like (and a (or b c)) - */ -void clean_up_sc(std::shared_ptr& ir, LinkedObjectFile& file) { - (void)file; - assert(ir->entries.size() > 1); - if (!try_clean_up_sc_as_and(ir, file)) { - if (!try_clean_up_sc_as_or(ir, file)) { - if (!try_splitting_nested_sc(ir, file)) { - assert(false); - } - } - } -} - -/*! - * A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis. - * But it should not be decompiled as a branching statement. - * This either succeeds or asserts and must be called with with something that can be converted - * successfully - */ -void convert_cond_no_else_to_compare(std::shared_ptr* ir) { - auto cne = dynamic_cast(ir->get()); - assert(cne); - auto condition = get_condition_branch(&cne->entries.front().condition); - assert(condition.first); - auto body = dynamic_cast(cne->entries.front().body.get()); - assert(body); - auto dst = body->dst; - auto src = dynamic_cast(body->src.get()); - assert(src->name == "#f"); - assert(cne->entries.size() == 1); - - auto condition_as_single = dynamic_cast(cne->entries.front().condition.get()); - if (condition_as_single) { - auto replacement = std::make_shared( - IR_Set::REG_64, dst, - std::make_shared(condition.first->condition, condition.first)); - *ir = replacement; - } else { - auto condition_as_seq = dynamic_cast(cne->entries.front().condition.get()); - assert(condition_as_seq); - if (condition_as_seq) { - auto replacement = std::make_shared(); - replacement->forms = condition_as_seq->forms; - assert(condition.second == &condition_as_seq->forms.back()); - replacement->forms.pop_back(); - replacement->forms.push_back(std::make_shared( - IR_Set::REG_64, dst, - std::make_shared(condition.first->condition, condition.first))); - *ir = replacement; - } - } -} - -void clean_up_cond_no_else_final(IR_Cond* cne, LinkedObjectFile& file) { - (void)cne; - (void)file; - for (size_t idx = 0; idx < cne->entries.size(); idx++) { - auto& entry = cne->entries.at(idx); - if (entry.false_destination != nullptr) { - auto* fr = dynamic_cast(entry.false_destination.get()); - assert(fr); - cne->final_destination = fr->reg; - } else { - assert(false); - } - } - - auto last_branch = - dynamic_cast(cne->entries.back().original_condition_branch.get()); - assert(last_branch); - cne->used_as_value = last_branch->written_and_unused.find(cne->final_destination) == - last_branch->written_and_unused.end(); - - // check that all other delay slot writes are unused. - for (size_t i = 0; i < cne->entries.size() - 1; i++) { - auto branch = - dynamic_cast(cne->entries.at(i).original_condition_branch.get()); - auto reg = dynamic_cast(cne->entries.at(i).false_destination.get()); - assert(reg); - assert(branch); - assert(branch->written_and_unused.find(reg->reg) != branch->written_and_unused.end()); - } -} - -/*! - * Replace internal branches inside a CondNoElse IR. - * If possible will simplify the entire expression into a comparison operation if possible. - * Will record which registers are set to false in branch delay slots. - * The exact behavior here isn't really clear to me. It's possible that these delay set false - * were disabled in cases where the result of the cond was none, or was a number or something. - * But it generally seems inconsistent. The expression propagation step will have to deal with - * this. - */ -void clean_up_cond_no_else(std::shared_ptr* ir, LinkedObjectFile& file) { - (void)file; - auto cne = dynamic_cast(ir->get()); - assert(cne); - for (size_t idx = 0; idx < cne->entries.size(); idx++) { - auto& e = cne->entries.at(idx); - if (e.cleaned) { - continue; - } - - auto jump_to_next = get_condition_branch(&e.condition); - assert(jump_to_next.first); - - if (jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_TRUE && - cne->entries.size() == 1) { - convert_cond_no_else_to_compare(ir); - return; - } else { - assert(jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_FALSE || - jump_to_next.first->branch_delay.kind == BranchDelay::NOP); - assert(jump_to_next.first->condition.kind != Condition::ALWAYS); - - if (jump_to_next.first->branch_delay.kind == BranchDelay::SET_REG_FALSE) { - assert(!e.false_destination); - e.false_destination = jump_to_next.first->branch_delay.destination; - assert(e.false_destination); - } - - e.original_condition_branch = *jump_to_next.second; - - auto replacement = - std::make_shared(jump_to_next.first->condition, jump_to_next.first); - replacement->condition.invert(); - *(jump_to_next.second) = replacement; - e.cleaned = true; - - if (idx != cne->entries.size() - 1) { - auto jump_to_end = get_condition_branch(&e.body); - assert(jump_to_end.first); - assert(jump_to_end.first->branch_delay.kind == BranchDelay::NOP); - assert(jump_to_end.first->condition.kind == Condition::ALWAYS); - auto as_end_of_sequence = get_condition_branch_as_vector(e.body.get()); - if (as_end_of_sequence.first) { - assert(as_end_of_sequence.second->size() > 1); - as_end_of_sequence.second->pop_back(); - } else { - *(jump_to_end.second) = std::make_shared(); - } - } - } - } - - // bool has_any_falses = false; - // Register false_reg; - // for (size_t idx = 0; idx < cne->entries.size(); idx++) { - // auto& entry = cne->entries.at(idx); - // if (idx == 0) { - // has_any_falses = entry.false_destination != nullptr; - // if (has_any_falses) { - // auto* as_reg = dynamic_cast(entry.false_destination.get()); - // assert(as_reg); - // false_reg = as_reg->reg; - // } - // } else { - // if (has_any_falses) { - // if (idx == cne->entries.size() - 1) { - // assert(entry.false_destination == nullptr); - // } else { - // auto* as_reg = dynamic_cast(entry.false_destination.get()); - // assert(as_reg); - // assert(as_reg->reg == false_reg); - // } - // } else { - // if (entry.false_destination != nullptr) { - // printf("BAD set of %s\n", entry.false_destination->print(file).c_str()); - // printf("%s\n", entry.condition->print(file).c_str()); - // } - // assert(entry.false_destination == nullptr); - // } - // } - // } -} - -/*! - * Match for a (set! reg (math reg reg)) form - */ -bool is_int_math_3(IR* ir, - MatchParam kind, - MatchParam dst, - MatchParam src0, - MatchParam src1, - Register* dst_out = nullptr, - Register* src0_out = nullptr, - Register* src1_out = nullptr) { - // should be a set reg to int math 2 ir - auto set = dynamic_cast(ir); - if (!set) { - return false; - } - - // destination should be a register - auto dest = dynamic_cast(set->dst.get()); - if (!dest || dst != dest->reg) { - return false; - } - - auto math = dynamic_cast(set->src.get()); - if (!math || kind != math->kind) { - return false; - } - - auto arg0 = dynamic_cast(math->arg0.get()); - auto arg1 = dynamic_cast(math->arg1.get()); - - if (!arg0 || src0 != arg0->reg || !arg1 || src1 != arg1->reg) { - return false; - } - - // it's a match! - if (dst_out) { - *dst_out = dest->reg; - } - - if (src0_out) { - *src0_out = arg0->reg; - } - - if (src1_out) { - *src1_out = arg1->reg; - } - return true; -} - -bool is_int_math_2(IR* ir, - MatchParam kind, - MatchParam dst, - MatchParam src0, - Register* dst_out = nullptr, - Register* src0_out = nullptr) { - // should be a set reg to int math 2 ir - auto set = dynamic_cast(ir); - if (!set) { - return false; - } - - // destination should be a register - auto dest = dynamic_cast(set->dst.get()); - if (!dest || dst != dest->reg) { - return false; - } - - auto math = dynamic_cast(set->src.get()); - if (!math || kind != math->kind) { - return false; - } - - auto arg = dynamic_cast(math->arg.get()); - - if (!arg || src0 != arg->reg) { - return false; - } - - // it's a match! - if (dst_out) { - *dst_out = dest->reg; - } - - if (src0_out) { - *src0_out = arg->reg; - } - - return true; -} - -/*! - * Are these IR's both the same register? False if either is not a register. - */ -bool is_same_reg(IR* a, IR* b) { - auto ar = dynamic_cast(a); - auto br = dynamic_cast(b); - return ar && br && ar->reg == br->reg; -} - -/*! - * Try to convert this SC Vertex into an abs (integer). - * Will return a converted abs IR if successful, or nullptr if its not possible - */ -std::shared_ptr try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { - if (vtx->entries.size() != 1) { - return nullptr; - } - - auto b0 = dynamic_cast(vtx->entries.at(0)); - if (!b0) { - return nullptr; - } - - auto b0_ptr = cfg_to_ir(f, file, b0); - auto b0_ir = dynamic_cast(b0_ptr.get()); - - IR_Branch* branch = nullptr; - std::shared_ptr branch_sp = nullptr; - if (b0_ir) { - branch_sp = b0_ir->forms.back(); - } else { - branch_sp = b0_ptr; - } - branch = dynamic_cast(branch_sp.get()); - - if (!branch) { - return nullptr; - } - - // check the branch instruction - if (!branch->likely || branch->condition.kind != Condition::LESS_THAN_ZERO || - branch->branch_delay.kind != BranchDelay::NEGATE) { - return nullptr; - } - - auto input = branch->condition.src0; - auto output = branch->branch_delay.destination; - - assert(is_same_reg(input.get(), branch->branch_delay.source.get())); - - if (b0_ir->forms.size() == 1) { - // this is probably fine but happens to not occur in anything we try yet. - assert(false); - } else { - // remove the branch - b0_ir->forms.pop_back(); - // add the ash - b0_ir->forms.push_back(std::make_shared( - IR_Set::REG_64, output, - std::make_shared(IR_IntMath1::ABS, input, - std::dynamic_pointer_cast(branch_sp)))); - - return b0_ptr; - } - - return nullptr; -} - -/*! - * Attempt to convert a short circuit expression into an arithmetic shift. - * GOAL's shift function accepts positive/negative numbers to determine the direction - * of the shift. - */ -std::shared_ptr try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { - if (vtx->entries.size() != 2) { - return nullptr; - } - - // todo, I think b0 could possibly be something more complicated, depending on how we order. - auto b0 = dynamic_cast(vtx->entries.at(0)); - auto b1 = dynamic_cast(vtx->entries.at(1)); - if (!b0 || !b1) { - return nullptr; - } - - // todo, seems possible to be a single op instead of a begin... - auto b0_ptr = cfg_to_ir(f, file, b0); - auto b0_ir = dynamic_cast(b0_ptr.get()); - - auto b1_ptr = cfg_to_ir(f, file, b1); - auto b1_ir = dynamic_cast(b1_ptr.get()); - - if (!b0_ir || !b1_ir) { - return nullptr; - } - - auto branch_sp = b0_ir->forms.back(); - auto branch = dynamic_cast(branch_sp.get()); - if (!branch || b1_ir->forms.size() != 2) { - return nullptr; - } - - // check the branch instruction - if (!branch->likely || branch->condition.kind != Condition::GEQ_ZERO_SIGNED || - branch->branch_delay.kind != BranchDelay::DSLLV) { - return nullptr; - } - - /* - * bgezl s5, L109 ; s5 is the shift amount - dsllv a0, a0, s5 ; a0 is both input and output here - - dsubu a1, r0, s5 ; a1 is a temp here - dsrav a0, a0, a1 ; a0 is both input and output here - */ - - auto sa_in = dynamic_cast(branch->condition.src0.get()); - assert(sa_in); - auto result = dynamic_cast(branch->branch_delay.destination.get()); - auto value_in = dynamic_cast(branch->branch_delay.source.get()); - auto sa_in2 = dynamic_cast(branch->branch_delay.source2.get()); - assert(result && value_in && sa_in2); - assert(sa_in->reg == sa_in2->reg); - - auto dsubu_candidate = b1_ir->forms.at(0); - auto dsrav_candidate = b1_ir->forms.at(1); - - Register clobber; - // if (!is_int_math_3(dsubu_candidate.get(), IR_IntMath2::SUB, {}, make_gpr(Reg::R0), sa_in->reg, - // &clobber)) { - // return nullptr; - // } - if (!is_int_math_2(dsubu_candidate.get(), IR_IntMath1::NEG, {}, sa_in->reg, &clobber)) { - return nullptr; - } - - assert(result); - assert(value_in); - - bool is_arith = is_int_math_3(dsrav_candidate.get(), IR_IntMath2::RIGHT_SHIFT_ARITH, result->reg, - value_in->reg, clobber); - bool is_logical = is_int_math_3(dsrav_candidate.get(), IR_IntMath2::RIGHT_SHIFT_LOGIC, - result->reg, value_in->reg, clobber); - - if (!is_arith && !is_logical) { - return nullptr; - } - - std::shared_ptr clobber_ir = nullptr; - auto dsubu_set = dynamic_cast(dsubu_candidate.get()); - auto dsrav_set = dynamic_cast(dsrav_candidate.get()); - if (clobber != result->reg) { - clobber_ir = dsubu_set->dst; - } - - std::shared_ptr dest_ir = branch->branch_delay.destination; - std::shared_ptr shift_ir = branch->condition.src0; - std::shared_ptr value_ir = dynamic_cast(dsrav_set->src.get())->arg0; - if (b0_ir->forms.size() == 1) { - // this is probably fine but happens to not occur in anything we try yet. - assert(false); - } else { - // remove the branch - b0_ir->forms.pop_back(); - // add the ash - b0_ir->forms.push_back(std::make_shared( - IR_Set::REG_64, dest_ir, - std::make_shared(shift_ir, value_ir, clobber_ir, - std::dynamic_pointer_cast(branch_sp), - std::dynamic_pointer_cast(dsubu_candidate), - std::dynamic_pointer_cast(dsrav_candidate), is_arith))); - return b0_ptr; - } - - return nullptr; -} - -/*! - * Try to convert a short circuiting expression into a "type-of" expression. - * We do this before attempting the normal and/or expressions. - */ -std::shared_ptr try_sc_as_type_of(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) { - // the assembly looks like this: - /* - dsll32 v1, a0, 29 ;; (set! v1 (shl a0 61)) - beql v1, r0, L60 ;; (bl! (= v1 r0) L60 (unknown-branch-delay)) - lw v1, binteger(s7) - - bgtzl v1, L60 ;; (bl! (>0.s v1) L60 (unknown-branch-delay)) - lw v1, pair(s7) - - lwu v1, -4(a0) ;; (set! v1 (l.wu (+.i a0 -4))) - L60: - */ - - // some of these checks may be a little bit overkill but it's a nice way to sanity check that - // we have actually decoded everything correctly. - if (vtx->entries.size() != 3) { - return nullptr; - } - - auto b0 = dynamic_cast(vtx->entries.at(0)); - auto b1 = dynamic_cast(vtx->entries.at(1)); - auto b2 = dynamic_cast(vtx->entries.at(2)); - - if (!b0 || !b1 || !b2) { - return nullptr; - } - - auto b0_ptr = cfg_to_ir(f, file, b0); - auto b0_ir = dynamic_cast(b0_ptr.get()); - - auto b1_ptr = cfg_to_ir(f, file, b1); - auto b1_ir = dynamic_cast(b1_ptr.get()); - - auto b2_ptr = cfg_to_ir(f, file, b2); - auto b2_ir = dynamic_cast(b2_ptr.get()); - if (!b0_ir || !b1_ir || !b2_ir) { - return nullptr; - } - - auto set_shift = dynamic_cast(b0_ir->forms.at(b0_ir->forms.size() - 2).get()); - if (!set_shift) { - return nullptr; - } - - auto temp_reg0 = dynamic_cast(set_shift->dst.get()); - if (!temp_reg0) { - return nullptr; - } - - auto shift = dynamic_cast(set_shift->src.get()); - if (!shift || shift->kind != IR_IntMath2::LEFT_SHIFT) { - return nullptr; - } - auto src_reg = dynamic_cast(shift->arg0.get()); - auto sa = dynamic_cast(shift->arg1.get()); - if (!src_reg || !sa || sa->value != 61) { - return nullptr; - } - - auto first_branch = dynamic_cast(b0_ir->forms.back().get()); - auto second_branch = b1_ir; - auto else_case = b2_ir; - - if (!first_branch || first_branch->branch_delay.kind != BranchDelay::SET_BINTEGER || - first_branch->condition.kind != Condition::ZERO || !first_branch->likely) { - return nullptr; - } - auto temp_reg = dynamic_cast(first_branch->condition.src0.get()); - assert(temp_reg); - assert(temp_reg->reg == temp_reg0->reg); - auto dst_reg = dynamic_cast(first_branch->branch_delay.destination.get()); - assert(dst_reg); - - if (!second_branch || second_branch->branch_delay.kind != BranchDelay::SET_PAIR || - second_branch->condition.kind != Condition::GREATER_THAN_ZERO_SIGNED || - !second_branch->likely) { - return nullptr; - } - - // check we agree on destination register. - auto dst_reg2 = dynamic_cast(second_branch->branch_delay.destination.get()); - assert(dst_reg2->reg == dst_reg->reg); - - // else case is a lwu to grab the type from a basic - assert(else_case); - auto dst_reg3 = dynamic_cast(else_case->dst.get()); - assert(dst_reg3); - assert(dst_reg3->reg == dst_reg->reg); - auto load_op = dynamic_cast(else_case->src.get()); - if (!load_op || load_op->kind != IR_Load::UNSIGNED || load_op->size != 4) { - return nullptr; - } - auto load_loc = dynamic_cast(load_op->location.get()); - if (!load_loc || load_loc->kind != IR_IntMath2::ADD) { - return nullptr; - } - auto src_reg3 = dynamic_cast(load_loc->arg0.get()); - auto offset = dynamic_cast(load_loc->arg1.get()); - if (!src_reg3 || !offset) { - return nullptr; - } - - assert(src_reg3->reg == src_reg->reg); - assert(offset->value == -4); - - std::shared_ptr clobber = nullptr; - if (temp_reg->reg != dst_reg->reg) { - clobber = first_branch->condition.src0; - } - if (b0_ir->forms.size() == 2) { - return std::make_shared(IR_Set::REG_64, else_case->dst, - std::make_shared(shift->arg0, clobber)); - } else { - // remove the branch - b0_ir->forms.pop_back(); - // remove the shift - b0_ir->forms.pop_back(); - // add the type-of - b0_ir->forms.push_back(std::make_shared( - IR_Set::REG_64, else_case->dst, std::make_shared(shift->arg0, clobber))); - return b0_ptr; - } -} - -std::shared_ptr merge_cond_else_with_sc_cond(CondWithElse* cwe, - const std::shared_ptr& else_ir, - Function& f, - LinkedObjectFile& file) { - auto as_seq = dynamic_cast(else_ir.get()); - if (!as_seq || as_seq->forms.size() != 2) { - return nullptr; - } - - auto first = dynamic_cast(as_seq->forms.at(0).get()); - auto second = dynamic_cast(as_seq->forms.at(1).get()); - if (!first || !second) { - return nullptr; - } - - std::vector entries; - for (auto& x : cwe->entries) { - IR_Cond::Entry e; - e.condition = cfg_to_ir(f, file, x.condition); - e.body = cfg_to_ir(f, file, x.body); - entries.push_back(std::move(e)); - } - - auto first_condition = std::make_shared(); - first_condition->forms.push_back(as_seq->forms.at(0)); - first_condition->forms.push_back(second->entries.front().condition); - - second->entries.front().condition = first_condition; - - for (auto& x : second->entries) { - entries.push_back(x); - } - std::shared_ptr result = std::make_shared(entries); - clean_up_cond_no_else(&result, file); - return result; -} - -/*! - * Main CFG vertex to IR conversion. Will pull basic IR ops from the provided function as needed. - */ -std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx) { - if (dynamic_cast(vtx)) { - auto* bv = dynamic_cast(vtx); - auto& block = f.basic_blocks.at(bv->block_id); - std::vector> irs; - IR* last = nullptr; - for (int instr = block.start_word; instr < block.end_word; instr++) { - auto got = f.get_basic_op_at_instr(instr); - if (got.get() == last) { - continue; - } - last = got.get(); - irs.push_back(got); - } - - if (irs.size() == 1) { - return irs.front(); - } else { - return std::make_shared(irs); - } - - } else if (dynamic_cast(vtx)) { - auto* sv = dynamic_cast(vtx); - - std::vector> irs; - insert_cfg_into_list(f, file, &irs, sv); - - return std::make_shared(irs); - } else if (dynamic_cast(vtx)) { - auto wvtx = dynamic_cast(vtx); - auto result = std::make_shared(cfg_to_ir(f, file, wvtx->condition), - cfg_to_ir(f, file, wvtx->body)); - return result; - } else if (dynamic_cast(vtx)) { - auto wvtx = dynamic_cast(vtx); - auto result = std::make_shared(cfg_to_ir(f, file, wvtx->condition), - cfg_to_ir(f, file, wvtx->body)); - clean_up_until_loop(result.get()); - return result; - } else if (dynamic_cast(vtx)) { - auto wvtx = dynamic_cast(vtx); - auto result = - std::make_shared(cfg_to_ir(f, file, wvtx->block), std::make_shared()); - clean_up_until_loop(result.get()); - return result; - } else if (dynamic_cast(vtx)) { - auto wvtx = dynamic_cast(vtx); - auto result = std::make_shared( - std::make_shared(Condition(Condition::ALWAYS, nullptr, nullptr, nullptr), - nullptr), - cfg_to_ir(f, file, wvtx->block)); - clean_up_infinite_while_loop(result.get()); - return result; - } else if (dynamic_cast(vtx)) { - auto* cvtx = dynamic_cast(vtx); - - // the cfg analysis pass may recognize some things out of order, which can cause - // fake nesting. This is actually a problem at this point because it can turn a normal - // cond into a cond with else, which emits different instructions. This attempts to recognize - // an else which is actually more cases and compacts it into a single statement. At this point - // I don't know if this is sufficient to catch all cases. it may even recognize the wrong - // thing in some cases... maybe we should check the delay slot instead? - auto else_ir = cfg_to_ir(f, file, cvtx->else_vtx); - auto fancy_compact_result = merge_cond_else_with_sc_cond(cvtx, else_ir, f, file); - if (fancy_compact_result) { - return fancy_compact_result; - } - - // this case is disabled because I _think_ it is now properly handled elsewhere. - if (false && dynamic_cast(else_ir.get())) { - auto extra_cond = dynamic_cast(else_ir.get()); - std::vector entries; - for (auto& x : cvtx->entries) { - IR_Cond::Entry e; - e.condition = cfg_to_ir(f, file, x.condition); - e.body = cfg_to_ir(f, file, x.body); - entries.push_back(std::move(e)); - } - for (auto& x : extra_cond->entries) { - entries.push_back(x); - } - std::shared_ptr result = std::make_shared(entries); - clean_up_cond_no_else(&result, file); - return result; - } else { - std::vector entries; - for (auto& x : cvtx->entries) { - IR_CondWithElse::Entry e; - e.condition = cfg_to_ir(f, file, x.condition); - e.body = cfg_to_ir(f, file, x.body); - entries.push_back(std::move(e)); - } - std::shared_ptr result = std::make_shared(entries, else_ir); - clean_up_cond_with_else(&result, file); - return result; - } - } else if (dynamic_cast(vtx)) { - auto* svtx = dynamic_cast(vtx); - // try as a type of expression first - auto as_type_of = try_sc_as_type_of(f, file, svtx); - if (as_type_of) { - return as_type_of; - } - - auto as_ash = try_sc_as_ash(f, file, svtx); - if (as_ash) { - return as_ash; - } - - auto as_abs = try_sc_as_abs(f, file, svtx); - if (as_abs) { - return as_abs; - } - - if (svtx->entries.size() == 1) { - throw std::runtime_error("Weird short circuit form."); - } - // now try as a normal and/or - std::vector entries; - for (auto& x : svtx->entries) { - IR_ShortCircuit::Entry e; - e.condition = cfg_to_ir(f, file, x); - entries.push_back(e); - } - auto result = std::make_shared(entries); - clean_up_sc(result, file); - return result; - } else if (dynamic_cast(vtx)) { - auto* cvtx = dynamic_cast(vtx); - std::vector entries; - for (auto& x : cvtx->entries) { - IR_Cond::Entry e; - e.condition = cfg_to_ir(f, file, x.condition); - e.body = cfg_to_ir(f, file, x.body); - entries.push_back(std::move(e)); - } - std::shared_ptr result = std::make_shared(entries); - clean_up_cond_no_else(&result, file); - return result; - } else if (dynamic_cast(vtx)) { - auto* cvtx = dynamic_cast(vtx); - auto result = std::make_shared(cfg_to_ir(f, file, cvtx->body), - cfg_to_ir(f, file, cvtx->unreachable_block)); - clean_up_return(result.get()); - return result; - } else if (dynamic_cast(vtx)) { - auto* cvtx = dynamic_cast(vtx); - auto result = std::make_shared(cfg_to_ir(f, file, cvtx->body), - cfg_to_ir(f, file, cvtx->unreachable_block)); - clean_up_break(result.get()); - return result; - } - - throw std::runtime_error("not yet implemented IR conversion."); - return nullptr; -} - -/*! - * Post processing pass to clean up while loops - annoyingly the block before a while loop - * has a jump to the condition branch that we need to remove. This currently happens after all - * conversion but this may need to be revisited depending on the final order of simplifications. - */ -void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) { - (void)file; - std::vector to_remove; // the list of branches to remove by index in this sequence - for (size_t i = 0; i < sequence->forms.size(); i++) { - auto* form_as_while = dynamic_cast(sequence->forms.at(i).get()); - if (form_as_while && !form_as_while->cleaned) { - assert(i != 0); - auto prev_as_branch = dynamic_cast(sequence->forms.at(i - 1).get()); - assert(prev_as_branch); - // printf("got while intro branch %s\n", prev_as_branch->print(file).c_str()); - // this should be an always jump. We'll assume that the CFG builder successfully checked - // the brach destination, but we will check the condition. - assert(prev_as_branch->condition.kind == Condition::ALWAYS); - assert(prev_as_branch->branch_delay.kind == BranchDelay::NOP); - to_remove.push_back(i - 1); - - // now we should try to find the condition branch: - - auto condition_branch = get_condition_branch(&form_as_while->condition); - - assert(condition_branch.first); - assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP); - // printf("got while condition branch %s\n", condition_branch.first->print(file).c_str()); - auto replacement = - std::make_shared(condition_branch.first->condition, condition_branch.first); - *(condition_branch.second) = replacement; - } - } - - // remove the implied forward always branches. - for (int i = int(to_remove.size()); i-- > 0;) { - auto idx = to_remove.at(i); - assert(dynamic_cast(sequence->forms.at(idx).get())); - sequence->forms.erase(sequence->forms.begin() + idx); - } -} -} // namespace - -/*! - * Use a control flow graph to build a single IR representing a function. - * This should be done after basic ops are added and before typing, variable splitting, and - * expression compaction. - */ -std::shared_ptr build_cfg_ir(Function& function, - ControlFlowGraph& cfg, - LinkedObjectFile& file) { - // printf("build cfg ir\n"); - if (!cfg.is_fully_resolved()) { - return nullptr; - } - - try { - auto top_level = cfg.get_single_top_level(); - // and possibly annotate the IR control flow structure so that we can determine if its and/or - // or whatever. This may require rejecting a huge number of inline assembly functions, and - // possibly resolving the min/max/ash issue. - // auto ir = cfg_to_ir(function, file, top_level); - auto ir = std::make_shared(); - insert_cfg_into_list(function, file, &ir->forms, top_level); - auto all_children = ir->get_all_ir(file); - all_children.push_back(ir); - for (auto& child : all_children) { - auto as_begin = dynamic_cast(child.get()); - if (as_begin) { - clean_up_while_loops(as_begin, file); - } - - auto as_cond_no_else = dynamic_cast(child.get()); - if (as_cond_no_else) { - clean_up_cond_no_else_final(as_cond_no_else, file); - } - } - return ir; - } catch (std::runtime_error& e) { - return nullptr; - } -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/CfgBuilder.h b/decompiler/IR/CfgBuilder.h deleted file mode 100644 index 7592e887a6..0000000000 --- a/decompiler/IR/CfgBuilder.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include - -namespace decompiler { -class IR; -class Function; -class LinkedObjectFile; -class ControlFlowGraph; - -std::shared_ptr build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file); -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index 0572c0c234..ae12a58ba9 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -49,64 +49,6 @@ void add_regs_to_str(const T& regs, std::string& str) { } } // namespace -std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const { - std::string result = print(file); - if (result.length() < 40) { - result.append(40 - result.length(), ' '); - } - result += " ;;"; - if (!write_regs.empty()) { - result += "write: ["; - add_regs_to_str(write_regs, result); - result += "] "; - } - if (!read_regs.empty()) { - result += "read: ["; - add_regs_to_str(read_regs, result); - result += "] "; - } - if (!clobber_regs.empty()) { - result += "clobber: ["; - add_regs_to_str(clobber_regs, result); - result += "] "; - } - if (!consumed.empty()) { - result += "consumed: ["; - add_regs_to_str(consumed, result); - result += "] "; - } - return result; -} - -std::string IR_Atomic::print_with_types(const TypeState& init_types, - const LinkedObjectFile& file) const { - std::string result; - - for (auto& warning : warnings) { - result += ";; warn: " + warning + "\n"; - } - result += print(file); - if (result.length() < 40) { - result.append(40 - result.length(), ' '); - } - result += " ;; "; - - auto read_mask = regs_to_gpr_mask(read_regs); - auto write_mask = regs_to_gpr_mask(write_regs); - - result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask), - end_types.print_gpr_masked(write_mask)); - - if (!consumed.empty()) { - result += "c:"; - for (auto x : consumed) { - result += " "; - result += x.to_charp(); - } - } - return result; -} - goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::build_list("INVALID-OPERATION"); @@ -996,189 +938,6 @@ void IR_Breakpoint_Atomic::get_children(std::vector>* output (void)output; } -goos::Object IR_Begin::to_form(const LinkedObjectFile& file) const { - if (forms.size() == 1 && inline_single_begins) { - return forms.front()->to_form(file); - } - std::vector list; - list.push_back(pretty_print::to_symbol("begin")); - for (auto& x : forms) { - list.push_back(x->to_form(file)); - } - return pretty_print::build_list(list); -} - -void IR_Begin::get_children(std::vector>* output) const { - for (auto& x : forms) { - output->push_back(x); - } -} - -namespace { -void print_inlining_begin(std::vector* output, IR* ir, const LinkedObjectFile& file) { - auto as_begin = dynamic_cast(ir); - if (as_begin) { - for (auto& x : as_begin->forms) { - output->push_back(x->to_form(file)); - } - } else { - output->push_back(ir->to_form(file)); - } -} - -bool is_single_expression(IR* in) { - return !dynamic_cast(in); -} -} // namespace - -goos::Object IR_WhileLoop::to_form(const LinkedObjectFile& file) const { - std::vector list; - list.push_back(pretty_print::to_symbol("while")); - list.push_back(condition->to_form(file)); - print_inlining_begin(&list, body.get(), file); - return pretty_print::build_list(list); -} - -void IR_WhileLoop::get_children(std::vector>* output) const { - output->push_back(condition); - output->push_back(body); -} - -goos::Object IR_UntilLoop::to_form(const LinkedObjectFile& file) const { - std::vector list; - list.push_back(pretty_print::to_symbol("until")); - list.push_back(condition->to_form(file)); - print_inlining_begin(&list, body.get(), file); - return pretty_print::build_list(list); -} - -void IR_UntilLoop::get_children(std::vector>* output) const { - output->push_back(condition); - output->push_back(body); -} - -goos::Object IR_CondWithElse::to_form(const LinkedObjectFile& file) const { - // for now we only turn it into an if statement if both cases won't require a begin at the top - // level. I think it is more common to write these as a two-case cond instead of an if with begin. - if (entries.size() == 1 && is_single_expression(entries.front().body.get()) && - is_single_expression(else_ir.get())) { - std::vector list; - list.push_back(pretty_print::to_symbol("if")); - list.push_back(entries.front().condition->to_form(file)); - list.push_back(entries.front().body->to_form(file)); - list.push_back(else_ir->to_form(file)); - return pretty_print::build_list(list); - } else { - std::vector list; - list.push_back(pretty_print::to_symbol("cond")); - for (auto& e : entries) { - std::vector entry; - entry.push_back(e.condition->to_form(file)); - print_inlining_begin(&entry, e.body.get(), file); - list.push_back(pretty_print::build_list(entry)); - } - std::vector else_form; - else_form.push_back(pretty_print::to_symbol("else")); - print_inlining_begin(&else_form, else_ir.get(), file); - list.push_back(pretty_print::build_list(else_form)); - return pretty_print::build_list(list); - } -} - -void IR_CondWithElse::get_children(std::vector>* output) const { - for (auto& e : entries) { - output->push_back(e.condition); - output->push_back(e.body); - } - output->push_back(else_ir); -} - -goos::Object IR_GetRuntimeType::to_form(const LinkedObjectFile& file) const { - std::vector list = {pretty_print::to_symbol("type-of"), object->to_form(file)}; - return pretty_print::build_list(list); -} - -void IR_GetRuntimeType::get_children(std::vector>* output) const { - output->push_back(object); -} - -goos::Object IR_Cond::to_form(const LinkedObjectFile& file) const { - if (entries.size() == 1 && is_single_expression(entries.front().body.get())) { - // print as an if statement if we can put the body in a single form. - std::vector list; - list.push_back(pretty_print::to_symbol("if")); - list.push_back(entries.front().condition->to_form(file)); - list.push_back(entries.front().body->to_form(file)); - return pretty_print::build_list(list); - } else if (entries.size() == 1) { - // turn into a when if the body requires multiple forms - // todo check to see if the condition starts with a NOT and this can be simplified to an - // unless. - std::vector list; - list.push_back(pretty_print::to_symbol("when")); - list.push_back(entries.front().condition->to_form(file)); - print_inlining_begin(&list, entries.front().body.get(), file); - return pretty_print::build_list(list); - } else { - std::vector list; - list.push_back(pretty_print::to_symbol("cond")); - for (auto& e : entries) { - std::vector entry; - entry.push_back(e.condition->to_form(file)); - print_inlining_begin(&entry, e.body.get(), file); - list.push_back(pretty_print::build_list(entry)); - } - return pretty_print::build_list(list); - } -} - -void IR_Cond::get_children(std::vector>* output) const { - for (auto& e : entries) { - output->push_back(e.condition); - output->push_back(e.body); - } -} - -goos::Object IR_ShortCircuit::to_form(const LinkedObjectFile& file) const { - std::vector forms; - switch (kind) { - case UNKNOWN: - forms.push_back(pretty_print::to_symbol("unknown-sc")); - break; - case AND: - forms.push_back(pretty_print::to_symbol("and")); - break; - case OR: - forms.push_back(pretty_print::to_symbol("or")); - break; - default: - assert(false); - } - for (auto& x : entries) { - forms.push_back(x.condition->to_form(file)); - } - return pretty_print::build_list(forms); -} - -void IR_ShortCircuit::get_children(std::vector>* output) const { - for (auto& x : entries) { - output->push_back(x.condition); - if (x.output) { - output->push_back(x.output); - } - } -} - -goos::Object IR_Ash::to_form(const LinkedObjectFile& file) const { - return pretty_print::build_list(pretty_print::to_symbol(is_signed ? "ash.si" : "ash.ui"), - value->to_form(file), shift_amount->to_form(file)); -} - -void IR_Ash::get_children(std::vector>* output) const { - output->push_back(value); - output->push_back(shift_amount); -} - goos::Object IR_AsmOp::to_form(const LinkedObjectFile& file) const { std::vector forms; forms.push_back(pretty_print::to_symbol(name)); @@ -1240,29 +999,4 @@ void IR_AsmReg::get_children(std::vector>* output) const { (void)output; } -goos::Object IR_Return::to_form(const LinkedObjectFile& file) const { - std::vector forms; - forms.push_back(pretty_print::to_symbol("return")); - forms.push_back(pretty_print::build_list(return_code->to_form(file))); - forms.push_back(pretty_print::build_list(dead_code->to_form(file))); - return pretty_print::build_list(forms); -} - -void IR_Return::get_children(std::vector>* output) const { - output->push_back(return_code); - output->push_back(dead_code); -} - -goos::Object IR_Break::to_form(const LinkedObjectFile& file) const { - std::vector forms; - forms.push_back(pretty_print::to_symbol("break")); // todo break destination... - forms.push_back(pretty_print::build_list(return_code->to_form(file))); - forms.push_back(pretty_print::build_list(dead_code->to_form(file))); - return pretty_print::build_list(forms); -} - -void IR_Break::get_children(std::vector>* output) const { - output->push_back(return_code); - output->push_back(dead_code); -} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index 5d71448d57..07955c6c70 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -27,29 +27,6 @@ class IR { std::string print(const LinkedObjectFile& file) const; virtual void get_children(std::vector>* output) const = 0; bool is_basic_op = false; - virtual TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts); - - // update the expression stack - virtual bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)stack; - (void)file; - throw std::runtime_error("expression_stack NYI for " + print(file)); - } - - // update myself to use consumed registers from the stack. - virtual bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - (void)consume; - (void)stack; - throw std::runtime_error("update_from_stack NYI for " + print(file)); - } - - virtual std::unordered_set get_consumed(LinkedObjectFile& file) { - throw std::runtime_error("get_consumed NYI for " + print(file)); - } virtual ~IR() = default; }; @@ -62,12 +39,6 @@ class IR_Atomic : public virtual IR { TypeState end_types; // types at the end of this instruction std::vector warnings; void warn(const std::string& str) { warnings.emplace_back(str); } - - virtual void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts); - std::string print_with_types(const TypeState& init_types, const LinkedObjectFile& file) const; - std::string print_with_reguse(const LinkedObjectFile& file) const; }; class IR_Failed : public virtual IR { @@ -89,9 +60,6 @@ class IR_Register : public virtual IR { void get_children(std::vector>* output) const override; Register reg; int instr_idx = -1; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; }; class IR_Set : public virtual IR { @@ -112,7 +80,6 @@ class IR_Set : public virtual IR { : kind(_kind), dst(std::move(_dst)), src(std::move(_src)) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; std::shared_ptr dst, src; std::shared_ptr clobber = nullptr; @@ -127,10 +94,6 @@ class IR_Set_Atomic : public IR_Set, public IR_Atomic { template void update_reginfo_self(int n_dest, int n_src, int n_clobber); void update_reginfo_regreg(); - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_IntMath2; @@ -158,9 +121,6 @@ class IR_Store_Atomic : public IR_Set_Atomic { int size; goos::Object to_form(const LinkedObjectFile& file) const override; void update_reginfo_self(int n_dest, int n_src, int n_clobber); - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; }; class IR_Symbol : public virtual IR { @@ -169,17 +129,6 @@ class IR_Symbol : public virtual IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override { - (void)consume; - (void)stack; - (void)file; - return true; - } }; class IR_SymbolValue : public virtual IR { @@ -188,17 +137,6 @@ class IR_SymbolValue : public virtual IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override { - (void)consume; - (void)stack; - (void)file; - return true; - } }; class IR_EmptyPair : public virtual IR { @@ -206,17 +144,6 @@ class IR_EmptyPair : public virtual IR { explicit IR_EmptyPair() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override { - (void)consume; - (void)stack; - (void)file; - return true; - } }; class IR_StaticAddress : public virtual IR { @@ -225,12 +152,6 @@ class IR_StaticAddress : public virtual IR { int label_id = -1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_Load : public virtual IR { @@ -243,12 +164,6 @@ class IR_Load : public virtual IR { std::shared_ptr location; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; // this load_path stuff is just for debugging and shouldn't be used as part of the real // decompilation. @@ -272,12 +187,6 @@ class IR_FloatMath2 : public virtual IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_FloatMath1 : public virtual IR { @@ -287,12 +196,6 @@ class IR_FloatMath1 : public virtual IR { std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_IntMath2 : public virtual IR { @@ -321,12 +224,6 @@ class IR_IntMath2 : public virtual IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_IntMath1 : public virtual IR { @@ -341,13 +238,6 @@ class IR_IntMath1 : public virtual IR { std::shared_ptr abs_op = nullptr; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - std::unordered_set get_consumed(LinkedObjectFile& file) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_Call : public virtual IR { @@ -364,10 +254,6 @@ class IR_Call : public virtual IR { class IR_Call_Atomic : public virtual IR_Call, public IR_Atomic { public: IR_Call_Atomic() = default; - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_IntegerConstant : public virtual IR { @@ -376,17 +262,6 @@ class IR_IntegerConstant : public virtual IR { explicit IR_IntegerConstant(int64_t _value) : value(_value) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override { - (void)consume; - (void)stack; - (void)file; - return true; - } }; struct BranchDelay { @@ -492,9 +367,6 @@ class IR_Branch_Atomic : public virtual IR_Branch, public IR_Atomic { : IR_Branch(std::move(_condition), _dest_label_idx, std::move(_branch_delay), _likely) {} // note - counts only for the condition. void update_reginfo_self(int n_dst, int n_src, int n_clobber); - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; }; class IR_Compare : public virtual IR { @@ -512,14 +384,6 @@ class IR_Compare : public virtual IR { goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; - std::unordered_set get_consumed(LinkedObjectFile& file) override; }; class IR_Nop : public virtual IR { @@ -527,15 +391,11 @@ class IR_Nop : public virtual IR { IR_Nop() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_Nop_Atomic : public IR_Nop, public IR_Atomic { public: IR_Nop_Atomic() = default; - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; }; class IR_Suspend_Atomic : public virtual IR, public IR_Atomic { @@ -543,14 +403,6 @@ class IR_Suspend_Atomic : public virtual IR, public IR_Atomic { IR_Suspend_Atomic() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override { - (void)stack; - (void)file; - return true; - } }; class IR_Breakpoint_Atomic : public virtual IR_Atomic { @@ -558,147 +410,6 @@ class IR_Breakpoint_Atomic : public virtual IR_Atomic { IR_Breakpoint_Atomic() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override { - (void)stack; - (void)file; - return true; - } -}; - -class IR_Begin : public virtual IR { - public: - IR_Begin() = default; - explicit IR_Begin(const std::vector>& _forms) : forms(std::move(_forms)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - std::vector> forms; -}; - -class IR_WhileLoop : public virtual IR { - public: - IR_WhileLoop(std::shared_ptr _condition, std::shared_ptr _body) - : condition(std::move(_condition)), body(std::move(_body)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - std::shared_ptr condition, body; - bool cleaned = false; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; -}; - -class IR_UntilLoop : public virtual IR { - public: - IR_UntilLoop(std::shared_ptr _condition, std::shared_ptr _body) - : condition(std::move(_condition)), body(std::move(_body)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; - std::shared_ptr condition, body; -}; - -class IR_CondWithElse : public virtual IR { - public: - struct Entry { - std::shared_ptr condition = nullptr; - std::shared_ptr body = nullptr; - bool cleaned = false; - }; - std::vector entries; - std::shared_ptr else_ir; - IR_CondWithElse(std::vector _entries, std::shared_ptr _else_ir) - : entries(std::move(_entries)), else_ir(std::move(_else_ir)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; -}; - -// this one doesn't have an else statement. Will return false if none of the cases are taken. -class IR_Cond : public virtual IR { - public: - struct Entry { - std::shared_ptr condition = nullptr; - std::shared_ptr body = nullptr; - std::shared_ptr false_destination = nullptr; - std::shared_ptr original_condition_branch = nullptr; - bool cleaned = false; - }; - Register final_destination; - bool used_as_value = false; - std::vector entries; - explicit IR_Cond(std::vector _entries) : entries(std::move(_entries)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; -}; - -// this will work on pairs, bintegers, or basics -class IR_GetRuntimeType : public virtual IR { - public: - std::shared_ptr object, clobber; - IR_GetRuntimeType(std::shared_ptr _object, std::shared_ptr _clobber) - : object(std::move(_object)), clobber(std::move(_clobber)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - std::unordered_set get_consumed(LinkedObjectFile& file) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; -}; - -class IR_ShortCircuit : public virtual IR { - public: - struct Entry { - std::shared_ptr condition = nullptr; - // in the case where there's no else, each delay slot will write #f to the "output" register. - // this can be with an or , s7, r0 - std::shared_ptr output = nullptr; - bool is_output_trick = false; - bool cleaned = false; - }; - - enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN; - - std::shared_ptr final_result = nullptr; // the register that the final result goes in. - std::vector entries; - std::optional used_as_value = std::nullopt; - - explicit IR_ShortCircuit(std::vector _entries) : entries(std::move(_entries)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; -}; - -class IR_Ash : public virtual IR { - public: - std::shared_ptr shift_amount, value, clobber; - std::shared_ptr branch_op, sub_op, shift_op; - bool is_signed = true; - IR_Ash(std::shared_ptr _shift_amount, - std::shared_ptr _value, - std::shared_ptr _clobber, - std::shared_ptr _branch_op, - std::shared_ptr _sub_op, - std::shared_ptr _shift_op, - bool _is_signed) - : shift_amount(std::move(_shift_amount)), - value(std::move(_value)), - clobber(std::move(_clobber)), - branch_op(std::move(_branch_op)), - sub_op(std::move(_sub_op)), - shift_op(std::move(_shift_op)), - is_signed(_is_signed) { - assert(sub_op); - assert(shift_op); - assert(branch_op); - } - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; - std::unordered_set get_consumed(LinkedObjectFile& file) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_AsmOp : public virtual IR { @@ -711,16 +422,12 @@ class IR_AsmOp : public virtual IR { IR_AsmOp(std::string _name) : name(std::move(_name)) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic { public: IR_AsmOp_Atomic(std::string _name) : IR_AsmOp(std::move(_name)) {} void set_reg_info(); - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; }; class IR_CMoveF : public virtual IR { @@ -731,12 +438,6 @@ class IR_CMoveF : public virtual IR { : src(std::move(_src)), on_zero(_on_zero) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; - bool update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) override; }; class IR_AsmReg : public virtual IR { @@ -747,24 +448,5 @@ class IR_AsmReg : public virtual IR { void get_children(std::vector>* output) const override; }; -class IR_Return : public virtual IR { - public: - std::shared_ptr return_code; - std::shared_ptr dead_code; - IR_Return(std::shared_ptr _return_code, std::shared_ptr _dead_code) - : return_code(std::move(_return_code)), dead_code(std::move(_dead_code)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; -}; - -class IR_Break : public virtual IR { - public: - std::shared_ptr return_code; - std::shared_ptr dead_code; - IR_Break(std::shared_ptr _return_code, std::shared_ptr _dead_code) - : return_code(std::move(_return_code)), dead_code(std::move(_dead_code)) {} - goos::Object to_form(const LinkedObjectFile& file) const override; - void get_children(std::vector>* output) const override; -}; } // namespace decompiler #endif // JAK_IR_H diff --git a/decompiler/IR/IR_ExpressionStack.cpp b/decompiler/IR/IR_ExpressionStack.cpp deleted file mode 100644 index 1d56e973a8..0000000000 --- a/decompiler/IR/IR_ExpressionStack.cpp +++ /dev/null @@ -1,453 +0,0 @@ -#include -#include "IR.h" -#include "decompiler/Function/ExpressionStack.h" - -namespace decompiler { -bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - // first determine the type of the set. - switch (kind) { - case IR_Set::REG_64: - case IR_Set::LOAD: - case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible. - case IR_Set::FPR_TO_GPR64: - case IR_Set::REG_FLT: - case IR_Set::SYM_LOAD: { - // normal 64-bit GPR set! - // first, we update our source to substitute in more complicated expressions. - auto src_as_reg = dynamic_cast(src.get()); - if (src_as_reg) { - // we're reading a register. Let's find out if it's safe to directly copy it's value. - if (consumed.find(src_as_reg->reg) != consumed.end()) { - // yep. Let's read it off of the stack. - src = stack.get(src_as_reg->reg); - } - } else { - // our source is some expression. we need to make sure the expression is up-to-date. - src->update_from_stack(consumed, stack, file); - } - - // next, we tell the stack the value of the register we just set - auto dest_reg = dynamic_cast(dst.get()); - assert(dest_reg); - // sequence point if not a register -> register set. - stack.set(dest_reg->reg, src, !src_as_reg); - return true; - } - - case IR_Set::STORE: - case IR_Set::SYM_STORE: { - auto src_as_reg = dynamic_cast(src.get()); - if (src_as_reg) { - // we're reading a register. Let's find out if it's safe to directly copy it's value. - if (consumed.find(src_as_reg->reg) != consumed.end()) { - // yep. Let's read it off of the stack. - src = stack.get(src_as_reg->reg); - } - } else { - // our source is some expression. we need to make sure the expression is up-to-date. - src->update_from_stack(consumed, stack, file); - } - stack.add_no_set(std::make_shared(*this), true); - return true; - } - - break; - default: - throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file)); - } -} - -bool IR_Set::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - // first determine the type of the set. - switch (kind) { - case IR_Set::REG_64: - case IR_Set::LOAD: - case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible. - case IR_Set::FPR_TO_GPR64: - case IR_Set::REG_FLT: { - // normal 64-bit GPR set! - // first, we update our source to substitute in more complicated expressions. - auto consumed = src->get_consumed(file); - auto src_as_reg = dynamic_cast(src.get()); - if (src_as_reg) { - // an annoying special case. - if (consumed.find(src_as_reg->reg) != consumed.end()) { - // we consume it. - src = stack.get(src_as_reg->reg); - } - } else { - src->update_from_stack(consumed, stack, file); - } - - // next, we tell the stack the value of the register we just set - auto dest_reg = dynamic_cast(dst.get()); - assert(dest_reg); - stack.set(dest_reg->reg, src, !src_as_reg); - return true; - } - - break; - default: - throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file)); - } -} - -bool IR_Call_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)file; - if (!call_type_set) { - throw std::runtime_error("Call type is unknown on an IR_Call_Atomic"); - } - - const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, - Reg::T0, Reg::T1, Reg::T2, Reg::T3}; - int nargs = int(call_type.arg_count()) - 1; - // printf("%s\n", stack.print(file).c_str()); - // get all arguments. - for (int i = nargs; i-- > 0;) { - args.push_back(stack.get(Register(Reg::GPR, arg_regs[i]))); - } - args.push_back(stack.get(Register(Reg::GPR, Reg::T9))); - std::reverse(args.begin(), args.end()); - - auto return_type = call_type.get_arg(call_type.arg_count() - 1); - // bleh... - stack.set(Register(Reg::GPR, Reg::V0), std::make_shared(*this), true); - - return true; -} - -bool IR_UntilLoop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)stack; - (void)file; - stack.add_no_set(std::make_shared(*this), true); - return true; -} - -namespace { -void update_from_stack_helper(std::shared_ptr* ir, - const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - auto as_reg = dynamic_cast(ir->get()); - if (as_reg) { - if (consume.find(as_reg->reg) != consume.end()) { - *ir = stack.get(as_reg->reg); - } - } else { - (*ir)->update_from_stack(consume, stack, file); - } -} -} // namespace - -bool IR_Compare::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - if (condition.kind != Condition::ALWAYS) { - assert(root_op); - // auto consumed = root_op->get_consumed(file); - auto& consumed = root_op->consumed; - switch (condition.num_args()) { - case 0: - break; - case 1: - update_from_stack_helper(&condition.src0, consumed, stack, file); - break; - case 2: - update_from_stack_helper(&condition.src1, consumed, stack, file); - update_from_stack_helper(&condition.src0, consumed, stack, file); - break; - default: - assert(false); - } - } - - stack.add_no_set(std::make_shared(*this), true); - return true; -} - -bool IR_Compare::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - if (condition.kind != Condition::ALWAYS) { - switch (condition.num_args()) { - case 0: - break; - case 1: - update_from_stack_helper(&condition.src0, consume, stack, file); - break; - case 2: - update_from_stack_helper(&condition.src1, consume, stack, file); - update_from_stack_helper(&condition.src0, consume, stack, file); - break; - default: - assert(false); - } - } - return true; -} - -bool IR_ShortCircuit::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)file; - // this one is weird. All forms but the last implicitly set final_destination. - // the last form should somewhere set final_destination, but due to tricky coloring we - // can't identify this 100% of the time. - // so we settle for something like: - // (set! result (or ... (begin (blah) (set! result x) (blah)))) - // in the future, we may want to handle this a little bit better, at least in the obvious cases. - - assert(final_result); - assert(used_as_value.has_value()); - - if (used_as_value.value()) { - auto dest_reg = dynamic_cast(final_result.get()); - - // try as a set - auto last_entry_as_set = dynamic_cast(entries.back().condition.get()); - if (last_entry_as_set) { - auto sd = last_entry_as_set->dst; - auto sd_as_reg = dynamic_cast(sd.get()); - if (sd_as_reg && sd_as_reg->reg == dest_reg->reg) { - entries.back().condition = last_entry_as_set->src; - stack.set(dest_reg->reg, std::make_shared(*this), true); - return true; - } - } - - // try as the last thing in a begin. - auto last_entry_as_begin = dynamic_cast(entries.back().condition.get()); - if (last_entry_as_begin) { - last_entry_as_set = dynamic_cast(last_entry_as_begin->forms.back().get()); - if (last_entry_as_set) { - auto sd = last_entry_as_set->dst; - auto sd_as_reg = dynamic_cast(sd.get()); - if (sd_as_reg && sd_as_reg->reg == dest_reg->reg) { - entries.back().condition = last_entry_as_set->src; - stack.set(dest_reg->reg, std::make_shared(*this), true); - return true; - } - } - } - - // nope. if we have something like (and x (if a b c)), we may need to explictly add an - // evaluation of the if's result. - auto new_last_entry = std::make_shared(); - new_last_entry->forms.push_back(entries.back().condition); - new_last_entry->forms.push_back(std::make_shared(dest_reg->reg, -1)); - entries.back().condition = new_last_entry; - - stack.set(dest_reg->reg, std::make_shared(*this), true); - return true; - - // throw std::runtime_error("Last entry in short circuit was bad: " + - // entries.back().condition->print(file)); - } else { - stack.add_no_set(std::make_shared(*this), true); - return true; - } -} - -bool IR_Cond::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - if (used_as_value) { - // we have to make sure that all of the bodies evaluate to the value stored in the - // final_destination register. - - for (auto& entry : entries) { - IR* current_ir = entry.body.get(); - while (dynamic_cast(current_ir)) { - current_ir = dynamic_cast(current_ir)->forms.back().get(); - } - auto as_set = dynamic_cast(current_ir); - if (as_set) { - auto sd = as_set->dst; - auto sd_as_reg = dynamic_cast(sd.get()); - if (sd_as_reg && sd_as_reg->reg == final_destination) { - // yep! it's okay. set!'s evaluate to the thing they are setting. - continue; - } - } - throw std::runtime_error("IR_Cond used as value didn't work for reg " + - final_destination.to_string() + "\n" + entry.body->print(file)); - } - return true; - } else { - (void)file; - stack.add_no_set(std::make_shared(*this), true); - return true; - } -} - -bool IR_WhileLoop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)file; - // while loops are never "used by value" yet, but this is okay because they don't - // do any tricks in delay slots like IR_Cond's do. - stack.add_no_set(std::make_shared(*this), true); - return true; -} - -bool IR_AsmOp::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)file; - // we only fall back to asm ops if we don't understand the GOAL code, or if the original code - // used inline assembly. In these cases, we create a sequence point here. - stack.add_no_set(std::make_shared(*this), true); - return true; -} - -bool IR_CondWithElse::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)file; - // cond with else are never "used by value" yet, but this is okay because they don't - // do any tricks in delay slots like IR_Cond's do. - stack.add_no_set(std::make_shared(*this), true); - return true; -} - -bool IR_Load::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - update_from_stack_helper(&location, consume, stack, file); - return true; -} - -bool IR_StaticAddress::update_from_stack( - const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - (void)consume; - (void)stack; - (void)file; - return true; -} - -bool IR_FloatMath2::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - if (kind == DIV) { - for (auto reg : {&arg1, &arg0}) { - auto as_reg = dynamic_cast(reg->get()); - if (as_reg) { - if (consume.find(as_reg->reg) != consume.end()) { - *reg = stack.get(as_reg->reg); - } - } else { - (*reg)->update_from_stack(consume, stack, file); - } - } - } else { - for (auto reg : {&arg1, &arg0}) { - auto as_reg = dynamic_cast(reg->get()); - if (as_reg) { - if (consume.find(as_reg->reg) != consume.end()) { - *reg = stack.get(as_reg->reg); - } - } else { - (*reg)->update_from_stack(consume, stack, file); - } - } - } - - return true; -} - -bool IR_IntMath2::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - for (auto reg : {&arg1, &arg0}) { - auto as_reg = dynamic_cast(reg->get()); - if (as_reg) { - if (consume.find(as_reg->reg) != consume.end()) { - *reg = stack.get(as_reg->reg); - } - } else { - (*reg)->update_from_stack(consume, stack, file); - } - } - return true; -} - -std::unordered_set IR_Ash::get_consumed(LinkedObjectFile& file) { - (void)file; - // first get the set of read registers... - auto value_as_reg = dynamic_cast(value.get()); - auto sa_as_reg = dynamic_cast(shift_amount.get()); - if (!sa_as_reg || !value_as_reg) { - // consume nobody. - // todo - is this actually right? If not, this is "safe", but might lead to ugly code. - return {}; - } - - std::unordered_set result; - - for (auto& op : {branch_op, sub_op, shift_op}) { - for (auto& reg : {value_as_reg->reg, sa_as_reg->reg}) { - if (op->consumed.find(reg) != op->consumed.end()) { - result.insert(reg); - } - } - } - - return result; -} - -bool IR_Ash::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - for (auto x : {&value, &shift_amount}) { - update_from_stack_helper(x, consume, stack, file); - } - return true; -} - -std::unordered_set IR_IntMath1::get_consumed(LinkedObjectFile& file) { - if (kind == ABS) { - assert(abs_op); - return abs_op->consumed; - } else { - throw std::runtime_error("IR_IntMath1::get_consumed NYI for " + print(file)); - } -} - -bool IR_IntMath1::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - update_from_stack_helper(&arg, consume, stack, file); - return true; -} - -bool IR_GetRuntimeType::update_from_stack( - const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - update_from_stack_helper(&object, consume, stack, file); - return true; -} - -std::unordered_set IR_GetRuntimeType::get_consumed( - LinkedObjectFile& file) { - // todo, this can actually consume stuff. - (void)file; - return {}; -} - -std::unordered_set IR_Compare::get_consumed(LinkedObjectFile& file) { - // todo, this can actually consume stuff. - (void)file; - return {}; -} - -bool IR_Nop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { - (void)stack; - (void)file; - return true; -} - -bool IR_CMoveF::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - update_from_stack_helper(&src, consume, stack, file); - return true; -} - -bool IR_FloatMath1::update_from_stack(const std::unordered_set& consume, - ExpressionStack& stack, - LinkedObjectFile& file) { - update_from_stack_helper(&arg, consume, stack, file); - return true; -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp deleted file mode 100644 index f56303abe6..0000000000 --- a/decompiler/IR/IR_TypeAnalysis.cpp +++ /dev/null @@ -1,950 +0,0 @@ -#include "IR.h" -#include "decompiler/util/DecompilerTypeSystem.h" -#include "third-party/fmt/core.h" -#include "common/goos/Object.h" -#include "decompiler/util/TP_Type.h" -#include "decompiler/ObjectFile/LinkedObjectFile.h" - -namespace decompiler { -namespace { -// bool is_plain_type(const TP_Type& type, const TypeSpec& ts) { -// return type.as_typespec() == ts; -//} -// -// bool is_integer_type(const TP_Type& type) { -// return is_plain_type(type, TypeSpec("int")) || is_plain_type(type, TypeSpec("uint")); -//} -// -///*! -// * If first arg is unsigned, make the result unsigned. -// * Otherwise signed. This is the default GOAL behavior I guess. -// * This strips away any fancy stuff like [uint x 4] -// */ -// TP_Type get_int_type(const TP_Type& one) { -// if (is_plain_type(one, TypeSpec("uint"))) { -// return TP_Type(one.as_typespec()); -// } else { -// return TP_Type(TypeSpec("int")); -// } -//} -// - -bool tc(DecompilerTypeSystem& dts, const TypeSpec& expected, const TP_Type& actual) { - return dts.ts.typecheck(expected, actual.typespec(), "", false, false); -} - -bool is_int_or_uint(DecompilerTypeSystem& dts, const TP_Type& type) { - return tc(dts, TypeSpec("int"), type) || tc(dts, TypeSpec("uint"), type); -} - -struct RegOffset { - Register reg; - std::shared_ptr reg_ir; - int offset; -}; - -bool get_as_reg_offset(const IR* ir, RegOffset* out) { - auto as_reg = dynamic_cast(ir); - if (as_reg) { - out->reg = as_reg->reg; - out->reg_ir = std::make_shared(*as_reg); - out->offset = 0; - return true; - } - - auto as_math = dynamic_cast(ir); - if (as_math && as_math->kind == IR_IntMath2::ADD) { - auto first_as_reg = dynamic_cast(as_math->arg0.get()); - auto second_as_const = dynamic_cast(as_math->arg1.get()); - if (first_as_reg && second_as_const) { - out->reg = first_as_reg->reg; - out->offset = second_as_const->value; - out->reg_ir = std::dynamic_pointer_cast(as_math->arg0); - return true; - } - } - return false; -} - -RegClass get_reg_kind(const Register& r) { - switch (r.get_kind()) { - case Reg::GPR: - return RegClass::GPR_64; - case Reg::FPR: - return RegClass::FLOAT; - default: - assert(false); - } -} -} // namespace - -/*! - * Default implementation of propagate types, throw an NYI error. - */ -void IR_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)dts; - throw std::runtime_error( - fmt::format("Could not propagate types for {}, not yet implemented", print(file))); -} - -/*! - * Default implementation of get_expression_type. - */ -TP_Type IR::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)dts; - throw std::runtime_error( - fmt::format("Could not get expression types for {}, not yet implemented", print(file))); -} - -/*! - * Propagate types through a set! operation. - */ -void IR_Set_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - // pass through types - end_types = input; - // modify as needed - switch (kind) { - case IR_Set::REG_64: - case IR_Set::LOAD: - case IR_Set::GPR_TO_FPR: - case IR_Set::FPR_TO_GPR64: - case IR_Set::REG_FLT: - case IR_Set::SYM_LOAD: { - // all these should set a register. - auto as_reg = dynamic_cast(dst.get()); - assert(as_reg); - // get the type of the source, - auto t = src->get_expression_type(input, file, dts); - // set the type of the register. - end_types.get(as_reg->reg) = t; - } break; - - case IR_Set::SYM_STORE: { - auto as_reg = dynamic_cast(dst.get()); - assert(!as_reg); - return; - } - default: - throw std::runtime_error(fmt::format( - "Could not propagate types through IR_Set_Atomic, kind not handled {}", print(file))); - } -} - -/*! - * Get the type of a register. - */ -TP_Type IR_Register::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - return input.get(reg); -} - -TP_Type IR_Load::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - clear_load_path(); - - //////////////////// - // STATIC - //////////////////// - auto as_static = dynamic_cast(location.get()); - if (as_static) { - // todo - we should map out static data and use an actual type system lookup to figure this out. - // but for now, this is probably good enough. - if (kind == FLOAT) { - // loading static data with a FLOAT kind load (lwc1), assume result is a float. - return TP_Type::make_from_ts(dts.ts.make_typespec("float")); - } - - if (size == 8) { - // 8 byte integer constants are always loaded from a static pool - // this could technically hide loading a different type from inside of a static basic. - return TP_Type::make_from_ts(dts.ts.make_typespec("uint")); - } - } - - /////////////////////////////////////// - // REGISTER + OFFSET (possibly 0) - /////////////////////////////////////// - RegOffset ro; - if (get_as_reg_offset(location.get(), &ro)) { - auto& input_type = input.get(ro.reg); - - if (input_type.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD && ro.offset >= 16 && - (ro.offset & 3) == 0 && size == 4 && kind == UNSIGNED) { - // method get of fixed type - auto type_name = input_type.get_type_objects_typespec().base_type(); - auto method_id = (ro.offset - 16) / 4; - auto method_info = dts.ts.lookup_method(type_name, method_id); - auto method_type = method_info.type.substitute_for_method_call(type_name); - if (type_name == "object" && method_id == GOAL_NEW_METHOD) { - // remember that we're an object new. - return TP_Type::make_object_new(method_type); - } - return TP_Type::make_from_ts(method_type); - } - - if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") && - ro.offset >= 16 && (ro.offset & 3) == 0 && size == 4 && kind == UNSIGNED) { - // method get of an unknown type. We assume the most general "object" type. - auto method_id = (ro.offset - 16) / 4; - auto method_info = dts.ts.lookup_method("object", method_id); - if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) { - // this can get us the wrong thing for `new` methods. And maybe relocate? - return TP_Type::make_from_ts(method_info.type.substitute_for_method_call("object")); - } - } - - if (input_type.typespec() == TypeSpec("pointer")) { - // we got a plain pointer. let's just assume we're loading an integer. - // perhaps we should disable this feature by default on 4-byte loads if we're getting - // lots of false positives for loading pointers from plain pointers. - - switch (kind) { - case UNSIGNED: - switch (size) { - case 1: - case 2: - case 4: - case 8: - return TP_Type::make_from_ts(TypeSpec("uint")); - default: - break; - } - break; - case SIGNED: - switch (size) { - case 1: - case 2: - case 4: - case 8: - return TP_Type::make_from_ts(TypeSpec("int")); - default: - break; - } - break; - case FLOAT: - return TP_Type::make_from_ts(TypeSpec("float")); - default: - assert(false); - } - } - - if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) { - FieldReverseLookupInput rd_in; - DerefKind dk; - dk.is_store = false; - dk.reg_kind = get_reg_kind(ro.reg); - dk.sign_extend = kind == SIGNED; - dk.size = size; - rd_in.deref = dk; - rd_in.base_type = input_type.get_obj_plus_const_mult_typespec(); - rd_in.stride = input_type.get_multiplier(); - rd_in.offset = ro.offset; - auto rd = dts.ts.reverse_field_lookup(rd_in); - - if (rd.success) { - load_path_set = true; - load_path_addr_of = rd.addr_of; - load_path_base = ro.reg_ir; - for (auto& x : rd.tokens) { - load_path.push_back(x.print()); - } - return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); - } - } - - if (input_type.kind == TP_Type::Kind::TYPESPEC && ro.offset == -4 && kind == UNSIGNED && - size == 4 && ro.reg.get_kind() == Reg::GPR) { - // get type of basic likely, but misrecognized as an object. - // occurs often in typecase-like structures because other possible types are - // "stripped". - load_path_base = ro.reg_ir; - load_path_addr_of = false; - load_path.push_back("type"); - load_path_set = true; - - return TP_Type::make_type_object(input_type.typespec().base_type()); - } - // - // if (input_type.as_typespec() == TypeSpec("object") && ro.offset == -4 && kind == - // UNSIGNED - // && - // size == 4 && ro.reg.get_kind() == Reg::GPR) { - // // get type of basic likely, but misrecognized as an object. - // // occurs often in typecase-like structures because other possible types are - // "stripped". return TP_Type(TypeSpec("type")); - // } - // - - if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) { - // access method vtable. The input is type + (4 * method), and the 16 is the offset - // of method 0. - return TP_Type::make_from_ts(TypeSpec("function")); - } - // Assume we're accessing a field of an object. - FieldReverseLookupInput rd_in; - DerefKind dk; - dk.is_store = false; - dk.reg_kind = get_reg_kind(ro.reg); - dk.sign_extend = kind == SIGNED; - dk.size = size; - rd_in.deref = dk; - rd_in.base_type = input_type.typespec(); - rd_in.stride = 0; - rd_in.offset = ro.offset; - auto rd = dts.ts.reverse_field_lookup(rd_in); - - // only error on failure if "pair" is disabled. otherwise it might be a pair. - if (!rd.success && !dts.type_prop_settings.allow_pair) { - printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.base_type.print().c_str(), - rd_in.offset, rd_in.deref.value().sign_extend, rd_in.deref.value().size); - throw std::runtime_error( - fmt::format("Could not get type of load: {}. Reverse Deref Failed.", print(file))); - } - - if (rd.success) { - load_path_set = true; - load_path_addr_of = rd.addr_of; - load_path_base = ro.reg_ir; - for (auto& x : rd.tokens) { - load_path.push_back(x.print()); - } - return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); - } - - // rd failed, try as pair. - if (dts.type_prop_settings.allow_pair) { - // we are strict here - only permit pair-type loads from object or pair. - // object is permitted for stuff like association lists where the car is also a pair. - if (kind == SIGNED && size == 4 && - (input_type.typespec() == TypeSpec("object") || - input_type.typespec() == TypeSpec("pair"))) { - // these rules are of course not always correct or the most specific, but it's the best - // we can do. - if (ro.offset == 2) { - // cdr = another pair. - return TP_Type::make_from_ts(TypeSpec("pair")); - } else if (ro.offset == -2) { - // car = some object. - return TP_Type::make_from_ts(TypeSpec("object")); - } - } - } - } - - throw std::runtime_error(fmt::format("Could not get type of load: {}. Not handled: {}", - print(file), location->print(file))); -} - -TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - - // regardless of input types, the output is going to be a float. - // todo - if we ever support meters we should do something better here. - switch (kind) { - case DIV: - case MUL: - case ADD: - case SUB: - case MIN: - case MAX: - return TP_Type::make_from_ts(dts.ts.make_typespec("float")); - default: - assert(false); - } -} - -TP_Type IR_FloatMath1::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - // FLOAT_TO_INT, INT_TO_FLOAT, ABS, NEG, SQRT - switch (kind) { - case FLOAT_TO_INT: - return TP_Type::make_from_ts(TypeSpec("int")); - case INT_TO_FLOAT: - case ABS: - case NEG: - case SQRT: - return TP_Type::make_from_ts(TypeSpec("float")); - default: - assert(false); - } -} - -TP_Type IR_IntMath2::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - auto arg0_type = arg0->get_expression_type(input, file, dts); - auto arg1_type = arg1->get_expression_type(input, file, dts); - - // special cases for integers - switch (kind) { - case LEFT_SHIFT: - // multiply! - { - auto as_const = dynamic_cast(arg1.get()); - if (as_const && is_int_or_uint(dts, arg0_type)) { - assert(as_const->value >= 0); - assert(as_const->value < 64); - return TP_Type::make_from_product((1ull << as_const->value)); - } - break; - } - - case MUL_SIGNED: { - if (arg0_type.is_integer_constant() && is_int_or_uint(dts, arg1_type)) { - return TP_Type::make_from_product(arg0_type.get_integer_constant()); - } - } break; - - case ADD: - if (arg0_type.is_product_with(4) && tc(dts, TypeSpec("type"), arg1_type)) { - // dynamic access into the method array with shift, add, offset-load - // no need to track the type because we don't know the method index anyway. - return TP_Type::make_partial_dyanmic_vtable_access(); - } - break; - - default: - break; - } - - if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) { - // both are the same type and both are int/uint, so we assume that we're doing integer math. - // we strip off any weird things like multiplication or integer constant. - return TP_Type::make_from_ts(arg0_type.typespec()); - } - - if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { - // usually we would want to use arg0's type as the "winning" type. - // but we use arg1's if arg0 is an integer constant - // in either case, strip off weird stuff. - if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) { - return TP_Type::make_from_ts(arg1_type.typespec()); - } - return TP_Type::make_from_ts(arg0_type.typespec()); - } - - if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) { - return TP_Type::make_from_ts(TypeSpec("binteger")); - } - - // special cases for non-integers - if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) && - (arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) { - // boxed object tag trick. - return TP_Type::make_from_ts(TypeSpec("int")); - } - - // - // if (is_integer_type(arg0_type) && is_integer_type(arg1_type)) { - // // case where both arguments are integers. - // // in this case we assume we're actually doing math. - // switch (kind) { - // case ADD: - // case SUB: - // case AND: - // case OR: - // case NOR: - // case XOR: - // // we don't know if we're signed or unsigned. so let's just go with the first type. - // return get_int_type(arg0_type); - // case MUL_SIGNED: - // case DIV_SIGNED: - // case RIGHT_SHIFT_ARITH: - // case MOD_SIGNED: - // case MIN_SIGNED: - // case MAX_SIGNED: - // // result is going to be signed, regardless of inputs. - // return TP_Type(TypeSpec("int")); - // - // case MUL_UNSIGNED: - // case RIGHT_SHIFT_LOGIC: - // // result is going to be unsigned, regardless of inputs. - // return TP_Type(TypeSpec("uint")); - // - // case LEFT_SHIFT: { - // // multiply! - // auto as_const = dynamic_cast(arg1.get()); - // if (as_const) { - // // shift by constant integer. could be accessing the method array. - // TP_Type result; - // result.kind = TP_Type::PRODUCT; - // result.ts = get_int_type(arg0_type).ts; - // result.multiplier = (1 << as_const->value); - // return result; - // } else { - // // normal variable shift. - // return get_int_type(arg0_type); - // } - // } - // default: - // break; - // } - // } - // - // - auto a1_const = dynamic_cast(arg1.get()); - if (a1_const && kind == ADD && arg0_type.kind == TP_Type::Kind::TYPESPEC) { - // access a field. - FieldReverseLookupInput rd_in; - rd_in.deref = std::nullopt; - rd_in.stride = 0; - rd_in.offset = a1_const->value; - rd_in.base_type = arg0_type.typespec(); - auto rd = dts.ts.reverse_field_lookup(rd_in); - - if (rd.success) { - // todo, load path. - return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); - } - } - // - // if (kind == ADD && is_integer_type(arg0_type) && arg1_type.kind == TP_Type::OBJECT_OF_TYPE) - // { - // // product + object with multiplier 1 (access array of bytes for example) - // TP_Type result; - // result.kind = TP_Type::OBJ_PLUS_PRODUCT; - // result.ts = arg1_type.as_typespec(); - // result.multiplier = 1; - // return result; - // } - // - if (kind == ADD && arg0_type.is_product() && arg1_type.kind == TP_Type::Kind::TYPESPEC) { - return TP_Type::make_object_plus_product(arg1_type.typespec(), arg0_type.get_multiplier()); - } - - if (kind == ADD && arg1_type.is_product() && arg0_type.kind == TP_Type::Kind::TYPESPEC) { - return TP_Type::make_object_plus_product(arg0_type.typespec(), arg1_type.get_multiplier()); - } - - if (kind == ADD && arg0_type.typespec().base_type() == "pointer" && - tc(dts, TypeSpec("integer"), arg1_type)) { - // plain pointer plus integer = plain pointer - return TP_Type::make_from_ts(TypeSpec("pointer")); - } - - if (kind == ADD && arg1_type.typespec().base_type() == "pointer" && - tc(dts, TypeSpec("integer"), arg0_type)) { - // plain pointer plus integer = plain pointer - return TP_Type::make_from_ts(TypeSpec("pointer")); - } - - if (tc(dts, TypeSpec("structure"), arg1_type) && !dynamic_cast(arg0.get()) && - is_int_or_uint(dts, arg0_type)) { - if (arg1_type.typespec() == TypeSpec("symbol") && - arg0_type.is_integer_constant(SYM_INFO_OFFSET + POINTER_SIZE)) { - // symbol -> GOAL String - return TP_Type::make_from_ts(dts.ts.make_pointer_typespec("string")); - } else { - // byte access of offset array field trick. - // arg1 holds a structure. - // arg0 is an integer in a register. - return TP_Type::make_object_plus_product(arg1_type.typespec(), 1); - } - } - - if (kind == AND) { - // base case for and. Just get an integer. - return TP_Type::make_from_ts(TypeSpec("int")); - } - - // - // if (kind == ADD && - // dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && - // is_integer_type(arg1_type)) { - // return arg0_type; - // } - // - // if ((kind == ADD || kind == AND) && - // dts.ts.typecheck(TypeSpec("pointer"), arg1_type.as_typespec(), "", false, false) && - // is_integer_type(arg0_type)) { - // return arg1_type; - // } - // - // if (kind == ADD && - // dts.ts.typecheck(TypeSpec("binteger"), arg0_type.as_typespec(), "", false, false) && - // is_integer_type(arg1_type)) { - // return arg0_type; - // } - // - if (kind == SUB && tc(dts, TypeSpec("pointer"), arg0_type) && - tc(dts, TypeSpec("pointer"), arg1_type)) { - return TP_Type::make_from_ts(TypeSpec("int")); - } - - throw std::runtime_error( - fmt::format("Can't get_expression_type on this IR_IntMath2: {}, args {} and {}", print(file), - arg0_type.print(), arg1_type.print())); -} - -void BranchDelay::type_prop(TypeState& output, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - // (void)dts; - switch (kind) { - case DSLLV: { - // I believe this is only used in ash. We ignore the shift amount's type and just look - // at the input value. If it's a uint/int based type, we just return uint/int (not the type) - // this will kill any weird stuff like product, etc. - // if it's not an integer type, it's currently an error. - auto dst = dynamic_cast(destination.get()); - assert(dst); - auto src = dynamic_cast(source.get()); - assert(src); - if (tc(dts, TypeSpec("uint"), output.get(src->reg))) { - output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("uint")); - } else if (tc(dts, TypeSpec("int"), output.get(src->reg))) { - output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int")); - } else { - throw std::runtime_error("BranchDelay::type_prop DSLLV for src " + - output.get(src->reg).print()); - } - } break; - case NEGATE: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - // to match the behavior in IntMath1, assume signed when negating. - output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int")); - } break; - case SET_REG_FALSE: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - output.get(dst->reg) = TP_Type::make_false(); - } break; - case SET_REG_REG: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - auto src = dynamic_cast(source.get()); - assert(src); - output.get(dst->reg) = output.get(src->reg); - break; - } - case SET_REG_TRUE: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("symbol")); - } break; - - case SET_BINTEGER: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - output.get(dst->reg) = TP_Type::make_type_object(TypeSpec("binteger")); - } break; - - case SET_PAIR: { - auto dst = dynamic_cast(destination.get()); - assert(dst); - output.get(dst->reg) = TP_Type::make_type_object(TypeSpec("pair")); - } break; - - case NOP: - break; - - default: - throw std::runtime_error("Unhandled branch delay in type_prop: " + to_form(file).print()); - } -} - -void IR_Branch_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - // pass through types - end_types = input; - branch_delay.type_prop(end_types, file, dts); - // todo clobbers. -} - -TP_Type IR_IntMath1::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)dts; - auto arg_type = arg->get_expression_type(input, file, dts); - if (is_int_or_uint(dts, arg_type)) { - switch (kind) { - case NEG: - // if we negate a thing, let's just make it a signed integer. - return TP_Type::make_from_ts(TypeSpec("int")); - case ABS: - // if we take the absolute value of a thing, just make it signed. - return TP_Type::make_from_ts(TypeSpec("int")); - case NOT: - // otherwise, make it int/uint as needed (this works because we check is_int_or_uint - // above) - return TP_Type::make_from_ts(arg_type.typespec()); - } - } - - throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + - to_form(file).print() + " " + arg_type.print()); -} - -TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - if (name == "#f") { - // if we ever read the false symbol, it should contain the false symbol as its value. - return TP_Type::make_false(); - } else if (name == "__START-OF-TABLE__") { - // another annoying special case. We have a fake symbol called __START-OF-TABLE__ - // which actually means that you get the first address in the symbol table. - // it's not really a linked symbol, but the basic op builder represents it as one. - return TP_Type::make_from_ts(TypeSpec("pointer")); - } - - // look up the type of the symbol - auto type = dts.symbol_types.find(name); - if (type == dts.symbol_types.end()) { - throw std::runtime_error("Don't have the type of symbol " + name); - } - - if (type->second == TypeSpec("type")) { - // if we get a type by symbol, we should remember which type we got it from. - return TP_Type::make_type_object(TypeSpec(name)); - } - - // otherwise, just return a normal typespec - return TP_Type::make_from_ts(type->second); -} - -TP_Type IR_Symbol::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - if (name == "#f") { - return TP_Type::make_false(); - } - - return TP_Type::make_from_ts(TypeSpec("symbol")); -} - -TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - return TP_Type::make_from_integer(value); -} - -TP_Type IR_Compare::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - // really a boolean. - return TP_Type::make_from_ts(TypeSpec("symbol")); -} - -void IR_Nop_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - end_types = input; -} - -void IR_Suspend_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - end_types = input; -} - -void IR_Call_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, - Reg::T0, Reg::T1, Reg::T2, Reg::T3}; - const Reg::Gpr goal_function_clobber_regs[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, - Reg::T0, Reg::T1, Reg::T2, Reg::T3, - Reg::T4, Reg::V1, Reg::T9}; - end_types = input; - - auto in_tp = input.get(Register(Reg::GPR, Reg::T9)); - if (in_tp.kind == TP_Type::Kind::OBJECT_NEW_METHOD && - !dts.type_prop_settings.current_method_type.empty()) { - // calling object new method. Set the result to a new object of our type - end_types.get(Register(Reg::GPR, Reg::V0)) = - TP_Type::make_from_ts(dts.type_prop_settings.current_method_type); - // update the call type - call_type = in_tp.get_method_new_object_typespec(); - call_type.get_arg(call_type.arg_count() - 1) = - TypeSpec(dts.type_prop_settings.current_method_type); - call_type_set = true; - return; - } - - auto in_type = in_tp.typespec(); - - if (in_type.base_type() != "function") { - throw std::runtime_error("Called something that wasn't a function: " + in_type.print()); - } - - if (in_type.arg_count() < 1) { - throw std::runtime_error("Called a function, but we don't know its type"); - } - - if (in_type.arg_count() == 2 && in_type.get_arg(0) == TypeSpec("_varargs_")) { - // we're calling a varags function, which is format. We can determine the argument count - // by looking at the format string, if we can get it. - auto arg_type = input.get(Register(Reg::GPR, Reg::A1)); - if (arg_type.is_constant_string() || arg_type.is_format_string()) { - int arg_count = -1; - - if (arg_type.is_constant_string()) { - auto& str = arg_type.get_string(); - arg_count = dts.get_format_arg_count(str); - } else { - // is format string. - arg_count = arg_type.get_format_string_arg_count(); - } - - TypeSpec format_call_type("function"); - format_call_type.add_arg(TypeSpec("object")); // destination - format_call_type.add_arg(TypeSpec("string")); // format string - for (int i = 0; i < arg_count; i++) { - format_call_type.add_arg(TypeSpec("object")); - } - format_call_type.add_arg(TypeSpec("object")); - arg_count += 2; // for destination and format string. - call_type = format_call_type; - call_type_set = true; - - end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); - - // we can also update register usage here. - read_regs.clear(); - read_regs.emplace_back(Reg::GPR, Reg::T9); - for (int i = 0; i < arg_count; i++) { - read_regs.emplace_back(Reg::GPR, arg_regs[i]); - } - - for (auto reg : goal_function_clobber_regs) { - end_types.get(Register(Reg::GPR, reg)) = TP_Type::make_uninitialized(); - } - return; - } else { - throw std::runtime_error("Failed to get string for _varags_ call, got " + arg_type.print()); - } - } - // set the call type! - call_type = in_type; - call_type_set = true; - - end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); - - // we can also update register usage here. - read_regs.clear(); - read_regs.emplace_back(Reg::GPR, Reg::T9); - - for (uint32_t i = 0; i < in_type.arg_count() - 1; i++) { - read_regs.emplace_back(Reg::GPR, arg_regs[i]); - } - for (auto reg : goal_function_clobber_regs) { - end_types.get(Register(Reg::GPR, reg)) = TP_Type::make_uninitialized(); - } -} - -void IR_Store_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - end_types = input; -} - -TP_Type IR_StaticAddress::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)dts; - // todo - we should map out static data and use a real type system lookup here. - - auto label = file.labels.at(label_id); - // strings are 16-byte aligned, but functions are 8 byte aligned? - if ((label.offset & 7) == BASIC_OFFSET) { - // it's a basic! probably. - const auto& word = file.words_by_seg.at(label.target_segment).at((label.offset - 4) / 4); - if (word.kind == LinkedWord::TYPE_PTR) { - if (word.symbol_name == "string") { - return TP_Type::make_from_string(file.get_goal_string_by_label(label)); - } else { - // otherwise, some other static basic. - return TP_Type::make_from_ts(TypeSpec(word.symbol_name)); - } - } - } else if ((label.offset & 7) == PAIR_OFFSET) { - return TP_Type::make_from_ts(TypeSpec("pair")); - } - - throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); -} - -void IR_AsmOp_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - auto dst_reg = dynamic_cast(dst.get()); - end_types = input; - if (dst_reg) { - if (name == "daddu") { - end_types.get(dst_reg->reg) = TP_Type::make_from_ts(TypeSpec("uint")); - } - } -} - -void IR_Breakpoint_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - end_types = input; -} - -TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - // GOAL's empty pair is actually a pair type, containing the empty pair as the car and cdr - return TP_Type::make_from_ts(TypeSpec("pair")); -} - -TP_Type IR_CMoveF::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - return TP_Type::make_from_ts(TypeSpec("symbol")); -} -} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index 7746800bb6..e7da601489 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -1242,7 +1242,9 @@ void CallOp::collect_vars(VariableSet& vars) const { vars.insert(e); } - vars.insert(m_return_var); + if (m_call_type_set && m_call_type.last_arg() != TypeSpec("none")) { + vars.insert(m_return_var); + } } ///////////////////////////// diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 16d7059f03..9f692cdd39 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -769,76 +769,6 @@ std::string LinkedObjectFile::print_disassembly() { return result; } -std::string LinkedObjectFile::print_type_analysis_debug() { - std::string result; - - assert(segments <= 3); - for (int seg = segments; seg-- > 0;) { - // segment header - result += ";------------------------------------------\n; "; - result += segment_names[seg]; - result += "\n;------------------------------------------\n\n"; - - // functions - for (auto& func : functions_by_seg.at(seg)) { - result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; - result += "; .function " + func.guessed_name.to_string() + "\n"; - result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; - if (!func.warnings.empty()) { - result += ";; WARNING:\n" + func.warnings + "\n"; - } - - for (auto& block : func.basic_blocks) { - result += "\n"; - if (!block.label_name.empty()) { - result += block.label_name + ":\n"; - } - - TypeState* init_types = &block.init_types; - for (int i = block.start_basic_op; i < block.end_basic_op; i++) { - result += " "; - // result += func.basic_ops.at(i)->print_with_reguse(*this); - // result += func.basic_ops.at(i)->print(*this); - if (func.attempted_type_analysis) { - result += fmt::format("[{:3d}] ", i); - auto& op = func.basic_ops.at(i); - result += op->print_with_types(*init_types, *this); - - // temporary debug load path print - auto op_as_set = dynamic_cast(op.get()); - if (op_as_set) { - auto op_as_load = dynamic_cast(op_as_set->src.get()); - if (op_as_load && op_as_load->load_path_set) { - if (op_as_load->load_path_addr_of) { - result += " (&->"; - } else { - result += " (->"; - } - result += ' '; - result += op_as_load->load_path_base->print(*this); - for (auto& tok : op_as_load->load_path) { - result += ' '; - result += tok; - } - result += ')'; - } - } - - result += "\n"; - init_types = &func.basic_ops.at(i)->end_types; - } else { - result += fmt::format("[{:3d}] ", i); - result += func.basic_ops.at(i)->print(*this); - result += "\n"; - } - } - } - } - } - - return result; -} - /*! * Hacky way to get a GOAL string object */ diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 3ecc68134f..e89a2fe45b 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -51,7 +51,6 @@ class LinkedObjectFile { void process_fp_relative_links(); std::string print_scripts(); std::string print_disassembly(); - std::string print_type_analysis_debug(); bool has_any_functions(); void append_word_to_string(std::string& dest, const LinkedWord& word) const; std::string to_asm_json(const std::string& obj_file_name); diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 3fcb94a735..63155d1d42 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -22,7 +22,6 @@ #include "common/util/FileUtil.h" #include "decompiler/Function/BasicBlocks.h" #include "decompiler/IR/BasicOpBuilder.h" -#include "decompiler/IR/CfgBuilder.h" #include "decompiler/Function/TypeInspector.h" #include "common/log/log.h" #include "third-party/json.hpp" @@ -533,31 +532,6 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d // printf("\n"); } -void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir, - const std::string& suffix) { - lg::info("- Writing debug type analysis..."); - Timer timer; - uint32_t total_bytes = 0, total_files = 0; - - for_each_obj([&](ObjectFileData& obj) { - if (obj.linked_data.has_any_functions()) { - auto file_text = obj.linked_data.print_type_analysis_debug(); - auto file_name = - file_util::combine_path(output_dir, obj.to_unique_name() + suffix + "_dbt.asm"); - - total_bytes += file_text.size(); - file_util::write_text_file(file_name, file_text); - total_files++; - } - }); - - lg::info("Wrote functions dumps:"); - lg::info(" Total {} files", total_files); - lg::info(" Total {} MB", total_bytes / ((float)(1u << 20u))); - lg::info(" Total {} ms ({:.3f} MB/sec)", timer.getMs(), - total_bytes / ((1u << 20u) * timer.getSeconds())); -} - /*! * Dump disassembly for object files containing code. Data zones will also be dumped. */ @@ -744,7 +718,6 @@ void ObjectFileDB::analyze_functions_ir1() { Timer timer; int total_functions = 0; - int resolved_cfg_functions = 0; const auto& config = get_config(); // Step 1 - analyze the "top level" or "login" code for each object file. @@ -803,27 +776,13 @@ void ObjectFileDB::analyze_functions_ir1() { func.warnings += ";; this function exists in multiple non-identical object files\n"; } }); - /* - for (const auto& kv : duplicated_functions) { - printf("Function %s is found in non-identical object files:\n", kv.first.c_str()); - for (const auto& obj : kv.second) { - printf(" %s\n", obj.c_str()); - } - } - */ int total_trivial_cfg_functions = 0; int total_named_functions = 0; int total_basic_ops = 0; int total_failed_basic_ops = 0; - int total_reginfo_ops = 0; int asm_funcs = 0; - int non_asm_funcs = 0; - int successful_cfg_irs = 0; - int successful_type_analysis = 0; - int attempted_type_analysis = 0; - int bad_type_analysis = 0; // didn't attempt because we didn't know how + attempted but failed std::map> unresolved_by_length; @@ -833,11 +792,6 @@ void ObjectFileDB::analyze_functions_ir1() { // Main Pass over each function... for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { total_functions++; - // if (func.guessed_name.to_string() != "sort") { - // return; - // } - // printf("in %s from %s\n", func.guessed_name.to_string().c_str(), - // data.to_unique_name().c_str()); // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); @@ -872,7 +826,6 @@ void ObjectFileDB::analyze_functions_ir1() { } total_basic_ops += func.get_basic_op_count(); total_failed_basic_ops += func.get_failed_basic_op_count(); - total_reginfo_ops += func.get_reginfo_basic_op_count(); // if we got an inspect method, inspect it. if (func.is_inspect_method) { @@ -880,157 +833,10 @@ void ObjectFileDB::analyze_functions_ir1() { all_type_defs += ";; " + data.to_unique_name() + "\n"; all_type_defs += result.print_as_deftype() + "\n"; } - - // Combine basic ops + CFG to build a nested IR - // register usage first, so we can tell if the SC's if's are used by value. - func.run_reg_usage(); - func.ir = build_cfg_ir(func, *func.cfg, data.linked_data); - non_asm_funcs++; - if (func.ir) { - successful_cfg_irs++; - } - - if (func.cfg->is_fully_resolved()) { - resolved_cfg_functions++; - } else { - lg::warn("Function {} from {} failed cfg ir", func.guessed_name.to_string(), - data.to_unique_name()); - } - - // type analysis - - if (get_config().function_type_prop) { - auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()]; - if (get_config().no_type_analysis_functions_by_name.find(func.guessed_name.to_string()) == - get_config().no_type_analysis_functions_by_name.end()) { - if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { - // we're a global named function. This means we're stored in a symbol - auto kv = dts.symbol_types.find(func.guessed_name.function_name); - if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { - if (kv->second.base_type() != "function") { - lg::error("Found a function named {} but the symbol has type {}", - func.guessed_name.to_string(), kv->second.print()); - assert(false); - } - // GOOD! - func.type = kv->second; - func.attempted_type_analysis = true; - attempted_type_analysis++; - // lg::info("Type Analysis on {} {}", func.guessed_name.to_string(), - // kv->second.print()); - if (func.run_type_analysis(kv->second, dts, data.linked_data, hints)) { - successful_type_analysis++; - } else { - // bad, failed. - bad_type_analysis++; - } - } else { - // bad, don't know global type - bad_type_analysis++; - } - } else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { - // it's a method. - try { - auto info = - dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id); - if (info.type.arg_count() >= 1) { - if (info.type.base_type() != "function") { - lg::error("Found a method named {} but the symbol has type {}", - func.guessed_name.to_string(), info.type.print()); - assert(false); - } - // GOOD! - func.type = info.type.substitute_for_method_call(func.guessed_name.type_name); - func.attempted_type_analysis = true; - attempted_type_analysis++; - // lg::info("Type Analysis on {} {}", - // func.guessed_name.to_string(), - // func.type.print()); - if (func.run_type_analysis(func.type, dts, data.linked_data, hints)) { - successful_type_analysis++; - } else { - bad_type_analysis++; - } - } else { - // not enough type info - bad_type_analysis++; - } - - } catch (std::runtime_error& e) { - // failed to lookup method info - bad_type_analysis++; - } - } else if (func.guessed_name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) { - attempted_type_analysis++; - func.type = dts.ts.make_function_typespec({}, "none"); - func.attempted_type_analysis = true; - if (func.run_type_analysis(func.type, dts, data.linked_data, hints)) { - successful_type_analysis++; - } else { - // failed - bad_type_analysis++; - } - } else if (func.guessed_name.kind == FunctionName::FunctionKind::UNIDENTIFIED) { - auto obj_name = data.to_unique_name(); - // try looking up the object - const auto& map = get_config().anon_function_types_by_obj_by_id; - auto obj_kv = map.find(obj_name); - if (obj_kv != map.end()) { - auto func_kv = obj_kv->second.find(func.guessed_name.get_anon_id()); - if (func_kv != obj_kv->second.end()) { - attempted_type_analysis++; - func.type = dts.parse_type_spec(func_kv->second); - func.attempted_type_analysis = true; - if (func.run_type_analysis(func.type, dts, data.linked_data, hints)) { - successful_type_analysis++; - } else { - // tried, but failed. - bad_type_analysis++; - } - } else { - // no id - bad_type_analysis++; - } - } else { - // no object in map - bad_type_analysis++; - } - } else { - // unsupported function kind - bad_type_analysis++; - } - - if (!func.attempted_type_analysis) { - func.warnings.append(";; Failed to try type analysis\n"); - } - } else { - func.warnings.append(";; Marked as no type analysis in config\n"); - } - } } else { asm_funcs++; func.warnings.append(";; Assembly Function. Analysis passes were not attempted.\n"); } - - if (func.basic_blocks.size() > 1 && !func.suspected_asm) { - if (func.cfg->is_fully_resolved()) { - } else { - unresolved_by_length[func.end_word - func.start_word].push_back( - func.guessed_name.to_string()); - } - } - - if (!func.suspected_asm && func.basic_blocks.size() <= 1) { - total_trivial_cfg_functions++; - } - - if (!func.guessed_name.empty()) { - total_named_functions++; - } - - // if (func.guessed_name.to_string() == "reset-and-call") { - // assert(false); - // } }); lg::info("Found {} functions ({} with no control flow)", total_functions, @@ -1039,58 +845,9 @@ void ObjectFileDB::analyze_functions_ir1() { 100.f * float(total_named_functions) / float(total_functions)); lg::info("Excluding {} asm functions", asm_funcs); lg::info("Found {} basic blocks in {:.3f} ms", total_basic_blocks, timer.getMs()); - lg::info(" {}/{} functions passed cfg analysis stage ({:.3f}%)", resolved_cfg_functions, - non_asm_funcs, 100.f * float(resolved_cfg_functions) / float(non_asm_funcs)); int successful_basic_ops = total_basic_ops - total_failed_basic_ops; lg::info(" {}/{} basic ops converted successfully ({:.3f}%)", successful_basic_ops, total_basic_ops, 100.f * float(successful_basic_ops) / float(total_basic_ops)); - lg::info(" {}/{} basic ops with reginfo ({:.3f}%)", total_reginfo_ops, total_basic_ops, - 100.f * float(total_reginfo_ops) / float(total_basic_ops)); - lg::info(" {}/{} cfgs converted to ir ({:.3f}%)", successful_cfg_irs, non_asm_funcs, - 100.f * float(successful_cfg_irs) / float(non_asm_funcs)); - lg::info(" {}/{} functions attempted type analysis ({:.2f}%)", attempted_type_analysis, - non_asm_funcs, 100.f * float(attempted_type_analysis) / float(non_asm_funcs)); - lg::info(" {}/{} functions that attempted type analysis succeeded ({:.2f}%)", - successful_type_analysis, attempted_type_analysis, - 100.f * float(successful_type_analysis) / float(attempted_type_analysis)); - lg::info(" {}/{} functions passed type analysis ({:.2f}%)", successful_type_analysis, - non_asm_funcs, 100.f * float(successful_type_analysis) / float(non_asm_funcs)); - lg::info( - " {} functions were supposed to do type analysis but either failed or didn't know their " - "types.\n", - bad_type_analysis); - - // for (auto& kv : unresolved_by_length) { - // printf("LEN %d\n", kv.first); - // for (auto& x : kv.second) { - // printf(" %s\n", x.c_str()); - // } - // } -} - -void ObjectFileDB::analyze_expressions() { - lg::info("- Analyzing Expressions..."); - Timer timer; - int attempts = 0; - int success = 0; - bool had_failure = false; - for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { - (void)segment_id; - - if (/*!had_failure &&*/ func.attempted_type_analysis) { - attempts++; - lg::info("Analyze {}", func.guessed_name.to_string()); - if (func.build_expression(data.linked_data)) { - success++; - } else { - func.warnings.append(";; Expression analysis failed.\n"); - had_failure = true; - } - } - }); - - lg::info(" {}/{} functions passed expression building ({:.2f}%)\n", success, attempts, - 100.f * float(success) / float(attempts)); } void ObjectFileDB::dump_raw_objects(const std::string& output_dir) { diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index a05f641ab8..dd302fecd7 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -64,7 +64,6 @@ class ObjectFileDB { bool write_json, const std::string& file_suffix = ""); - void write_debug_type_analysis(const std::string& output_dir, const std::string& suffix = ""); void analyze_functions_ir1(); void analyze_functions_ir2(const std::string& output_dir); void ir2_top_level_pass(); @@ -81,7 +80,6 @@ class ObjectFileDB { std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); void process_tpages(); - void analyze_expressions(); std::string process_game_count_file(); std::string process_game_text_files(); diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index 6ff676577a..87b40a698f 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -61,7 +61,7 @@ "write_disassembly":true, "write_hex_near_instructions":false, - "run_ir2":false, + "run_ir2":true, // if false, skips printing disassembly of object with functions, as these are usually large (~1 GB) and not interesting yet. "disassemble_objects_without_functions":false, diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 09299188fd..dc66e13c94 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -70,12 +70,6 @@ int main(int argc, char** argv) { if (get_config().write_disassembly) { db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions, get_config().write_func_json); - db.write_debug_type_analysis(out_folder); - } - - if (get_config().analyze_expressions) { - db.analyze_expressions(); - db.write_disassembly(out_folder, false, false, "_expr"); } }