diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 45434b53d7..efdf1b9c0a 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -107,7 +107,7 @@ std::string TypeSystem::get_runtime_type(const TypeSpec& ts) { /*! * Get information about what happens if you dereference an object of given type */ -DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) { +DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) const { DerefInfo info; if (!ts.has_single_arg()) { @@ -193,7 +193,7 @@ bool TypeSystem::partially_defined_type_exists(const std::string& name) const { * return type. */ TypeSpec TypeSystem::make_function_typespec(const std::vector& arg_types, - const std::string& return_type) { + const std::string& return_type) const { auto result = make_typespec("function"); for (auto& x : arg_types) { result.add_arg(make_typespec(x)); @@ -205,28 +205,28 @@ TypeSpec TypeSystem::make_function_typespec(const std::vector& arg_ /*! * Create a TypeSpec for a pointer to a type. */ -TypeSpec TypeSystem::make_pointer_typespec(const std::string& type) { +TypeSpec TypeSystem::make_pointer_typespec(const std::string& type) const { return make_pointer_typespec(make_typespec(type)); } /*! * Create a TypeSpec for a pointer to a type. */ -TypeSpec TypeSystem::make_pointer_typespec(const TypeSpec& type) { +TypeSpec TypeSystem::make_pointer_typespec(const TypeSpec& type) const { return TypeSpec("pointer", {type}); } /*! * Create a TypeSpec for an inline-array of type */ -TypeSpec TypeSystem::make_inline_array_typespec(const std::string& type) { +TypeSpec TypeSystem::make_inline_array_typespec(const std::string& type) const { return make_inline_array_typespec(make_typespec(type)); } /*! * Create a TypeSpec for an inline-array of type */ -TypeSpec TypeSystem::make_inline_array_typespec(const TypeSpec& type) { +TypeSpec TypeSystem::make_inline_array_typespec(const TypeSpec& type) const { return TypeSpec("inline-array", {type}); } @@ -466,7 +466,7 @@ void TypeSystem::assert_method_id(const std::string& type_name, * and how to access it. */ FieldLookupInfo TypeSystem::lookup_field_info(const std::string& type_name, - const std::string& field_name) { + const std::string& field_name) const { FieldLookupInfo info; info.field = lookup_field(type_name, field_name); @@ -764,7 +764,7 @@ int TypeSystem::get_next_method_id(Type* type) { /*! * Lookup a field of a type by name */ -Field TypeSystem::lookup_field(const std::string& type_name, const std::string& field_name) { +Field TypeSystem::lookup_field(const std::string& type_name, const std::string& field_name) const { auto type = get_type_of_type(type_name); Field field; if (!type->lookup_field(field_name, &field)) { @@ -1088,4 +1088,169 @@ TypeSpec coerce_to_reg_type(const TypeSpec& in) { } return in; +} + +bool debug_reverse_deref = false; + +/*! + * Todo: + * - I suspect inlined basics will be off by 4-bytes, depending on where the basic field starts. + * - Inline array is not yet implemented. + */ +bool TypeSystem::reverse_deref(const ReverseDerefInputInfo& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + if (!input.mem_deref) { + assert(input.load_size == 0); + } + if (debug_reverse_deref) { + fmt::print("Reverse Deref Type {} Offset {} Deref {} Load Size {} Signed {}\n", + input.input_type.print(), input.offset, input.mem_deref, input.load_size, + input.sign_extend); + } + + if (input.offset == 0 && !input.mem_deref) { + // base case, we are here! + *addr_of = false; + return true; + } + + auto base_input_type = input.input_type.base_type(); + if (base_input_type == "pointer") { + auto di = get_deref_info(input.input_type); + int closest_index = input.offset / di.stride; + int offset_into_elt = input.offset - (closest_index * di.stride); + auto base_type = di.result_type; + + ReverseDerefInfo::DerefToken token; + token.kind = ReverseDerefInfo::DerefToken::INDEX; + token.index = closest_index; + + assert(di.mem_deref); + if (offset_into_elt == 0) { + if (input.mem_deref) { + path->push_back(token); + *addr_of = false; + *result_type = base_type; + return true; + } else { + path->push_back(token); + *addr_of = true; + *result_type = make_pointer_typespec(base_type); + return true; + } + } else { + return false; + } + + } else if (base_input_type == "inline-array") { + if (debug_reverse_deref) { + fmt::print("Got inline-array case\n"); + } + // todo + return false; + } else { + auto type_info = lookup_type(input.input_type); + auto structure_type = dynamic_cast(type_info); + if (!structure_type) { + if (debug_reverse_deref) { + fmt::print("Failed structure type check\n"); + } + return false; + } + auto corrected_offset = input.offset + type_info->get_offset(); + for (auto& field : structure_type->fields()) { + auto field_deref = lookup_field_info(type_info->get_name(), field.name()); + if (debug_reverse_deref) { + fmt::print("Offset is {}, {} try field {} {} which is {}, {}\n", corrected_offset, + corrected_offset + input.load_size, field.name(), field_deref.type.print(), + field.offset(), field.offset() + get_size_in_type(field)); + } + if (corrected_offset >= field.offset() && (corrected_offset + std::max(1, input.load_size) <= + field.offset() + get_size_in_type(field) || + field.is_dynamic())) { + if (debug_reverse_deref) { + fmt::print(" ok, using field {}\n", field.name()); + } + // we are somewhere in this field! + int offset_into_field = corrected_offset - field.offset(); + + ReverseDerefInfo::DerefToken token; + token.kind = ReverseDerefInfo::DerefToken::FIELD; + token.name = field.name(); + + if (offset_into_field == 0) { + if (field_deref.needs_deref) { + if (input.mem_deref) { + // perfect match to a field requiring a deref, which we have. + TypeSpec loc_type = make_pointer_typespec(field_deref.type); + auto di = get_deref_info(loc_type); + if (di.load_size == input.load_size && di.sign_extend == input.sign_extend) { + path->push_back(token); + *addr_of = false; + *result_type = field_deref.type; + return true; + } else { + return false; + } + } else { + // we didn't deref the field, so it's an addr of + path->push_back(token); + *addr_of = true; + *result_type = make_pointer_typespec(field_deref.type); + return true; + } + } else { + // field doesn't need deref to access. + if (input.mem_deref) { + // but we did deref... + // let's look deeper in this field. + path->push_back(token); + ReverseDerefInputInfo r_input = input; + r_input.offset = offset_into_field; + r_input.input_type = field_deref.type; + return reverse_deref(r_input, path, addr_of, result_type); + } else { + // and we didn't deref. + path->push_back(token); + *result_type = field_deref.type; + *addr_of = false; + return true; + } + } + } else { + // we are partially inside of a field here. + if (field_deref.needs_deref) { + // hmm.. shouldn't be possible + if (debug_reverse_deref) { + fmt::print("Failed extra deref case: {}.\n", field.print()); + } + return false; + } else { + // we should try again. + path->push_back(token); + ReverseDerefInputInfo r_input = input; + r_input.offset = offset_into_field; + r_input.input_type = field_deref.type; + return reverse_deref(r_input, path, addr_of, result_type); + } + } + } + } + } + + if (debug_reverse_deref) { + fmt::print("Failed (reached end)\n"); + } + return false; +} + +ReverseDerefInfo TypeSystem::get_reverse_deref_info(const ReverseDerefInputInfo& input) const { + if (!input.mem_deref) { + assert(input.load_size == 0); + } + ReverseDerefInfo result; + result.success = reverse_deref(input, &result.deref_path, &result.addr_of, &result.result_type); + return result; } \ No newline at end of file diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index 8d31139477..013a18ab9d 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -29,6 +29,28 @@ struct DerefInfo { TypeSpec result_type; }; +struct ReverseDerefInfo { + struct DerefToken { + enum Kind { INDEX, FIELD } kind; + std::string name; + int index; + }; + + TypeSpec result_type; + std::vector deref_path; + bool success = false; + bool addr_of = false; +}; + +struct ReverseDerefInputInfo { + int offset = -1; + bool mem_deref = false; + RegKind reg = RegKind::INVALID; + int load_size = -1; + bool sign_extend = false; + TypeSpec input_type; +}; + class TypeSystem { public: TypeSystem(); @@ -39,18 +61,19 @@ class TypeSystem { void forward_declare_type_as_structure(const std::string& name); std::string get_runtime_type(const TypeSpec& ts); - DerefInfo get_deref_info(const TypeSpec& ts); + DerefInfo get_deref_info(const TypeSpec& ts) const; + ReverseDerefInfo get_reverse_deref_info(const ReverseDerefInputInfo& input) const; bool fully_defined_type_exists(const std::string& name) const; bool partially_defined_type_exists(const std::string& name) const; TypeSpec make_typespec(const std::string& name) const; TypeSpec make_function_typespec(const std::vector& arg_types, - const std::string& return_type); + const std::string& return_type) const; - TypeSpec make_pointer_typespec(const std::string& type); - TypeSpec make_pointer_typespec(const TypeSpec& type); - TypeSpec make_inline_array_typespec(const std::string& type); - TypeSpec make_inline_array_typespec(const TypeSpec& type); + TypeSpec make_pointer_typespec(const std::string& type) const; + TypeSpec make_pointer_typespec(const TypeSpec& type) const; + TypeSpec make_inline_array_typespec(const std::string& type) const; + TypeSpec make_inline_array_typespec(const TypeSpec& type) const; Type* lookup_type(const TypeSpec& ts) const; Type* lookup_type(const std::string& name) const; @@ -71,7 +94,8 @@ class TypeSystem { MethodInfo lookup_new_method(const std::string& type_name); void assert_method_id(const std::string& type_name, const std::string& method_name, int id); - FieldLookupInfo lookup_field_info(const std::string& type_name, const std::string& field_name); + FieldLookupInfo lookup_field_info(const std::string& type_name, + const std::string& field_name) const; void assert_field_offset(const std::string& type_name, const std::string& field_name, int offset); int add_field_to_type(StructureType* type, const std::string& field_name, @@ -96,7 +120,7 @@ class TypeSystem { * Get a type by name and cast to a child class of Type*. Must succeed. */ template - T* get_type_of_type(const std::string& type_name) { + T* get_type_of_type(const std::string& type_name) const { auto x = lookup_type(type_name); T* result = dynamic_cast(x); if (!result) { @@ -109,11 +133,15 @@ class TypeSystem { TypeSpec lowest_common_ancestor(const std::vector& types); private: + bool reverse_deref(const ReverseDerefInputInfo& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; std::string lca_base(const std::string& a, const std::string& b); bool typecheck_base_types(const std::string& expected, const std::string& actual) const; int get_size_in_type(const Field& field) const; int get_alignment_in_type(const Field& field); - Field lookup_field(const std::string& type_name, const std::string& field_name); + Field lookup_field(const std::string& type_name, const std::string& field_name) const; StructureType* add_builtin_structure(const std::string& parent, const std::string& type_name, bool boxed = false); diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index ef18e73cb8..a78de4336c 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -8,15 +8,17 @@ add_executable(decompiler ObjectFile/LinkedObjectFileCreation.cpp ObjectFile/LinkedObjectFile.cpp Function/Function.cpp + Function/TypeAnalysis.cpp util/FileIO.cpp config.cpp util/DecompilerTypeSystem.cpp Function/BasicBlocks.cpp Disasm/InstructionMatching.cpp - Function/CfgVtx.cpp Function/CfgVtx.h + Function/CfgVtx.cpp IR/BasicOpBuilder.cpp IR/CfgBuilder.cpp - IR/IR.cpp) + IR/IR.cpp + IR/IR_TypeAnalysis.cpp) target_link_libraries(decompiler goos diff --git a/decompiler/Disasm/Register.h b/decompiler/Disasm/Register.h index d6b48c3b16..2578397176 100644 --- a/decompiler/Disasm/Register.h +++ b/decompiler/Disasm/Register.h @@ -140,6 +140,10 @@ class Register { bool operator==(const Register& other) const; bool operator!=(const Register& other) const; + struct hash { + auto operator()(const Register& x) const { return std::hash()(x.id); } + }; + private: uint16_t id = -1; }; diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index 38eefd2f45..8b4f4e26d7 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -12,6 +12,10 @@ struct BasicBlock { int start_word; int end_word; + std::vector pred; + int succ_ft = -1; + int succ_branch = -1; + BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {} }; diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 0362affe76..09accf7291 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -1692,31 +1692,41 @@ const std::vector& ControlFlowGraph::create_blocks(int count) { /*! * Setup pred/succ for a block which falls through to the next. */ -void ControlFlowGraph::link_fall_through(BlockVtx* first, BlockVtx* second) { +void ControlFlowGraph::link_fall_through(BlockVtx* first, + BlockVtx* second, + std::vector& blocks) { assert(!first->succ_ft); // don't want to overwrite something by accident. // can only fall through to the next code in memory. assert(first->next == second); assert(second->prev == first); first->succ_ft = second; + assert(blocks.at(first->block_id).succ_ft == -1); + blocks.at(first->block_id).succ_ft = second->block_id; if (!second->has_pred(first)) { // if a block can block fall through and branch to the same block, we want to avoid adding // it as a pred twice. This is rare, but does happen and makes sense with likely branches // which only run the delay slot when taken. second->pred.push_back(first); + blocks.at(second->block_id).pred.push_back(first->block_id); } } /*! * Setup pred/succ for a block which branches to second. */ -void ControlFlowGraph::link_branch(BlockVtx* first, BlockVtx* second) { +void ControlFlowGraph::link_branch(BlockVtx* first, + BlockVtx* second, + std::vector& blocks) { assert(!first->succ_branch); - first->succ_branch = second; + assert(blocks.at(first->block_id).succ_branch == -1); + blocks.at(first->block_id).succ_branch = second->block_id; + if (!second->has_pred(first)) { // see comment in link_fall_through second->pred.push_back(first); + blocks.at(second->block_id).pred.push_back(first->block_id); } } @@ -1756,7 +1766,7 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se if (b.end_word - b.start_word < 2) { // there's no room for a branch here, fall through to the end if (not_last) { - cfg->link_fall_through(blocks.at(i), blocks.at(i + 1)); + cfg->link_fall_through(blocks.at(i), blocks.at(i + 1), func.basic_blocks); } } else { // might be a branch @@ -1790,7 +1800,7 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se } assert(block_target != -1); - cfg->link_branch(blocks.at(i), blocks.at(block_target)); + cfg->link_branch(blocks.at(i), blocks.at(block_target), func.basic_blocks); if (branch_always) { // don't continue to the next one @@ -1798,13 +1808,13 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se } else { // not an always branch if (not_last) { - cfg->link_fall_through(blocks.at(i), blocks.at(i + 1)); + cfg->link_fall_through(blocks.at(i), blocks.at(i + 1), func.basic_blocks); } } } else { // not a branch at all if (not_last) { - cfg->link_fall_through(blocks.at(i), blocks.at(i + 1)); + cfg->link_fall_through(blocks.at(i), blocks.at(i + 1), func.basic_blocks); } } } diff --git a/decompiler/Function/CfgVtx.h b/decompiler/Function/CfgVtx.h index 7031c37218..66fbd218c8 100644 --- a/decompiler/Function/CfgVtx.h +++ b/decompiler/Function/CfgVtx.h @@ -279,8 +279,8 @@ class ControlFlowGraph { void flag_early_exit(const std::vector& blocks); const std::vector& create_blocks(int count); - void link_fall_through(BlockVtx* first, BlockVtx* second); - void link_branch(BlockVtx* first, BlockVtx* second); + void link_fall_through(BlockVtx* first, BlockVtx* second, std::vector& blocks); + void link_branch(BlockVtx* first, BlockVtx* second, std::vector& blocks); bool find_cond_w_else(); bool find_cond_n_else(); diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index 96233104fb..65e769830b 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -576,6 +576,10 @@ std::shared_ptr Function::get_basic_op_at_instr(int idx) { return basic_ops.at(instruction_to_basic_op.at(idx)); } +const TypeMap& Function::get_typemap_by_instr_idx(int idx) { + return basic_op_typemaps.at(instruction_to_basic_op.at(idx)); +} + int Function::get_basic_op_count() { return basic_ops.size(); } diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 5e8ddfe711..4feba2f238 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -9,6 +9,7 @@ #include "BasicBlocks.h" #include "CfgVtx.h" #include "decompiler/IR/IR.h" +#include "common/type_system/TypeSpec.h" class DecompilerTypeSystem; @@ -60,6 +61,11 @@ struct FunctionName { } }; +class BasicOpTypeInfo { + public: + std::unordered_map all_reg_types; +}; + class Function { public: Function(int _start_word, int _end_word); @@ -68,10 +74,15 @@ class Function { void find_method_defs(LinkedObjectFile& file); void add_basic_op(std::shared_ptr op, int start_instr, int end_instr); bool has_basic_ops() { return !basic_ops.empty(); } + bool has_typemaps() { return !basic_op_typemaps.empty(); } bool instr_starts_basic_op(int idx); std::shared_ptr get_basic_op_at_instr(int idx); + const TypeMap& get_typemap_by_instr_idx(int idx); int get_basic_op_count(); int get_failed_basic_op_count(); + void run_type_analysis(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file); std::shared_ptr ir = nullptr; @@ -129,6 +140,7 @@ class Function { private: void check_epilogue(const LinkedObjectFile& file); std::vector> basic_ops; + std::vector basic_op_typemaps; std::unordered_map instruction_to_basic_op; std::unordered_map basic_op_to_instruction; }; diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp new file mode 100644 index 0000000000..9046a633e0 --- /dev/null +++ b/decompiler/Function/TypeAnalysis.cpp @@ -0,0 +1,165 @@ +/*! + * @file TypeAnalysis.cpp + * This is the first attempt to do GOAL type analysis. + * + * This approach is based purely on registers/basic blocks, and not GOAL variables or expressions. + * This was chosen because it should (maybe) be more successful at things like + * (format #t "blah" (if a b c) (and b c)) + * where there is branching in between the load of format and the actual function call. + * + * We use IR Basic Ops instead of MIPS instructions to do the type propagation because there are + * often weird intermediate results in between instructions within the same IR basic op + * that we don't care about. + * + * The basic idea is to "keep propagating types until nothing changes." + * When there are two ways to get to the same spot, and the types there are different, we take + * the lowest common ancestor of the types. + */ + +#include +#include "Function.h" +#include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/Disasm/InstructionMatching.h" + +namespace { +/*! + * Modify the combined type map to be the lowest common ancestor of combined and add for shared + * regs. Currently combined will also be updated to contain the union of unshared registers. + * + * Returns if combined was changed. + */ +bool lca_tm(TypeMap& combined, const TypeMap& add, DecompilerTypeSystem& dts) { + bool changed = false; + for (auto& kv : add) { + auto existing = combined.find(kv.first); + if (existing == combined.end()) { + changed = true; + combined[kv.first] = kv.second; + } else { + auto candidate = dts.ts.lowest_common_ancestor(kv.second, existing->second); + if (candidate != existing->second) { + changed = true; + combined[kv.first] = candidate; + } + } + } + return changed; +} + +/*! + * Debug print a TypeMap. + */ +void print_tm(const TypeMap& tm) { + for (int i = 0; i < 32; i++) { + auto gpr = Register(Reg::RegisterKind::GPR, i); + auto kv = tm.find(gpr); + if (kv != tm.end()) { + fmt::print("{}: {}, ", gpr.to_charp(), kv->second.print()); + } + } + fmt::print("\n"); +} +} // namespace + +/*! + * Main Type Analysis Algorithm. + */ +void Function::run_type_analysis(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) { + if (!has_basic_ops()) { + fmt::print("run_type_analysis failed because function {} has no basic ops\n", + guessed_name.to_string()); + return; + } + std::vector typemap_out; + typemap_out.resize(basic_ops.size()); + + // can only run if our type makes sense and has arguments. + assert(my_type.base_type() == "function"); + assert(my_type.arg_count() > 0); + + int n_args = int(my_type.arg_count()) - 1; + auto& return_type = my_type.get_arg(int(my_type.arg_count()) - 1); + + // all types at the entrance of each basic block. + std::vector bb_entry_types; + bb_entry_types.resize(basic_blocks.size()); + + // We run the algorithm in rounds. If nothing changes after running a round, we are done. + // In each round, we only visit each block once. + // It's not clear if this is the most efficient approach, but it is an easy way to be sure to + // hit everything. + + // the list of blocks that should be visited in this round. + std::vector to_visit; + + // this list of blocks we have already visited, and should not visit again until the next round. + std::set visited; + + // Initialize for the first round. + // start by visiting the first block + to_visit.push_back(0); + + // the argument registers for GOAL (todo, common register utils for GOAL) + std::vector arg_regs = {make_gpr(Reg::A0), make_gpr(Reg::A1), make_gpr(Reg::A2), + make_gpr(Reg::A3), make_gpr(Reg::T0), make_gpr(Reg::T1), + make_gpr(Reg::T2), make_gpr(Reg::T3)}; + + // set up entry types for the first block + for (int i = 0; i < n_args; i++) { + bb_entry_types.at(0)[arg_regs.at(i)] = my_type.get_arg(i); + } + // print_tm(bb_entry_types.at(0)); + + bool changed = true; // did we change anything in this round? + int round = 0; // what round are we currently running + while (changed) { + changed = false; + fmt::print("--Starting round {}\n", round); + while (!to_visit.empty()) { + int block_id = to_visit.back(); + visited.insert(block_id); + auto& block = basic_blocks.at(block_id); + to_visit.pop_back(); + fmt::print("-Visit {}\n", block_id); + + TypeMap current_types = bb_entry_types.at(block_id); + + // basic blocks are in terms of instructions, but we want to do our logic on basic ops + for (int i = block.start_word; i < block.end_word; i++) { + if (instr_starts_basic_op(i)) { + auto basic_op = get_basic_op_at_instr(i); + fmt::print("-Attempt prop on {}\n", basic_op->print(file)); + auto basic_idx = instruction_to_basic_op.at(i); + typemap_out.at(basic_idx) = current_types; + if (!basic_op->update_types(typemap_out.at(basic_idx), dts, file)) { + fmt::print("ERROR: Giving up on type analysis, could not prop types on {}\n", + basic_op->print(file)); + return; + } + current_types = typemap_out.at(basic_idx); + } + } + + // prop to succ blocks + for (auto succ : {block.succ_branch, block.succ_ft}) { + if (succ != -1) { + if (lca_tm(bb_entry_types.at(succ), current_types, dts)) { + changed = true; // need another round + fmt::print("Block {} entry types are now ", succ); + print_tm(bb_entry_types.at(succ)); + + if (visited.find(succ) == visited.end()) { + to_visit.push_back(succ); + } + } + } + } + } + + round++; + } + + basic_op_typemaps = std::move(typemap_out); +} diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index adc8d25e4b..f70bfad09d 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -516,8 +516,14 @@ std::shared_ptr try_daddu(Instruction& instr, int idx) { } std::shared_ptr try_dsubu(Instruction& instr, int idx) { - if (is_gpr_3(instr, InstructionKind::DSUBU, {}, {}, {}) && + if (is_gpr_3(instr, InstructionKind::DSUBU, {}, make_gpr(Reg::R0), {}) && !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { + return make_set( + IR_Set::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared(IR_IntMath1::NEG, make_reg(instr.get_src(1).get_reg(), idx))); + } else if (is_gpr_3(instr, InstructionKind::DSUBU, {}, {}, {}) && + !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && + !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { return make_set( IR_Set::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), std::make_shared(IR_IntMath2::SUB, make_reg(instr.get_src(0).get_reg(), idx), diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp index 6d628d3f47..71d8b6a7ed 100644 --- a/decompiler/IR/CfgBuilder.cpp +++ b/decompiler/IR/CfgBuilder.cpp @@ -553,6 +553,47 @@ bool is_int_math_3(IR* ir, return true; } +bool is_int_math_2(IR* ir, + MatchParam kind, + MatchParam dst, + MatchParam src0, + Register* dst_out = nullptr, + Register* src0_out = nullptr) { + // should be a set reg to int math 2 ir + auto set = dynamic_cast(ir); + if (!set) { + return false; + } + + // destination should be a register + auto dest = dynamic_cast(set->dst.get()); + if (!dest || dst != dest->reg) { + return false; + } + + auto math = dynamic_cast(set->src.get()); + if (!math || kind != math->kind) { + return false; + } + + auto arg = dynamic_cast(math->arg.get()); + + if (!arg || src0 != arg->reg) { + return false; + } + + // it's a match! + if (dst_out) { + *dst_out = dest->reg; + } + + if (src0_out) { + *src0_out = arg->reg; + } + + return true; +} + /*! * Are these IR's both the same register? False if either is not a register. */ @@ -670,8 +711,11 @@ std::shared_ptr try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCirc auto dsrav_candidate = b1_ir->forms.at(1); Register clobber; - if (!is_int_math_3(dsubu_candidate.get(), IR_IntMath2::SUB, {}, make_gpr(Reg::R0), sa_in->reg, - &clobber)) { + // if (!is_int_math_3(dsubu_candidate.get(), IR_IntMath2::SUB, {}, make_gpr(Reg::R0), sa_in->reg, + // &clobber)) { + // return nullptr; + // } + if (!is_int_math_2(dsubu_candidate.get(), IR_IntMath1::NEG, {}, sa_in->reg, &clobber)) { return nullptr; } diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index 425e8719c1..7985c288e9 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -26,6 +26,13 @@ std::string IR::print(const LinkedObjectFile& file) const { return pretty_print::to_string(to_form(file)); } +bool IR::update_types(TypeMap& reg_types, DecompilerTypeSystem& dts, LinkedObjectFile& file) const { + (void)reg_types; + (void)dts; + (void)file; + return false; +} + goos::Object IR_Failed::to_form(const LinkedObjectFile& file) const { (void)file; return pretty_print::build_list("INVALID-OPERATION"); @@ -283,6 +290,9 @@ goos::Object IR_IntMath1::to_form(const LinkedObjectFile& file) const { case ABS: math_operator = "abs.si"; break; + case NEG: + math_operator = "-.i"; + break; default: assert(false); } diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index 979c94ff3c..604e3434f4 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -5,8 +5,13 @@ #include #include "decompiler/Disasm/Register.h" #include "common/goos/PrettyPrinter.h" +#include "common/type_system/TypeSpec.h" class LinkedObjectFile; +class DecompilerTypeSystem; + +// Map of what type is in each register. +using TypeMap = std::unordered_map; class IR { public: @@ -14,6 +19,13 @@ class IR { std::vector> get_all_ir(LinkedObjectFile& file) const; std::string print(const LinkedObjectFile& file) const; virtual void get_children(std::vector>* output) const = 0; + virtual bool update_types(TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) const; + virtual bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const; bool is_basic_op = false; }; @@ -30,6 +42,10 @@ class IR_Register : public IR { IR_Register(Register _reg, int _instr_idx) : reg(_reg), instr_idx(_instr_idx) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; Register reg; int instr_idx = -1; }; @@ -51,6 +67,9 @@ class IR_Set : public IR { : kind(_kind), dst(std::move(_dst)), src(std::move(_src)) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool update_types(TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) const override; std::shared_ptr dst, src; std::shared_ptr clobber = nullptr; }; @@ -70,6 +89,10 @@ class IR_Symbol : public IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_SymbolValue : public IR { @@ -78,6 +101,10 @@ class IR_SymbolValue : public IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_StaticAddress : public IR { @@ -98,6 +125,10 @@ class IR_Load : public IR { std::shared_ptr location; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_FloatMath2 : public IR { @@ -108,6 +139,10 @@ class IR_FloatMath2 : public IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_FloatMath1 : public IR { @@ -145,15 +180,23 @@ class IR_IntMath2 : public IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_IntMath1 : public IR { public: - enum Kind { NOT, ABS } kind; + enum Kind { NOT, ABS, NEG } kind; IR_IntMath1(Kind _kind, std::shared_ptr _arg) : kind(_kind), arg(std::move(_arg)) {} std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_Call : public IR { @@ -169,6 +212,10 @@ class IR_IntegerConstant : public IR { explicit IR_IntegerConstant(int64_t _value) : value(_value) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; struct BranchDelay { @@ -256,6 +303,9 @@ class IR_Branch : public IR { goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + virtual bool update_types(TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) const; }; class IR_Compare : public IR { @@ -266,6 +316,10 @@ class IR_Compare : public IR { goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const override; }; class IR_Nop : public IR { diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp new file mode 100644 index 0000000000..e5d5640f83 --- /dev/null +++ b/decompiler/IR/IR_TypeAnalysis.cpp @@ -0,0 +1,313 @@ +#include +#include "IR.h" +#include "decompiler/util/DecompilerTypeSystem.h" + +bool IR::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)reg_types; + (void)dts; + (void)file; + (void)out; + return false; +} + +bool IR_Register::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)dts; + (void)file; + auto kv = reg_types.find(reg); + if (kv != reg_types.end()) { + *out = kv->second; + return true; + } + return false; +} + +bool IR_Set::update_types(TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) const { + auto dest_as_reg = dynamic_cast(dst.get()); + if (dest_as_reg) { + TypeSpec src_type; + if (!src->get_type_of_expr(reg_types, dts, file, &src_type)) { + return false; + } + reg_types[dest_as_reg->reg] = src_type; + return true; + } + + return false; +} + +bool IR_Load::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + auto loc_as_static = dynamic_cast(location.get()); + if (loc_as_static) { + // this will need to get upgraded once we have good support for static data. + // but for now we will do a "best guess" that should cover common cases. + if (kind == FLOAT) { + // the FLOAT kind is for an instruction that loads directly into a floating point register. + // so we know it's either float or a child of float. + // this can be two cases, loading a floating point constant, or loading a float from + // a static object. In either case, we don't yet have enough information to get a more + // specific type, so "float" is a safe fallback. + *out = dts.ts.make_typespec("float"); + return true; + } + } + + TypeSpec loc_type; + if (location->get_type_of_expr(reg_types, dts, file, &loc_type)) { + ReverseDerefInputInfo info; + info.mem_deref = true; + info.input_type = loc_type; + info.offset = 0; + info.load_size = size; + info.sign_extend = kind == SIGNED; + switch (kind) { + case UNSIGNED: + case SIGNED: + info.reg = RegKind::GPR_64; + break; + case FLOAT: + info.reg = RegKind::FLOAT; + break; + default: + assert(false); + } + auto result = dts.ts.get_reverse_deref_info(info); + if (result.success) { + *out = result.result_type; + return true; + } + } + + return false; +} + +namespace { +bool is_type(const TypeSpec& type, const std::string& name, TypeSystem& ts) { + return ts.typecheck(ts.make_typespec(name), type, "", false, false); +} + +bool is_float(const TypeSpec& type, TypeSystem& ts) { + return ts.typecheck(ts.make_typespec("float"), type, "", false, false); +} + +bool is_int_or_uint(const TypeSpec& type, TypeSystem& ts) { + return is_type(type, "int", ts) || is_type(type, "uint", ts); +} + +} // namespace + +bool IR_FloatMath2::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + // to be paranoid, we should check that our arguments are both floats. + for (auto& arg : {arg0, arg1}) { + TypeSpec arg_type; + if (!arg->get_type_of_expr(reg_types, dts, file, &arg_type)) { + return false; + } + if (!is_float(arg_type, dts.ts)) { + return false; + } + } + + *out = dts.ts.make_typespec("float"); + return true; +} + +bool IR_IntMath2::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + TypeSpec arg0_type; + TypeSpec arg1_type; + + // special case for subtraction with r0 + // auto arg0_as_reg = dynamic_cast(arg0.get()); + // if(arg0_as_reg && arg0_as_reg->reg == make_gpr(Reg::R0) && kind == SUB) { + // if (!arg1->get_type_of_expr(reg_types, dts, file, &arg1_type)) { + // return false; + // } + // + // if(is_int_or_uint(arg1_type, dts.ts)) { + // *out = arg1_type; + // return true; + // } + // } + + if (!arg0->get_type_of_expr(reg_types, dts, file, &arg0_type)) { + return false; + } + + if (!arg1->get_type_of_expr(reg_types, dts, file, &arg1_type)) { + fmt::print("a1 fail\n"); + return false; + } + + if (is_int_or_uint(arg0_type, dts.ts) && is_int_or_uint(arg1_type, dts.ts)) { + // the arg0 wins + *out = arg0_type; + return true; + } + + auto arg1_as_int = dynamic_cast(arg1.get()); + if (kind == ADD && arg1_as_int) { + // it's a memory thing... + ReverseDerefInputInfo info; + info.mem_deref = false; + info.input_type = arg0_type; + info.offset = arg1_as_int->value; + info.load_size = 0; + info.sign_extend = false; + info.reg = RegKind::GPR_64; + auto result = dts.ts.get_reverse_deref_info(info); + if (result.success) { + *out = result.result_type; + return true; + } + } + + // auto arg0_as_int = dynamic_cast(arg0.get()); + // if (kind == ADD && arg0_as_int) { + // // it's a memory thing... + // ReverseDerefInputInfo info; + // info.mem_deref = false; + // info.input_type = arg1_type; + // info.offset = arg0_as_int->value; + // info.load_size = 0; + // info.sign_extend = false; + // info.reg = RegKind::GPR_64; + // auto result = dts.ts.get_reverse_deref_info(info); + // if (result.success) { + // *out = result.result_type; + // return true; + // } else { + // fmt::print("deref fail!\n"); + // } + // } + + return false; +} + +bool IR_IntMath1::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + TypeSpec arg_type; + + if (!arg->get_type_of_expr(reg_types, dts, file, &arg_type)) { + return false; + } + + if (is_int_or_uint(arg_type, dts.ts)) { + *out = arg_type; + return true; + } + + return false; +} + +bool IR_Branch::update_types(TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) const { + (void)file; + switch (branch_delay.kind) { + case BranchDelay::DSLLV: { + auto dst_as_reg = dynamic_cast(branch_delay.destination.get()); + if (dst_as_reg) { + reg_types[dst_as_reg->reg] = dts.ts.make_typespec("int"); // todo? + return true; + } + } break; + case BranchDelay::NEGATE: { + auto dst_as_reg = dynamic_cast(branch_delay.destination.get()); + if (dst_as_reg) { + reg_types[dst_as_reg->reg] = dts.ts.make_typespec("int"); // todo? + return true; + } + } break; + case BranchDelay::SET_REG_FALSE: { + auto dst_as_reg = dynamic_cast(branch_delay.destination.get()); + if (dst_as_reg) { + // this probably will break a lot of things when using the result of an if. + reg_types[dst_as_reg->reg] = dts.ts.make_typespec("basic"); // todo? + return true; + } + } break; + case BranchDelay::NOP: + return true; + case BranchDelay::SET_REG_REG: { + auto dst_as_reg = dynamic_cast(branch_delay.destination.get()); + if (dst_as_reg) { + // this probably will break a lot of things when using the result of an if. + auto src_as_reg = dynamic_cast(branch_delay.source.get()); + if (src_as_reg) { + auto src_kv = reg_types.find(src_as_reg->reg); + if (src_kv != reg_types.end()) { + reg_types[dst_as_reg->reg] = reg_types[src_as_reg->reg]; + return true; + } + } + } + return false; + } break; + default: + return false; + } + return false; +} + +bool IR_Symbol::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)reg_types; + (void)file; + *out = dts.ts.make_typespec("symbol"); + return true; +} + +bool IR_SymbolValue::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)reg_types; + (void)file; + auto kv = dts.symbol_types.find(name); + if (kv != dts.symbol_types.end()) { + *out = kv->second; + return true; + } + return false; +} + +bool IR_IntegerConstant::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)reg_types; + (void)file; + *out = dts.ts.make_typespec("int"); + return true; +} + +bool IR_Compare::get_type_of_expr(const TypeMap& reg_types, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + TypeSpec* out) const { + (void)reg_types; + (void)file; + // always returns #t or #f + *out = dts.ts.make_typespec("symbol"); + return true; +} \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 92f671970f..8d93990763 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -2,11 +2,13 @@ * @file LinkedObjectFile.cpp * An object file's data with linking information included. */ -#include "LinkedObjectFile.h" + #include #include #include #include +#include "third-party/fmt/format.h" +#include "LinkedObjectFile.h" #include "decompiler/Disasm/InstructionDecode.h" #include "decompiler/config.h" @@ -556,9 +558,10 @@ std::string LinkedObjectFile::print_disassembly() { auto& word = words_by_seg[seg].at(func.start_word + i); append_word_to_string(result, word); } else { + // print basic op stuff if (func.has_basic_ops() && func.instr_starts_basic_op(i)) { - if (line.length() < 40) { - line.append(40 - line.length(), ' '); + if (line.length() < 30) { + line.append(30 - line.length(), ' '); } line += ";; " + func.get_basic_op_at_instr(i)->print(*this); for (int iidx = 0; iidx < instr.n_src; iidx++) { @@ -569,6 +572,31 @@ std::string LinkedObjectFile::print_disassembly() { } } } + + // print type map + if (func.has_typemaps()) { + if (line.length() < 60) { + line.append(60 - line.length(), ' '); + } + line += " tm: "; + auto& tm = func.get_typemap_by_instr_idx(i); + bool added = false; + for (auto reg_kind : {Reg::RegisterKind::GPR, Reg::RegisterKind::FPR}) { + for (int reg_idx = 0; reg_idx < 32; reg_idx++) { + auto gpr = Register(reg_kind, reg_idx); + auto kv = tm.find(gpr); + if (kv != tm.end()) { + added = true; + line += fmt::format("{}: {}, ", gpr.to_charp(), kv->second.print()); + } + } + } + + if (added) { + line.pop_back(); + line.pop_back(); + } + } } result += line + "\n"; } diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 1d4735dd90..8a1d5d8fde 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -602,6 +602,7 @@ void ObjectFileDB::analyze_functions() { int asm_funcs = 0; int non_asm_funcs = 0; int successful_cfg_irs = 0; + int successful_type_analysis = 0; std::map> unresolved_by_length; if (get_config().find_basic_blocks) { @@ -615,8 +616,15 @@ void ObjectFileDB::analyze_functions() { total_functions++; if (!func.suspected_asm) { + // run analysis + + // first, find the prologue/epilogue func.analyze_prologue(data.linked_data); + + // build a control flow graph func.cfg = build_cfg(data.linked_data, segment_id, func); + + // convert individual basic blocks to sequences of IR Basic Ops for (auto& block : func.basic_blocks) { if (block.end_word > block.start_word) { add_basic_ops_to_block(&func, block, &data.linked_data); @@ -625,6 +633,7 @@ void ObjectFileDB::analyze_functions() { total_basic_ops += func.get_basic_op_count(); total_failed_basic_ops += func.get_failed_basic_op_count(); + // Combine basic ops + CFG to build a nested IR func.ir = build_cfg_ir(func, *func.cfg, data.linked_data); non_asm_funcs++; if (func.ir) { @@ -634,6 +643,26 @@ void ObjectFileDB::analyze_functions() { if (func.cfg->is_fully_resolved()) { resolved_cfg_functions++; } + + // type analysis + if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { + // we're a global named function. This means we're stored in a symbol + auto kv = dts.symbol_types.find(func.guessed_name.function_name); + if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { + if (kv->second.base_type() != "function") { + spdlog::error("Found a function named {} but the symbol has type {}", + func.guessed_name.to_string(), kv->second.print()); + assert(false); + } + // GOOD! + spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), + kv->second.print()); + func.run_type_analysis(kv->second, dts, data.linked_data); + if (func.has_typemaps()) { + successful_type_analysis++; + } + } + } } else { asm_funcs++; } @@ -670,8 +699,10 @@ void ObjectFileDB::analyze_functions() { int successful_basic_ops = total_basic_ops - total_failed_basic_ops; spdlog::info(" {}/{} basic ops converted successfully ({}%)", successful_basic_ops, total_basic_ops, 100.f * float(successful_basic_ops) / float(total_basic_ops)); - spdlog::info(" {}/{} cfgs converted to ir ({}%)\n", successful_cfg_irs, non_asm_funcs, + spdlog::info(" {}/{} cfgs converted to ir ({}%)", successful_cfg_irs, non_asm_funcs, 100.f * float(successful_cfg_irs) / float(non_asm_funcs)); + spdlog::info(" {}/{} functions passed type analysis ({:.2f}%)\n", successful_type_analysis, + non_asm_funcs, 100.f * float(successful_type_analysis) / float(non_asm_funcs)); // for (auto& kv : unresolved_by_length) { // printf("LEN %d\n", kv.first); diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index ba53994d70..ba13bb2c1c 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -14,9 +14,82 @@ ;; SYMBOLS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BUILTIN +(define-extern #f symbol) +(define-extern #t symbol) + ;; GCOMMON (define-extern identity (function object object)) +(define-extern 1/ (function float float)) (define-extern + (function int int int)) +(define-extern - (function int int int)) +(define-extern * (function int int int)) +(define-extern / (function int int int)) +(define-extern ash (function int int int)) +(define-extern mod (function int int int)) +(define-extern rem (function int int int)) +(define-extern abs (function int int)) +(define-extern min (function int int int)) +(define-extern max (function int int int)) +(define-extern logior (function int int int)) +(define-extern logand (function int int int)) +(define-extern lognor (function int int int)) +(define-extern logxor (function int int int)) +(define-extern lognot (function int int)) +(define-extern false-func (function basic)) +(define-extern true-func (function basic)) +;; format +;; vec4s +;; vec4s method 3 +;; vec4s method 2 +;; bfloat +;; bfloat method 3 +;; bfloat method 2 +;; type method 5 +;; basic-type? +;; type-type? +;; find-parent-method +;; pair method 4 +;; pair method 5 +;; last +;; member +;; nmember +;; assoc +;; assoce +;; nassoc +;; nassoce +;; append! +;; delete! +;; delete-car! +;; insert-cons! +;; sort +;; inline-array-class +;; inline-array-class method 3 +;; inline-array-class method 0 +;; inline-array-class method 4 +;; inline-array-class method 5 +;; array method 0 +;; array method 2 +;; array method 3 +;; array method 4 +;; array method 5 +;; mem-copy! +;; qmem-copy<-! +;; qmem-copy->! +;; mem-set32! +;; mem-or! +;; quad-copy! +;; fact +;; print-column +;; print +;; printl +;; inspect +;; mem-print +;; *trace-list* +;; print-tree-bitmask +;; breakpoint-range-set! +;; valid? + (define-extern function type) diff --git a/test/test_type_system.cpp b/test/test_type_system.cpp index b1e47cd535..b38d1daefb 100644 --- a/test/test_type_system.cpp +++ b/test/test_type_system.cpp @@ -316,4 +316,79 @@ TEST(TypeSystem, lca) { "(pointer object)"); } +TEST(TypeSystem, DecompLookupsTypeOfBasic) { + TypeSystem ts; + ts.add_builtin_types(); + + auto string_type = ts.make_typespec("string"); + + ReverseDerefInputInfo input; + input.input_type = string_type; + input.mem_deref = true; + input.reg = RegKind::GPR_64; + input.load_size = 4; + input.sign_extend = false; + input.offset = -4; + + auto result = ts.get_reverse_deref_info(input); + EXPECT_TRUE(result.success); + EXPECT_FALSE(result.addr_of); + EXPECT_TRUE(result.result_type == ts.make_typespec("type")); + EXPECT_EQ(result.deref_path.size(), 1); + EXPECT_EQ(result.deref_path.at(0).name, "type"); +} + +TEST(TypeSystem, DecompLookupsMethod) { + TypeSystem ts; + ts.add_builtin_types(); + + auto type_type = ts.make_typespec("type"); + + ReverseDerefInputInfo input; + input.input_type = type_type; + input.mem_deref = true; + input.reg = RegKind::GPR_64; + input.load_size = 4; + input.sign_extend = false; + input.offset = 16; // should be method 0, new. + + auto result = ts.get_reverse_deref_info(input); + EXPECT_TRUE(result.success); + EXPECT_FALSE(result.addr_of); + EXPECT_TRUE(result.result_type == ts.make_typespec("function")); + EXPECT_EQ(result.deref_path.size(), 2); + EXPECT_EQ(result.deref_path.at(0).name, "method-table"); + EXPECT_EQ(result.deref_path.at(1).index, 0); + + input.input_type = type_type; + input.mem_deref = true; + input.reg = RegKind::GPR_64; + input.load_size = 4; + input.sign_extend = false; + input.offset = 24; // should be method 2 + + result = ts.get_reverse_deref_info(input); + EXPECT_TRUE(result.success); + EXPECT_FALSE(result.addr_of); + EXPECT_TRUE(result.result_type == ts.make_typespec("function")); + EXPECT_EQ(result.deref_path.size(), 2); + EXPECT_EQ(result.deref_path.at(0).name, "method-table"); + EXPECT_EQ(result.deref_path.at(1).index, 2); + + input.input_type = type_type; + input.mem_deref = false; + input.reg = RegKind::GPR_64; + input.load_size = 0; + input.sign_extend = false; + input.offset = 24; // should be method 2 + + result = ts.get_reverse_deref_info(input); + EXPECT_TRUE(result.success); + EXPECT_TRUE(result.addr_of); + EXPECT_TRUE(result.result_type == ts.make_pointer_typespec("function")); + EXPECT_EQ(result.deref_path.size(), 2); + EXPECT_EQ(result.deref_path.at(0).name, "method-table"); + EXPECT_EQ(result.deref_path.at(1).index, 2); +} + // TODO - a big test to make sure all the builtin types are what we expect.