diff --git a/common/goal_constants.h b/common/goal_constants.h index 91f5f25062..b484be1e99 100644 --- a/common/goal_constants.h +++ b/common/goal_constants.h @@ -10,6 +10,7 @@ constexpr s32 PAIR_OFFSET = 2; constexpr int POINTER_SIZE = 4; constexpr int BASIC_OFFSET = 4; constexpr int STRUCTURE_ALIGNMENT = 16; +constexpr int ARRAY_DATA_OFFSET = 12; // not including type tag constexpr s32 GOAL_MAX_SYMBOLS = 0x2000; constexpr s32 SYM_INFO_OFFSET = 0xff34; diff --git a/common/type_system/CMakeLists.txt b/common/type_system/CMakeLists.txt index 271fcd92b4..2236c564ea 100644 --- a/common/type_system/CMakeLists.txt +++ b/common/type_system/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(type_system TypeSystem.cpp Type.cpp TypeSpec.cpp - deftype.cpp) + deftype.cpp + TypeFieldLookup.cpp) target_link_libraries(type_system fmt goos) \ No newline at end of file diff --git a/common/type_system/TypeFieldLookup.cpp b/common/type_system/TypeFieldLookup.cpp new file mode 100644 index 0000000000..813b906c3e --- /dev/null +++ b/common/type_system/TypeFieldLookup.cpp @@ -0,0 +1,461 @@ +/*! + * @file TypeFieldLookup.cpp + * Reverse field lookup used in the decompiler. + */ + +#include "third-party/fmt/core.h" +#include "TypeSystem.h" + +namespace { +// debug prints for the reverse lookup +bool debug_reverse_lookup = false; + +/*! + * Is the actual dereference compatible with the expected? + */ +bool deref_matches(const DerefInfo& expected, const DerefKind& actual, bool is_integer) { + assert(expected.mem_deref); + assert(expected.can_deref); + if (actual.is_store || actual.size >= 8 || !is_integer) { + // don't check sign extension + return expected.load_size == actual.size; + } else { + return expected.load_size == actual.size && expected.sign_extend == actual.sign_extend; + } +} +} // namespace + +/*! + * Convert a Token in a field path to a string for debugging. + */ +std::string FieldReverseLookupOutput::Token::print() const { + switch (kind) { + case Kind::FIELD: + return name; + case Kind::CONSTANT_IDX: + return std::to_string(idx); + case Kind::VAR_IDX: + return "__VAR__"; + default: + assert(false); + } +} + +/*! + * Main reverse lookup. Check the success field of the result to see if it was successful. + * Will return the type of result as well as the path taken to get there. + * The path can be arbitrarily long because we could be looking through nested inline structures. + * The result is _always_ an actual dereference and there is no way for this to return "no deref". + * The "offset" should always be the actual memory offset in the load instruction. + * This is the "offset into memory" - "boxed offset" + */ +FieldReverseLookupOutput TypeSystem::reverse_field_lookup( + const FieldReverseLookupInput& input) const { + if (debug_reverse_lookup) { + fmt::print("reverse_field_lookup on {} offset {} deref {} stride {}\n", input.base_type.print(), + input.offset, input.deref.has_value(), input.stride); + } + FieldReverseLookupOutput result; + result.success = try_reverse_lookup(input, &result.tokens, &result.addr_of, &result.result_type); + // todo check for only one var lookup. + return result; +} + +/*! + * Reverse lookup helper. Returns true if successful. It's okay to call this an have it fail. + * Will set path/addr_of/result_type if successful. + */ +bool TypeSystem::try_reverse_lookup(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + if (debug_reverse_lookup) { + fmt::print(" try_reverse_lookup on {} offset {} deref {} stride {}\n", input.base_type.print(), + input.offset, input.deref.has_value(), input.stride); + } + + auto base_input_type = input.base_type.base_type(); + if (base_input_type == "pointer") { + return try_reverse_lookup_pointer(input, path, addr_of, result_type); + } else if (base_input_type == "inline-array") { + return try_reverse_lookup_inline_array(input, path, addr_of, result_type); + } else if (base_input_type == "array" && input.base_type.has_single_arg()) { + return try_reverse_lookup_array(input, path, addr_of, result_type); + } else { + return try_reverse_lookup_other(input, path, addr_of, result_type); + } + return false; +} + +/*! + * Handle a dereference of a pointer. This can be: + * - just dereferencing a pointer + * - accessing a variable element of a pointer-style array + * - getting the address of a variable element of a pointer-style array + * - accessing a constant element of a pointer-style array + * - getting the address of a constant element of a pointer-style array + */ +bool TypeSystem::try_reverse_lookup_pointer(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + if (!input.base_type.has_single_arg()) { + return false; + } + auto di = get_deref_info(input.base_type); + bool is_integer = + typecheck(TypeSpec("integer"), input.base_type.get_single_arg(), "", false, false); + assert(di.mem_deref); // it's accessing a pointer. + auto elt_type = di.result_type; + if (input.stride) { + // variable access to the array. + if (input.stride != di.stride) { + // mismatched array strides, fail! + return false; + } + if (input.offset != 0) { + // can't access within an element of a pointer array. + // todo - this could be some sort of constant folding for the next operation. + return false; + } + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::VAR_IDX; + path->push_back(token); + if (input.deref.has_value()) { + if (deref_matches(di, input.deref.value(), is_integer)) { + // access element of array + *addr_of = false; + *result_type = elt_type; + return true; + } else { + // this isn't the right type of dereference. + return false; + } + } else { + // get the address of a variable indexed element of a pointer-style array + *addr_of = true; + *result_type = make_pointer_typespec(elt_type); + return true; + } + } else { + // either access array or just plain deref a pointer. + int elt_idx = input.offset / di.stride; + int offset_into_elt = input.offset - (elt_idx * di.stride); + if (offset_into_elt) { + // should line up correctly. + return false; + } + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX; + token.idx = elt_idx; + if (input.deref.has_value()) { + if (!deref_matches(di, input.deref.value(), is_integer)) { + // this isn't the right type of dereference + return false; + } + + // always push back an index so we're never ambiguous. + path->push_back(token); + + // access constant idx element of array + *addr_of = false; + *result_type = elt_type; + return true; + } else { + // we want (&-> arr 0) + path->push_back(token); + // get address of constant idx element of array + *addr_of = true; + *result_type = make_pointer_typespec(elt_type); + return true; + } + } +} + +/*! + * Handle a dereference with an "array" type. + * This has two cases: + * - accessing a field of the array class, not including the array data. Like any other structure. + * - accessing the data array part of the array class, very similar to the pointer-style array. + */ +bool TypeSystem::try_reverse_lookup_array(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + // type should be (array elt-type) + if (!input.base_type.has_single_arg()) { + return false; + } + + if (input.offset < ARRAY_DATA_OFFSET) { + // we are accessing a field in an array so we can treat this like any other structure. + // this will add the basic offset, so we can pass in the input unchanged. + return try_reverse_lookup_other(input, path, addr_of, result_type); + } + + // this is the data type - (pointer elt-type). this is stored at an offset of ARRAY_DATA_OFFSET. + auto array_data_type = make_pointer_typespec(input.base_type.get_single_arg()); + auto di = get_deref_info(array_data_type); + bool is_integer = + typecheck(TypeSpec("integer"), input.base_type.get_single_arg(), "", false, false); + assert(di.mem_deref); // it's accessing a pointer. + auto elt_type = di.result_type; + if (input.stride) { + if (input.offset != ARRAY_DATA_OFFSET) { + // might be constant propagated other offsets here? + return false; + } + + // variable access to the array. + if (input.stride != di.stride) { + // mismatched array strides, fail! + return false; + } + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::VAR_IDX; + path->push_back(token); + if (input.deref.has_value()) { + if (deref_matches(di, input.deref.value(), is_integer)) { + // access element of array + *addr_of = false; + *result_type = elt_type; + return true; + } else { + // this isn't the right type of dereference. + return false; + } + } else { + // get the address of a variable indexed element of a pointer-style array + *addr_of = true; + *result_type = make_pointer_typespec(elt_type); + return true; + } + } else { + // either access array or just plain deref a pointer. + int elt_idx = (input.offset - ARRAY_DATA_OFFSET) / di.stride; + int offset_into_elt = (input.offset - ARRAY_DATA_OFFSET) - (elt_idx * di.stride); + if (offset_into_elt) { + // should line up correctly. + return false; + } + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX; + token.idx = elt_idx; + // always put array index, even if it's zero. + path->push_back(token); + if (input.deref.has_value()) { + if (!deref_matches(di, input.deref.value(), is_integer)) { + // this isn't the right type of dereference + return false; + } + + // access constant idx element of array + *addr_of = false; + *result_type = elt_type; + return true; + } else { + // get address of constant idx element of array + *addr_of = true; + *result_type = make_pointer_typespec(elt_type); + return true; + } + } +} + +/*! + * Handle a reverse deref of an inline-array. This assumes that the array contains inlined + * reference type objects. It can handle + * - get a reference object (variable idx) + * - get something inside an object (variable idx) + * - get a constant idx reference object (we pick this over just getting the array for idx = 0) + * - get something inside a constant idx reference object + */ +bool TypeSystem::try_reverse_lookup_inline_array(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + auto di = get_deref_info(input.base_type); + assert(di.can_deref); + assert(!di.mem_deref); // if we make integer arrays allowed to be inline-array, this will break. + + if (input.stride) { + if (input.stride != di.stride) { + return false; + } + + if (input.offset >= di.stride) { + return false; + } + + // variable lookup. + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::VAR_IDX; + path->push_back(token); + + if (input.offset == 0 && !input.deref.has_value()) { + *addr_of = false; + *result_type = di.result_type; + return true; + } + + FieldReverseLookupInput next_input; + next_input.deref = input.deref; + next_input.stride = 0; + next_input.offset = input.offset; + next_input.base_type = di.result_type; + return try_reverse_lookup(next_input, path, addr_of, result_type); + } else { + // constant lookup, or accessing within the first one + // which element we are in + int elt_idx = input.offset / di.stride; + // how many bytes into the element we look + int offset_into_elt = input.offset - (elt_idx * di.stride); + // the expected number of bytes into the element we would look to grab a ref to the elt. + int expected_offset_into_elt = lookup_type(di.result_type)->get_offset(); + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX; + token.idx = elt_idx; + + if (offset_into_elt == expected_offset_into_elt && !input.deref.has_value()) { + // just get an element (possibly zero, and we want to include the 0 if so) + // for the degenerate inline-array case, it seems more likely that we get the zeroth object + // rather than the array? Either way, this code should be compatible with both approaches. + path->push_back(token); + *addr_of = false; + *result_type = di.result_type; + return true; + } + + // otherwise access within the element + if (elt_idx != 0) { + path->push_back(token); + } + FieldReverseLookupInput next_input; + next_input.deref = input.deref; + next_input.stride = 0; + // try_reverse_lookup expects "offset_into_field - boxed_offset" + next_input.offset = offset_into_elt - expected_offset_into_elt; + next_input.base_type = di.result_type; + return try_reverse_lookup(next_input, path, addr_of, result_type); + } +} + +/*! + * Handle a deref for fields of a structure. + * - Access a field which requires mem deref. + * - Get address of a field which requires mem deref. + */ +bool TypeSystem::try_reverse_lookup_other(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const { + auto type_info = lookup_type(input.base_type); + auto structure_type = dynamic_cast(type_info); + if (!structure_type) { + return false; + } + + auto corrected_offset = input.offset + type_info->get_offset(); + // loop over fields. We may need to try multiple fields. + for (auto& field : structure_type->fields()) { + auto field_deref = lookup_field_info(type_info->get_name(), field.name()); + + // how many bytes do we look at? In the case where we're just getting an address, we assume + // one byte, so we'll always pass the size check. + auto effective_load_size = 1; + if (input.deref.has_value()) { + effective_load_size = input.deref->size; + } + + if (corrected_offset >= field.offset() && + (corrected_offset + effective_load_size <= field.offset() + get_size_in_type(field) || + field.is_dynamic())) { + // the field size looks okay. + int offset_into_field = corrected_offset - field.offset(); + + FieldReverseLookupOutput::Token token; + token.kind = FieldReverseLookupOutput::Token::Kind::FIELD; + token.name = field.name(); + + if (field_deref.needs_deref) { + if (offset_into_field == 0) { + if (input.deref.has_value()) { + // needs deref, offset is 0, did a deref. + // Check the deref is right... + // (pointer ) + TypeSpec loc_type = make_pointer_typespec(field_deref.type); + auto di = get_deref_info(loc_type); + bool is_integer = typecheck(TypeSpec("integer"), field_deref.type, "", false, false); + if (!deref_matches(di, input.deref.value(), is_integer)) { + continue; // try another field! + } + // it's a match, just access the field like normal! + if (input.stride) { + continue; + } + path->push_back(token); + *addr_of = false; + *result_type = field_deref.type; + return true; + } else { + // needs a deref, offset is 0, didn't do a deref. + // we're taking the address + if (input.stride) { + continue; + } + path->push_back(token); + *addr_of = true; + *result_type = make_pointer_typespec(field_deref.type); + return true; + } + } else { + if (input.deref.has_value()) { + // needs deref, offset != 0, did a deref. + // try a different field. + continue; + } else { + // needs deref, offset != 0, didn't deref. + // try a different field + continue; + } + } + } else { + // no deref needed + int expected_offset_into_field = 0; + if (field.is_inline()) { + expected_offset_into_field = lookup_type(field.type())->get_offset(); + } + if (offset_into_field == expected_offset_into_field && !input.deref.has_value()) { + // get the inline field. + if (input.stride) { + continue; + } + path->push_back(token); + *result_type = field_deref.type; + *addr_of = false; + return true; + } else { + FieldReverseLookupInput next_input; + next_input.deref = input.deref; + next_input.offset = offset_into_field - expected_offset_into_field; + next_input.stride = input.stride; + next_input.base_type = field_deref.type; + auto old_path = *path; + path->push_back(token); + if (try_reverse_lookup(next_input, path, addr_of, result_type)) { + return true; + } else { + *path = old_path; + continue; + } + } + } + } + } + return false; +} \ No newline at end of file diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 20bafcceeb..ad2c32f561 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -126,6 +126,10 @@ DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) const { info.reg = RegKind::GPR_64; info.mem_deref = true; + if (typecheck(TypeSpec("float"), ts, "", false, false)) { + info.reg = RegKind::FLOAT; + } + if (ts.base_type() == "inline-array") { auto result_type = lookup_type(ts.get_single_arg()); auto result_structure_type = dynamic_cast(result_type); diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index ea3cc90d22..7fc0b4ec15 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "TypeSpec.h" #include "Type.h" @@ -73,6 +74,38 @@ struct ReverseDerefInputInfo { TypeSpec input_type; }; +/*! + * A description of a dereference (size + sign extend) + */ +struct DerefKind { + bool is_store = false; // when true, the sign extension shouldn't matter + int size = -1; // how many bytes + bool sign_extend = false; // for loads only (4 bytes and under), do we sign extend? + RegKind reg_kind = RegKind::INVALID; +}; + +struct FieldReverseLookupInput { + std::optional deref = std::nullopt; // if we actually access memory + int offset = 0; // if we apply a constant offset + int stride = 0; // if we are doing a + (idx * stride) + TypeSpec base_type; // the type of the thing we're accessing +}; + +struct FieldReverseLookupOutput { + struct Token { + enum class Kind { FIELD, CONSTANT_IDX, VAR_IDX } kind; + std::string name; + int idx; + + std::string print() const; + }; + + bool success = false; + bool addr_of = false; // do we take the address of this result? + TypeSpec result_type; + std::vector tokens; +}; + class TypeSystem { public: TypeSystem(); @@ -85,6 +118,7 @@ class TypeSystem { DerefInfo get_deref_info(const TypeSpec& ts) const; ReverseDerefInfo get_reverse_deref_info(const ReverseDerefInputInfo& input) const; + FieldReverseLookupOutput reverse_field_lookup(const FieldReverseLookupInput& input) const; bool fully_defined_type_exists(const std::string& name) const; bool partially_defined_type_exists(const std::string& name) const; @@ -171,6 +205,26 @@ class TypeSystem { std::vector* path, bool* addr_of, TypeSpec* result_type) const; + bool try_reverse_lookup(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; + bool try_reverse_lookup_pointer(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; + bool try_reverse_lookup_inline_array(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; + bool try_reverse_lookup_array(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; + bool try_reverse_lookup_other(const FieldReverseLookupInput& input, + std::vector* path, + bool* addr_of, + TypeSpec* result_type) const; std::string lca_base(const std::string& a, const std::string& b); bool typecheck_base_types(const std::string& expected, const std::string& actual) const; int get_size_in_type(const Field& field) const; diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index d315bbe9c0..c88419c85a 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -1005,14 +1005,26 @@ std::shared_ptr try_sw(Instruction& instr, int idx) { op->update_reginfo_self(0, 2, 0); return op; } else { - auto op = std::make_shared( - IR_Store_Atomic::INTEGER, - std::make_shared( - IR_IntMath2::ADD, make_reg(instr.get_src(2).get_reg(), idx), - std::make_shared(instr.get_src(1).get_imm())), - make_reg(instr.get_src(0).get_reg(), idx), 4); - op->update_reginfo_self(0, 2, 0); - return op; + if (instr.get_src(0).is_reg(make_gpr(Reg::S7))) { + // store false + auto op = std::make_shared( + IR_Store_Atomic::Kind::INTEGER, + std::make_shared( + IR_IntMath2::ADD, make_reg(instr.get_src(2).get_reg(), idx), + std::make_shared(instr.get_src(1).get_imm())), + make_sym("#f"), 4); + op->update_reginfo_self(0, 1, 0); + return op; + } else { + auto op = std::make_shared( + IR_Store_Atomic::INTEGER, + std::make_shared( + IR_IntMath2::ADD, make_reg(instr.get_src(2).get_reg(), idx), + std::make_shared(instr.get_src(1).get_imm())), + make_reg(instr.get_src(0).get_reg(), idx), 4); + op->update_reginfo_self(0, 2, 0); + return op; + } } } return nullptr; diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index e81a6b13c3..cea9791217 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -517,9 +517,9 @@ class IR_Breakpoint_Atomic : public virtual IR_Atomic { IR_Breakpoint_Atomic() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - // void propagate_types(const TypeState& input, - // const LinkedObjectFile& file, - // DecompilerTypeSystem& dts) override; + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Begin : public virtual IR { @@ -655,9 +655,9 @@ class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic { public: IR_AsmOp_Atomic(std::string _name) : IR_AsmOp(std::move(_name)) {} void set_reg_info(); - // void propagate_types(const TypeState& input, - // const LinkedObjectFile& file, - // DecompilerTypeSystem& dts) override; + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_CMoveF : public virtual IR { diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp index d085e683b3..22c19ce34f 100644 --- a/decompiler/IR/IR_TypeAnalysis.cpp +++ b/decompiler/IR/IR_TypeAnalysis.cpp @@ -193,93 +193,88 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, return TP_Type::make_from_typespec(method_type); } - // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && - // input_type.as_typespec() == TypeSpec("type") && ro.offset >= 16 && (ro.offset & 3) == - // 0 - // && size == 4 && kind == UNSIGNED) { - // // method get of dynamic type. - // auto method_id = (ro.offset - 16) / 4; - // auto method_info = dts.ts.lookup_method("object", method_id); - // return TP_Type(method_info.type.substitute_for_method_call("object")); - // } - // - // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && - // input_type.as_typespec() == TypeSpec("pointer")) { - // // we got a plain pointer. let's just assume we're loading an integer. - // // perhaps we should disable this feature by default on 4-byte loads if we're getting - // // lots of false positives for loading pointers from plain pointers. - // - // // todo, load_path - // switch (kind) { - // case UNSIGNED: - // switch (size) { - // case 1: - // return TP_Type(TypeSpec("uint")); - // case 2: - // return TP_Type(TypeSpec("uint")); - // case 4: - // return TP_Type(TypeSpec("uint")); - // case 8: - // return TP_Type(TypeSpec("uint")); - // case 16: - // return TP_Type(TypeSpec("uint")); - // default: - // assert(false); - // } - // break; - // case SIGNED: - // switch (size) { - // case 1: - // return TP_Type(TypeSpec("int")); - // case 2: - // return TP_Type(TypeSpec("int")); - // case 4: - // return TP_Type(TypeSpec("int")); - // case 8: - // return TP_Type(TypeSpec("int")); - // case 16: - // return TP_Type(TypeSpec("int")); - // default: - // assert(false); - // } - // break; - // case FLOAT: - // return TP_Type(TypeSpec("float")); - // default: - // assert(false); - // } - // } - // + if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") && + ro.offset >= 16 && (ro.offset & 3) == 0 && size == 4 && kind == UNSIGNED) { + // method get of an unknown type. We assume the most general "object" type. + auto method_id = (ro.offset - 16) / 4; + auto method_info = dts.ts.lookup_method("object", method_id); + if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) { + // this can get us the wrong thing for `new` methods. And maybe relocate? + return TP_Type::make_from_typespec(method_info.type.substitute_for_method_call("object")); + } + } + + if (input_type.typespec() == TypeSpec("pointer")) { + // we got a plain pointer. let's just assume we're loading an integer. + // perhaps we should disable this feature by default on 4-byte loads if we're getting + // lots of false positives for loading pointers from plain pointers. + + switch (kind) { + case UNSIGNED: + switch (size) { + case 1: + case 2: + case 4: + case 8: + return TP_Type::make_from_typespec(TypeSpec("uint")); + default: + break; + } + break; + case SIGNED: + switch (size) { + case 1: + case 2: + case 4: + case 8: + return TP_Type::make_from_typespec(TypeSpec("int")); + default: + break; + } + break; + case FLOAT: + return TP_Type::make_from_typespec(TypeSpec("float")); + default: + assert(false); + } + } - // } else - // if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) { - // note, we discard and completely ignore the stride here. - ReverseDerefInputInfo rd_in; - rd_in.mem_deref = true; - rd_in.input_type = input_type.get_obj_plus_const_mult_typespec(); - rd_in.reg = get_reg_kind(ro.reg); // bleh + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = kind == SIGNED; + dk.size = size; + rd_in.deref = dk; + rd_in.base_type = input_type.get_obj_plus_const_mult_typespec(); + rd_in.stride = input_type.get_multiplier(); rd_in.offset = ro.offset; - rd_in.sign_extend = kind == SIGNED; - rd_in.load_size = size; - auto rd = dts.ts.get_reverse_deref_info(rd_in); + auto rd = dts.ts.reverse_field_lookup(rd_in); if (rd.success) { + load_path_set = true; + load_path_addr_of = rd.addr_of; + load_path_base = ro.reg_ir; + for (auto& x : rd.tokens) { + load_path.push_back(x.print()); + } return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); } } - // } else { - // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && ro.offset == -4 && kind == UNSIGNED - // && - // size == 4 && ro.reg.get_kind() == Reg::GPR) { - // // get type of basic likely, but misrecognized as an object. - // // occurs often in typecase-like structures because other possible types are - // "stripped". load_path_base = ro.reg_ir; load_path_addr_of = false; - // load_path.push_back("type"); - // load_path_set = true; - // - // return TP_Type::make_type_object(input_type.as_typespec().base_type()); - // } + + if (input_type.kind == TP_Type::Kind::TYPESPEC && ro.offset == -4 && kind == UNSIGNED && + size == 4 && ro.reg.get_kind() == Reg::GPR) { + // get type of basic likely, but misrecognized as an object. + // occurs often in typecase-like structures because other possible types are + // "stripped". + load_path_base = ro.reg_ir; + load_path_addr_of = false; + load_path.push_back("type"); + load_path_set = true; + + return TP_Type::make_type_object(input_type.typespec().base_type()); + } // // if (input_type.as_typespec() == TypeSpec("object") && ro.offset == -4 && kind == // UNSIGNED @@ -297,20 +292,22 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, return TP_Type::make_from_typespec(TypeSpec("function")); } // Assume we're accessing a field of an object. - ReverseDerefInputInfo rd_in; - rd_in.mem_deref = true; - rd_in.input_type = input_type.typespec(); - rd_in.reg = get_reg_kind(ro.reg); + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = kind == SIGNED; + dk.size = size; + rd_in.deref = dk; + rd_in.base_type = input_type.typespec(); + rd_in.stride = 0; rd_in.offset = ro.offset; - rd_in.sign_extend = kind == SIGNED; - rd_in.load_size = size; - - auto rd = dts.ts.get_reverse_deref_info(rd_in); + auto rd = dts.ts.reverse_field_lookup(rd_in); // only error on failure if "pair" is disabled. otherwise it might be a pair. if (!rd.success && !dts.type_prop_settings.allow_pair) { - printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.input_type.print().c_str(), - rd_in.offset, rd_in.sign_extend, rd_in.load_size); + printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.base_type.print().c_str(), + rd_in.offset, rd_in.deref.value().sign_extend, rd_in.deref.value().size); throw std::runtime_error( fmt::format("Could not get type of load: {}. Reverse Deref Failed.", print(file))); } @@ -319,7 +316,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, load_path_set = true; load_path_addr_of = rd.addr_of; load_path_base = ro.reg_ir; - for (auto& x : rd.deref_path) { + for (auto& x : rd.tokens) { load_path.push_back(x.print()); } return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); @@ -410,6 +407,12 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, break; } + case MUL_SIGNED: { + if (arg0_type.is_integer_constant() && is_int_or_uint(dts, arg1_type)) { + return TP_Type::make_from_product(arg0_type.get_integer_constant()); + } + } break; + case ADD: if (arg0_type.is_product_with(4) && tc(dts, TypeSpec("type"), arg1_type)) { // dynamic access into the method array with shift, add, offset-load @@ -438,9 +441,13 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, return TP_Type::make_from_typespec(arg0_type.typespec()); } + if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) { + return TP_Type::make_from_typespec(TypeSpec("binteger")); + } + // special cases for non-integers if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) && - arg1_type.is_integer_constant(62)) { + (arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) { // boxed object tag trick. return TP_Type::make_from_typespec(TypeSpec("int")); } @@ -493,20 +500,21 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, // } // // - // auto a1_const = dynamic_cast(arg1.get()); - // if (a1_const && kind == ADD && arg0_type.kind == TP_Type::OBJECT_OF_TYPE) { - // // access a field. - // ReverseDerefInputInfo rd_in; - // rd_in.mem_deref = false; - // rd_in.input_type = arg0_type.as_typespec(); - // rd_in.offset = a1_const->value; - // rd_in.load_size = 0; - // auto rd = dts.ts.get_reverse_deref_info(rd_in); - // - // if (rd.success) { - // return TP_Type(coerce_to_reg_type(rd.result_type)); - // } - // } + auto a1_const = dynamic_cast(arg1.get()); + if (a1_const && kind == ADD && arg0_type.kind == TP_Type::Kind::TYPESPEC) { + // access a field. + FieldReverseLookupInput rd_in; + rd_in.deref = std::nullopt; + rd_in.stride = 0; + rd_in.offset = a1_const->value; + rd_in.base_type = arg0_type.typespec(); + auto rd = dts.ts.reverse_field_lookup(rd_in); + + if (rd.success) { + // todo, load path. + return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); + } + } // // if (kind == ADD && is_integer_type(arg0_type) && arg1_type.kind == TP_Type::OBJECT_OF_TYPE) // { @@ -522,6 +530,22 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, return TP_Type::make_object_plus_product(arg1_type.typespec(), arg0_type.get_multiplier()); } + if (kind == ADD && arg1_type.is_product() && arg0_type.kind == TP_Type::Kind::TYPESPEC) { + return TP_Type::make_object_plus_product(arg0_type.typespec(), arg1_type.get_multiplier()); + } + + if (kind == ADD && arg0_type.typespec() == TypeSpec("pointer") && + tc(dts, TypeSpec("integer"), arg1_type)) { + // plain pointer plus integer = plain pointer + return TP_Type::make_from_typespec(TypeSpec("pointer")); + } + + if (kind == ADD && arg1_type.typespec() == TypeSpec("pointer") && + tc(dts, TypeSpec("integer"), arg0_type)) { + // plain pointer plus integer = plain pointer + return TP_Type::make_from_typespec(TypeSpec("pointer")); + } + // byte access of offset array field trick. // arg1 holds a structure. // arg0 is an integer in a register. @@ -530,6 +554,11 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, return TP_Type::make_object_plus_product(arg1_type.typespec(), 1); } + if (kind == AND) { + // base case for and. Just get an integer. + return TP_Type::make_from_typespec(TypeSpec("int")); + } + // // if (kind == ADD && // dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && @@ -549,11 +578,10 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, // return arg0_type; // } // - // if (kind == SUB && - // dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && - // dts.ts.typecheck(TypeSpec("pointer"), arg1_type.as_typespec(), "", false, false)) { - // return TP_Type(TypeSpec("int")); - // } + if (kind == SUB && tc(dts, TypeSpec("pointer"), arg0_type) && + tc(dts, TypeSpec("pointer"), arg1_type)) { + return TP_Type::make_from_typespec(TypeSpec("int")); + } throw std::runtime_error( fmt::format("Can't get_expression_type on this IR_IntMath2: {}, args {} and {}", print(file), @@ -859,29 +887,29 @@ TP_Type IR_StaticAddress::get_expression_type(const TypeState& input, throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); } -// -// void IR_AsmOp_Atomic::propagate_types(const TypeState& input, -// const LinkedObjectFile& file, -// DecompilerTypeSystem& dts) { -// (void)file; -// (void)dts; -// auto dst_reg = dynamic_cast(dst.get()); -// end_types = input; -// if (dst_reg) { -// if (name == "daddu") { -// end_types.get(dst_reg->reg) = TP_Type(TypeSpec("uint")); -// } -// } -//} -// -// void IR_Breakpoint_Atomic::propagate_types(const TypeState& input, -// const LinkedObjectFile& file, -// DecompilerTypeSystem& dts) { -// (void)file; -// (void)dts; -// end_types = input; -//} -// + +void IR_AsmOp_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + auto dst_reg = dynamic_cast(dst.get()); + end_types = input; + if (dst_reg) { + if (name == "daddu") { + end_types.get(dst_reg->reg) = TP_Type::make_from_typespec(TypeSpec("uint")); + } + } +} + +void IR_Breakpoint_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + end_types = input; +} + TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index d0b25986b8..4944ab71ac 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -800,7 +800,29 @@ std::string LinkedObjectFile::print_type_analysis_debug() { // result += func.basic_ops.at(i)->print(*this); if (func.attempted_type_analysis) { result += fmt::format("[{:3d}] ", i); - result += func.basic_ops.at(i)->print_with_types(*init_types, *this); + auto& op = func.basic_ops.at(i); + result += op->print_with_types(*init_types, *this); + + // temporary debug load path print + auto op_as_set = dynamic_cast(op.get()); + if (op_as_set) { + auto op_as_load = dynamic_cast(op_as_set->src.get()); + if (op_as_load && op_as_load->load_path_set) { + if (op_as_load->load_path_addr_of) { + result += " (&->"; + } else { + result += " (->"; + } + result += ' '; + result += op_as_load->load_path_base->print(*this); + for (auto& tok : op_as_load->load_path) { + result += ' '; + result += tok; + } + result += ')'; + } + } + result += "\n"; init_types = &func.basic_ops.at(i)->end_types; } else { diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index f40d301b30..d1823cfacc 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -90,6 +90,8 @@ "no_type_analysis_functions_by_name":[ "(method 2 vec4s)", // 128-bit bitfield. "(method 3 vec4s)", // 128-bit bitfield + "qmem-copy<-!", // 128-bit loads and stores + "qmem-copy->!", // 128-bit loads and stores "reset-and-call", // stack manipulation "(method 10 cpu-thread)" // loading saved regs off of the stack. ], diff --git a/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc b/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc index 2a00faf862..148d724a01 100644 --- a/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc @@ -1,4 +1,36 @@ { + "(method 2 array)":[ + [23, ["gp", "(array int32)"]], + [43, ["gp", "(array uint32)"]], + [63, ["gp", "(array int64)"]], + [83, ["gp", "(array uint64)"]], + [102, ["gp", "(array int8)"]], + [121, ["gp", "(array uint8)"]], + [141, ["gp", "(array int16)"]], + [161, ["gp", "(array uint16)"]], + [185, ["gp", "(array uint128)"]], + [203, ["gp", "(array int32)"]], + [222, ["gp", "(array float)"]], + [231, ["gp", "(array float)"]], + [248, ["gp", "(array basic)"]], + [257, ["gp", "(array basic)"]] + ], + + "(method 3 array)":[ + [44, ["gp", "(array int32)"]], + [62, ["gp", "(array uint32)"]], + [80, ["gp", "(array int64)"]], + [98, ["gp", "(array uint64)"]], + [115, ["gp", "(array int8)"]], + [132, ["gp", "(array int8)"]], // bug in game + [150, ["gp", "(array int16)"]], + [168, ["gp", "(array uint16)"]], + [190, ["gp", "(array uint128)"]], + [203, ["gp", "(array int32)"]], + [225, ["gp", "(array float)"]], + [242, ["gp", "(array basic)"]] + ], + "(method 2 handle)":[ [10, ["a3", "process"]], [11, ["v1", "int"]], diff --git a/decompiler/util/DecompilerTypeSystem.cpp b/decompiler/util/DecompilerTypeSystem.cpp index 849fd73769..7943ad3132 100644 --- a/decompiler/util/DecompilerTypeSystem.cpp +++ b/decompiler/util/DecompilerTypeSystem.cpp @@ -262,7 +262,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add result_type = TP_Type::make_from_typespec(TypeSpec("string")); } - *changed = (result_type == existing); + *changed = (result_type != existing); return result_type; } diff --git a/decompiler/util/TP_Type.h b/decompiler/util/TP_Type.h index 2a3169f9ef..dfb154ef36 100644 --- a/decompiler/util/TP_Type.h +++ b/decompiler/util/TP_Type.h @@ -229,6 +229,11 @@ class TP_Type { return m_int; } + uint64_t get_integer_constant() const { + assert(kind == Kind::INTEGER_CONSTANT); + return m_int; + } + private: TypeSpec m_ts; std::string m_str; diff --git a/goalc/compiler/compilation/Type.cpp b/goalc/compiler/compilation/Type.cpp index 4dc0cf9e67..ced2734375 100644 --- a/goalc/compiler/compilation/Type.cpp +++ b/goalc/compiler/compilation/Type.cpp @@ -538,7 +538,7 @@ Val* Compiler::compile_deref(const goos::Object& form, const goos::Object& _rest assert(di.mem_deref); assert(di.can_deref); // the total offset is 12 + stride * idx - auto offset = compile_integer(12, env)->to_gpr(env); + auto offset = compile_integer(ARRAY_DATA_OFFSET, env)->to_gpr(env); auto stride = compile_integer(di.stride, env)->to_gpr(env); env->emit(std::make_unique(IntegerMathKind::IMUL_32, stride, index_value)); env->emit_ir(IntegerMathKind::ADD_64, offset, stride); diff --git a/test/test_type_system.cpp b/test/test_type_system.cpp index c257004c2d..44442eb13b 100644 --- a/test/test_type_system.cpp +++ b/test/test_type_system.cpp @@ -2,6 +2,7 @@ #include "common/type_system/TypeSystem.h" #include "common/goos/Reader.h" #include "common/type_system/deftype.h" +#include "common/goos/ParseHelpers.h" TEST(TypeSystem, Construction) { // test that we can add all builtin types without any type errors @@ -322,21 +323,23 @@ TEST(TypeSystem, DecompLookupsTypeOfBasic) { ts.add_builtin_types(); auto string_type = ts.make_typespec("string"); - - ReverseDerefInputInfo input; - input.input_type = string_type; - input.mem_deref = true; - input.reg = RegKind::GPR_64; - input.load_size = 4; - input.sign_extend = false; + FieldReverseLookupInput input; + input.stride = 0; + input.base_type = string_type; input.offset = -4; + DerefKind dk; + dk.size = 4; + dk.sign_extend = false; + dk.is_store = false; + dk.reg_kind = RegKind::GPR_64; + input.deref = dk; + auto result = ts.reverse_field_lookup(input); - auto result = ts.get_reverse_deref_info(input); EXPECT_TRUE(result.success); EXPECT_FALSE(result.addr_of); EXPECT_TRUE(result.result_type == ts.make_typespec("type")); - EXPECT_EQ(result.deref_path.size(), 1); - EXPECT_EQ(result.deref_path.at(0).name, "type"); + EXPECT_EQ(result.tokens.size(), 1); + EXPECT_EQ(result.tokens.at(0).name, "type"); } TEST(TypeSystem, DecompLookupsMethod) { @@ -345,51 +348,54 @@ TEST(TypeSystem, DecompLookupsMethod) { auto type_type = ts.make_typespec("type"); - ReverseDerefInputInfo input; - input.input_type = type_type; - input.mem_deref = true; - input.reg = RegKind::GPR_64; - input.load_size = 4; - input.sign_extend = false; - input.offset = 16; // should be method 0, new. + FieldReverseLookupInput input; + input.stride = 0; + input.base_type = type_type; + input.offset = 16; + DerefKind dk; + dk.size = 4; + dk.sign_extend = false; + dk.is_store = false; + dk.reg_kind = RegKind::GPR_64; + input.deref = dk; + auto result = ts.reverse_field_lookup(input); - auto result = ts.get_reverse_deref_info(input); EXPECT_TRUE(result.success); EXPECT_FALSE(result.addr_of); EXPECT_TRUE(result.result_type == ts.make_typespec("function")); - EXPECT_EQ(result.deref_path.size(), 2); - EXPECT_EQ(result.deref_path.at(0).name, "method-table"); - EXPECT_EQ(result.deref_path.at(1).index, 0); + EXPECT_EQ(result.tokens.size(), 2); + EXPECT_EQ(result.tokens.at(0).name, "method-table"); + EXPECT_EQ(result.tokens.at(1).idx, 0); - input.input_type = type_type; - input.mem_deref = true; - input.reg = RegKind::GPR_64; - input.load_size = 4; - input.sign_extend = false; - input.offset = 24; // should be method 2 + input.stride = 0; + input.base_type = type_type; + input.offset = 24; + dk.size = 4; + dk.sign_extend = false; + dk.is_store = false; + dk.reg_kind = RegKind::GPR_64; + input.deref = dk; + result = ts.reverse_field_lookup(input); - result = ts.get_reverse_deref_info(input); EXPECT_TRUE(result.success); EXPECT_FALSE(result.addr_of); EXPECT_TRUE(result.result_type == ts.make_typespec("function")); - EXPECT_EQ(result.deref_path.size(), 2); - EXPECT_EQ(result.deref_path.at(0).name, "method-table"); - EXPECT_EQ(result.deref_path.at(1).index, 2); + EXPECT_EQ(result.tokens.size(), 2); + EXPECT_EQ(result.tokens.at(0).name, "method-table"); + EXPECT_EQ(result.tokens.at(1).idx, 2); - input.input_type = type_type; - input.mem_deref = false; - input.reg = RegKind::GPR_64; - input.load_size = 0; - input.sign_extend = false; - input.offset = 24; // should be method 2 + input.stride = 0; + input.base_type = type_type; + input.offset = 24; + input.deref = std::nullopt; + result = ts.reverse_field_lookup(input); - result = ts.get_reverse_deref_info(input); EXPECT_TRUE(result.success); EXPECT_TRUE(result.addr_of); EXPECT_TRUE(result.result_type == ts.make_pointer_typespec("function")); - EXPECT_EQ(result.deref_path.size(), 2); - EXPECT_EQ(result.deref_path.at(0).name, "method-table"); - EXPECT_EQ(result.deref_path.at(1).index, 2); + EXPECT_EQ(result.tokens.size(), 2); + EXPECT_EQ(result.tokens.at(0).name, "method-table"); + EXPECT_EQ(result.tokens.at(1).idx, 2); } TEST(Deftype, deftype) {