diff --git a/.github/workflows/linux-workflow.yaml b/.github/workflows/linux-workflow.yaml new file mode 100644 index 0000000000..c72db11797 --- /dev/null +++ b/.github/workflows/linux-workflow.yaml @@ -0,0 +1,26 @@ +name: Linux Workflow +on: [push] + +jobs: + build: + name: Build and Test Project (Linux) + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Get Package Dependencies + run: sudo apt install gcc make cmake build-essential g++ nasm clang-format + - name: Initialize Submodules + run: git submodule update --init --recursive + - name: Build Project with CMake + run: | + mkdir build + cd build + cmake .. + make -j + - name: Test Project with gTest + run: ./test.sh + - name: Check Clang-Formatting + run: | + chmod +x ./third-party/run-clang-format/run-clang-format.py + ./third-party/run-clang-format/run-clang-format.py -r common decompiler game goalc test --color always diff --git a/.gitmodules b/.gitmodules index 3f127b847c..522084739f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "third-party/googletest"] path = third-party/googletest - url = https://github.com/google/googletest.git + url = https://github.com/google/googletest.git \ No newline at end of file diff --git a/README.md b/README.md index 0fdd8465f2..1a6a7ef923 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Jak Project +![Linux Workflow](https://github.com/water111/jak-project/workflows/Linux%20Workflow/badge.svg?branch=master) ## Table of Contents diff --git a/common/goal_constants.h b/common/goal_constants.h index bf9239e322..88d6896e2f 100644 --- a/common/goal_constants.h +++ b/common/goal_constants.h @@ -1,16 +1,24 @@ #ifndef JAK_GOAL_CONSTANTS_H #define JAK_GOAL_CONSTANTS_H +#include "common_types.h" + +constexpr s32 BINTEGER_OFFSET = 0; +constexpr s32 PAIR_OFFSET = 2; constexpr int POINTER_SIZE = 4; constexpr int BASIC_OFFSET = 4; constexpr int STRUCTURE_ALIGNMENT = 16; -enum class RegKind { - GPR_64, - FLOAT, - INT_128, - FLOAT_4X, - INVALID -}; +enum class RegKind { GPR_64, FLOAT, INT_128, FLOAT_4X, INVALID }; + +constexpr u32 GOAL_NEW_METHOD = 0; // method ID of GOAL new +constexpr u32 GOAL_DEL_METHOD = 1; // method ID of GOAL delete +constexpr u32 GOAL_PRINT_METHOD = 2; // method ID of GOAL print +constexpr u32 GOAL_INSPECT_METHOD = 3; // method ID of GOAL inspect +constexpr u32 GOAL_LENGTH_METHOD = 4; // method ID of GOAL length +constexpr u32 GOAL_ASIZE_METHOD = 5; // method ID of GOAL size +constexpr u32 GOAL_COPY_METHOD = 6; // method ID of GOAL copy +constexpr u32 GOAL_RELOC_METHOD = 7; // method ID of GOAL relocate +constexpr u32 GOAL_MEMUSAGE_METHOD = 8; // method ID of GOAL mem-usage #endif // JAK_GOAL_CONSTANTS_H diff --git a/common/symbols.h b/common/symbols.h index 750e430d87..a16cfcf543 100644 --- a/common/symbols.h +++ b/common/symbols.h @@ -6,75 +6,76 @@ #ifndef JAK1_SYMBOLS_H #define JAK1_SYMBOLS_H -constexpr int FIX_SYM_EMPTY_CAR = -0xc; +constexpr int FIX_SYM_EMPTY_CAR = -0xc; constexpr int FIX_SYM_EMPTY_PAIR = -0xa; -constexpr int FIX_SYM_EMPTY_CDR = -0x8; -constexpr int FIX_SYM_FALSE = 0x0; // GOAL boolean #f (note that this is equal to the $s7 register) -constexpr int FIX_SYM_TRUE = 0x8; // GOAL boolean #t +constexpr int FIX_SYM_EMPTY_CDR = -0x8; +constexpr int FIX_SYM_FALSE = 0x0; // GOAL boolean #f (note that this is equal to the $s7 register) +constexpr int FIX_SYM_TRUE = 0x8; // GOAL boolean #t // types -constexpr int FIX_SYM_FUNCTION_TYPE = 0x10; // GOAL type of function -constexpr int FIX_SYM_BASIC_TYPE = 0x18; // GOAL structure type with type tag -constexpr int FIX_SYM_STRING_TYPE = 0x20; // GOAL string type (gstring) -constexpr int FIX_SYM_SYMBOL_TYPE = 0x28; // GOAL symbol type -constexpr int FIX_SYM_TYPE_TYPE = 0x30; // GOAL type of type -constexpr int FIX_SYM_OBJECT_TYPE = 0x38; // GOAL parent type of all types -constexpr int FIX_SYM_LINK_BLOCK = 0x40; // GOAL type of link-block (used by linker, but seems to be unused by GOAL) -constexpr int FIX_SYM_INTEGER_TYPE = 0x48; // GOAL integer parent type, assumes unboxed -constexpr int FIX_SYM_SINTEGER_TYPE = 0x50; // GOAL signed integer parent type, assumes unboxed -constexpr int FIX_SYM_UINTEGER_TYPE = 0x58; // GOAL unsinged integer parent type, assumes unboxed -constexpr int FIX_SYM_BINTEGER_TYPE = 0x60; // GOAL "boxed integer" type -constexpr int FIX_SYM_INT8_TYPE = 0x68; // GOAL 8-bit signed integer -constexpr int FIX_SYM_INT16_TYPE = 0x70; // ... -constexpr int FIX_SYM_INT32_TYPE = 0x78; // ... -constexpr int FIX_SYM_INT64_TYPE = 0x80; // ... -constexpr int FIX_SYM_INT128_TYPE = 0x88; // GOAL 128-bit integer type, behaves strangely -constexpr int FIX_SYM_UINT8_TYPE = 0x90; // GOAL 8-bit unsigned integer -constexpr int FIX_SYM_UINT16_TYPE = 0x98; // ... -constexpr int FIX_SYM_UINT32_TYPE = 0xA0; // ... -constexpr int FIX_SYM_UINT64_TYPE = 0xA8; // ... -constexpr int FIX_SYM_UINT128_TYPE = 0xB0; // ... -constexpr int FIX_SYM_FLOAT_TYPE = 0xB8; // GOAL 32-bit floating point type -constexpr int FIX_SYM_PROCESS_TREE_TYPE = 0xC0; // GOAL process-tree type. Used in the gkernel -constexpr int FIX_SYM_PROCESS_TYPE = 0xC8; // GOAL process type -constexpr int FIX_SYM_THREAD_TYPE = 0xD0; // GOAL thread type -constexpr int FIX_SYM_STRUCTURE_TYPE = 0xD8; // GOAL structure type. Any type with fields -constexpr int FIX_SYM_PAIR_TYPE = 0xE0; // GOAL pair type -constexpr int FIX_SYM_POINTER_TYPE = 0xE8; // GOAL pointer type (32-bit) -constexpr int FIX_SYM_NUMBER_TYPE = 0xF0; // GOAL number type (parent of integer/float types) -constexpr int FIX_SYM_ARRAY_TYPE = 0xF8; // GOAL array type -constexpr int FIX_SYM_VU_FUNCTION_TYPE = 0x100; // GOAL vu-function type -constexpr int FIX_SYM_CONNECTABLE_TYPE = 0x108; // GOAL connectable -constexpr int FIX_SYM_STACK_FRAME_TYPE = 0x110; // GOAL stack-frame -constexpr int FIX_SYM_FILE_STREAM_TYPE = 0x118; // GOAL file-stream -constexpr int FIX_SYM_KHEAP = 0x120; // GOAL kheap +constexpr int FIX_SYM_FUNCTION_TYPE = 0x10; // GOAL type of function +constexpr int FIX_SYM_BASIC_TYPE = 0x18; // GOAL structure type with type tag +constexpr int FIX_SYM_STRING_TYPE = 0x20; // GOAL string type (gstring) +constexpr int FIX_SYM_SYMBOL_TYPE = 0x28; // GOAL symbol type +constexpr int FIX_SYM_TYPE_TYPE = 0x30; // GOAL type of type +constexpr int FIX_SYM_OBJECT_TYPE = 0x38; // GOAL parent type of all types +constexpr int FIX_SYM_LINK_BLOCK = + 0x40; // GOAL type of link-block (used by linker, but seems to be unused by GOAL) +constexpr int FIX_SYM_INTEGER_TYPE = 0x48; // GOAL integer parent type, assumes unboxed +constexpr int FIX_SYM_SINTEGER_TYPE = 0x50; // GOAL signed integer parent type, assumes unboxed +constexpr int FIX_SYM_UINTEGER_TYPE = 0x58; // GOAL unsinged integer parent type, assumes unboxed +constexpr int FIX_SYM_BINTEGER_TYPE = 0x60; // GOAL "boxed integer" type +constexpr int FIX_SYM_INT8_TYPE = 0x68; // GOAL 8-bit signed integer +constexpr int FIX_SYM_INT16_TYPE = 0x70; // ... +constexpr int FIX_SYM_INT32_TYPE = 0x78; // ... +constexpr int FIX_SYM_INT64_TYPE = 0x80; // ... +constexpr int FIX_SYM_INT128_TYPE = 0x88; // GOAL 128-bit integer type, behaves strangely +constexpr int FIX_SYM_UINT8_TYPE = 0x90; // GOAL 8-bit unsigned integer +constexpr int FIX_SYM_UINT16_TYPE = 0x98; // ... +constexpr int FIX_SYM_UINT32_TYPE = 0xA0; // ... +constexpr int FIX_SYM_UINT64_TYPE = 0xA8; // ... +constexpr int FIX_SYM_UINT128_TYPE = 0xB0; // ... +constexpr int FIX_SYM_FLOAT_TYPE = 0xB8; // GOAL 32-bit floating point type +constexpr int FIX_SYM_PROCESS_TREE_TYPE = 0xC0; // GOAL process-tree type. Used in the gkernel +constexpr int FIX_SYM_PROCESS_TYPE = 0xC8; // GOAL process type +constexpr int FIX_SYM_THREAD_TYPE = 0xD0; // GOAL thread type +constexpr int FIX_SYM_STRUCTURE_TYPE = 0xD8; // GOAL structure type. Any type with fields +constexpr int FIX_SYM_PAIR_TYPE = 0xE0; // GOAL pair type +constexpr int FIX_SYM_POINTER_TYPE = 0xE8; // GOAL pointer type (32-bit) +constexpr int FIX_SYM_NUMBER_TYPE = 0xF0; // GOAL number type (parent of integer/float types) +constexpr int FIX_SYM_ARRAY_TYPE = 0xF8; // GOAL array type +constexpr int FIX_SYM_VU_FUNCTION_TYPE = 0x100; // GOAL vu-function type +constexpr int FIX_SYM_CONNECTABLE_TYPE = 0x108; // GOAL connectable +constexpr int FIX_SYM_STACK_FRAME_TYPE = 0x110; // GOAL stack-frame +constexpr int FIX_SYM_FILE_STREAM_TYPE = 0x118; // GOAL file-stream +constexpr int FIX_SYM_KHEAP = 0x120; // GOAL kheap // GOAL functions -constexpr int FIX_SYM_NOTHING_FUNC = 0x128; // GOAL nothing-func (does nothing) -constexpr int FIX_SYM_DEL_BASIC_FUNC = 0x130; // GOAL delete-basic function +constexpr int FIX_SYM_NOTHING_FUNC = 0x128; // GOAL nothing-func (does nothing) +constexpr int FIX_SYM_DEL_BASIC_FUNC = 0x130; // GOAL delete-basic function // GOAL allocation symbols (?) -constexpr int FIX_SYM_STATIC = 0x138; // GOAL 'static -constexpr int FIX_SYM_GLOBAL_HEAP = 0x140; // GOAL 'global -constexpr int FIX_SYM_DEBUG_HEAP = 0x148; // GOAL 'debug -constexpr int FIX_SYM_LOADING_LEVEL = 0x150; // ?? -constexpr int FIX_SYM_LOADING_PACKAGE = 0x158; // ?? -constexpr int FIX_SYM_PROCESS_LEVEL_HEAP = 0x160; // ?? -constexpr int FIX_SYM_STACK = 0x168; // GOAL 'stack -constexpr int FIX_SYM_SCRATCH = 0x170; // GOAL 'scratch +constexpr int FIX_SYM_STATIC = 0x138; // GOAL 'static +constexpr int FIX_SYM_GLOBAL_HEAP = 0x140; // GOAL 'global +constexpr int FIX_SYM_DEBUG_HEAP = 0x148; // GOAL 'debug +constexpr int FIX_SYM_LOADING_LEVEL = 0x150; // ?? +constexpr int FIX_SYM_LOADING_PACKAGE = 0x158; // ?? +constexpr int FIX_SYM_PROCESS_LEVEL_HEAP = 0x160; // ?? +constexpr int FIX_SYM_STACK = 0x168; // GOAL 'stack +constexpr int FIX_SYM_SCRATCH = 0x170; // GOAL 'scratch // GOAL random stuff -constexpr int FIX_SYM_SCRATCH_TOP = 0x178; // GOAL *scratch-top* -constexpr int FIX_SYM_ZERO_FUNC = 0x180; // GOAL zero-func (returns 0x0 in $v0 register) -constexpr int FIX_SYM_ASIZE_OF_BASIC_FUNC = 0x188; // GOAL asize-of-basic function -constexpr int FIX_SYM_COPY_BASIC_FUNC = 0x190; // GOAL copy-basic function -constexpr int FIX_SYM_LEVEL = 0x198; // ?? -constexpr int FIX_SYM_ART_GROUP = 0x1a0; // ?? -constexpr int FIX_SYM_TX_PAGE_DIR = 0x1a8; // ?? -constexpr int FIX_SYM_TX_PAGE = 0x1b0; // ?? -constexpr int FIX_SYM_SOUND = 0x1b8; // ?? -constexpr int FIX_SYM_DGO = 0x1c0; // ?? -constexpr int FIX_SYM_TOP_LEVEL = 0x1c8; // ?? -constexpr int FIX_FIXED_SYM_END_OFFSET = 0x1d0; +constexpr int FIX_SYM_SCRATCH_TOP = 0x178; // GOAL *scratch-top* +constexpr int FIX_SYM_ZERO_FUNC = 0x180; // GOAL zero-func (returns 0x0 in $v0 register) +constexpr int FIX_SYM_ASIZE_OF_BASIC_FUNC = 0x188; // GOAL asize-of-basic function +constexpr int FIX_SYM_COPY_BASIC_FUNC = 0x190; // GOAL copy-basic function +constexpr int FIX_SYM_LEVEL = 0x198; // ?? +constexpr int FIX_SYM_ART_GROUP = 0x1a0; // ?? +constexpr int FIX_SYM_TX_PAGE_DIR = 0x1a8; // ?? +constexpr int FIX_SYM_TX_PAGE = 0x1b0; // ?? +constexpr int FIX_SYM_SOUND = 0x1b8; // ?? +constexpr int FIX_SYM_DGO = 0x1c0; // ?? +constexpr int FIX_SYM_TOP_LEVEL = 0x1c8; // ?? +constexpr int FIX_FIXED_SYM_END_OFFSET = 0x1d0; #endif // JAK1_SYMBOLS_H diff --git a/common/type_system/Type.cpp b/common/type_system/Type.cpp index 3dd0fe1a80..0c68f48d86 100644 --- a/common/type_system/Type.cpp +++ b/common/type_system/Type.cpp @@ -108,6 +108,11 @@ std::string Type::get_name() const { } std::string Type::get_runtime_name() const { + if (!m_allow_in_runtime) { + fmt::print("[TypeSystem] Tried to use type {} as a runtime type, which is not allowed.\n", + get_name()); + throw std::runtime_error("get_runtime_name"); + } return m_runtime_name; } @@ -137,7 +142,7 @@ bool Type::is_equal(const Type& other) const { * parents. */ bool Type::has_parent() const { - return m_parent != "object" && !m_parent.empty(); + return m_name != "object" && !m_parent.empty(); } /*! @@ -218,52 +223,53 @@ std::string Type::print_method_info() const { } ///////////// -// NoneType +// NullType ///////////// -// Special Type representing nothing. -// it's an error to try to do anything with None. +// Special Type for both "none" and "_type_" types +// it's an error to try to do anything with Null. -NoneType::NoneType() : Type("", "none", false) {} +NullType::NullType(std::string name) : Type("", std::move(name), false) {} -bool NoneType::is_reference() const { - throw std::runtime_error("is_reference called on NoneType"); +bool NullType::is_reference() const { + throw std::runtime_error("is_reference called on NullType"); } -int NoneType::get_load_size() const { - throw std::runtime_error("get_load_size called on NoneType"); +int NullType::get_load_size() const { + throw std::runtime_error("get_load_size called on NullType"); } -bool NoneType::get_load_signed() const { - throw std::runtime_error("get_load_size called on NoneType"); +bool NullType::get_load_signed() const { + throw std::runtime_error("get_load_size called on NullType"); } -int NoneType::get_size_in_memory() const { - throw std::runtime_error("get_size_in_memory called on NoneType"); +int NullType::get_size_in_memory() const { + throw std::runtime_error("get_size_in_memory called on NullType"); } -RegKind NoneType::get_preferred_reg_kind() const { - throw std::runtime_error("get_preferred_reg_kind called on NoneType"); +RegKind NullType::get_preferred_reg_kind() const { + throw std::runtime_error("get_preferred_reg_kind called on NullType"); } -int NoneType::get_offset() const { +int NullType::get_offset() const { throw std::runtime_error("get_offset called on NoneType"); } -int NoneType::get_in_memory_alignment() const { - throw std::runtime_error("get_in_memory_alignment called on NoneType"); +int NullType::get_in_memory_alignment() const { + throw std::runtime_error("get_in_memory_alignment called on NullType"); } -int NoneType::get_inline_array_alignment() const { - throw std::runtime_error("get_inline_array_alignment called on NoneType"); +int NullType::get_inline_array_alignment() const { + throw std::runtime_error("get_inline_array_alignment called on NullType"); } -std::string NoneType::print() const { - return "none"; +std::string NullType::print() const { + return m_name; } -bool NoneType::operator==(const Type& other) const { - // there should be only one none type, so this is safe. +bool NullType::operator==(const Type& other) const { + // any redefinition by the user should be invalid, so this will always return false unless + // you're calling it on the same object. return this == &other; } @@ -484,7 +490,7 @@ void StructureType::override_size_in_memory(int size) { } int StructureType::get_offset() const { - return 0; + return m_offset; } int StructureType::get_in_memory_alignment() const { diff --git a/common/type_system/Type.h b/common/type_system/Type.h index 5ce8dd69d0..a8c3474a3e 100644 --- a/common/type_system/Type.h +++ b/common/type_system/Type.h @@ -68,6 +68,8 @@ class Type { const MethodInfo& add_new_method(const MethodInfo& info); std::string print_method_info() const; + void disallow_in_runtime() { m_allow_in_runtime = false; } + virtual ~Type() = default; protected: @@ -79,6 +81,7 @@ class Type { std::string m_parent; // the parent type (is empty for none and object) std::string m_name; + bool m_allow_in_runtime = true; std::string m_runtime_name; bool m_is_boxed = false; // does this have runtime type information? }; @@ -87,9 +90,9 @@ class Type { * Used only for "none" - this is a type that the compiler can use for "this has no value". * Attempting to do anything with a NoneType is an error. */ -class NoneType : public Type { +class NullType : public Type { public: - NoneType(); + NullType(std::string name); bool is_reference() const override; int get_load_size() const override; bool get_load_signed() const override; @@ -100,7 +103,7 @@ class NoneType : public Type { int get_in_memory_alignment() const override; std::string print() const override; bool operator==(const Type& other) const override; - ~NoneType() = default; + ~NullType() = default; }; /*! @@ -217,10 +220,12 @@ class StructureType : public ReferenceType { int get_in_memory_alignment() const override; int get_inline_array_alignment() const override; bool lookup_field(const std::string& name, Field* out); + bool is_dynamic() const { return m_dynamic; } ~StructureType() = default; protected: friend class TypeSystem; + void override_offset(int offset) { m_offset = offset; } void override_size_in_memory( int size); // only to be used for setting up weird types like "structure" void add_field(const Field& f, int new_size_in_mem) { @@ -234,6 +239,7 @@ class StructureType : public ReferenceType { bool m_dynamic = false; int m_size_in_mem = 0; bool m_pack = false; + int m_offset = 0; }; class BasicType : public StructureType { diff --git a/common/type_system/TypeSpec.cpp b/common/type_system/TypeSpec.cpp index f52cd558c3..c78def6cd8 100644 --- a/common/type_system/TypeSpec.cpp +++ b/common/type_system/TypeSpec.cpp @@ -34,4 +34,13 @@ bool TypeSpec::operator==(const TypeSpec& other) const { } return true; +} + +TypeSpec TypeSpec::substitute_for_method_call(const std::string& method_type) const { + TypeSpec result; + result.m_type = (m_type == "_type_") ? method_type : m_type; + for (const auto& x : m_arguments) { + result.m_arguments.push_back(x.substitute_for_method_call(method_type)); + } + return result; } \ No newline at end of file diff --git a/common/type_system/TypeSpec.h b/common/type_system/TypeSpec.h index cb42ba788b..04a0431b08 100644 --- a/common/type_system/TypeSpec.h +++ b/common/type_system/TypeSpec.h @@ -33,12 +33,18 @@ class TypeSpec { void add_arg(const TypeSpec& ts) { m_arguments.push_back(ts); } const std::string base_type() const { return m_type; } + + bool has_single_arg() const { return m_arguments.size() == 1; } + const TypeSpec& get_single_arg() const { assert(m_arguments.size() == 1); return m_arguments.front(); } + TypeSpec substitute_for_method_call(const std::string& method_type) const; + private: + friend class TypeSystem; std::string m_type; std::vector m_arguments; }; diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 7922eb90d1..3b61135bf5 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -6,8 +6,9 @@ #include TypeSystem::TypeSystem() { - // the "none" type is included by default. - add_type("none", std::make_unique()); + // the "none" and "_type_" types are included by default. + add_type("none", std::make_unique("none")); + add_type("_type_", std::make_unique("_type_")); } /*! @@ -40,7 +41,7 @@ Type* TypeSystem::add_type(const std::string& name, std::unique_ptr type) // newly defined! // none/object get to skip these checks because they are roots. - if (name != "object" && name != "none") { + if (name != "object" && name != "none" && name != "_type_") { if (m_forward_declared_types.find(type->get_parent()) != m_forward_declared_types.end()) { fmt::print("[TypeSystem] Type {} has incompletely defined parent {}\n", type->get_name(), type->get_parent()); @@ -84,11 +85,24 @@ std::string TypeSystem::get_runtime_type(const TypeSpec& ts) { DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) { DerefInfo info; + if (!ts.has_single_arg()) { + // not enough info. + info.can_deref = false; + return info; + } + // default to GPR info.reg = RegKind::GPR_64; info.mem_deref = true; if (ts.base_type() == "inline-array") { + auto result_type = lookup_type(ts.get_single_arg()); + auto result_structure_type = dynamic_cast(result_type); + if (!result_structure_type || result_structure_type->is_dynamic()) { + info.can_deref = false; + return info; + } + // it's an inline array of structures. We can "dereference". But really we don't do a memory // dereference, we just add stride*idx to the pointer. info.can_deref = true; // deref operators should work... @@ -96,7 +110,6 @@ DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) { info.result_type = ts.get_single_arg(); // what we're an inline-array of info.sign_extend = false; // not applicable anyway - auto result_type = lookup_type(info.result_type); if (result_type->is_reference()) { info.stride = align(result_type->get_size_in_memory(), result_type->get_inline_array_alignment()); @@ -112,11 +125,13 @@ DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) { // in memory, an array of pointers info.stride = POINTER_SIZE; info.sign_extend = false; + info.load_size = POINTER_SIZE; } else { // an array of values, which should be loaded in the correct way to the correct register info.stride = result_type->get_size_in_memory(); info.sign_extend = result_type->get_load_signed(); info.reg = result_type->get_preferred_reg_kind(); + info.load_size = result_type->get_load_size(); assert(result_type->get_size_in_memory() == result_type->get_load_size()); } } else { @@ -188,7 +203,7 @@ TypeSpec TypeSystem::make_inline_array_typespec(const TypeSpec& type) { * possible, don't store a Type* and store a TypeSpec instead. The TypeSpec can then be used with * lookup_type to find the most up-to-date type information. */ -Type* TypeSystem::lookup_type(const std::string& name) { +Type* TypeSystem::lookup_type(const std::string& name) const { auto kv = m_types.find(name); if (kv != m_types.end()) { return kv->second.get(); @@ -209,7 +224,7 @@ Type* TypeSystem::lookup_type(const std::string& name) { * possible, don't store a Type* and store a TypeSpec instead. The TypeSpec can then be used with * lookup_type to find the most up-to-date type information. */ -Type* TypeSystem::lookup_type(const TypeSpec& ts) { +Type* TypeSystem::lookup_type(const TypeSpec& ts) const { return lookup_type(ts.base_type()); } @@ -488,7 +503,7 @@ int TypeSystem::add_field_to_type(StructureType* type, * Add types which are built-in to GOAL. */ void TypeSystem::add_builtin_types() { - // some of the basic types having confusing circular dependencies, so this is done manually. + // some of the basic types have confusing circular dependencies, so this is done manually. // there are no inlined things so its ok to do some things out of order because the actual size // doesn't really matter. @@ -506,48 +521,53 @@ void TypeSystem::add_builtin_types() { auto link_block_type = add_builtin_basic("basic", "link-block"); auto kheap_type = add_builtin_structure("structure", "kheap"); auto array_type = add_builtin_basic("basic", "array"); - auto pair_type = add_builtin_structure("object", "pair"); + auto pair_type = add_builtin_structure("object", "pair", true); auto process_tree_type = add_builtin_basic("basic", "process-tree"); auto process_type = add_builtin_basic("process-tree", "process"); auto thread_type = add_builtin_basic("basic", "thread"); auto connectable_type = add_builtin_structure("structure", "connectable"); auto stack_frame_type = add_builtin_basic("basic", "stack-frame"); auto file_stream_type = add_builtin_basic("basic", "file-stream"); - auto pointer_type = add_builtin_value_type("object", "pointer", 4); - auto number_type = add_builtin_value_type("object", "number", 8); // sign extend? - auto float_type = add_builtin_value_type("number", "float", 4, false, false, RegKind::FLOAT); - auto integer_type = add_builtin_value_type("number", "integer", 8, false, false); // sign extend? - auto binteger_type = - add_builtin_value_type("integer", "binteger", 8, true, false); // sign extend? - auto sinteger_type = add_builtin_value_type("integer", "sinteger", 8, false, true); - auto int8_type = add_builtin_value_type("sinteger", "int8", 1, false, true); - auto int16_type = add_builtin_value_type("sinteger", "int16", 2, false, true); - auto int32_type = add_builtin_value_type("sinteger", "int32", 4, false, true); - auto int64_type = add_builtin_value_type("sinteger", "int64", 8, false, true); - auto int128_type = - add_builtin_value_type("sinteger", "int128", 16, false, true, RegKind::INT_128); - auto uinteger_type = add_builtin_value_type("integer", "uinteger", 8); - auto uint8_type = add_builtin_value_type("uinteger", "uint8", 1); - auto uint16_type = add_builtin_value_type("uinteger", "uint16", 2); - auto uint32_type = add_builtin_value_type("uinteger", "uint32", 4); - auto uint64_type = add_builtin_value_type("uinteger", "uint64", 81); - auto uint128_type = - add_builtin_value_type("uinteger", "uint128", 16, false, false, RegKind::INT_128); + add_builtin_value_type("object", "pointer", 4); + auto inline_array_type = add_builtin_value_type("object", "inline-array", 4); + inline_array_type->set_runtime_type("pointer"); + + add_builtin_value_type("object", "number", 8); // sign extend? + add_builtin_value_type("number", "float", 4, false, false, RegKind::FLOAT); + add_builtin_value_type("number", "integer", 8, false, false); // sign extend? + add_builtin_value_type("integer", "binteger", 8, true, false); // sign extend? + add_builtin_value_type("integer", "sinteger", 8, false, true); + add_builtin_value_type("sinteger", "int8", 1, false, true); + add_builtin_value_type("sinteger", "int16", 2, false, true); + add_builtin_value_type("sinteger", "int32", 4, false, true); + add_builtin_value_type("sinteger", "int64", 8, false, true); + add_builtin_value_type("sinteger", "int128", 16, false, true, RegKind::INT_128); + add_builtin_value_type("integer", "uinteger", 8); + add_builtin_value_type("uinteger", "uint8", 1); + add_builtin_value_type("uinteger", "uint16", 2); + add_builtin_value_type("uinteger", "uint32", 4); + add_builtin_value_type("uinteger", "uint64", 81); + add_builtin_value_type("uinteger", "uint128", 16, false, false, RegKind::INT_128); + + auto int_type = add_builtin_value_type("integer", "int", 8, false, true); + int_type->disallow_in_runtime(); + auto uint_type = add_builtin_value_type("uinteger", "uint", 8, false, false); + uint_type->disallow_in_runtime(); // Methods and Fields // OBJECT - add_method(obj_type, "new", make_function_typespec({"symbol", "type", "int32"}, "object")); - add_method(obj_type, "delete", make_function_typespec({"object"}, "none")); - add_method(obj_type, "print", make_function_typespec({"object"}, "object")); - add_method(obj_type, "inspect", make_function_typespec({"object"}, "object")); + add_method(obj_type, "new", make_function_typespec({"symbol", "type", "int32"}, "_type_")); + add_method(obj_type, "delete", make_function_typespec({"_type_"}, "none")); + add_method(obj_type, "print", make_function_typespec({"_type_"}, "_type_")); + add_method(obj_type, "inspect", make_function_typespec({"_type_"}, "_type_")); add_method(obj_type, "length", - make_function_typespec({"object"}, "int32")); // todo - this integer type? - add_method(obj_type, "asize-of", make_function_typespec({"object"}, "int32")); - add_method(obj_type, "copy", make_function_typespec({"object", "symbol"}, "object")); - add_method(obj_type, "relocate", make_function_typespec({"object", "int32"}, "object")); + make_function_typespec({"_type_"}, "int32")); // todo - this integer type? + add_method(obj_type, "asize-of", make_function_typespec({"_type_"}, "int32")); + add_method(obj_type, "copy", make_function_typespec({"_type_", "symbol"}, "_type_")); + add_method(obj_type, "relocate", make_function_typespec({"_type_", "int32"}, "_type_")); add_method(obj_type, "mem-usage", - make_function_typespec({"object"}, "int32")); // todo - this is a guess. + make_function_typespec({"_type_"}, "int32")); // todo - this is a guess. // STRUCTURE // structure new doesn't support dynamic sizing, which is kinda weird - it grabs the size from @@ -591,6 +611,38 @@ void TypeSystem::add_builtin_types() { // VU FUNCTION // don't inherit + add_field_to_type(vu_function_type, "length", make_typespec("int32")); // todo integer type + add_field_to_type(vu_function_type, "origin", make_typespec("pointer")); // todo sign extend? + add_field_to_type(vu_function_type, "qlength", make_typespec("int32")); // todo integer type + + // link block + builtin_structure_inherit(link_block_type); + add_field_to_type(link_block_type, "allocated-length", + make_typespec("int32")); // todo integer type + add_field_to_type(link_block_type, "version", make_typespec("int32")); // todo integer type + // there's probably some dynamically sized stuff after this... + + // kheap + add_field_to_type(kheap_type, "base", make_typespec("pointer")); + add_field_to_type(kheap_type, "top", make_typespec("pointer")); + add_field_to_type(kheap_type, "current", make_typespec("pointer")); + add_field_to_type(kheap_type, "top-base", make_typespec("pointer")); + + // todo + (void)array_type; + + // pair + pair_type->override_offset(2); + add_field_to_type(pair_type, "car", make_typespec("object")); + add_field_to_type(pair_type, "cdr", make_typespec("object")); + + // todo, with kernel + (void)process_tree_type; + (void)process_type; + (void)thread_type; + (void)connectable_type; + (void)stack_frame_type; + (void)file_stream_type; } /*! @@ -625,23 +677,6 @@ int TypeSystem::get_next_method_id(Type* type) { } } -/*! - * For debugging, todo remove. - */ -int TypeSystem::manual_add_field_to_type(StructureType* type, - const std::string& field_name, - const TypeSpec& field_type, - int offset, - int size, - int alignment) { - Field field(field_name, field_type); - field.set_alignment(alignment); - field.set_offset(offset); - int new_size = type->get_size_in_memory() + size; - type->add_field(field, new_size); - return offset; -} - /*! * Lookup a field of a type by name */ @@ -725,8 +760,9 @@ int TypeSystem::get_size_in_type(const Field& field) { * things in the wrong order. */ StructureType* TypeSystem::add_builtin_structure(const std::string& parent, - const std::string& type_name) { - add_type(type_name, std::make_unique(parent, type_name)); + const std::string& type_name, + bool boxed) { + add_type(type_name, std::make_unique(parent, type_name, boxed)); return get_type_of_type(type_name); } @@ -759,4 +795,172 @@ ValueType* TypeSystem::add_builtin_value_type(const std::string& parent, */ void TypeSystem::builtin_structure_inherit(StructureType* st) { st->inherit(get_type_of_type(st->get_parent())); +} + +/*! + * Main compile-time type check! + * @param expected - the expected type + * @param actual - the actual type (can be more specific) + * @param error_source_name - optional, can provide a name for where the error comes from + * @param print_on_error - print a message explaining the type error, if there is one + * @param throw_on_error - throw a std::runtime_error on failure if set. + * @return if the type check passes + */ +bool TypeSystem::typecheck(const TypeSpec& expected, + const TypeSpec& actual, + const std::string& error_source_name, + bool print_on_error, + bool throw_on_error) const { + bool success = true; + // first, typecheck the base types: + if (!typecheck_base_types(expected.base_type(), actual.base_type())) { + success = false; + } + + // next argument checks: + if (expected.m_arguments.size() == actual.m_arguments.size()) { + for (size_t i = 0; i < expected.m_arguments.size(); i++) { + // don't print/throw because the error would be confusing. Better to fail only the + // outer most check and print a single error message. + if (!typecheck(expected.m_arguments[i], actual.m_arguments[i], "", false, false)) { + success = false; + break; + } + } + } else { + // different sizes of arguments. + if (expected.m_arguments.empty()) { + // we expect zero arguments, but got some. The actual type is more specific, so this is fine. + } else { + // different sizes, and we expected arguments. No good! + success = false; + } + } + + if (!success) { + if (print_on_error) { + if (error_source_name.empty()) { + fmt::print("[TypeSystem] Got type \"{}\" when expecting \"{}\"\n", actual.print(), + expected.print()); + } else { + fmt::print("[TypeSystem] For {}, got type \"{}\" when expecting \"{}\"\n", + error_source_name, actual.print(), expected.print()); + } + } + + if (throw_on_error) { + throw std::runtime_error("typecheck failed"); + } + } + + return success; +} + +/*! + * Is actual of type expected? For base types. + */ +bool TypeSystem::typecheck_base_types(const std::string& expected, + const std::string& actual) const { + // just to make sure it exists. (note - could there be a case when it just has to be forward + // declared, but not defined?) + lookup_type(expected); + + if (expected == actual) { + lookup_type(actual); // make sure it exists + return true; + } + + std::string actual_name = actual; + auto actual_type = lookup_type(actual_name); + while (actual_type->has_parent()) { + actual_name = actual_type->get_parent(); + actual_type = lookup_type(actual_name); + + if (expected == actual_name) { + return true; + } + } + + return false; +} + +/*! + * Get a path from type to object. + */ +std::vector TypeSystem::get_path_up_tree(const std::string& type) { + auto parent = lookup_type(type)->get_parent(); + std::vector path = {type}; + path.push_back(parent); + auto parent_type = lookup_type(parent); + + while (parent_type->has_parent()) { + parent = parent_type->get_parent(); + parent_type = lookup_type(parent); + path.push_back(parent); + } + + return path; +} + +/*! + * Lowest common ancestor of two base types. + */ +std::string TypeSystem::lca_base(const std::string& a, const std::string& b) { + if (a == b) { + return a; + } + + auto a_up = get_path_up_tree(a); + auto b_up = get_path_up_tree(b); + + int ai = a_up.size() - 1; + int bi = b_up.size() - 1; + + std::string* result = nullptr; + while (ai >= 0 && bi >= 0) { + if (a_up.at(ai) == b_up.at(bi)) { + result = &a_up.at(ai); + } else { + break; + } + ai--; + bi--; + } + + assert(result); + return *result; +} + +/*! + * Lowest common ancestor of two typespecs. Will recursively apply to arguments, if compatible. + * Otherwise arguments are stripped off. + * In a situation like lca("(a b)", "(c d)"), the result will be + * (lca(a, b) lca(b, d)). + */ +TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) { + auto result = make_typespec(lca_base(a.base_type(), b.base_type())); + if (!a.m_arguments.empty() && !b.m_arguments.empty() && + a.m_arguments.size() == b.m_arguments.size()) { + // recursively add arguments + for (size_t i = 0; i < a.m_arguments.size(); i++) { + result.add_arg(lowest_common_ancestor(a.m_arguments.at(i), b.m_arguments.at(i))); + } + } + return result; +} + +/*! + * Lowest common ancestor of multiple (or at least one) type. + */ +TypeSpec TypeSystem::lowest_common_ancestor(const std::vector& types) { + assert(!types.empty()); + if (types.size() == 1) { + return types.front(); + } + + auto result = lowest_common_ancestor(types.at(0), types.at(1)); + for (size_t i = 2; i < types.size(); i++) { + result = lowest_common_ancestor(result, types.at(i)); + } + return result; } \ No newline at end of file diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index 71a865588b..722ca515ae 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -23,6 +23,7 @@ struct DerefInfo { bool sign_extend = false; RegKind reg = RegKind::INVALID; int stride = -1; + int load_size = -1; TypeSpec result_type; }; @@ -45,8 +46,8 @@ class TypeSystem { TypeSpec make_inline_array_typespec(const std::string& type); TypeSpec make_inline_array_typespec(const TypeSpec& type); - Type* lookup_type(const TypeSpec& ts); - Type* lookup_type(const std::string& name); + Type* lookup_type(const TypeSpec& ts) const; + Type* lookup_type(const std::string& name) const; MethodInfo add_method(Type* type, const std::string& method_name, const TypeSpec& ts); MethodInfo add_new_method(Type* type, const TypeSpec& ts); @@ -67,6 +68,12 @@ class TypeSystem { void add_builtin_types(); std::string print_all_type_information() const; + bool typecheck(const TypeSpec& expected, + const TypeSpec& actual, + const std::string& error_source_name = "", + bool print_on_error = true, + bool throw_on_error = true) const; + std::vector get_path_up_tree(const std::string& type); /*! * Get a type by name and cast to a child class of Type*. Must succeed. @@ -81,19 +88,19 @@ class TypeSystem { return result; } + TypeSpec lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b); + TypeSpec lowest_common_ancestor(const std::vector& types); + private: + std::string lca_base(const std::string& a, const std::string& b); + bool typecheck_base_types(const std::string& expected, const std::string& actual) const; int get_size_in_type(const Field& field); int get_alignment_in_type(const Field& field); Field lookup_field(const std::string& type_name, const std::string& field_name); int get_next_method_id(Type* type); - int manual_add_field_to_type(StructureType* type, - const std::string& field_name, - const TypeSpec& field_type, - int offset, - int size, - int alignment); - - StructureType* add_builtin_structure(const std::string& parent, const std::string& type_name); + StructureType* add_builtin_structure(const std::string& parent, + const std::string& type_name, + bool boxed = false); BasicType* add_builtin_basic(const std::string& parent, const std::string& type_name); ValueType* add_builtin_value_type(const std::string& parent, const std::string& type_name, @@ -104,7 +111,6 @@ class TypeSystem { void builtin_structure_inherit(StructureType* st); std::unordered_map> m_types; - std::unordered_map m_global_types; std::unordered_set m_forward_declared_types; std::vector> m_old_types; diff --git a/common/type_system/type_util.h b/common/type_system/type_util.h index 4e87474578..fcefbdd2ad 100644 --- a/common/type_system/type_util.h +++ b/common/type_system/type_util.h @@ -1,9 +1,9 @@ #ifndef JAK_TYPE_UTIL_H #define JAK_TYPE_UTIL_H -template +template T align(T current, T alignment, T offset = 0) { - while((current % alignment) != 0) { + while ((current % alignment) != 0) { current++; } return current + offset; diff --git a/common/versions.h b/common/versions.h index 74b1ec0169..5ecb742ac8 100644 --- a/common/versions.h +++ b/common/versions.h @@ -12,7 +12,7 @@ namespace versions { // language version constexpr s32 GOAL_VERSION_MAJOR = 2; constexpr s32 GOAL_VERSION_MINOR = 6; -} +} // namespace versions // GOAL kernel version constexpr int KERNEL_VERSION_MAJOR = 2; diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index d8bfeba202..c972496229 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -9,18 +9,18 @@ struct FunctionName { enum class FunctionKind { - UNIDENTIFIED, // hasn't been identified yet. - GLOBAL, // global named function + UNIDENTIFIED, // hasn't been identified yet. + GLOBAL, // global named function METHOD, TOP_LEVEL_INIT, } kind = FunctionKind::UNIDENTIFIED; - std::string function_name; // only applicable for GLOBAL - std::string type_name; // only applicable for METHOD - int method_id = -1; // only applicable for METHOD + std::string function_name; // only applicable for GLOBAL + std::string type_name; // only applicable for METHOD + int method_id = -1; // only applicable for METHOD std::string to_string() const { - switch(kind) { + switch (kind) { case FunctionKind::GLOBAL: return function_name; case FunctionKind::METHOD: @@ -34,13 +34,9 @@ struct FunctionName { } } - bool empty() const { - return kind == FunctionKind::UNIDENTIFIED; - } + bool empty() const { return kind == FunctionKind::UNIDENTIFIED; } - void set_as_top_level() { - kind = FunctionKind::TOP_LEVEL_INIT; - } + void set_as_top_level() { kind = FunctionKind::TOP_LEVEL_INIT; } void set_as_global(std::string name) { kind = FunctionKind::GLOBAL; diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index d02fc3bf31..be9eaa1de7 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -90,7 +90,7 @@ Function& LinkedObjectFile::get_function_at_label(int label_id) { } assert(false); - return functions_by_seg.front().front(); // to avoid error + return functions_by_seg.front().front(); // to avoid error } /*! @@ -520,7 +520,7 @@ std::string LinkedObjectFile::print_disassembly() { result += "; .function " + func.guessed_name.to_string() + "\n"; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; result += func.prologue.to_string(2) + "\n"; - if(!func.warnings.empty()) { + if (!func.warnings.empty()) { result += "Warnings: " + func.warnings + "\n"; } @@ -581,11 +581,11 @@ std::string LinkedObjectFile::print_disassembly() { // } // hack - if(func.cfg && !func.cfg->is_fully_resolved()) { + if (func.cfg && !func.cfg->is_fully_resolved()) { result += func.cfg->to_dot(); result += "\n"; } - if(func.cfg) { + if (func.cfg) { result += func.cfg->to_form_string() + "\n"; // To debug block stuff. @@ -614,7 +614,6 @@ std::string LinkedObjectFile::print_disassembly() { */ } - result += "\n\n\n"; } @@ -636,7 +635,6 @@ std::string LinkedObjectFile::print_disassembly() { if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "string") { result += "; " + get_goal_string(seg, i) + "\n"; - } } } diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 4b87dbe345..f897b86475 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -15,7 +15,6 @@ #include "decompiler/Function/Function.h" #include "decompiler/util/LispPrint.h" - /*! * A label to a location in this object file. * Doesn't have to be word aligned. @@ -23,14 +22,14 @@ struct Label { std::string name; int target_segment; - int offset; // in bytes + int offset; // in bytes }; /*! * An object file's data with linking information included. */ class LinkedObjectFile { -public: + public: LinkedObjectFile() = default; void set_segment_count(int n_segs); void push_back_word_to_segment(uint32_t word, int segment); @@ -38,8 +37,15 @@ public: int get_label_at(int seg, int offset) const; bool label_points_to_code(int label_id) const; bool pointer_link_word(int source_segment, int source_offset, int dest_segment, int dest_offset); - void pointer_link_split_word(int source_segment, int source_hi_offset, int source_lo_offset, int dest_segment, int dest_offset); - void symbol_link_word(int source_segment, int source_offset, const char* name, LinkedWord::Kind kind); + void pointer_link_split_word(int source_segment, + int source_hi_offset, + int source_lo_offset, + int dest_segment, + int dest_offset); + void symbol_link_word(int source_segment, + int source_offset, + const char* name, + LinkedWord::Kind kind); void symbol_link_offset(int source_segment, int source_offset, const char* name); Function& get_function_at_label(int label_id); std::string get_label_name(int label_id) const; @@ -83,7 +89,6 @@ public: uint32_t n_fp_reg_use = 0; uint32_t n_fp_reg_use_resolved = 0; - void add(const Stats& other) { total_code_bytes += other.total_code_bytes; total_v2_code_bytes += other.total_v2_code_bytes; @@ -116,9 +121,9 @@ public: std::vector> functions_by_seg; std::vector