diff --git a/.github/workflows/linux-workflow.yaml b/.github/workflows/linux-workflow.yaml index 70b97b91dc..d243ff83e1 100644 --- a/.github/workflows/linux-workflow.yaml +++ b/.github/workflows/linux-workflow.yaml @@ -18,9 +18,10 @@ jobs: matrix: os: [ubuntu-20.04] config: [Debug] # TODO - Eventually we need to make a Release Config + compiler: [clang, gcc] experimental: [false] - name: ${{ matrix.config }} + name: ${{ matrix.config }}-${{ matrix.compiler }} runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} # Set some sort of timeout in the event of run-away builds. We are limited on concurrent jobs so, get rid of them. @@ -44,7 +45,6 @@ jobs: key: submodules-${{ hashFiles('./.gitmodules') }} path: | ./third-party/googletest - ./third-party/spdlog ./third-party/zydis ./.git/modules/ @@ -53,6 +53,7 @@ jobs: run: git submodule update --init --recursive --jobs 2 - name: Prepare Artifact Git Info + id: git-vars shell: bash run: | echo "##[set-output name=branch;]${GITHUB_REF#refs/heads/}" @@ -66,10 +67,9 @@ jobs: echo "##[set-output name=short-sha;]$(git rev-parse --short "$GITHUB_SHA")" fi echo "##[set-output name=artifact-metadata;]${ARTIFACT_NAME}" - id: git-vars - name: Get Package Dependencies - run: sudo apt install build-essential cmake ccache gcc g++ lcov make nasm + run: sudo apt install build-essential cmake ccache clang gcc g++ lcov make nasm # # -- SETUP CCACHE - https://cristianadam.eu/20200113/speeding-up-c-plus-plus-github-actions-using-ccache/ # - name: Prepare ccache timestamp @@ -87,9 +87,18 @@ jobs: # restore-keys: | # ${{ matrix.config }}-ccache- - - name: CMake Generation + - name: CMake Generation # run: cmake -D CMAKE_C_COMPILER_LAUNCHER=ccache -D CMAKE_CXX_COMPILER_LAUNCHER=ccache -B build -DCODE_COVERAGE=ON - run: cmake -B build -DCODE_COVERAGE=ON + run: | + if [ "${{ matrix.compiler }}" == 'clang' ]; then + export CC=clang + export CXX=clang++ + cmake -B build -DCODE_COVERAGE=ON -DASAN_BUILD=ON + else + export CC=gcc + export CXX=g++ + cmake -B build -DCODE_COVERAGE=ON + fi - name: Build Project working-directory: ./build @@ -106,9 +115,15 @@ jobs: run: make -j4 - name: Run Tests - run: ./test_code_coverage.sh + run: | + if [ "${{ matrix.compiler }}" == 'clang' ]; then + ./test.sh + else + ./test_code_coverage.sh + fi - name: Coveralls + if: ${{ matrix.compiler }} != 'clang' uses: coverallsapp/github-action@master continue-on-error: true # Sometimes Coveralls has intermittent problems, and codecoverage isn't critical to our success with: diff --git a/.github/workflows/windows-workflow.yaml b/.github/workflows/windows-workflow.yaml index 1987733dc2..df9d0f0c34 100644 --- a/.github/workflows/windows-workflow.yaml +++ b/.github/workflows/windows-workflow.yaml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: [windows-2019] - config: [Debug] # TODO - Eventually we need to make a Release Config + config: [Release] experimental: [false] name: ${{ matrix.config }} @@ -44,7 +44,6 @@ jobs: key: submodules-${{ hashFiles('./.gitmodules') }} path: | ./third-party/googletest - ./third-party/spdlog ./third-party/zydis ./.git/modules/ @@ -76,7 +75,7 @@ jobs: run: | call "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat" cmake --version - cmake -B build -DCMAKE_BUILD_TYPE=Debug -G "NMake Makefiles" . + cmake -B build -DCMAKE_BUILD_TYPE=Release -G "NMake Makefiles" . - name: Build Project working-directory: ./build diff --git a/.gitignore b/.gitignore index c3c13dd1bb..00620b5359 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ cmake-build-debug/* build/* decompiler_out/* logs/* +log/* \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 4b8c68fa22..bed025c79f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "third-party/googletest"] path = third-party/googletest url = https://github.com/google/googletest.git -[submodule "third-party/spdlog"] - path = third-party/spdlog - url = https://github.com/gabime/spdlog.git [submodule "third-party/zydis"] path = third-party/zydis url = https://github.com/zyantific/zydis.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ce0804460..7fa5af6ec1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ IF (WIN32) ENDIF () IF (ASAN_BUILD) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -O1") message(STATUS "Doing ASAN build") ENDIF () @@ -64,28 +64,8 @@ include_directories(./) include_directories(SYSTEM third-party/inja) -# build spdlog as a shared library to improve compile times -# adding this as a SYSTEM include suppresses all the terrible warnings in spdlog -include_directories(SYSTEM third-party/spdlog/include) -# this makes spdlog generate a shared library that we can link against -set(SPDLOG_BUILD_SHARED ON CACHE BOOL "a" FORCE) -# this makes the spdlog includes not use the header only version, making compiling faster -add_definitions(-DSPDLOG_COMPILED_LIB) - -# build goos -add_subdirectory(common/goos) - -# build type_system library for compiler/decompiler -add_subdirectory(common/type_system) - -# build common_util library -add_subdirectory(common/util) - -# build cross platform socket library -add_subdirectory(common/cross_sockets) - -# build cross platform debug library -add_subdirectory(common/cross_os_debug) +# build common library +add_subdirectory(common) # build decompiler add_subdirectory(decompiler) @@ -108,9 +88,6 @@ add_subdirectory(third-party/minilzo) # build format library add_subdirectory(third-party/fmt) -# build spdlog library -add_subdirectory(third-party/spdlog) - # build zydis third party library for disassembling x86 option(ZYDIS_BUILD_TOOLS "" OFF) option(ZYDIS_BUILD_EXAMPLES "" OFF) diff --git a/CMakeSettings.json b/CMakeSettings.json index fb6ae5c28e..f00f49bebe 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -1,40 +1,42 @@ { - "configurations": [ - { - "name": "Debug", - "generator": "Ninja", - "configurationType": "Debug", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${projectDir}\\out\\build\\${name}", - "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "", - "variables": [ - { - "name": "INSTALL_GTEST", - "value": "True", - "type": "BOOL" - } - ] - }, - { - "name": "x64-Release", - "generator": "Ninja", - "configurationType": "Release", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${projectDir}\\out\\build\\${name}", - "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "", - "variables": [ - { - "name": "INSTALL_GTEST", - "value": "True", - "type": "BOOL" - } - ] - } - ] + "configurations": [ + { + "name": "Debug", + "generator": "Ninja", + "configurationType": "Debug", + "inheritEnvironments": [ "msvc_x64_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "addressSanitizerEnabled": true, + "ctestCommandArgs": "", + "variables": [ + { + "name": "INSTALL_GTEST", + "value": "True", + "type": "BOOL" + } + ] + }, + { + "name": "Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "addressSanitizerEnabled": true, + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [ + { + "name": "INSTALL_GTEST", + "value": "True", + "type": "BOOL" + } + ] + } + ] } \ No newline at end of file diff --git a/README.md b/README.md index 9d64bfcdcf..eabd8b2437 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,20 @@ ![Linux](https://github.com/water111/jak-project/workflows/Linux/badge.svg) ![Windows](https://github.com/water111/jak-project/workflows/Windows/badge.svg) [![Coverage Status](https://coveralls.io/repos/github/water111/jak-project/badge.svg?branch=master)](https://coveralls.io/github/water111/jak-project?branch=master) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/7c3cdc07523f43aca3433484ebc62ff9)](https://www.codacy.com/gh/water111/jak-project/dashboard?utm_source=github.com&utm_medium=referral&utm_content=xTVaser/jak-project&utm_campaign=Badge_Grade) ## Table of Contents -- [Project Description](#project-description) - [Table of Contents](#table-of-contents) +- [Project Description](#project-description) - [Getting Started - Linux (Ubuntu)](#getting-started---linux-ubuntu) - [Getting Started - Windows](#getting-started---windows) - [Project Layout](#project-layout) +- [Directory Layout](#directory-layout) +- [More Documentation](#more-documentation) +- [ASan Build](#asan-build) ## Project Description @@ -161,7 +165,6 @@ The final component is the "runtime", located in `game`. This is the part of the - `mman`: Windows library used to emulate `mmap` on Linux - `run-clang-format`: Utility to check and enforce code formatting - `run-clang-tidy` - - `spdlog`: Logging library - `zydis`: x86-64 disassembler used in the OpenGOAL debugger - `json`: A JSON library - `linenoise`: Used for the REPL input. Support history and useful editing shortcuts. diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt new file mode 100644 index 0000000000..02a6f48f63 --- /dev/null +++ b/common/CMakeLists.txt @@ -0,0 +1,28 @@ +add_library(common + SHARED + cross_os_debug/xdbg.cpp + cross_sockets/xsocket.cpp + goos/Interpreter.cpp + goos/Object.cpp + goos/ParseHelpers.cpp + goos/PrettyPrinter.cpp + goos/Reader.cpp + goos/TextDB.cpp + log/log.cpp + type_system/deftype.cpp + type_system/Type.cpp + type_system/TypeFieldLookup.cpp + type_system/TypeSpec.cpp + type_system/TypeSystem.cpp + util/DgoWriter.cpp + util/FileUtil.cpp + util/Timer.cpp + ) + +target_link_libraries(common fmt) + +IF(WIN32) + target_link_libraries(common wsock32 ws2_32) +ELSE() + target_link_libraries(common stdc++fs) +ENDIF() \ No newline at end of file diff --git a/common/cross_os_debug/CMakeLists.txt b/common/cross_os_debug/CMakeLists.txt deleted file mode 100644 index b29f36f8d5..0000000000 --- a/common/cross_os_debug/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_library(cross_os_debug SHARED - xdbg.cpp) - -target_link_libraries(cross_os_debug fmt) \ No newline at end of file diff --git a/common/cross_os_debug/xdbg.cpp b/common/cross_os_debug/xdbg.cpp index 50fc908af0..0124f2c0ed 100644 --- a/common/cross_os_debug/xdbg.cpp +++ b/common/cross_os_debug/xdbg.cpp @@ -92,32 +92,35 @@ bool attach_and_break(const ThreadID& tid) { */ bool check_stopped(const ThreadID& tid, SignalInfo* out) { int status; - if (waitpid(tid.id, &status, WNOHANG) < 0) { + int rv = waitpid(tid.id, &status, WNOHANG); + if (rv < 0) { printf("[Debugger] Failed to waitpid: %s.\n", strerror(errno)); // assert(false); // todo, temp because I think we should never hit this. return false; } - if (WIFSTOPPED(status)) { - auto sig = WSTOPSIG(status); - if (out) { - switch (sig) { - case SIGSEGV: - out->kind = SignalInfo::SEGFAULT; - break; - case SIGFPE: - out->kind = SignalInfo::MATH_EXCEPTION; - break; - case SIGTRAP: - out->kind = SignalInfo::BREAK; - break; + if (rv > 0) { + // status has actually changed + if (WIFSTOPPED(status)) { + auto sig = WSTOPSIG(status); + if (out) { + switch (sig) { + case SIGSEGV: + out->kind = SignalInfo::SEGFAULT; + break; + case SIGFPE: + out->kind = SignalInfo::MATH_EXCEPTION; + break; + case SIGTRAP: + out->kind = SignalInfo::BREAK; + break; - default: - out->kind = SignalInfo::UNKNOWN; + default: + out->kind = SignalInfo::UNKNOWN; + } } + return true; } - - return true; } return false; diff --git a/common/cross_sockets/CMakeLists.txt b/common/cross_sockets/CMakeLists.txt deleted file mode 100644 index f97ddd0f83..0000000000 --- a/common/cross_sockets/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_library(cross_sockets - SHARED - "xsocket.h" - "xsocket.cpp") - -IF (WIN32) - # set stuff for windows - target_link_libraries(cross_sockets wsock32 ws2_32) -ELSE() - # set stuff for other systems - target_link_libraries(cross_sockets) -ENDIF() diff --git a/common/goos/CMakeLists.txt b/common/goos/CMakeLists.txt deleted file mode 100644 index 2000a6f58e..0000000000 --- a/common/goos/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ - -IF (WIN32) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2") -ELSE() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -ENDIF() - -add_library(goos SHARED Object.cpp ParseHelpers.cpp TextDB.cpp Reader.cpp Interpreter.cpp PrettyPrinter.cpp ParseHelpers.cpp ParseHelpers.h) -target_link_libraries(goos common_util fmt) \ No newline at end of file diff --git a/common/log/log.cpp b/common/log/log.cpp new file mode 100644 index 0000000000..0313852700 --- /dev/null +++ b/common/log/log.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include "third-party/fmt/color.h" +#include "log.h" + +namespace lg { +struct Logger { + Logger() = default; + + bool initialized = false; + FILE* fp = nullptr; + level stdout_log_level = level::trace; + level file_log_level = level::trace; + level flush_level = level::trace; + std::mutex mutex; + + ~Logger() { + // will run when program exits. + if (fp) { + fclose(fp); + } + } +}; + +Logger gLogger; + +namespace internal { +const char* log_level_names[] = {"trace", "debug", "info", "warn", "error", "die"}; +const fmt::color log_colors[] = {fmt::color::gray, fmt::color::turquoise, fmt::color::light_green, + fmt::color::yellow, fmt::color::red, fmt::color::hot_pink}; + +void log_message(level log_level, LogTime& now, const char* message) { +#ifdef __linux__ + char date_time_buffer[128]; + time_t now_seconds = now.tv.tv_sec; + auto now_milliseconds = now.tv.tv_usec / 1000; + strftime(date_time_buffer, 128, "%Y-%m-%d %H:%M:%S", localtime(&now_seconds)); + std::string date_string = fmt::format("[{}:{:03d}]", date_time_buffer, now_milliseconds); +#else + char date_time_buffer[128]; + strftime(date_time_buffer, 128, "%Y-%m-%d %H:%M:%S", localtime(&now.tim)); + std::string date_string = fmt::format("[{}]", date_time_buffer); +#endif + + { + std::lock_guard lock(gLogger.mutex); + if (gLogger.fp && log_level >= gLogger.file_log_level) { + // log to file + std::string file_string = + fmt::format("{} [{}] {}\n", date_string, log_level_names[int(log_level)], message); + fwrite(file_string.c_str(), file_string.length(), 1, gLogger.fp); + if (log_level >= gLogger.flush_level) { + fflush(gLogger.fp); + } + } + + if (log_level >= gLogger.stdout_log_level) { + fmt::print("{} [", date_string); + fmt::print(fg(log_colors[int(log_level)]), "{}", log_level_names[int(log_level)]); + fmt::print("] {}\n", message); + if (log_level >= gLogger.flush_level) { + fflush(stdout); + } + } + } + + if (log_level == level::die) { + exit(-1); + } +} +} // namespace internal + +void set_file(const std::string& filename) { + assert(!gLogger.fp); + gLogger.fp = fopen(filename.c_str(), "w"); + assert(gLogger.fp); +} + +void set_flush_level(level log_level) { + gLogger.flush_level = log_level; +} + +void set_file_level(level log_level) { + gLogger.file_log_level = log_level; +} + +void set_stdout_level(level log_level) { + gLogger.stdout_log_level = log_level; +} + +void set_max_debug_levels() { + gLogger.flush_level = level::trace; + gLogger.stdout_log_level = level::trace; + gLogger.file_log_level = level::trace; +} + +void initialize() { + assert(!gLogger.initialized); + gLogger.initialized = true; +} + +void finish() { + { + std::lock_guard lock(gLogger.mutex); + if (gLogger.fp) { + fclose(gLogger.fp); + gLogger.fp = nullptr; + } + } +} + +} // namespace lg \ No newline at end of file diff --git a/common/log/log.h b/common/log/log.h new file mode 100644 index 0000000000..5de1cca8e4 --- /dev/null +++ b/common/log/log.h @@ -0,0 +1,80 @@ +#pragma once + +#include + +#ifdef __linux__ +#include +#endif +#include +#include "third-party/fmt/core.h" + +namespace lg { + +#ifdef __linux__ +struct LogTime { + timeval tv; +}; +#else +struct LogTime { + time_t tim; +}; +#endif + +// Logging API +enum class level { trace = 0, debug = 1, info = 2, warn = 3, error = 4, die = 5 }; + +namespace internal { +// log implementation stuff, not to be called by the user +void log_message(level log_level, LogTime& now, const char* message); +} // namespace internal + +void set_file(const std::string& filename); +void set_flush_level(level log_level); +void set_file_level(level log_level); +void set_stdout_level(level log_level); +void set_max_debug_levels(); +void initialize(); +void finish(); + +template +void log(level log_level, const std::string& format, Args&&... args) { + LogTime now; +#ifdef __linux__ + gettimeofday(&now.tv, nullptr); +#else + now.tim = time(nullptr); +#endif + std::string formatted_message = fmt::format(format, std::forward(args)...); + internal::log_message(log_level, now, formatted_message.c_str()); +} + +template +void trace(const std::string& format, Args&&... args) { + log(level::trace, format, std::forward(args)...); +} + +template +void debug(const std::string& format, Args&&... args) { + log(level::debug, format, std::forward(args)...); +} + +template +void info(const std::string& format, Args&&... args) { + log(level::info, format, std::forward(args)...); +} + +template +void warn(const std::string& format, Args&&... args) { + log(level::warn, format, std::forward(args)...); +} + +template +void error(const std::string& format, Args&&... args) { + log(level::error, format, std::forward(args)...); +} + +template +void die(const std::string& format, Args&&... args) { + log(level::die, format, std::forward(args)...); +} +} // namespace lg diff --git a/common/type_system/CMakeLists.txt b/common/type_system/CMakeLists.txt deleted file mode 100644 index 2236c564ea..0000000000 --- a/common/type_system/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_library(type_system - SHARED - TypeSystem.cpp - Type.cpp - TypeSpec.cpp - deftype.cpp - TypeFieldLookup.cpp) - -target_link_libraries(type_system fmt goos) \ No newline at end of file diff --git a/common/util/CMakeLists.txt b/common/util/CMakeLists.txt deleted file mode 100644 index b2b4e92f32..0000000000 --- a/common/util/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_library(common_util - SHARED - FileUtil.cpp - DgoWriter.cpp - Timer.cpp) - -IF(UNIX) -target_link_libraries(common_util stdc++fs) -ENDIF() diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 8dd17f2878..7306b7dd07 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -1,38 +1,59 @@ -add_executable(decompiler - main.cpp - ObjectFile/ObjectFileDB.cpp - Disasm/Instruction.cpp - Disasm/InstructionDecode.cpp - Disasm/OpcodeInfo.cpp - Disasm/Register.cpp - ObjectFile/LinkedObjectFileCreation.cpp - ObjectFile/LinkedObjectFile.cpp - Function/Function.cpp - config.cpp - util/DecompilerTypeSystem.cpp - Function/BasicBlocks.cpp - Disasm/InstructionMatching.cpp - Function/CfgVtx.cpp - IR/BasicOpBuilder.cpp - IR/CfgBuilder.cpp - IR/IR.cpp - Function/TypeInspector.cpp - data/tpage.cpp +add_library( + decomp + SHARED + data/game_count.cpp data/game_text.cpp data/StrFileReader.cpp - data/game_count.cpp - Function/TypeAnalysis.cpp - IR/IR_TypeAnalysis.cpp - util/TP_Type.cpp - Function/RegUsage.cpp + data/tpage.cpp + + Disasm/Instruction.cpp + Disasm/InstructionDecode.cpp + Disasm/InstructionMatching.cpp + Disasm/InstructionParser.cpp + Disasm/OpcodeInfo.cpp + Disasm/Register.cpp + + Function/BasicBlocks.cpp + Function/CfgVtx.cpp Function/ExpressionBuilder.cpp Function/ExpressionStack.cpp - IR/IR_ExpressionStack.cpp) + Function/Function.cpp + Function/RegUsage.cpp + Function/TypeAnalysis.cpp + Function/TypeInspector.cpp + + IR/BasicOpBuilder.cpp + IR/CfgBuilder.cpp + IR/IR.cpp + IR/IR_ExpressionStack.cpp + IR/IR_TypeAnalysis.cpp + + IR2/AtomicOp.cpp + IR2/AtomicOpBuilder.cpp + IR2/Env.cpp + + ObjectFile/LinkedObjectFile.cpp + ObjectFile/LinkedObjectFileCreation.cpp + ObjectFile/ObjectFileDB.cpp + + util/DecompilerTypeSystem.cpp + util/TP_Type.cpp + + config.cpp +) + +target_link_libraries(decomp + minilzo + common + fmt + ) + +add_executable(decompiler + main.cpp + ) target_link_libraries(decompiler - goos + decomp + common minilzo - common_util - type_system - spdlog fmt) diff --git a/decompiler/Disasm/DecompilerLabel.h b/decompiler/Disasm/DecompilerLabel.h new file mode 100644 index 0000000000..a6a150ccde --- /dev/null +++ b/decompiler/Disasm/DecompilerLabel.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace decompiler { +/*! + * A label to a location in an object file. + * Doesn't have to be word aligned. + */ +struct DecompilerLabel { + std::string name; + int target_segment; + int offset; // in bytes +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/Instruction.cpp b/decompiler/Disasm/Instruction.cpp index bde9c6f6ec..6317ff20c7 100644 --- a/decompiler/Disasm/Instruction.cpp +++ b/decompiler/Disasm/Instruction.cpp @@ -8,17 +8,18 @@ #include "decompiler/ObjectFile/LinkedObjectFile.h" #include +namespace decompiler { /*! * Convert atom to a string for disassembly. */ -std::string InstructionAtom::to_string(const LinkedObjectFile& file) const { +std::string InstructionAtom::to_string(const std::vector& labels) const { switch (kind) { case REGISTER: return reg.to_string(); case IMM: return std::to_string(imm); case LABEL: - return file.get_label_name(label_id); + return labels.at(label_id).name; case VU_ACC: return "acc"; case VU_Q: @@ -115,6 +116,25 @@ bool InstructionAtom::is_link_or_label() const { return kind == IMM_SYM || kind == LABEL; } +bool InstructionAtom::operator==(const InstructionAtom& other) const { + if (kind != other.kind) { + return false; + } + switch (kind) { + case REGISTER: + return reg == other.reg; + case IMM: + return imm == other.imm; + case LABEL: + return label_id == other.label_id; + case VU_ACC: + case VU_Q: + return true; + default: + assert(false); + } +} + /*! * Convert just the name of the opcode to a string, omitting src/dst, but including * suffixes (interlock, broadcasts and destination) @@ -169,7 +189,7 @@ std::string Instruction::op_name_to_string() const { /*! * Convert entire instruction to a string. */ -std::string Instruction::to_string(const LinkedObjectFile& file) const { +std::string Instruction::to_string(const std::vector& labels) const { auto& info = gOpcodeInfo[(int)kind]; auto result = op_name_to_string(); @@ -178,33 +198,33 @@ std::string Instruction::to_string(const LinkedObjectFile& file) const { assert(n_dst == 0); assert(n_src == 3); result += " "; - result += src[0].to_string(file); + result += src[0].to_string(labels); result += ", "; - result += src[1].to_string(file); + result += src[1].to_string(labels); result += "("; - result += src[2].to_string(file); + result += src[2].to_string(labels); result += ")"; } else if (info.is_load) { assert(n_dst == 1); assert(n_src == 2); result += " "; - result += dst[0].to_string(file); + result += dst[0].to_string(labels); result += ", "; - result += src[0].to_string(file); + result += src[0].to_string(labels); result += "("; - result += src[1].to_string(file); + result += src[1].to_string(labels); result += ")"; } else { // for instructions that aren't a store or load, the dest/sources are comma separated. bool end_comma = false; for (uint8_t i = 0; i < n_dst; i++) { - result += " " + dst[i].to_string(file) + ","; + result += " " + dst[i].to_string(labels) + ","; end_comma = true; } for (uint8_t i = 0; i < n_src; i++) { - result += " " + src[i].to_string(file) + ","; + result += " " + src[i].to_string(labels) + ","; end_comma = true; } @@ -312,3 +332,25 @@ int Instruction::get_label_target() const { } return result; } + +bool Instruction::operator==(const Instruction& other) const { + if (kind != other.kind || n_src != other.n_src || n_dst != other.n_dst || + cop2_dest != other.cop2_dest || cop2_bc != other.cop2_bc || il != other.il) { + return false; + } + + for (int i = 0; i < n_dst; i++) { + if (dst[i] != other.dst[i]) { + return false; + } + } + + for (int i = 0; i < n_src; i++) { + if (src[i] != other.src[i]) { + return false; + } + } + + return true; +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/Instruction.h b/decompiler/Disasm/Instruction.h index c97ce621e5..a9b652263c 100644 --- a/decompiler/Disasm/Instruction.h +++ b/decompiler/Disasm/Instruction.h @@ -9,10 +9,12 @@ #ifndef NEXT_INSTRUCTION_H #define NEXT_INSTRUCTION_H +#include #include "OpcodeInfo.h" #include "Register.h" -class LinkedObjectFile; +namespace decompiler { +struct DecompilerLabel; constexpr int MAX_INSTRUCTION_SOURCE = 3; constexpr int MAX_INTRUCTION_DEST = 1; @@ -41,7 +43,7 @@ struct InstructionAtom { int get_label() const; std::string get_sym() const; - std::string to_string(const LinkedObjectFile& file) const; + std::string to_string(const std::vector& labels) const; bool is_link_or_label() const; bool is_reg() const { return kind == REGISTER; } @@ -51,11 +53,13 @@ struct InstructionAtom { bool is_reg(Register r) const { return kind == REGISTER && reg == r; } + bool operator==(const InstructionAtom& other) const; + bool operator!=(const InstructionAtom& other) const { return !((*this) == other); } + private: int32_t imm; int label_id; Register reg; - std::string sym; }; @@ -66,7 +70,7 @@ class Instruction { InstructionKind kind = InstructionKind::UNKNOWN; std::string op_name_to_string() const; - std::string to_string(const LinkedObjectFile& file) const; + std::string to_string(const std::vector& labels) const; bool is_valid() const; void add_src(InstructionAtom& a); @@ -89,10 +93,13 @@ class Instruction { int get_label_target() const; + bool operator==(const Instruction& other) const; + bool operator!=(const Instruction& other) const { return !((*this) == other); } + // extra fields for some COP2 instructions. uint8_t cop2_dest = 0xff; // 0xff indicates "don't print dest" uint8_t cop2_bc = 0xff; // 0xff indicates "don't print bc" uint8_t il = 0xff; // 0xff indicates "don't print il" }; - +} // namespace decompiler #endif // NEXT_INSTRUCTION_H diff --git a/decompiler/Disasm/InstructionDecode.cpp b/decompiler/Disasm/InstructionDecode.cpp index ba2c83da91..2a98258d2f 100644 --- a/decompiler/Disasm/InstructionDecode.cpp +++ b/decompiler/Disasm/InstructionDecode.cpp @@ -8,6 +8,7 @@ #include #include "decompiler/ObjectFile/LinkedObjectFile.h" +namespace decompiler { // utility class to extract fields of an opcode. struct OpcodeFields { OpcodeFields(uint32_t _data) : data(_data) {} @@ -1171,3 +1172,4 @@ Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg return i; } +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/InstructionDecode.h b/decompiler/Disasm/InstructionDecode.h index 137234eb2b..45a2991637 100644 --- a/decompiler/Disasm/InstructionDecode.h +++ b/decompiler/Disasm/InstructionDecode.h @@ -11,9 +11,10 @@ #include "Instruction.h" +namespace decompiler { class LinkedWord; class LinkedObjectFile; Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg_id, int word_id); - +} // namespace decompiler #endif // NEXT_INSTRUCTIONDECODE_H diff --git a/decompiler/Disasm/InstructionMatching.cpp b/decompiler/Disasm/InstructionMatching.cpp index a05c26f92c..4cb66a4fd4 100644 --- a/decompiler/Disasm/InstructionMatching.cpp +++ b/decompiler/Disasm/InstructionMatching.cpp @@ -6,6 +6,7 @@ #include #include "InstructionMatching.h" +namespace decompiler { /*! * Check if the given instruction stores a GPR with the specified parameters. */ @@ -348,3 +349,4 @@ bool is_always_branch(const Instruction& instr) { return false; } +} // namespace decompiler diff --git a/decompiler/Disasm/InstructionMatching.h b/decompiler/Disasm/InstructionMatching.h index 850cf7a0ce..2f14fabe0a 100644 --- a/decompiler/Disasm/InstructionMatching.h +++ b/decompiler/Disasm/InstructionMatching.h @@ -11,6 +11,7 @@ #include "Instruction.h" #include "decompiler/util/MatchParam.h" +namespace decompiler { bool is_no_link_gpr_store(const Instruction& instr, MatchParam size, MatchParam src, @@ -56,5 +57,5 @@ Register make_fpr(int fpr); bool is_branch(const Instruction& instr, MatchParam likely); bool is_always_branch(const Instruction& instr); - +} // namespace decompiler #endif // JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H diff --git a/decompiler/Disasm/InstructionParser.cpp b/decompiler/Disasm/InstructionParser.cpp new file mode 100644 index 0000000000..b626c0469b --- /dev/null +++ b/decompiler/Disasm/InstructionParser.cpp @@ -0,0 +1,307 @@ +#include +#include +#include +#include "common/common_types.h" +#include "InstructionParser.h" + +namespace decompiler { +InstructionParser::InstructionParser() { + init_opcode_info(); + + // we only support a subset of the total instructions. These are common used and don't have + // strange formatting. + int added = 0; + for (auto i : {InstructionKind::DADDIU, InstructionKind::ADDIU, InstructionKind::SLTI, + InstructionKind::SLTIU, InstructionKind::SB, InstructionKind::SH, + InstructionKind::SW, InstructionKind::SD, InstructionKind::SQ, + InstructionKind::LB, InstructionKind::LBU, InstructionKind::LH, + InstructionKind::LHU, InstructionKind::LW, InstructionKind::LWU, + InstructionKind::LD, InstructionKind::LQ, InstructionKind::LDR, + InstructionKind::LDL, InstructionKind::LWL, InstructionKind::LWR, + InstructionKind::DADDU, InstructionKind::SUBU, InstructionKind::ADDU, + InstructionKind::DSUBU, InstructionKind::MULT3, InstructionKind::MULTU3, + InstructionKind::AND, InstructionKind::OR, InstructionKind::NOR, + InstructionKind::XOR, InstructionKind::MOVN, InstructionKind::MOVZ, + InstructionKind::SLT, InstructionKind::SLTU, InstructionKind::SLL, + InstructionKind::SRA, InstructionKind::SRL, InstructionKind::DSLL, + InstructionKind::DSLL32, InstructionKind::DSRA, InstructionKind::DSRA32, + InstructionKind::DSRL, InstructionKind::DSRL32, InstructionKind::DSRAV, + InstructionKind::SLLV, InstructionKind::DSLLV, InstructionKind::DSRLV, + InstructionKind::DIV, InstructionKind::DIVU, InstructionKind::ORI, + InstructionKind::XORI, InstructionKind::ANDI, InstructionKind::LUI, + InstructionKind::JALR, InstructionKind::JR, InstructionKind::LWC1, + InstructionKind::SWC1, InstructionKind::ADDS, InstructionKind::SUBS, + InstructionKind::MULS, InstructionKind::DIVS, InstructionKind::MINS, + InstructionKind::MAXS, InstructionKind::MADDS, InstructionKind::MSUBS, + InstructionKind::RSQRTS, InstructionKind::ABSS, InstructionKind::NEGS, + InstructionKind::CVTSW, InstructionKind::CVTWS, InstructionKind::MOVS, + InstructionKind::SQRTS, InstructionKind::CLTS, InstructionKind::CLES, + InstructionKind::CEQS, InstructionKind::BC1F, InstructionKind::BC1T, + InstructionKind::BEQ, InstructionKind::BNE, InstructionKind::BEQL, + InstructionKind::BNEL, InstructionKind::BC1FL, InstructionKind::BC1TL, + InstructionKind::BLTZ, InstructionKind::BGEZ, InstructionKind::BLEZ, + InstructionKind::BGTZ, InstructionKind::BLTZL, InstructionKind::BGTZL, + InstructionKind::BGEZL}) { + auto& info = gOpcodeInfo[int(i)]; + if (info.defined) { + m_opcode_name_lookup[info.name] = int(i); + added++; + } + } + assert(added == int(m_opcode_name_lookup.size())); +} + +namespace { +std::string get_until_space(std::string& instr) { + assert(!instr.empty()); + size_t i; + for (i = 0; i < instr.length(); i++) { + if (instr[i] == ' ') { + break; + } + } + auto name = instr.substr(0, i); + if (i == instr.length()) { + instr.clear(); + } else { + instr = instr.substr(i + 1); + } + return name; +} + +std::string get_comma_separated(std::string& instr) { + assert(!instr.empty()); + auto arg = get_until_space(instr); + if (instr.empty()) { + assert(arg.back() != ','); + } else { + assert(arg.back() == ','); + arg.pop_back(); + } + return arg; +} + +std::string get_before_paren(std::string& instr) { + size_t i; + for (i = 0; i < instr.length(); i++) { + if (instr[i] == '(') { + auto result = instr.substr(0, i); + instr = instr.substr(i); + return result; + } + } + assert(false); +} + +std::string get_in_paren(std::string& instr) { + assert(instr.length() > 2); + assert(instr.front() == '('); + size_t i; + for (i = 0; i < instr.length(); i++) { + if (instr[i] == ')') { + auto result = instr.substr(1, i - 1); + if (i == instr.length()) { + instr.clear(); + } else { + instr = instr.substr(i + 1); + } + return result; + } + } + assert(false); +} + +bool is_integer(const std::string& str) { + assert(!str.empty()); + char* end; + std::strtol(str.c_str(), &end, 10); + return end == str.c_str() + str.length(); +} + +int parse_integer(const std::string& str) { + assert(!str.empty()); + char* end; + int result = std::strtol(str.c_str(), &end, 10); + assert(end == str.c_str() + str.length()); + return result; +} + +std::vector string_to_lines(const std::string& str) { + std::vector result; + std::string::size_type i; + std::string::size_type start = 0; + while (true) { + i = str.find('\n', start); + if (i == std::string::npos) { + if (start < str.length()) { + result.push_back(str.substr(start)); + } + return result; + } else { + result.push_back(str.substr(start, i - start)); + start = i + 1; + } + } +} + +} // namespace + +Instruction InstructionParser::parse_single_instruction( + std::string str, + const std::vector& labels) { + auto name = get_until_space(str); + auto lookup = m_opcode_name_lookup.find(name); + if (lookup == m_opcode_name_lookup.end()) { + throw std::runtime_error("InstructionParser cannot handle opcode " + name); + } + + Instruction instr; + instr.kind = InstructionKind(lookup->second); + auto& info = gOpcodeInfo[lookup->second]; + for (u8 i = 0; i < info.step_count; i++) { + auto& step = info.steps[i]; + switch (step.decode) { + case DecodeType::GPR: { + std::string gpr_name; + if ((info.is_store || info.is_load) && i == 2) { + gpr_name = get_in_paren(str); + } else { + gpr_name = get_comma_separated(str); + } + + Register reg(gpr_name); + assert(reg.get_kind() == Reg::GPR); + InstructionAtom atom; + atom.set_reg(reg); + if (step.is_src) { + instr.add_src(atom); + } else { + instr.add_dst(atom); + } + } break; + + case DecodeType::FPR: { + auto reg_name = get_comma_separated(str); + Register reg(reg_name); + assert(reg.get_kind() == Reg::FPR); + InstructionAtom atom; + atom.set_reg(reg); + if (step.is_src) { + instr.add_src(atom); + } else { + instr.add_dst(atom); + } + } break; + + case DecodeType::IMM: { + InstructionAtom atom; + std::string atom_str; + if ((info.is_store || info.is_load) && i == 1) { + // number before paren + atom_str = get_before_paren(str); + } else { + atom_str = get_comma_separated(str); + } + + if (is_integer(atom_str)) { + auto amt = parse_integer(atom_str); + atom.set_imm(amt); + } else { + atom.set_sym(atom_str); + } + if (step.is_src) { + instr.add_src(atom); + } else { + instr.add_dst(atom); + } + + } break; + + case DecodeType::BRANCH_TARGET: { + auto label = get_comma_separated(str); + auto f = std::find_if(labels.begin(), labels.end(), + [&](const DecompilerLabel& l) { return l.name == label; }); + assert(f != labels.end()); + auto idx = f - labels.begin(); + InstructionAtom atom; + atom.set_label(idx); + if (step.is_src) { + instr.add_src(atom); + } else { + instr.add_dst(atom); + } + } break; + default: + assert(false); + } + } + + assert(str.empty()); + return instr; +} + +ParsedProgram InstructionParser::parse_program(const std::string& str) { + ParsedProgram program; + auto lines = string_to_lines(str); + int byte_offset = 0; + // first pass + for (auto& line : lines) { + // strip off leading white space + size_t i; + for (i = 0; i < line.length(); i++) { + if (line[i] != ' ') { + line = line.substr(i); + break; + } + } + + if (line.empty()) { + continue; + } + + if (line.front() == 'L') { + if (line.back() == ':') { + line.pop_back(); + } else { + assert(false); + } + DecompilerLabel label; + label.target_segment = 0; + label.offset = byte_offset; + label.name = line; + program.labels.push_back(label); + } else { + byte_offset += 4; + } + } + + // second pass + for (auto& line : lines) { + if (!line.empty() && line.front() != 'L') { + program.instructions.push_back(parse_single_instruction(line, program.labels)); + } + } + + return program; +} + +std::string ParsedProgram::print() { + std::string result; + + int offset = 0; + for (auto& instr : instructions) { + for (auto& label : labels) { + if (label.offset == offset) { + result += label.name; + result += ":\n"; + } + } + result += ' '; + result += ' '; + result += instr.to_string(labels); + result += '\n'; + offset += 4; + } + return result; +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/InstructionParser.h b/decompiler/Disasm/InstructionParser.h new file mode 100644 index 0000000000..bd328f139e --- /dev/null +++ b/decompiler/Disasm/InstructionParser.h @@ -0,0 +1,29 @@ +/*! + * The InstructionParser converts a string like "daddu a0, s7, r0" into an Instruction. + * It is used to generate test sequences of instructions for decompiler algorithms. + */ + +#pragma once + +#include +#include +#include "Instruction.h" +#include "DecompilerLabel.h" + +namespace decompiler { +struct ParsedProgram { + std::vector labels; + std::vector instructions; + std::string print(); +}; + +class InstructionParser { + public: + InstructionParser(); + Instruction parse_single_instruction(std::string str, const std::vector& labels); + ParsedProgram parse_program(const std::string& str); + + private: + std::unordered_map m_opcode_name_lookup; +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/OpcodeInfo.cpp b/decompiler/Disasm/OpcodeInfo.cpp index 4ae837be23..f016692c3b 100644 --- a/decompiler/Disasm/OpcodeInfo.cpp +++ b/decompiler/Disasm/OpcodeInfo.cpp @@ -6,8 +6,13 @@ #include "OpcodeInfo.h" #include +namespace decompiler { OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX]; +namespace { +bool opcodes_initialized = false; +} + typedef InstructionKind IK; typedef FieldType FT; typedef DecodeType DT; @@ -130,6 +135,9 @@ static OpcodeInfo& cd_dacc_svfs_svft(OpcodeInfo& info) { } void init_opcode_info() { + if (opcodes_initialized) { + return; + } gOpcodeInfo[0].name = ";; ??????"; // RT, RS, SIMM @@ -444,6 +452,7 @@ void init_opcode_info() { // for the UNKNOWN op which shouldn't be valid. total_count--; assert(total_count == valid_count); + opcodes_initialized = true; } void OpcodeInfo::step(DecodeStep& s) { @@ -501,4 +510,5 @@ OpcodeInfo& OpcodeInfo::dst_vf(FieldType field) { OpcodeInfo& OpcodeInfo::dst_vi(FieldType field) { return dst(field, DT::VI); -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/OpcodeInfo.h b/decompiler/Disasm/OpcodeInfo.h index 23413bbcad..2d89684917 100644 --- a/decompiler/Disasm/OpcodeInfo.h +++ b/decompiler/Disasm/OpcodeInfo.h @@ -10,6 +10,7 @@ #include +namespace decompiler { enum class InstructionKind { UNKNOWN, @@ -342,12 +343,12 @@ struct OpcodeInfo { OpcodeInfo& dst_vf(FieldType field); OpcodeInfo& dst_vi(FieldType field); - uint8_t step_count; + uint8_t step_count = 0; DecodeStep steps[MAX_DECODE_STEPS]; }; extern OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX]; void init_opcode_info(); - +} // namespace decompiler #endif // NEXT_OPCODEINFO_H diff --git a/decompiler/Disasm/Register.cpp b/decompiler/Disasm/Register.cpp index 17ed886671..05b9c8631b 100644 --- a/decompiler/Disasm/Register.cpp +++ b/decompiler/Disasm/Register.cpp @@ -7,6 +7,24 @@ #include #include +namespace decompiler { +namespace Reg { +// register which may hold GOAL local variables + +// clang-format off +const bool allowed_local_gprs[Reg::MAX_GPR] = { + false /*R0*/, false /*AT*/, true /*V0*/, true /*V1*/, + true /*A0*/, true /*A1*/, true /*A2*/, true /*A3*/, + true /*T0*/, true /*T1*/, true /*T2*/, true /*T3*/, + true /*T4*/, true /*T5*/, true /*T6*/, true /*T7*/, + true /*S0*/, true /*S1*/, true /*S2*/, true /*S3*/, + true /*S4*/, true /*S5*/, false /*S6*/, false /*S7*/, + true /*T8*/, true /*T9*/, false /*K0*/, false /*K1*/, + true /*GP*/, true /*SP*/, false /*FP*/, false /*RA*/ +}; +// clang-format on +} // namespace Reg + //////////////////////////// // Register Name Constants //////////////////////////// @@ -233,4 +251,5 @@ bool Register::operator==(const Register& other) const { bool Register::operator!=(const Register& other) const { return id != other.id; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Disasm/Register.h b/decompiler/Disasm/Register.h index 4f1ba1dc36..122faf18c4 100644 --- a/decompiler/Disasm/Register.h +++ b/decompiler/Disasm/Register.h @@ -11,6 +11,7 @@ #include #include +namespace decompiler { // Namespace for register name constants namespace Reg { enum RegisterKind { @@ -120,6 +121,9 @@ enum Vi { CMSAR1 = 31, MAX_COP2 = 32 }; + +const extern bool allowed_local_gprs[Reg::MAX_GPR]; + } // namespace Reg // Representation of a register. Uses a 32-bit integer internally. @@ -148,5 +152,5 @@ class Register { private: uint16_t id = -1; }; - +} // namespace decompiler #endif // NEXT_REGISTER_H diff --git a/decompiler/Function/BasicBlocks.cpp b/decompiler/Function/BasicBlocks.cpp index 6c9850998f..82745acd2c 100644 --- a/decompiler/Function/BasicBlocks.cpp +++ b/decompiler/Function/BasicBlocks.cpp @@ -4,6 +4,7 @@ #include "decompiler/ObjectFile/LinkedObjectFile.h" #include "decompiler/Disasm/InstructionMatching.h" +namespace decompiler { /*! * Find all basic blocks in a function. * All delay slot instructions are grouped with the branch instruction. @@ -48,4 +49,5 @@ std::vector find_blocks_in_function(const LinkedObjectFile& file, } return basic_blocks; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index 452e1833c8..457c60f470 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -7,6 +7,7 @@ #include "decompiler/util/DecompilerTypeSystem.h" #include "decompiler/util/TP_Type.h" +namespace decompiler { class LinkedObjectFile; class Function; @@ -48,3 +49,4 @@ struct BlockTopologicalSort { std::vector find_blocks_in_function(const LinkedObjectFile& file, int seg, const Function& func); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 005084def7..22f6114288 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -5,6 +5,7 @@ #include "CfgVtx.h" #include "Function.h" +namespace decompiler { ///////////////////////////////////////// /// CfgVtx ///////////////////////////////////////// @@ -1912,3 +1913,4 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se return cfg; } +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/CfgVtx.h b/decompiler/Function/CfgVtx.h index 2ba28be0c3..b630ec7aef 100644 --- a/decompiler/Function/CfgVtx.h +++ b/decompiler/Function/CfgVtx.h @@ -11,6 +11,7 @@ namespace goos { class Object; } +namespace decompiler { /*! * In v, find an item equal to old, and replace it with replace. * Will throw an error is there is not exactly one thing equal to old. @@ -351,5 +352,5 @@ class ControlFlowGraph { class LinkedObjectFile; class Function; std::shared_ptr build_cfg(const LinkedObjectFile& file, int seg, Function& func); - +} // namespace decompiler #endif // JAK_DISASSEMBLER_CFGVTX_H diff --git a/decompiler/Function/ExpressionBuilder.cpp b/decompiler/Function/ExpressionBuilder.cpp index b49faddf62..f7fdd6fb41 100644 --- a/decompiler/Function/ExpressionBuilder.cpp +++ b/decompiler/Function/ExpressionBuilder.cpp @@ -2,6 +2,7 @@ #include "decompiler/IR/IR.h" #include "ExpressionStack.h" +namespace decompiler { namespace { bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) { ExpressionStack stack; @@ -55,4 +56,5 @@ bool Function::build_expression(LinkedObjectFile& file) { } return true; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.cpp b/decompiler/Function/ExpressionStack.cpp index 88eff57668..6135362335 100644 --- a/decompiler/Function/ExpressionStack.cpp +++ b/decompiler/Function/ExpressionStack.cpp @@ -1,6 +1,7 @@ #include "third-party/fmt/core.h" #include "ExpressionStack.h" +namespace decompiler { std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) { return fmt::format("d: {} s: {} | {} <- {}", display, sequence_point, destination.has_value() ? destination.value().to_charp() : "N/A", @@ -107,4 +108,5 @@ ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() { } } assert(false); -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.h b/decompiler/Function/ExpressionStack.h index 8669111367..281254d9a0 100644 --- a/decompiler/Function/ExpressionStack.h +++ b/decompiler/Function/ExpressionStack.h @@ -6,6 +6,7 @@ #include "decompiler/Disasm/Register.h" #include "decompiler/util/TP_Type.h" +namespace decompiler { /*! * An ExpressionStack is used to track partial expressions when rebuilding the tree structure of * GOAL code. Linear sequences of operations are added onto the expression stack. @@ -33,4 +34,5 @@ class ExpressionStack { bool display_stack_empty(); StackEntry& get_display_stack_top(); -}; \ No newline at end of file +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index cf38418282..71d7d2a352 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -1,13 +1,14 @@ #include #include #include "Function.h" -#include "third-party/spdlog/include/spdlog/spdlog.h" +#include "common/log/log.h" #include "decompiler/Disasm/InstructionMatching.h" #include "decompiler/ObjectFile/LinkedObjectFile.h" #include "decompiler/util/DecompilerTypeSystem.h" #include "TypeInspector.h" #include "decompiler/IR/IR.h" +namespace decompiler { namespace { std::vector gpr_backups = {make_gpr(Reg::GP), make_gpr(Reg::S5), make_gpr(Reg::S4), make_gpr(Reg::S3), make_gpr(Reg::S2), make_gpr(Reg::S1), @@ -70,8 +71,8 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { // storing stack pointer on the stack is done by some ASM kernel functions if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) { printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n", - guessed_name.to_string().c_str(), instr.to_string(file).c_str()); - warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n"; + guessed_name.to_string().c_str(), instr.to_string(file.labels).c_str()); + warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n"; suspected_asm = true; return; } @@ -92,9 +93,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { // storing s7 on the stack is done by interrupt handlers, which we probably don't want to // support if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) { - spdlog::warn("{} Suspected ASM function based on this instruction in prologue: {}\n", - guessed_name.to_string(), instr.to_string(file)); - warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n"; + lg::warn("{} Suspected ASM function based on this instruction in prologue: {}\n", + guessed_name.to_string(), instr.to_string(file.labels)); + warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n"; suspected_asm = true; return; } @@ -164,9 +165,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { suspected_asm = true; printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n", guessed_name.to_string().c_str(), - instructions.at(idx + i).to_string(file).c_str()); + instructions.at(idx + i).to_string(file.labels).c_str()); warnings += ";; Suspected asm function due to stack store: " + - instructions.at(idx + i).to_string(file) + "\n"; + instructions.at(idx + i).to_string(file.labels) + "\n"; return; } } @@ -194,9 +195,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { suspected_asm = true; printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n", guessed_name.to_string().c_str(), - instructions.at(idx + i).to_string(file).c_str()); + instructions.at(idx + i).to_string(file.labels).c_str()); warnings += ";; Suspected asm function due to stack store: " + - instructions.at(idx + i).to_string(file) + "\n"; + instructions.at(idx + i).to_string(file.labels) + "\n"; return; } } @@ -643,7 +644,7 @@ void Function::find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts) // done! // fmt::print("Got type {} parent {}\n", type_name, parent_type); dts.add_type_parent(type_name, parent_type); - Label flag_label = file.labels.at(label_idx); + DecompilerLabel flag_label = file.labels.at(label_idx); u64 word = file.read_data_word(flag_label); flag_label.offset += 4; u64 word2 = file.read_data_word(flag_label); @@ -744,4 +745,5 @@ BlockTopologicalSort Function::bb_topo_sort() { } return result; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 9d0bfbfa48..36c6dd1e9d 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -15,6 +15,7 @@ #include "common/type_system/TypeSpec.h" #include "decompiler/config.h" +namespace decompiler { class DecompilerTypeSystem; class IR_Atomic; class IR; @@ -158,5 +159,5 @@ class Function { std::unordered_map instruction_to_basic_op; std::unordered_map basic_op_to_instruction; }; - +} // namespace decompiler #endif // NEXT_FUNCTION_H diff --git a/decompiler/Function/RegUsage.cpp b/decompiler/Function/RegUsage.cpp index 580ddfc49b..61bcd991d0 100644 --- a/decompiler/Function/RegUsage.cpp +++ b/decompiler/Function/RegUsage.cpp @@ -1,6 +1,7 @@ #include "Function.h" #include "decompiler/IR/IR.h" +namespace decompiler { namespace { bool in_set(RegSet& set, const Register& obj) { return set.find(obj) != set.end(); @@ -170,4 +171,5 @@ void Function::run_reg_usage() { } } } -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index b9ec4897b6..339acaa328 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -3,6 +3,7 @@ #include "third-party/fmt/core.h" #include "decompiler/config.h" +namespace decompiler { namespace { TypeState construct_initial_typestate(const TypeSpec& f_ts) { TypeState result; @@ -132,4 +133,5 @@ bool Function::run_type_analysis(const TypeSpec& my_type, } return true; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/TypeInspector.cpp b/decompiler/Function/TypeInspector.cpp index f2a8ba4f18..34c3c255ca 100644 --- a/decompiler/Function/TypeInspector.cpp +++ b/decompiler/Function/TypeInspector.cpp @@ -8,6 +8,7 @@ #include "common/type_system/deftype.h" #include "decompiler/IR/IR.h" +namespace decompiler { namespace { struct FieldPrint { char format = '\0'; @@ -843,4 +844,5 @@ std::string TypeInspectorResult::print_as_deftype() { result.append(")\n"); return result; -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/Function/TypeInspector.h b/decompiler/Function/TypeInspector.h index 4381687ec6..a11315964e 100644 --- a/decompiler/Function/TypeInspector.h +++ b/decompiler/Function/TypeInspector.h @@ -8,10 +8,12 @@ #include #include "common/common_types.h" +class Field; + +namespace decompiler { class Function; class DecompilerTypeSystem; class LinkedObjectFile; -class Field; struct TypeInspectorResult { bool success = false; @@ -34,3 +36,4 @@ TypeInspectorResult inspect_inspect_method(Function& inspect, const std::string& type_name, DecompilerTypeSystem& dts, LinkedObjectFile& file); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index 7f8f337557..273c4facd6 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -11,9 +11,11 @@ #include "decompiler/Function/Function.h" #include "decompiler/Function/BasicBlocks.h" #include "decompiler/Disasm/InstructionMatching.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" #include "decompiler/IR/IR.h" #include "common/symbols.h" +namespace decompiler { namespace { /////////////////////////////// @@ -135,7 +137,7 @@ std::shared_ptr to_asm_automatic(const std::string& str, Instruction& } if (instr.n_src >= 3) { - result->src1 = instr_atom_to_ir(instr.get_src(2), idx); + result->src2 = instr_atom_to_ir(instr.get_src(2), idx); } result->set_reg_info(); @@ -2520,7 +2522,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec // everything failed if (!result) { // temp hack for debug: - printf("Instruction -> BasicOp failed on %s\n", i.to_string(*file).c_str()); + printf("Instruction -> BasicOp failed on %s\n", i.to_string(file->labels).c_str()); func->add_basic_op(std::make_shared(), instr, instr + 1); } else { if (!func->contains_asm_ops && dynamic_cast(result.get())) { @@ -2536,3 +2538,4 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec } } } +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/BasicOpBuilder.h b/decompiler/IR/BasicOpBuilder.h index b7a2b553d8..d20b06c850 100644 --- a/decompiler/IR/BasicOpBuilder.h +++ b/decompiler/IR/BasicOpBuilder.h @@ -6,8 +6,10 @@ #pragma once +namespace decompiler { class Function; struct BasicBlock; class LinkedObjectFile; -void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file); \ No newline at end of file +void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp index 4414800e8e..df4066587d 100644 --- a/decompiler/IR/CfgBuilder.cpp +++ b/decompiler/IR/CfgBuilder.cpp @@ -7,6 +7,7 @@ #include "decompiler/Disasm/InstructionMatching.h" #include "decompiler/IR/IR.h" +namespace decompiler { namespace { std::shared_ptr cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx); @@ -1278,3 +1279,4 @@ std::shared_ptr build_cfg_ir(Function& function, return nullptr; } } +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/CfgBuilder.h b/decompiler/IR/CfgBuilder.h index b15b08b626..7592e887a6 100644 --- a/decompiler/IR/CfgBuilder.h +++ b/decompiler/IR/CfgBuilder.h @@ -2,9 +2,11 @@ #include +namespace decompiler { class IR; class Function; class LinkedObjectFile; class ControlFlowGraph; -std::shared_ptr build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file); \ No newline at end of file +std::shared_ptr build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index fda1196fa6..8c4edd2688 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -3,6 +3,7 @@ #include "common/goos/PrettyPrinter.h" #include "third-party/fmt/core.h" +namespace decompiler { // hack to print out reverse deref paths on loads to help with debugging load stuff. bool enable_hack_load_path_print = false; // hack to print (begin x) as x to make debug output easier to read. @@ -1273,4 +1274,5 @@ goos::Object IR_Break::to_form(const LinkedObjectFile& file) const { void IR_Break::get_children(std::vector>* output) const { output->push_back(return_code); output->push_back(dead_code); -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index ba4c22bbc3..3c9198d1d5 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -11,14 +11,15 @@ #include "decompiler/util/DecompilerTypeSystem.h" #include "decompiler/util/TP_Type.h" -class LinkedObjectFile; -class DecompilerTypeSystem; -class ExpressionStack; - namespace goos { class Object; } +namespace decompiler { +class LinkedObjectFile; +class DecompilerTypeSystem; +class ExpressionStack; + class IR { public: virtual goos::Object to_form(const LinkedObjectFile& file) const = 0; @@ -765,5 +766,5 @@ class IR_Break : public virtual IR { goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; }; - +} // namespace decompiler #endif // JAK_IR_H diff --git a/decompiler/IR/IR_ExpressionStack.cpp b/decompiler/IR/IR_ExpressionStack.cpp index f11992aa4f..1d56e973a8 100644 --- a/decompiler/IR/IR_ExpressionStack.cpp +++ b/decompiler/IR/IR_ExpressionStack.cpp @@ -2,6 +2,7 @@ #include "IR.h" #include "decompiler/Function/ExpressionStack.h" +namespace decompiler { bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { // first determine the type of the set. switch (kind) { @@ -448,4 +449,5 @@ bool IR_FloatMath1::update_from_stack(const std::unordered_set +#include +#include "third-party/fmt/core.h" +#include "common/goos/PrettyPrinter.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" +#include "AtomicOp.h" + +namespace decompiler { +///////////////////////////// +// VARIABLE +///////////////////////////// + +Variable::Variable(Mode mode, Register reg, int atomic_idx, bool allow_all) + : m_mode(mode), m_reg(reg), m_atomic_idx(atomic_idx) { + // make sure we're using a valid GPR. + if (reg.get_kind() == Reg::GPR && !allow_all) { + assert(Reg::allowed_local_gprs[reg.get_gpr()] || reg.get_gpr() == Reg::S6); + } +} + +std::string Variable::to_string(const Env* env, Print mode) const { + switch (mode) { + case Print::AS_REG: + return m_reg.to_string(); + case Print::FULL: + return fmt::format("{}-{:03d}-{}", m_reg.to_charp(), m_atomic_idx, + m_mode == Mode::READ ? 'r' : 'w'); + case Print::AS_VARIABLE: + return env->get_variable_name(m_reg, m_atomic_idx); + case Print::AUTOMATIC: + if (env->has_local_vars()) { + return env->get_variable_name(m_reg, m_atomic_idx); + } else { + return m_reg.to_string(); + } + default: + assert(false); + } +} + +bool Variable::operator==(const Variable& other) const { + return m_mode == other.m_mode && m_reg == other.m_reg && m_atomic_idx && other.m_atomic_idx; +} + +bool Variable::operator!=(const Variable& other) const { + return !((*this) == other); +} + +///////////////////////////// +// AtomicOp +///////////////////////////// +AtomicOp::AtomicOp(int my_idx) : m_my_idx(my_idx) {} + +std::string AtomicOp::to_string(const std::vector& labels, const Env* env) { + return pretty_print::to_string(to_form(labels, env)); +} +bool AtomicOp::operator!=(const AtomicOp& other) const { + return !((*this) == other); +} + +///////////////////////////// +// SimpleAtom +///////////////////////////// + +SimpleAtom SimpleAtom::make_var(const Variable& var) { + SimpleAtom result; + result.m_kind = Kind::VARIABLE; + result.m_variable = var; + return result; +} + +SimpleAtom SimpleAtom::make_sym_ptr(const std::string& name) { + SimpleAtom result; + result.m_kind = Kind::SYMBOL_PTR; + result.m_string = name; + return result; +} + +SimpleAtom SimpleAtom::make_sym_val(const std::string& name) { + SimpleAtom result; + result.m_kind = Kind::SYMBOL_VAL; + result.m_string = name; + return result; +} + +SimpleAtom SimpleAtom::make_empty_list() { + SimpleAtom result; + result.m_kind = Kind::EMPTY_LIST; + return result; +} + +SimpleAtom SimpleAtom::make_int_constant(s64 value) { + SimpleAtom result; + result.m_int = value; + return result; +} + +goos::Object SimpleAtom::to_form(const std::vector& labels, const Env* env) const { + switch (m_kind) { + case Kind::VARIABLE: + return pretty_print::to_symbol(m_variable.to_string(env)); + case Kind::INTEGER_CONSTANT: + return pretty_print::to_symbol(std::to_string(m_int)); + case Kind::SYMBOL_PTR: + return pretty_print::to_symbol(fmt::format("'{}", m_string)); + case Kind::SYMBOL_VAL: + return pretty_print::to_symbol(m_string); + case Kind::STATIC_ADDRESS: + return pretty_print::to_symbol(labels.at(m_int).name); + default: + assert(false); + return {}; + } +} + +bool SimpleAtom::operator==(const SimpleAtom& other) const { + if (other.m_kind != m_kind) { + return false; + } + + switch (m_kind) { + case Kind::VARIABLE: + return m_variable == other.m_variable; + case Kind::INTEGER_CONSTANT: + return m_int == other.m_int; + case Kind::SYMBOL_VAL: + case Kind::SYMBOL_PTR: + return m_string == other.m_string; + case Kind::EMPTY_LIST: + return true; + case Kind::STATIC_ADDRESS: + return m_int == other.m_int; + default: + assert(false); + return false; + } +} + +void SimpleAtom::get_regs(std::vector* out) const { + if (is_var()) { + out->push_back(var().reg()); + } +} + +///////////////////////////// +// SimpleExpression +///////////////////////////// + +namespace { +std::string get_simple_expression_op_name(SimpleExpression::Kind kind) { + switch (kind) { + case SimpleExpression::Kind::DIV_S: + return "/.s"; + case SimpleExpression::Kind::MUL_S: + return "*.s"; + case SimpleExpression::Kind::ADD_S: + return "+.s"; + case SimpleExpression::Kind::SUB_S: + return "-.s"; + case SimpleExpression::Kind::MIN_S: + return "min.s"; + case SimpleExpression::Kind::MAX_S: + return "max.s"; + case SimpleExpression::Kind::FLOAT_TO_INT: + return "f2i"; + case SimpleExpression::Kind::INT_TO_FLOAT: + return "i2f"; + case SimpleExpression::Kind::ABS_S: + return "abs.s"; + case SimpleExpression::Kind::NEG_S: + return "neg.s"; + case SimpleExpression::Kind::SQRT_S: + return "sqrt.s"; + case SimpleExpression::Kind::ADD: + return "+"; + case SimpleExpression::Kind::SUB: + return "-"; + case SimpleExpression::Kind::MUL_SIGNED: + return "*.si"; + case SimpleExpression::Kind::DIV_SIGNED: + return "/.si"; + case SimpleExpression::Kind::MOD_SIGNED: + return "%.si"; + case SimpleExpression::Kind::DIV_UNSIGNED: + return "/.ui"; + case SimpleExpression::Kind::MOD_UNSIGNED: + return "%.ui"; + case SimpleExpression::Kind::OR: + return "logior"; + case SimpleExpression::Kind::AND: + return "logand"; + case SimpleExpression::Kind::NOR: + return "lognor"; + case SimpleExpression::Kind::XOR: + return "logxor"; + case SimpleExpression::Kind::LEFT_SHIFT: + return "shl"; + case SimpleExpression::Kind::RIGHT_SHIFT_ARITH: + return "sra"; + case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: + return "srl"; + case SimpleExpression::Kind::MUL_UNSIGNED: + return "*.ui"; + case SimpleExpression::Kind::NOT: + return "lognot"; + case SimpleExpression::Kind::NEG: + return "-"; + default: + assert(false); + } +} + +int get_simple_expression_arg_count(SimpleExpression::Kind kind) { + switch (kind) { + case SimpleExpression::Kind::IDENTITY: + return 1; + case SimpleExpression::Kind::DIV_S: + case SimpleExpression::Kind::MUL_S: + case SimpleExpression::Kind::ADD_S: + case SimpleExpression::Kind::SUB_S: + case SimpleExpression::Kind::MIN_S: + case SimpleExpression::Kind::MAX_S: + return 2; + case SimpleExpression::Kind::FLOAT_TO_INT: + case SimpleExpression::Kind::INT_TO_FLOAT: + case SimpleExpression::Kind::ABS_S: + case SimpleExpression::Kind::NEG_S: + case SimpleExpression::Kind::SQRT_S: + return 1; + case SimpleExpression::Kind::ADD: + case SimpleExpression::Kind::SUB: + case SimpleExpression::Kind::MUL_SIGNED: + case SimpleExpression::Kind::DIV_SIGNED: + case SimpleExpression::Kind::MOD_SIGNED: + case SimpleExpression::Kind::DIV_UNSIGNED: + case SimpleExpression::Kind::MOD_UNSIGNED: + case SimpleExpression::Kind::OR: + case SimpleExpression::Kind::AND: + case SimpleExpression::Kind::NOR: + case SimpleExpression::Kind::XOR: + case SimpleExpression::Kind::LEFT_SHIFT: + case SimpleExpression::Kind::RIGHT_SHIFT_ARITH: + case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: + case SimpleExpression::Kind::MUL_UNSIGNED: + return 2; + case SimpleExpression::Kind::NOT: + case SimpleExpression::Kind::NEG: + return 1; + default: + assert(false); + } +} +} // namespace + +SimpleExpression::SimpleExpression(Kind kind, const SimpleAtom& arg0) : n_args(1) { + m_args[0] = arg0; + m_kind = kind; + assert(get_simple_expression_arg_count(kind) == 1); +} + +SimpleExpression::SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1) + : n_args(2) { + m_args[0] = arg0; + m_args[1] = arg1; + m_kind = kind; + assert(get_simple_expression_arg_count(kind) == 2); +} + +goos::Object SimpleExpression::to_form(const std::vector& labels, + const Env* env) const { + std::vector forms; + if (m_kind == Kind::IDENTITY) { + // we are "identity" so just pass through the atom + assert(args() == 1); + return get_arg(0).to_form(labels, env); + } else { + forms.push_back(pretty_print::to_symbol(get_simple_expression_op_name(m_kind))); + for (int i = 0; i < args(); i++) { + forms.push_back(get_arg(i).to_form(labels, env)); + } + return pretty_print::build_list(forms); + } +} + +bool SimpleExpression::operator==(const SimpleExpression& other) const { + if (m_kind != other.m_kind) { + return false; + } + assert(args() == other.args()); + for (int i = 0; i < args(); i++) { + if (other.get_arg(i) != get_arg(i)) { + return false; + } + } + return true; +} + +void SimpleExpression::get_regs(std::vector* out) const { + for (s8 i = 0; i < args(); i++) { + get_arg(i).get_regs(out); + } +} + +///////////////////////////// +// SetVarOp +///////////////////////////// + +goos::Object SetVarOp::to_form(const std::vector& labels, const Env* env) const { + return pretty_print::build_list(pretty_print::to_symbol("set!"), + pretty_print::to_symbol(m_dst.to_string(env)), + m_src.to_form(labels, env)); +} + +bool SetVarOp::operator==(const AtomicOp& other) const { + if (typeid(SetVarOp) != typeid(other)) { + return false; + } + auto po = dynamic_cast(&other); + assert(po); + return m_dst == po->m_dst && m_src == po->m_src; +} + +bool SetVarOp::is_variable_set() const { + return true; +} + +bool SetVarOp::is_sequence_point() const { + if (m_src.is_identity()) { + auto& atom = m_src.get_arg(0); + if (atom.is_var()) { + if (atom.var().reg().get_kind() == m_dst.reg().get_kind()) { + // if we're setting a register equal to another register of the same kind. + // todo - this may also be a non-sequence point operation moving a float to a GPR? + return false; + } + } + } + return true; +} + +Variable SetVarOp::get_set_destination() const { + return m_dst; +} + +std::unique_ptr SetVarOp::get_set_source_as_expr() const { + throw std::runtime_error("get_set_source_as_expr NYI for SetVarOp"); +} + +std::unique_ptr SetVarOp::get_as_expr() const { + throw std::runtime_error("get_as_expr NYI for SetVarOp"); +} + +void SetVarOp::update_register_info() { + m_write_regs.push_back(m_dst.reg()); + m_src.get_regs(&m_read_regs); +} + +///////////////////////////// +// AsmOp +///////////////////////////// + +AsmOp::AsmOp(Instruction instr, int my_idx) : AtomicOp(my_idx), m_instr(std::move(instr)) { + assert(m_instr.n_dst <= 1); + if (m_instr.n_dst == 1) { + auto& dst = m_instr.get_dst(0); + if (dst.is_reg()) { + m_dst = Variable(Variable::Mode::WRITE, dst.get_reg(), my_idx, true); + } + } + + assert(m_instr.n_src <= 3); + for (int i = 0; i < m_instr.n_src; i++) { + auto& src = m_instr.get_src(i); + if (src.is_reg()) { + m_src[i] = Variable(Variable::Mode::READ, src.get_reg(), my_idx, true); + } + } +} + +goos::Object AsmOp::to_form(const std::vector& labels, const Env* env) const { + std::vector forms; + forms.push_back(pretty_print::to_symbol("." + m_instr.op_name_to_string())); + assert(m_instr.n_dst <= 1); + + if (m_instr.n_dst == 1) { + if (m_dst.has_value()) { + // then print it as a variable + forms.push_back(pretty_print::to_symbol(m_dst.value().to_string(env))); + } else { + // print the atom + forms.push_back(pretty_print::to_symbol(m_instr.get_dst(0).to_string(labels))); + } + } + + assert(m_instr.n_src <= 3); + for (int i = 0; i < m_instr.n_src; i++) { + if (m_src[i].has_value()) { + forms.push_back(pretty_print::to_symbol(m_src[i].value().to_string(env))); + } else { + forms.push_back(pretty_print::to_symbol(m_instr.get_src(1).to_string(labels))); + } + } + + return pretty_print::build_list(forms); +} + +bool AsmOp::operator==(const AtomicOp& other) const { + if (typeid(AsmOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + + return (m_instr == po->m_instr) && (m_dst == po->m_dst) && (m_src[0] == po->m_src[0]) && + (m_src[1] == po->m_src[1]) && (m_src[2] == po->m_src[2]); +} + +bool AsmOp::is_variable_set() const { + return false; +} + +bool AsmOp::is_sequence_point() const { + return true; +} + +Variable AsmOp::get_set_destination() const { + throw std::runtime_error("AsmOp cannot be treated as a set! operation"); +} + +std::unique_ptr AsmOp::get_set_source_as_expr() const { + throw std::runtime_error("AsmOp cannot be treated as a set! operation"); +} + +std::unique_ptr AsmOp::get_as_expr() const { + throw std::runtime_error("AsmOp::get_as_expr is not implemented."); +} + +void AsmOp::update_register_info() { + if (m_dst.has_value()) { + m_write_regs.push_back(m_dst->reg()); + } + + for (auto& src : m_src) { + if (src.has_value()) { + m_read_regs.push_back(src->reg()); + } + } +} + +///////////////////////////// +// Condition +///////////////////////////// + +namespace { +std::string get_condition_kind_name(IR2_Condition::Kind kind) { + switch (kind) { + case IR2_Condition::Kind::NOT_EQUAL: + return "!="; + case IR2_Condition::Kind::EQUAL: + return "="; + case IR2_Condition::Kind::LESS_THAN_SIGNED: + return "<.si"; + case IR2_Condition::Kind::LESS_THAN_UNSIGNED: + return "<.ui"; + case IR2_Condition::Kind::GREATER_THAN_SIGNED: + return ">.si"; + case IR2_Condition::Kind::GREATER_THAN_UNSIGNED: + return ">.ui"; + case IR2_Condition::Kind::LEQ_SIGNED: + return "<=.si"; + case IR2_Condition::Kind::GEQ_SIGNED: + return ">=.si"; + case IR2_Condition::Kind::LEQ_UNSIGNED: + return "<=.ui"; + case IR2_Condition::Kind::GEQ_UNSIGNED: + return ">=.ui"; + case IR2_Condition::Kind::ZERO: + return "zero?"; + case IR2_Condition::Kind::NONZERO: + return "nonzero?"; + case IR2_Condition::Kind::FALSE: + return "not"; + case IR2_Condition::Kind::TRUTHY: + return "truthy"; + case IR2_Condition::Kind::ALWAYS: + return "#t"; + case IR2_Condition::Kind::NEVER: + return "#f"; + case IR2_Condition::Kind::FLOAT_EQUAL: + return "=.s"; + case IR2_Condition::Kind::FLOAT_NOT_EQUAL: + return "!=.s"; + case IR2_Condition::Kind::FLOAT_LESS_THAN: + return "<.s"; + case IR2_Condition::Kind::FLOAT_GEQ: + return ">=.s"; + case IR2_Condition::Kind::FLOAT_GREATER_THAN: + return ">.s"; + case IR2_Condition::Kind::FLOAT_LEQ: + return "<=.s"; + case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: + return ">0.si"; + case IR2_Condition::Kind::GEQ_ZERO_SIGNED: + return ">=0.si"; + case IR2_Condition::Kind::LESS_THAN_ZERO: + return "<0.si"; + case IR2_Condition::Kind::LEQ_ZERO_SIGNED: + return "<=0.si"; + default: + assert(false); + } +} + +int get_condition_num_args(IR2_Condition::Kind kind) { + switch (kind) { + case IR2_Condition::Kind::NOT_EQUAL: + case IR2_Condition::Kind::EQUAL: + case IR2_Condition::Kind::LESS_THAN_SIGNED: + case IR2_Condition::Kind::LESS_THAN_UNSIGNED: + case IR2_Condition::Kind::GREATER_THAN_SIGNED: + case IR2_Condition::Kind::GREATER_THAN_UNSIGNED: + case IR2_Condition::Kind::LEQ_SIGNED: + case IR2_Condition::Kind::GEQ_SIGNED: + case IR2_Condition::Kind::LEQ_UNSIGNED: + case IR2_Condition::Kind::GEQ_UNSIGNED: + case IR2_Condition::Kind::FLOAT_EQUAL: + case IR2_Condition::Kind::FLOAT_NOT_EQUAL: + case IR2_Condition::Kind::FLOAT_LESS_THAN: + case IR2_Condition::Kind::FLOAT_GEQ: + case IR2_Condition::Kind::FLOAT_GREATER_THAN: + case IR2_Condition::Kind::FLOAT_LEQ: + return 2; + case IR2_Condition::Kind::ZERO: + case IR2_Condition::Kind::NONZERO: + case IR2_Condition::Kind::FALSE: + case IR2_Condition::Kind::TRUTHY: + case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: + case IR2_Condition::Kind::GEQ_ZERO_SIGNED: + case IR2_Condition::Kind::LESS_THAN_ZERO: + case IR2_Condition::Kind::LEQ_ZERO_SIGNED: + return 1; + case IR2_Condition::Kind::ALWAYS: + case IR2_Condition::Kind::NEVER: + return 0; + default: + assert(false); + } +} + +IR2_Condition::Kind get_condition_opposite(IR2_Condition::Kind kind) { + switch (kind) { + case IR2_Condition::Kind::NOT_EQUAL: + return IR2_Condition::Kind::EQUAL; + case IR2_Condition::Kind::EQUAL: + return IR2_Condition::Kind::NOT_EQUAL; + case IR2_Condition::Kind::LESS_THAN_SIGNED: + return IR2_Condition::Kind::GEQ_SIGNED; + case IR2_Condition::Kind::GREATER_THAN_SIGNED: + return IR2_Condition::Kind::LEQ_SIGNED; + case IR2_Condition::Kind::LEQ_SIGNED: + return IR2_Condition::Kind::GREATER_THAN_SIGNED; + case IR2_Condition::Kind::GEQ_SIGNED: + return IR2_Condition::Kind::LESS_THAN_SIGNED; + case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: + return IR2_Condition::Kind::LEQ_ZERO_SIGNED; + case IR2_Condition::Kind::LEQ_ZERO_SIGNED: + return IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED; + case IR2_Condition::Kind::LESS_THAN_ZERO: + return IR2_Condition::Kind::GEQ_ZERO_SIGNED; + case IR2_Condition::Kind::GEQ_ZERO_SIGNED: + return IR2_Condition::Kind::LESS_THAN_ZERO; + case IR2_Condition::Kind::LESS_THAN_UNSIGNED: + return IR2_Condition::Kind::GEQ_UNSIGNED; + case IR2_Condition::Kind::GREATER_THAN_UNSIGNED: + return IR2_Condition::Kind::LEQ_UNSIGNED; + case IR2_Condition::Kind::LEQ_UNSIGNED: + return IR2_Condition::Kind::GREATER_THAN_UNSIGNED; + case IR2_Condition::Kind::GEQ_UNSIGNED: + return IR2_Condition::Kind::LESS_THAN_UNSIGNED; + case IR2_Condition::Kind::ZERO: + return IR2_Condition::Kind::NONZERO; + case IR2_Condition::Kind::NONZERO: + return IR2_Condition::Kind::ZERO; + case IR2_Condition::Kind::FALSE: + return IR2_Condition::Kind::TRUTHY; + case IR2_Condition::Kind::TRUTHY: + return IR2_Condition::Kind::FALSE; + case IR2_Condition::Kind::ALWAYS: + return IR2_Condition::Kind::NEVER; + case IR2_Condition::Kind::NEVER: + return IR2_Condition::Kind::ALWAYS; + case IR2_Condition::Kind::FLOAT_EQUAL: + return IR2_Condition::Kind::FLOAT_NOT_EQUAL; + case IR2_Condition::Kind::FLOAT_NOT_EQUAL: + return IR2_Condition::Kind::FLOAT_EQUAL; + case IR2_Condition::Kind::FLOAT_LESS_THAN: + return IR2_Condition::Kind::FLOAT_GEQ; + case IR2_Condition::Kind::FLOAT_GEQ: + return IR2_Condition::Kind::FLOAT_LESS_THAN; + case IR2_Condition::Kind::FLOAT_GREATER_THAN: + return IR2_Condition::Kind::FLOAT_LEQ; + case IR2_Condition::Kind::FLOAT_LEQ: + return IR2_Condition::Kind::FLOAT_GREATER_THAN; + default: + assert(false); + } +} +} // namespace + +IR2_Condition::IR2_Condition(Kind kind) : m_kind(kind) { + assert(get_condition_num_args(m_kind) == 0); +} + +IR2_Condition::IR2_Condition(Kind kind, const Variable& src0) : m_kind(kind) { + m_src[0] = src0; + assert(get_condition_num_args(m_kind) == 1); +} + +IR2_Condition::IR2_Condition(Kind kind, const Variable& src0, const Variable& src1) : m_kind(kind) { + m_src[0] = src0; + m_src[1] = src1; + assert(get_condition_num_args(m_kind) == 2); +} + +void IR2_Condition::invert() { + m_kind = get_condition_opposite(m_kind); +} + +bool IR2_Condition::operator==(const IR2_Condition& other) const { + if (m_kind == other.m_kind) { + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + if (m_src[i] != other.m_src[i]) { + return false; + } + } + return true; + } else { + return false; + } +} + +goos::Object IR2_Condition::to_form(const std::vector& labels, + const Env* env) const { + (void)labels; + std::vector forms; + forms.push_back(pretty_print::to_symbol(get_condition_kind_name(m_kind))); + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + forms.push_back(pretty_print::to_symbol(m_src[i].to_string(env))); + } + return pretty_print::build_list(forms); +} + +void IR2_Condition::get_regs(std::vector* out) const { + for (int i = 0; i < get_condition_num_args(m_kind); i++) { + out->push_back(m_src[i].reg()); + } +} + +///////////////////////////// +// SetVarConditionOp +///////////////////////////// + +SetVarConditionOp::SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx) + : AtomicOp(my_idx), m_dst(dst), m_condition(condition) {} + +goos::Object SetVarConditionOp::to_form(const std::vector& labels, + const Env* env) const { + return pretty_print::build_list(pretty_print::to_symbol("set!"), + pretty_print::to_symbol(m_dst.to_string(env)), + m_condition.to_form(labels, env)); +} + +bool SetVarConditionOp::operator==(const AtomicOp& other) const { + if (typeid(SetVarConditionOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return m_dst == po->m_dst && m_condition == po->m_condition; +} + +bool SetVarConditionOp::is_variable_set() const { + return true; +} + +bool SetVarConditionOp::is_sequence_point() const { + return true; +} + +Variable SetVarConditionOp::get_set_destination() const { + return m_dst; +} + +std::unique_ptr SetVarConditionOp::get_set_source_as_expr() const { + throw std::runtime_error("SetVarConditionOp::get_source_as_expr is not yet implemented."); +} + +std::unique_ptr SetVarConditionOp::get_as_expr() const { + throw std::runtime_error("SetVarConditionOp::get_as_expr is not yet implemented."); +} + +void SetVarConditionOp::update_register_info() { + m_write_regs.push_back(m_dst.reg()); + m_condition.get_regs(&m_read_regs); +} + +///////////////////////////// +// StoreOp +///////////////////////////// + +StoreOp::StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx) + : AtomicOp(my_idx), m_addr(std::move(addr)), m_value(std::move(value)) {} + +goos::Object StoreOp::to_form(const std::vector& labels, const Env* env) const { + return pretty_print::build_list(pretty_print::to_symbol("store!"), m_addr.to_form(labels, env), + m_value.to_form(labels, env)); +} + +bool StoreOp::operator==(const AtomicOp& other) const { + if (typeid(StoreOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + + return m_addr == po->m_addr && m_value == po->m_value; +} + +bool StoreOp::is_variable_set() const { + return false; +} + +bool StoreOp::is_sequence_point() const { + return true; +} + +Variable StoreOp::get_set_destination() const { + throw std::runtime_error("StoreOp cannot be treated as a set! operation"); +} + +std::unique_ptr StoreOp::get_set_source_as_expr() const { + throw std::runtime_error("StoreOp cannot be treated as a set! operation"); +} + +std::unique_ptr StoreOp::get_as_expr() const { + throw std::runtime_error("StoreOp::get_as_expr is not yet implemented"); +} + +void StoreOp::update_register_info() { + m_addr.get_regs(&m_read_regs); + m_value.get_regs(&m_read_regs); +} + +///////////////////////////// +// LoadVarOp +///////////////////////////// + +LoadVarOp::LoadVarOp(Variable dst, SimpleExpression src, int my_idx) + : AtomicOp(my_idx), m_dst(dst), m_src(std::move(src)) {} + +goos::Object LoadVarOp::to_form(const std::vector& labels, const Env* env) const { + return pretty_print::build_list(pretty_print::to_symbol("set!"), + pretty_print::to_symbol(m_dst.to_string(env)), + m_src.to_form(labels, env)); +} + +bool LoadVarOp::operator==(const AtomicOp& other) const { + if (typeid(LoadVarOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return m_dst == po->m_dst && m_src == po->m_src; +} + +bool LoadVarOp::is_variable_set() const { + return true; +} + +bool LoadVarOp::is_sequence_point() const { + return true; +} + +Variable LoadVarOp::get_set_destination() const { + return m_dst; +} + +std::unique_ptr LoadVarOp::get_set_source_as_expr() const { + throw std::runtime_error("LoadVarOp::get_set_source_as_expr is not yet implemented"); +} + +std::unique_ptr LoadVarOp::get_as_expr() const { + throw std::runtime_error("LoadVarOp::get_as_expr is not yet implemented"); +} + +void LoadVarOp::update_register_info() { + m_src.get_regs(&m_read_regs); + m_write_regs.push_back(m_dst.reg()); +} + +///////////////////////////// +// IR2_BranchDelay +///////////////////////////// + +IR2_BranchDelay::IR2_BranchDelay(Kind kind) : m_kind(kind) { + assert(m_kind == Kind::NOP); +} + +IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0) : m_kind(kind) { + assert(m_kind == Kind::SET_REG_FALSE || m_kind == Kind::SET_REG_TRUE || + m_kind == Kind::SET_BINTEGER || m_kind == Kind::SET_PAIR); + assert(var0.mode() == Variable::Mode::WRITE); + m_var[0] = var0; +} + +IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1) : m_kind(kind) { + assert(m_kind == Kind::NEGATE || m_kind == Kind::SET_REG_REG); + assert(var0.mode() == Variable::Mode::WRITE); + assert(var1.mode() == Variable::Mode::READ); + m_var[0] = var0; + m_var[1] = var1; +} + +IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2) + : m_kind(kind) { + assert(m_kind == Kind::DSLLV); + assert(var0.mode() == Variable::Mode::WRITE); + assert(var1.mode() == Variable::Mode::READ); + assert(var2.mode() == Variable::Mode::READ); + m_var[0] = var0; + m_var[1] = var1; + m_var[2] = var2; +} + +goos::Object IR2_BranchDelay::to_form(const std::vector& labels, + const Env* env) const { + (void)labels; + switch (m_kind) { + case Kind::NOP: + return pretty_print::build_list("nop!"); + case Kind::SET_REG_FALSE: + assert(m_var[0].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), "#f"); + case Kind::SET_REG_TRUE: + assert(m_var[0].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), "#t"); + case Kind::SET_REG_REG: + assert(m_var[0].has_value()); + assert(m_var[1].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), m_var[1]->to_string(env)); + case Kind::SET_BINTEGER: + assert(m_var[0].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), "binteger"); + case Kind::SET_PAIR: + assert(m_var[0].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), "pair"); + case Kind::DSLLV: + assert(m_var[0].has_value()); + assert(m_var[1].has_value()); + assert(m_var[2].has_value()); + return pretty_print::build_list( + "set!", m_var[0]->to_string(env), + pretty_print::build_list("dsllv", m_var[1]->to_string(env), m_var[2]->to_string(env))); + case Kind::NEGATE: + assert(m_var[0].has_value()); + assert(m_var[1].has_value()); + return pretty_print::build_list("set!", m_var[0]->to_string(env), + pretty_print::build_list("-", m_var[1]->to_string(env))); + default: + assert(false); + } +} + +bool IR2_BranchDelay::operator==(const IR2_BranchDelay& other) const { + for (int i = 0; i < 3; i++) { + if (m_var[i] != other.m_var[i]) { + return false; + } + } + return m_kind == other.m_kind; +} + +void IR2_BranchDelay::get_regs(std::vector* write, std::vector* read) const { + switch (m_kind) { + case Kind::NOP: + break; + case Kind::SET_REG_FALSE: + case Kind::SET_REG_TRUE: + case Kind::SET_BINTEGER: + case Kind::SET_PAIR: + write->push_back(m_var[0]->reg()); + break; + case Kind::SET_REG_REG: + case Kind::NEGATE: + write->push_back(m_var[0]->reg()); + read->push_back(m_var[1]->reg()); + break; + case Kind::DSLLV: + write->push_back(m_var[0]->reg()); + read->push_back(m_var[1]->reg()); + read->push_back(m_var[2]->reg()); + break; + default: + assert(false); + } +} + +///////////////////////////// +// BranchOp +///////////////////////////// + +BranchOp::BranchOp(bool likely, + IR2_Condition condition, + int label, + IR2_BranchDelay branch_delay, + int my_idx) + : AtomicOp(my_idx), + m_likely(likely), + m_condition(std::move(condition)), + m_label(label), + m_branch_delay(branch_delay) {} + +goos::Object BranchOp::to_form(const std::vector& labels, const Env* env) const { + std::vector forms; + + if (m_likely) { + forms.push_back(pretty_print::to_symbol("bl!")); + } else { + forms.push_back(pretty_print::to_symbol("b!")); + } + + forms.push_back(m_condition.to_form(labels, env)); + forms.push_back(pretty_print::to_symbol(labels.at(m_label).name)); + forms.push_back(m_branch_delay.to_form(labels, env)); + + return pretty_print::build_list(forms); +} + +bool BranchOp::operator==(const AtomicOp& other) const { + if (typeid(BranchOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return m_likely == po->m_likely && m_condition == po->m_condition && m_label == po->m_label && + m_branch_delay == po->m_branch_delay; +} + +bool BranchOp::is_variable_set() const { + return false; +} + +bool BranchOp::is_sequence_point() const { + return true; +} + +Variable BranchOp::get_set_destination() const { + throw std::runtime_error("BranchOp cannot be treated as a set! operation"); +} + +std::unique_ptr BranchOp::get_set_source_as_expr() const { + throw std::runtime_error("BranchOp cannot be treated as a set! operation"); +} + +std::unique_ptr BranchOp::get_as_expr() const { + throw std::runtime_error("BranchOp::get_as_expr is not yet implemented"); +} + +void BranchOp::update_register_info() { + m_condition.get_regs(&m_read_regs); + m_branch_delay.get_regs(&m_write_regs, &m_read_regs); +} + +///////////////////////////// +// SpecialOp +///////////////////////////// + +SpecialOp::SpecialOp(Kind kind, int my_idx) : AtomicOp(my_idx), m_kind(kind) {} + +goos::Object SpecialOp::to_form(const std::vector& labels, const Env* env) const { + (void)labels; + (void)env; + switch (m_kind) { + case Kind::NOP: + return pretty_print::to_symbol("nop!"); + case Kind::BREAK: + return pretty_print::to_symbol("break!"); + case Kind::SUSPEND: + return pretty_print::to_symbol("suspend"); + default: + assert(false); + } +} + +bool SpecialOp::operator==(const AtomicOp& other) const { + if (typeid(SpecialOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + + return m_kind == po->m_kind; +} + +bool SpecialOp::is_variable_set() const { + return false; +} + +bool SpecialOp::is_sequence_point() const { + return true; +} + +Variable SpecialOp::get_set_destination() const { + throw std::runtime_error("SpecialOp cannot be treated as a set! operation"); +} + +std::unique_ptr SpecialOp::get_set_source_as_expr() const { + throw std::runtime_error("SpecialOp cannot be treated as a set! operation"); +} + +std::unique_ptr SpecialOp::get_as_expr() const { + throw std::runtime_error("SpecialOp::get_as_expr not yet implemented"); +} + +void SpecialOp::update_register_info() {} + +///////////////////////////// +// CallOp +///////////////////////////// + +CallOp::CallOp(int my_idx) : AtomicOp(my_idx) {} + +goos::Object CallOp::to_form(const std::vector& labels, const Env* env) const { + (void)labels; + (void)env; + return pretty_print::build_list("call!"); +} + +bool CallOp::operator==(const AtomicOp& other) const { + if (typeid(CallOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return true; +} + +bool CallOp::is_variable_set() const { + return false; +} + +bool CallOp::is_sequence_point() const { + return true; +} + +Variable CallOp::get_set_destination() const { + throw std::runtime_error("CallOp cannot be treated as a set! operation"); +} + +std::unique_ptr CallOp::get_set_source_as_expr() const { + throw std::runtime_error("CallOp cannot be treated as a set! operation"); +} + +std::unique_ptr CallOp::get_as_expr() const { + throw std::runtime_error("CallOp::get_as_expr not yet implemented"); +} + +void CallOp::update_register_info() { + throw std::runtime_error("CallOp::update_register_info cannot be done until types are known"); +} + +///////////////////////////// +// ConditionalMoveFalseOp +///////////////////////////// + +ConditionalMoveFalseOp::ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx) + : AtomicOp(my_idx), m_dst(dst), m_src(src), m_on_zero(on_zero) {} + +goos::Object ConditionalMoveFalseOp::to_form(const std::vector& labels, + const Env* env) const { + (void)labels; + return pretty_print::build_list(m_on_zero ? "cmove-#f-zero" : "cmove-#f-nonzero", + m_dst.to_string(env), m_src.to_string(env)); +} + +bool ConditionalMoveFalseOp::operator==(const AtomicOp& other) const { + if (typeid(ConditionalMoveFalseOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return m_dst == po->m_dst && m_src == po->m_src && m_on_zero == po->m_on_zero; +} + +bool ConditionalMoveFalseOp::is_variable_set() const { + return false; +} + +bool ConditionalMoveFalseOp::is_sequence_point() const { + return true; +} + +Variable ConditionalMoveFalseOp::get_set_destination() const { + throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation"); +} + +std::unique_ptr ConditionalMoveFalseOp::get_set_source_as_expr() const { + throw std::runtime_error("ConditionalMoveFalseOp cannot be treated as a set! operation"); +} + +std::unique_ptr ConditionalMoveFalseOp::get_as_expr() const { + throw std::runtime_error("ConditionalMoveFalseOp::get_as_expr is not yet implemented"); +} + +void ConditionalMoveFalseOp::update_register_info() { + m_write_regs.push_back(m_dst.reg()); + m_read_regs.push_back(m_src.reg()); +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h new file mode 100644 index 0000000000..8ab07a8df5 --- /dev/null +++ b/decompiler/IR2/AtomicOp.h @@ -0,0 +1,544 @@ +#pragma once + +#include +#include +#include +#include "common/goos/Object.h" +#include "decompiler/Disasm/Register.h" +#include "decompiler/Disasm/Instruction.h" +#include "Env.h" + +namespace decompiler { +class Expr; + +/*! + * A "Variable" represents a register at a given instruction index. + * The register can either be a GOAL local variable or a GOAL register used in inline assembly. + * Because OpenGOAL's registers don't one-to-one map to GOAL registers, GOAL "inline assembly + * registers" will become OpenGOAL variables, and are treated similarly to variables in + * decompilation. + * + * In the earlier parts of decompilation, this just behaves like a register in all cases. + * But in later parts registers can be mapped to real local variables with types. A variable can + * look itself up in an environment to determine what "local variable" it is. + * + * Note: a variable is _not_ allowed to be R0, AT, S7, K0, K1, FP, or RA by default, as these + * can never hold normal GOAL locals. Inline assembly may use these, but you must set the allow_all + * flag to true in the constructor of Variable to indicate this is what you really want. + * + * Note: access to the process pointer (s6) is handled as a variable. As a result, you may always + * use s6 as a variable. + */ +class Variable { + public: + enum class Mode : u8 { + READ, // represents value of the variable at the beginning of the instruction + WRITE // represents value of the variable at the end of the instruction + }; + + Variable() = default; + Variable(Mode mode, Register reg, int atomic_idx, bool allow_all = false); + + enum class Print { + AS_REG, // print as a PS2 register name + FULL, // print as a register name, plus an index, plus read or write + AS_VARIABLE, // print local variable name, error if impossible + AUTOMATIC, // print as variable, but if that's not possible print as reg. + }; + + std::string to_string(const Env* env, Print mode = Print::AUTOMATIC) const; + + bool operator==(const Variable& other) const; + bool operator!=(const Variable& other) const; + + const Register& reg() const { return m_reg; } + Mode mode() const { return m_mode; } + int idx() const { return m_atomic_idx; } + + private: + Mode m_mode = Mode::READ; // do we represent a read or a write? + Register m_reg; // the EE register + int m_atomic_idx = -1; // the index in the function's list of AtomicOps +}; + +/*! + * An atomic operation represents a single operation from the point of view of the IR2 system. + * Each IR2 op is one or more instructions. + * Each function can be represented as a list of AtomicOps. These are stored in exactly the same + * order as the instructions appear. + * + * The AtomicOps use SimpleAtom and SimpleExpression. These are extremely limited versions of + * the full IR2 expression system, but are much easier to work with because they are less general + * and can't be nested infinitely. They also have features specific to the AtomicOp system that are + * not required for full expressions. The full expression system will later convert these into the + * more complicated expressions. + * + * The types of AtomicOp are: + * ConditionalMoveFalseOp + * CallOp + * SpecialOp + * BranchOp + * LoadVarOp + * StoreOp + * SetVarConditionOp + * AsmOp + * SetVarExprOp + * AsmOp + */ +class AtomicOp { + public: + explicit AtomicOp(int my_idx); + std::string to_string(const std::vector& labels, const Env* env); + virtual goos::Object to_form(const std::vector& labels, + const Env* env) const = 0; + virtual bool operator==(const AtomicOp& other) const = 0; + bool operator!=(const AtomicOp& other) const; + + // determine if this is a (set! thing) form. These will be handled differently in expression + // building. + virtual bool is_variable_set() const = 0; + + // determine if this is a GOAL "sequence point". + // non-sequence point instructions may be out of order from the point of view of the expression + // stack. + virtual bool is_sequence_point() const = 0; + + // get the variable being set by this operation. Only call this if is_variable_set returns true. + virtual Variable get_set_destination() const = 0; + + // get the value of the variable being set, as an expression. Only call this if is_variable_set + // returns true. + virtual std::unique_ptr get_set_source_as_expr() const = 0; + + // convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be + // undesirable when expression stacking. + virtual std::unique_ptr get_as_expr() const = 0; + + // figure out what registers are read and written in this AtomicOp and update read_regs, + // write_regs, and clobber_regs. It's expected that these have duplicates if a register appears + // in the original instructions multiple times. Ex: "and v0, v1, v1" would end up putting v1 in + // read twice. + virtual void update_register_info() = 0; + + const std::vector& read_regs() { return m_read_regs; } + const std::vector& write_regs() { return m_write_regs; } + const std::vector& clobber_regs() { return m_clobber_regs; } + + protected: + int m_my_idx = -1; + + // the register values that are read (at the start of this op) + std::vector m_read_regs; + // the registers that have actual values written into them (at the end of this op) + std::vector m_write_regs; + // the registers which have junk written into them. + std::vector m_clobber_regs; +}; + +/*! + * The has a value. In some cases it can be set. + */ +class SimpleAtom { + public: + enum class Kind : u8 { + VARIABLE, + INTEGER_CONSTANT, + SYMBOL_PTR, + SYMBOL_VAL, + EMPTY_LIST, + STATIC_ADDRESS, + INVALID + }; + + SimpleAtom() = default; + static SimpleAtom make_var(const Variable& var); + static SimpleAtom make_sym_ptr(const std::string& name); + static SimpleAtom make_sym_val(const std::string& name); + static SimpleAtom make_empty_list(); + static SimpleAtom make_int_constant(s64 value); + goos::Object to_form(const std::vector& labels, const Env* env) const; + + bool is_var() const { return m_kind == Kind::VARIABLE; } + const Variable& var() const { + assert(is_var()); + return m_variable; + } + bool is_int() const { return m_kind == Kind::INTEGER_CONSTANT; }; + bool is_sym_ptr() const { return m_kind == Kind::SYMBOL_PTR; }; + bool is_sym_val() const { return m_kind == Kind::SYMBOL_VAL; }; + bool is_empty_list() const { return m_kind == Kind::EMPTY_LIST; }; + bool is_static_addr() const { return m_kind == Kind::STATIC_ADDRESS; }; + bool operator==(const SimpleAtom& other) const; + bool operator!=(const SimpleAtom& other) const { return !((*this) == other); } + void get_regs(std::vector* out) const; + + private: + Kind m_kind = Kind::INVALID; + std::string m_string; // for symbol ptr and symbol val + s64 m_int = 0; // for integer constant and static address label id + Variable m_variable; +}; + +/*! + * A "simple expression" can be used within an AtomicOp. + * AtomicOps are often made up of very few instructions, so these expressions are quite simple and + * can't nest. There is an "operation" and some arguments. There are no side effects of a + * SimpleExpression. The side effects will be captured by the AtomicOp. + * + * Note - there is an expression kind called identity which takes one argument and uses that + * argument as an expression. + */ +class SimpleExpression { + public: + enum class Kind : u8 { + INVALID, + IDENTITY, + DIV_S, + MUL_S, + ADD_S, + SUB_S, + MIN_S, + MAX_S, + FLOAT_TO_INT, + INT_TO_FLOAT, + ABS_S, + NEG_S, + SQRT_S, + ADD, + SUB, + MUL_SIGNED, + DIV_SIGNED, + MOD_SIGNED, + DIV_UNSIGNED, + MOD_UNSIGNED, + OR, + AND, + NOR, + XOR, + LEFT_SHIFT, + RIGHT_SHIFT_ARITH, + RIGHT_SHIFT_LOGIC, + MUL_UNSIGNED, + NOT, + NEG + }; + + // how many arguments? + int args() const { return n_args; } + const SimpleAtom& get_arg(int idx) const { + assert(idx < args()); + return m_args[idx]; + } + Kind kind() const { return m_kind; } + + SimpleExpression(Kind kind, const SimpleAtom& arg0); + SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1); + goos::Object to_form(const std::vector& labels, const Env* env) const; + bool operator==(const SimpleExpression& other) const; + bool is_identity() const { return m_kind == Kind::IDENTITY; } + void get_regs(std::vector* out) const; + + private: + Kind m_kind = Kind::INVALID; + SimpleAtom m_args[2]; + s8 n_args = -1; +}; + +/*! + * Set a variable equal to a Simple Expression + */ +class SetVarOp : public AtomicOp { + public: + SetVarOp(const Variable& dst, const SimpleExpression& src, int my_idx) + : AtomicOp(my_idx), m_dst(dst), m_src(src) { + assert(my_idx == dst.idx()); + } + virtual goos::Object to_form(const std::vector& labels, + const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Variable m_dst; + SimpleExpression m_src; +}; + +/*! + * An AsmOp represents a single inline assembly instruction. This is used when the BasicOpBuilder + * pass decides that an instruction could not have been generated from high-level GOAL code, and + * instead must be due to inline assembly. + * + * Each AsmOp stores the instruction it uses, as well as "Variable"s for each register used. + */ +class AsmOp : public AtomicOp { + public: + AsmOp(Instruction instr, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Instruction m_instr; + std::optional m_dst; + std::optional m_src[3]; +}; + +/*! + * A condition represents something that can generate a 0 or 1 based on a check or comparison. + * This can be used as a branch condition in BranchOp + * This can be used as a condition in an SetVarConditionOp, which sets a variable to a GOAL boolean. + * Sometimes a SetVarConditionOp gets spread across many many instructions, in which case it is + * not correctly detected here. + */ +class IR2_Condition { + public: + enum class Kind { + NOT_EQUAL, + EQUAL, + LESS_THAN_SIGNED, + GREATER_THAN_SIGNED, + LEQ_SIGNED, + GEQ_SIGNED, + GREATER_THAN_ZERO_SIGNED, + LEQ_ZERO_SIGNED, + LESS_THAN_ZERO, + GEQ_ZERO_SIGNED, + LESS_THAN_UNSIGNED, + GREATER_THAN_UNSIGNED, + LEQ_UNSIGNED, + GEQ_UNSIGNED, + ZERO, + NONZERO, + FALSE, + TRUTHY, + ALWAYS, + NEVER, + FLOAT_EQUAL, + FLOAT_NOT_EQUAL, + FLOAT_LESS_THAN, + FLOAT_GEQ, + FLOAT_LEQ, + FLOAT_GREATER_THAN, + INVALID + }; + + explicit IR2_Condition(Kind kind); + IR2_Condition(Kind kind, const Variable& src0); + IR2_Condition(Kind kind, const Variable& src0, const Variable& src1); + + void invert(); + bool operator==(const IR2_Condition& other) const; + bool operator!=(const IR2_Condition& other) const { return !((*this) == other); } + goos::Object to_form(const std::vector& labels, const Env* env) const; + void get_regs(std::vector* out) const; + + private: + Kind m_kind = Kind::INVALID; + Variable m_src[2]; +}; + +/*! + * Set a variable to a GOAL boolean, based off of a condition. + */ +class SetVarConditionOp : public AtomicOp { + public: + SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Variable m_dst; + IR2_Condition m_condition; +}; + +/*! + * Store an Atom into a memory location. + * Note - this is _not_ considered a set! form because you are not setting the value of a + * register which can be expression-compacted. + */ +class StoreOp : public AtomicOp { + public: + StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + SimpleExpression m_addr; + SimpleAtom m_value; +}; + +/*! + * Load a value into a variable. + * This is treated as a set! form. + */ +class LoadVarOp : public AtomicOp { + public: + LoadVarOp(Variable dst, SimpleExpression src, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Variable m_dst; + SimpleExpression m_src; +}; + +/*! + * This represents one of the possible instructions that can go in a branch delay slot. + * These will be "absorbed" into higher level structures, but for the purpose of printing AtomicOps, + * it will be nice to have these print like expressions. + * + * These are always part of the branch op. + */ +class IR2_BranchDelay { + public: + enum class Kind { + NOP, + SET_REG_FALSE, + SET_REG_TRUE, + SET_REG_REG, + SET_BINTEGER, + SET_PAIR, + DSLLV, + NEGATE + }; + + explicit IR2_BranchDelay(Kind kind); + IR2_BranchDelay(Kind kind, Variable var0); + IR2_BranchDelay(Kind kind, Variable var0, Variable var1); + IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2); + goos::Object to_form(const std::vector& labels, const Env* env) const; + bool operator==(const IR2_BranchDelay& other) const; + void get_regs(std::vector* write, std::vector* read) const; + + private: + std::optional m_var[3]; + Kind m_kind; +}; + +/*! + * This represents a combination of a condition + a branch + the branch delay slot. + * This is considered as a single operation. + */ +class BranchOp : public AtomicOp { + public: + BranchOp(bool likely, + IR2_Condition condition, + int label, + IR2_BranchDelay branch_delay, + int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + bool m_likely = false; + IR2_Condition m_condition; + int m_label = -1; + IR2_BranchDelay m_branch_delay; +}; + +/*! + * A "special" op has no arguments. + * NOP, BREAK, SUSPEND, + */ +class SpecialOp : public AtomicOp { + public: + enum class Kind { + NOP, + BREAK, + SUSPEND, + }; + + SpecialOp(Kind kind, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Kind m_kind; +}; + +/*! + * Represents a function call. + * This has so many special cases and exceptions that it is separate from SpecialOp. + */ +class CallOp : public AtomicOp { + public: + CallOp(int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; +}; + +/*! + * Unfortunately the original GOAL compiler does something weird when compiling (zero? x) or (not + * (zero? x)) when the result needs to be stored in a GOAL boolean (not in a branch condition). It + * first does a (set! result #t), then (possibly) a bunch of code to evaluate x, then does a + * conditional move (movn/movz). As a result, we can't recognize this as a Condition in the + * AtomicOp pass. Instead we'll recognize it as a (set! result #t) .... (cmove result flag) where + * flag is checked to be 0 or not. It's weird because all of the other similar cases get this + * right. + * + * Note - this isn't considered a variable set. It's "conditional set" so it needs to be + * handled separately. Unfortunately. + */ +class ConditionalMoveFalseOp : public AtomicOp { + public: + ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx); + goos::Object to_form(const std::vector& labels, const Env* env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_variable_set() const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + std::unique_ptr get_set_source_as_expr() const override; + std::unique_ptr get_as_expr() const override; + void update_register_info() override; + + private: + Variable m_dst, m_src; + bool m_on_zero; +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOpBuilder.cpp b/decompiler/IR2/AtomicOpBuilder.cpp new file mode 100644 index 0000000000..5e2e81a207 --- /dev/null +++ b/decompiler/IR2/AtomicOpBuilder.cpp @@ -0,0 +1,141 @@ +#include "AtomicOpBuilder.h" +#include "common/log/log.h" +#include "decompiler/Function/BasicBlocks.h" +#include "decompiler/Function/Function.h" + +namespace decompiler { + +namespace { + +Variable make_dst_var(Register reg, int idx) { + return Variable(Variable::Mode::WRITE, reg, idx); +} + +Variable make_src_var(Register reg, int idx) { + return Variable(Variable::Mode::READ, reg, idx); +} + +SimpleAtom make_src_atom(Register reg, int idx) { + return SimpleAtom::make_var(make_src_var(reg, idx)); +} + +/*! + * Convert a single instruction in the form instr dest_reg, src_reg, src_reg + * to an atomic op of (set! dst_reg (op src_reg src_reg)) + * Like daddu a0, a1, a2 + */ +void make_3reg_op(const Instruction& instr, + SimpleExpression::Kind kind, + int idx, + std::unique_ptr& result) { + auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx); + auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx); + auto src1 = make_src_atom(instr.get_src(1).get_reg(), idx); + result = std::make_unique(dst, SimpleExpression(kind, src0, src1), idx); +} + +bool convert_and_1(const Instruction& i0, int idx, std::unique_ptr& result) { + // or reg, reg, reg: + make_3reg_op(i0, SimpleExpression::Kind::AND, idx, result); + return true; +} + +bool convert_1(const Instruction& i0, int idx, std::unique_ptr& result) { + switch (i0.kind) { + case InstructionKind::AND: + return convert_and_1(i0, idx, result); + default: + return false; + } +} + +} // namespace + +/*! + * Convert an entire basic block and add the results to a FunctionAtomicOps + * @param block_id : the index of the block + * @param begin : the start of the instructions for the block + * @param end : the end of the instructions for the block + * @param container : the container to add to + */ +void convert_block_to_atomic_ops(int begin_idx, + std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::vector& labels, + FunctionAtomicOps* container) { + container->block_id_to_first_atomic_op.push_back(container->ops.size()); + for (auto& instr = begin; instr < end;) { + // how many instructions can we look at, at most? + int n_instr = end - instr; + // how many instructions did we use? + int length = 0; + // what is the index of the atomic op we would add + int op_idx = int(container->ops.size()); + + bool converted = false; + std::unique_ptr op; + + if (n_instr >= 4) { + // try 4 instructions + } + + if (!converted && n_instr >= 3) { + // try 3 instructions + } + + if (!converted && n_instr >= 2) { + // try 2 instructions + } + + if (!converted) { + // try 1 instruction + if (convert_1(*instr, op_idx, op)) { + converted = true; + length = 1; + } + } + + if (!converted) { + // try assembly fallback. + } + + if (!converted) { + // failed! + lg::die("Failed to convert instruction {} to an atomic op", instr->to_string(labels)); + } + + assert(converted && length && op); + // add mappings: + container->atomic_op_to_instruction[container->ops.size()] = begin_idx; + for (int i = 0; i < length; i++) { + container->instruction_to_basic_op[begin_idx + i] = container->ops.size(); + } + // add + op->update_register_info(); + container->ops.emplace_back(std::move(op)); + instr += length; + } + container->block_id_to_end_atomic_op.push_back(container->ops.size()); +} + +FunctionAtomicOps convert_function_to_atomic_ops(const Function& func, + const std::vector& labels) { + FunctionAtomicOps result; + + for (const auto& block : func.basic_blocks) { + // we should only consider the blocks which actually have instructions: + if (block.end_word > block.start_word) { + auto begin = func.instructions.begin() + block.start_word; + auto end = func.instructions.begin() + block.end_word; + convert_block_to_atomic_ops(block.start_word, begin, end, labels, &result); + } else { + result.block_id_to_first_atomic_op.push_back(-1); + result.block_id_to_end_atomic_op.push_back(-1); + } + } + + assert(func.basic_blocks.size() == result.block_id_to_end_atomic_op.size()); + assert(func.basic_blocks.size() == result.block_id_to_first_atomic_op.size()); + return result; +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOpBuilder.h b/decompiler/IR2/AtomicOpBuilder.h new file mode 100644 index 0000000000..046cca8f13 --- /dev/null +++ b/decompiler/IR2/AtomicOpBuilder.h @@ -0,0 +1,47 @@ +#pragma once +#include +#include "AtomicOp.h" + +namespace decompiler { +class Function; +struct BasicBlock; +class LinkedObjectFile; + +/*! + * A collection of Atomic Ops in a function + */ +struct FunctionAtomicOps { + // the actual ops, store in the correct order + std::vector> ops; + + // mappings from instructions to atomic ops and back + std::unordered_map instruction_to_basic_op; + std::unordered_map atomic_op_to_instruction; + + // map from basic block to the index of the first op + std::vector block_id_to_first_atomic_op; + // map from basic block to the index of the last op + 1 + std::vector block_id_to_end_atomic_op; +}; + +/*! + * Convert an entire basic block and add the results to a FunctionAtomicOps. + * Updates the mapping between blocks, instructions, and atomic ops as needed + * @param begin idx : the index of the first instruction for the block + * @param begin : the start of the instructions for the block + * @param end : the end of the instructions for the block + * @param labels : label names for the function, used for error prints on failed conversions + * @param container : the container to add to + */ +void convert_block_to_atomic_ops(int begin_idx, + std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::vector& labels, + FunctionAtomicOps* container); + +/*! + * Convert an entire function to AtomicOps + */ +FunctionAtomicOps convert_function_to_atomic_ops(const Function& func, + const std::vector& labels); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Env.cpp b/decompiler/IR2/Env.cpp new file mode 100644 index 0000000000..c0c2af1003 --- /dev/null +++ b/decompiler/IR2/Env.cpp @@ -0,0 +1,10 @@ +#include +#include "Env.h" + +namespace decompiler { +std::string Env::get_variable_name(Register reg, int atomic_idx) const { + (void)reg; + (void)atomic_idx; + throw std::runtime_error("Env::get_variable_name not yet implemented."); +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h new file mode 100644 index 0000000000..98d71dc401 --- /dev/null +++ b/decompiler/IR2/Env.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include "decompiler/Disasm/Register.h" + +namespace decompiler { +/*! + * An "environment" for a single function. + * This contains data for an entire function, like which registers are live when, the types of + * values in registers, and local variable names. This does not actually store IR itself, just + * shared data that all IR can look at. The concept is somewhat similar to Env in the compiler. + */ +class Env { + public: + bool has_local_vars() const { return m_has_local_vars; } + std::string get_variable_name(Register reg, int atomic_idx) const; + + private: + bool m_has_local_vars = false; +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/IR2.h b/decompiler/IR2/IR2.h new file mode 100644 index 0000000000..83d077667b --- /dev/null +++ b/decompiler/IR2/IR2.h @@ -0,0 +1,8 @@ +#pragma once + +namespace decompiler { +class IR2 { + public: + private: +}; +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 4944ab71ac..aad6fe6964 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -13,9 +13,10 @@ #include "decompiler/Disasm/InstructionDecode.h" #include "decompiler/config.h" #include "third-party/json.hpp" -#include "third-party/spdlog/include/spdlog/spdlog.h" +#include "common/log/log.h" #include "common/goos/PrettyPrinter.h" +namespace decompiler { /*! * Set the number of segments in this object file. * This can only be done once, and must be done before adding any words. @@ -45,7 +46,7 @@ int LinkedObjectFile::get_label_id_for(int seg, int offset) { if (kv == label_per_seg_by_offset.at(seg).end()) { // create a new label int id = labels.size(); - Label label; + DecompilerLabel label; label.target_segment = seg; label.offset = offset; label.name = "L" + std::to_string(id); @@ -498,7 +499,7 @@ void LinkedObjectFile::process_fp_relative_links() { } break; default: - printf("unknown fp using op: %s\n", instr.to_string(*this).c_str()); + printf("unknown fp using op: %s\n", instr.to_string(labels).c_str()); assert(false); } } @@ -518,7 +519,7 @@ std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) { auto& func = functions_by_seg.at(seg).at(fi); auto fname = func.guessed_name.to_string(); if (functions_seen.find(fname) != functions_seen.end()) { - spdlog::warn( + lg::warn( "Function {} appears multiple times in the same object file {} - it cannot be uniquely " "referenced from config", func.guessed_name.to_string(), obj_file_name); @@ -544,7 +545,7 @@ std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) { } auto& instr = func.instructions.at(i); op["id"] = i; - op["asm_op"] = instr.to_string(*this); + op["asm_op"] = instr.to_string(labels); if (func.has_basic_ops() && func.instr_starts_basic_op(i)) { op["basic_op"] = func.get_basic_op_at_instr(i)->print(*this); @@ -608,7 +609,7 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func, } auto& instr = func.instructions.at(i); - std::string line = " " + instr.to_string(*this); + std::string line = " " + instr.to_string(labels); if (write_hex) { if (line.length() < 60) { @@ -1053,14 +1054,15 @@ goos::Object LinkedObjectFile::to_form_script_object(int seg, return result; } -u32 LinkedObjectFile::read_data_word(const Label& label) { +u32 LinkedObjectFile::read_data_word(const DecompilerLabel& label) { assert(0 == (label.offset % 4)); auto& word = words_by_seg.at(label.target_segment).at(label.offset / 4); assert(word.kind == LinkedWord::Kind::PLAIN_DATA); return word.data; } -std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) const { +std::string LinkedObjectFile::get_goal_string_by_label(const DecompilerLabel& label) const { assert(0 == (label.offset % 4)); return get_goal_string(label.target_segment, (label.offset / 4) - 1, false); -} \ No newline at end of file +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index d15809a89f..9ad369cae6 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -14,19 +14,11 @@ #include #include #include "LinkedWord.h" +#include "decompiler/Disasm/DecompilerLabel.h" #include "decompiler/Function/Function.h" #include "common/common_types.h" -/*! - * A label to a location in this object file. - * Doesn't have to be word aligned. - */ -struct Label { - std::string name; - int target_segment; - int offset; // in bytes -}; - +namespace decompiler { /*! * An object file's data with linking information included. */ @@ -69,8 +61,8 @@ class LinkedObjectFile { const std::string& extra_name); std::string print_asm_function_disassembly(const std::string& my_name); - u32 read_data_word(const Label& label); - std::string get_goal_string_by_label(const Label& label) const; + u32 read_data_word(const DecompilerLabel& label); + std::string get_goal_string_by_label(const DecompilerLabel& label) const; struct Stats { uint32_t total_code_bytes = 0; @@ -131,7 +123,7 @@ class LinkedObjectFile { std::vector> words_by_seg; std::vector offset_of_data_zone_by_seg; std::vector> functions_by_seg; - std::vector