goalc: Get CodeTester tests passing on Arm64 (only targetting macOS atm) (#3290)

This PR does the following:
- Designs a mechanism by which arm64 instructions can be encoded and
emitted
- Dispatch our higher-level instruction emitting calls to either x86 or
arm64 instructions depending on what the compiler is set to (defaults to
x86)
- Bare minimum scaffolding to get the arm64 instructions successfully
executing atleast on apple silicon
- Implement enough instructions to get the codetester test suite passing
on arm
This commit is contained in:
Tyler Wilding
2026-03-30 20:20:47 -04:00
committed by GitHub
parent 40cc1f0ae7
commit 64bcd8c030
72 changed files with 17001 additions and 9598 deletions
+1 -1
View File
@@ -57,6 +57,6 @@ jobs:
name: "🍎 MacOS"
uses: ./.github/workflows/macos-build-arm.yaml
with:
cmakePreset: "Release-macos-x86_64-clang"
cmakePreset: "Release-macos-arm64-clang"
cachePrefix: ""
secrets: inherit
+2 -2
View File
@@ -79,7 +79,7 @@ jobs:
uploadArtifacts: true
secrets: inherit
build_macos_arm:
build_macos_arm_rosetta:
needs:
- cut_release
name: "🍎 MacOS"
@@ -98,7 +98,7 @@ jobs:
- build_windows_clang
- build_linux_clang
- build_macos_intel
- build_macos_arm
- build_macos_arm_rosetta
name: "Upload Artifacts"
runs-on: ubuntu-latest
steps:
+21 -3
View File
@@ -5,13 +5,31 @@
"version": "0.2.0",
"configurations": [
{
"name": "run python script",
"name": "Run C++ Tests LLDB",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/goalc-test",
"args": [
"--gtest_brief=0",
"--gtest_filter=*CodeTester*",
"--gtest_break_on_failure"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "lldb"
},
{
"name": "Append File Docs",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/scripts/ci/lint-characters.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}",
"args": ["--fix"]
"args": [
"--fix"
]
},
]
}
}
+12 -4
View File
@@ -36,6 +36,13 @@ endif()
# a more recent issue - https://github.com/libsdl-org/SDL/issues/12078
if (APPLE)
enable_language(OBJC)
execute_process(
COMMAND xcrun --show-sdk-path
OUTPUT_VARIABLE MACOSX_SYSROOT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# Tell CMake to use it
set(CMAKE_OSX_SYSROOT "${MACOSX_SYSROOT}" CACHE PATH "macOS SDK path" FORCE)
endif()
# Setup compiler flags
@@ -127,11 +134,12 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-Wsign-promo \
-fdiagnostics-color=always"
)
# pin to AVX for macOS, hopefully all macOS runners have atleast this architecture
# technically speaking, SSE4 is the cutoff for Apple Silicon so...only a matter of time!
if(NOT CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin")
# TODO - make a proper flag for arm compiling
if (CMAKE_APPLE_SILICON_PROCESSOR STREQUAL "x86_64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mcrc")
endif()
# additional c++ flags for release mode for our projects
+17 -1
View File
@@ -62,12 +62,22 @@
"name": "base-linux-debug",
"hidden": true,
"inherits": "base",
"binaryDir": "${sourceDir}/build/Release/bin",
"binaryDir": "${sourceDir}/build/Debug/bin",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/build/install/${presetName}"
}
},
{
"name": "base-macos-debug",
"hidden": true,
"inherits": "base",
"binaryDir": "${sourceDir}/build/Debug/bin",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
}
},
{
"name": "base-macos-release",
"hidden": true,
@@ -174,6 +184,12 @@
"description": "Build with Clang as Release without Debug Symbols",
"inherits": ["base-linux-release", "base-clang"]
},
{
"name": "Debug-macos-arm64-clang",
"displayName": "MacOS ARM64 Debug (clang)",
"description": "Build for ARM64 with Clang as Debug",
"inherits": ["base-macos-arm64", "base-macos-debug", "base-clang"]
},
{
"name": "Release-macos-arm64-clang",
"displayName": "MacOS ARM64 Release (clang)",
+3
View File
@@ -237,3 +237,6 @@ tasks:
type-test:
cmds:
- cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*{{.TYPE_CONSISTENCY_TEST_FILTER}}*" --gtest_break_on_failure'
tests-filtered:
cmds:
- cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*{{.FILTER}}*" --gtest_break_on_failure'
+1 -6
View File
@@ -3,14 +3,9 @@
#include <algorithm>
#include <functional>
#ifndef __aarch64__
#include "xmmintrin.h"
#else
#include "third-party/sse2neon/sse2neon.h"
#endif
#include "common/log/log.h"
#include "common/util/Assert.h"
#include "common/util/simd_util.h"
namespace tfrag3 {
+4 -4
View File
@@ -10,17 +10,16 @@ u32 crc32(const u8* data, size_t size);
#ifdef __aarch64__
#include <arm_acle.h>
// Computes CRC32C
inline u32 crc32(const u8* data, size_t size) {
u32 result = 0xffffffff;
while (size >= 4) {
u32 x;
memcpy(&x, data, 4);
result = __crc32cw(result, *reinterpret_cast<const u32*>(data));
data += 4;
size -= 4;
result = __crc32w(result, x);
}
while (size) {
result = __crc32b(result, *data);
result = __crc32cb(result, *data);
data++;
size--;
}
@@ -28,6 +27,7 @@ inline u32 crc32(const u8* data, size_t size) {
}
#else
#include <immintrin.h>
// Computes CRC32C
inline u32 crc32(const u8* data, size_t size) {
u32 result = 0xffffffff;
while (size >= 4) {
+1
View File
@@ -47,6 +47,7 @@ void __cpuidex(int result[4], int eax, int ecx) {
: "0"(eax), "2"(ecx));
}
#else
// TODO ARM - implement ARM64 detection, check for NEON instead of AVX
// for now, just return 0's.
void __cpuidex(int result[4], int eax, int ecx) {
lg::warn("cpuid not implemented on this platform");
+7
View File
@@ -0,0 +1,7 @@
#pragma once
#ifndef __aarch64__
#include <immintrin.h>
#else
#include "third-party/sse2neon/sse2neon.h"
#endif
+1 -1
View File
@@ -129,7 +129,7 @@ ExtractorErrorCode compile(const fs::path& iso_data_path, const std::string& dat
// Determine which config to use from the database
const auto version_info = get_version_info_or_default(iso_data_path);
Compiler compiler(game_name_to_version(version_info.game_name));
Compiler compiler(game_name_to_version(version_info.game_name), emitter::InstructionSet::X86);
compiler.make_system().set_constant("*iso-data*", absolute(iso_data_path).string());
compiler.make_system().set_constant("*use-iso-data-path*", true);
file_util::set_iso_data_dir(absolute(iso_data_path));
+1 -2
View File
@@ -12,8 +12,7 @@ if(ARM64_ARCH)
set(OG_ASM_FUNCS_FILE kernel/asm_funcs_arm64.s)
enable_language(ASM)
set(CMAKE_ASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_SOURCE_FILE_EXTENSIONS} s)
# set(CMAKE_ASM_COMPILE_OBJECT "${CMAKE_ASM_COMPILER} -o <OBJECT> <SOURCE>")
set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-g")
set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-arch arm64 -g")
else()
set(OG_ASM_FUNCS_FILE kernel/asm_funcs_x86_64.asm)
enable_language(ASM_NASM)
+1 -6
View File
@@ -1,15 +1,10 @@
#pragma once
#include <cfloat>
#ifdef __aarch64__
#include "third-party/sse2neon/sse2neon.h"
#else
#include <immintrin.h>
#endif
#include "common/common_types.h"
#include "common/math/Vector.h"
#include "common/util/Assert.h"
#include "common/util/simd_util.h"
enum class Mask {
NONE = 0,
@@ -1,10 +1,7 @@
#include "SkyBlendCPU.h"
#ifndef __aarch64__
#include <immintrin.h>
#endif
#include "common/util/os.h"
#include "common/util/simd_util.h"
#include "game/graphics/opengl_renderer/AdgifHandler.h"
@@ -711,7 +711,6 @@ void Tie3::envmap_second_pass_draw(const Tree& tree,
void Tie3::draw_debug_window() {
ImGui::Checkbox("envmap 2nd draw", &m_draw_envmap_second_draw);
ImGui::SliderFloat("envmap str", &m_envmap_strength, 0, 2);
ImGui::Checkbox("Fast ToD", &m_use_fast_time_of_day);
ImGui::SameLine();
ImGui::Checkbox("All Visible", &m_debug_all_visible);
ImGui::Checkbox("Hide Wind", &m_hide_wind);
@@ -150,7 +150,6 @@ class Tie3 : public BucketRenderer {
static constexpr int TIME_OF_DAY_COLOR_COUNT = 8192;
bool m_has_level = false;
bool m_use_fast_time_of_day = true;
bool m_debug_all_visible = false;
bool m_hide_wind = false;
bool m_draw_envmap_second_draw = true;
@@ -2,13 +2,8 @@
#include "background_common.h"
#ifdef __aarch64__
#include "third-party/sse2neon/sse2neon.h"
#else
#include <immintrin.h>
#endif
#include "common/util/os.h"
#include "common/util/simd_util.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/pipelines/opengl.h"
@@ -329,9 +324,6 @@ void interp_time_of_day_slow(const math::Vector<s32, 4> itimes[4],
void interp_time_of_day(const math::Vector<s32, 4> itimes[4],
const tfrag3::PackedTimeOfDay& packed_colors,
math::Vector<u8, 4>* out) {
#ifdef __aarch64__
interp_time_of_day_slow(itimes, packed_colors, out);
#else
math::Vector<u16, 4> weights[8];
for (int component = 0; component < 8; component++) {
int quad_idx = component / 2;
@@ -483,7 +475,6 @@ void interp_time_of_day(const math::Vector<s32, 4> itimes[4],
_mm_storel_epi64((__m128i*)(&out[color_quad * 4 + 2]), result);
}
}
#endif
}
bool sphere_in_view_ref(const math::Vector4f& sphere, const math::Vector4f* planes) {
@@ -1,13 +1,8 @@
#include "Merc2.h"
#ifdef __aarch64__
#include "third-party/sse2neon/sse2neon.h"
#else
#include <xmmintrin.h>
#endif
#include "common/global_profiler/GlobalProfiler.h"
#include "common/util/fnv.h"
#include "common/util/simd_util.h"
#include "game/graphics/opengl_renderer/EyeRenderer.h"
#include "game/graphics/opengl_renderer/background/background_common.h"
+7 -6
View File
@@ -27,7 +27,6 @@
#include "common/global_profiler/GlobalProfiler.h"
#include "common/goal_constants.h"
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/versions/versions.h"
#include "game/external/discord.h"
@@ -55,9 +54,6 @@
#include "game/kernel/jak3/klisten.h"
#include "game/kernel/jak3/kscheme.h"
#include "game/kernel/jakx/kboot.h"
#include "game/kernel/jakx/kdgo.h"
#include "game/kernel/jakx/klisten.h"
#include "game/kernel/jakx/kscheme.h"
#include "game/overlord/common/fake_iso.h"
#include "game/overlord/common/iso.h"
#include "game/overlord/common/sbank.h"
@@ -70,7 +66,6 @@
#include "game/overlord/jak1/overlord.h"
#include "game/overlord/jak1/ramdisk.h"
#include "game/overlord/jak1/srpc.h"
#include "game/overlord/jak1/ssound.h"
#include "game/overlord/jak1/stream.h"
#include "game/overlord/jak2/dma.h"
#include "game/overlord/jak2/iso_cd.h"
@@ -82,7 +77,6 @@
#include "game/overlord/jak2/stream.h"
#include "game/overlord/jak2/streamlist.h"
#include "game/overlord/jak2/vag.h"
#include "game/overlord/jak3/init.h"
#include "game/overlord/jak3/overlord.h"
#include "game/system/Deci2Server.h"
#include "game/system/iop_thread.h"
@@ -155,6 +149,13 @@ void deci2_runner(SystemThreadInterface& iface) {
void ee_runner(SystemThreadInterface& iface) {
prof().root_event();
// Allocate Main RAM. Must have execute enabled.
// TODO Apple Silicon - You cannot make a page be RWX,
// or more specifically it can't be both writable and executable at the same time
//
// https://github.com/zherczeg/sljit/issues/99
//
// The solution to this is to flip-flop between permissions, or perhaps have two threads
// one that has writing permission, and another with executable permission
if (EE_MEM_LOW_MAP) {
g_ee_main_mem =
(u8*)mmap((void*)0x10000000, EE_MAIN_MEM_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE,
-1
View File
@@ -1,4 +1,3 @@
#include <filesystem>
#include <iostream>
#include <sstream>
+1 -1
View File
@@ -391,7 +391,7 @@ extern const InputBindingGroups DEFAULT_MOUSE_BINDS;
// So there are some potential solutions but this doesn't feel high priority and this was always an
// issue.
struct CommandBinding {
enum Source { CONTROLLER, KEYBOARD, MOUSE };
enum class Source { CONTROLLER, KEYBOARD, MOUSE };
u32 host_key;
InputModifiers modifiers;
+5
View File
@@ -4,6 +4,9 @@ add_library(compiler
emitter/ObjectFileData.cpp
emitter/ObjectGenerator.cpp
emitter/Register.cpp
emitter/IGen.cpp
emitter/IGenARM64.cpp
emitter/IGenX86.cpp
debugger/disassemble.cpp
build_level/common/build_level.cpp
build_actor/common/animation_processing.cpp
@@ -66,6 +69,8 @@ add_library(compiler
build_actor/jak3/build_actor.cpp
debugger/Debugger.cpp
debugger/DebugInfo.cpp
emitter/IGenX86.cpp
emitter/IGenARM64.cpp
listener/Listener.cpp
listener/MemoryMap.cpp
make/MakeSystem.cpp
+70 -42
View File
@@ -7,6 +7,7 @@
#include "CodeGenerator.h"
#include <stdexcept>
#include <unordered_set>
#include "IR.h"
@@ -18,8 +19,11 @@
using namespace emitter;
CodeGenerator::CodeGenerator(FileEnv* env, DebugInfo* debug_info, GameVersion version)
: m_gen(version), m_fe(env), m_debug_info(debug_info) {}
CodeGenerator::CodeGenerator(FileEnv* env,
DebugInfo* debug_info,
GameVersion version,
InstructionSet instruction_set)
: m_gen(version, instruction_set), m_fe(env), m_debug_info(debug_info) {}
/*!
* Generate an object file.
@@ -62,9 +66,21 @@ std::vector<u8> CodeGenerator::run(const TypeSystem* ts) {
void CodeGenerator::do_function(FunctionEnv* env, int f_idx) {
if (env->is_asm_func) {
do_asm_function(env, f_idx, env->asm_func_saved_regs);
if (m_gen.instr_set() == InstructionSet::X86) {
do_asm_function_x86(env, f_idx, env->asm_func_saved_regs);
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
do_asm_function_arm64(env, f_idx, env->asm_func_saved_regs);
} else {
throw std::runtime_error("CodeGenerator::do_function, instruction set not supported");
}
} else {
do_goal_function(env, f_idx);
if (m_gen.instr_set() == InstructionSet::X86) {
do_goal_function_x86(env, f_idx);
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
do_goal_function_arm64(env, f_idx);
} else {
throw std::runtime_error("CodeGenerator::do_function, instruction set not supported");
}
}
}
@@ -72,7 +88,7 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) {
* Add instructions to the function, specified by index.
* Generates prologues / epilogues.
*/
void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
void CodeGenerator::do_goal_function_x86(FunctionEnv* env, int f_idx) {
bool use_new_xmms = true;
auto* debug = &m_debug_info->function_by_name(env->name());
@@ -88,7 +104,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// count how many xmm's we have to backup
int n_xmm_backups = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
if (saved_reg.is_xmm(m_gen.instr_set())) {
n_xmm_backups++;
}
}
@@ -100,14 +116,15 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
if (n_xmm_backups > 0) {
// offset the stack
stack_offset += xmm_backup_stack_offset;
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, xmm_backup_stack_offset),
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(m_gen, RSP, xmm_backup_stack_offset),
InstructionInfo::Kind::PROLOGUE);
// back up xmms
int i = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
if (saved_reg.is_xmm(m_gen.instr_set())) {
int offset = i * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset),
m_gen.add_instr_no_ir(f_rec,
IGen::store128_xmm128_reg_offset(m_gen, RSP, saved_reg, offset),
InstructionInfo::Kind::PROLOGUE);
i++;
}
@@ -116,10 +133,10 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
} else {
// back up xmms (currently not aligned)
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE),
if (saved_reg.is_xmm(m_gen.instr_set())) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(m_gen, RSP, XMM_SIZE),
InstructionInfo::Kind::PROLOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_simd128(m_gen, RSP, saved_reg),
InstructionInfo::Kind::PROLOGUE);
stack_offset += XMM_SIZE;
}
@@ -128,8 +145,9 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// back up gprs
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_gpr()) {
m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(saved_reg), InstructionInfo::Kind::PROLOGUE);
if (saved_reg.is_gpr(m_gen.instr_set())) {
m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(m_gen, saved_reg),
InstructionInfo::Kind::PROLOGUE);
stack_offset += GPR_SIZE;
}
}
@@ -152,7 +170,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
} else {
// otherwise to an extra push, and remember so we can do an extra pop later on.
bonus_push = true;
m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(ri.get_saved_gpr(0)),
m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(m_gen, ri.get_saved_gpr(0)),
InstructionInfo::Kind::PROLOGUE);
}
stack_offset += 8;
@@ -162,7 +180,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// do manual stack offset.
if (manually_added_stack_offset) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, manually_added_stack_offset),
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(m_gen, RSP, manually_added_stack_offset),
InstructionInfo::Kind::PROLOGUE);
}
}
@@ -178,20 +196,20 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
auto& bonus = allocs.stack_ops.at(ir_idx);
for (auto& op : bonus.ops) {
if (op.load) {
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
if (op.reg.is_gpr(m_gen.instr_set()) && op.reg_class == RegClass::GPR_64) {
// todo, s8 or 0 offset if possible?
m_gen.add_instr(IGen::load64_gpr64_plus_s32(
op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP),
m_gen, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
} else if (op.reg.is_xmm(m_gen.instr_set()) && op.reg_class == RegClass::FLOAT) {
// load xmm32 off of the stack
m_gen.add_instr(IGen::load_reg_offset_xmm32(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
m_gen, op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
} else if (op.reg.is_xmm(m_gen.instr_set()) &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::load128_xmm128_reg_offset(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
m_gen, op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
ASSERT(false);
@@ -200,25 +218,25 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
}
// do the actual op
ir->do_codegen(&m_gen, allocs, i_rec);
ir->do_codegen_x86(&m_gen, allocs, i_rec);
// store things back on the stack if needed.
for (auto& op : bonus.ops) {
if (op.store) {
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
if (op.reg.is_gpr(m_gen.instr_set()) && op.reg_class == RegClass::GPR_64) {
// todo, s8 or 0 offset if possible?
m_gen.add_instr(IGen::store64_gpr64_plus_s32(
RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg),
m_gen, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
} else if (op.reg.is_xmm(m_gen.instr_set()) && op.reg_class == RegClass::FLOAT) {
// store xmm32 on the stack
m_gen.add_instr(IGen::store_reg_offset_xmm32(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
m_gen, RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
} else if (op.reg.is_xmm(m_gen.instr_set()) &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::store128_xmm128_reg_offset(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
m_gen, RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
ASSERT(false);
@@ -231,21 +249,22 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
if (manually_added_stack_offset || allocs.needs_aligned_stack_for_spills ||
env->needs_aligned_stack()) {
if (manually_added_stack_offset) {
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, manually_added_stack_offset),
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(m_gen, RSP, manually_added_stack_offset),
InstructionInfo::Kind::EPILOGUE);
}
if (bonus_push) {
ASSERT(!manually_added_stack_offset);
m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(ri.get_saved_gpr(0)),
m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(m_gen, ri.get_saved_gpr(0)),
InstructionInfo::Kind::EPILOGUE);
}
}
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_gpr()) {
m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(saved_reg), InstructionInfo::Kind::EPILOGUE);
if (saved_reg.is_gpr(m_gen.instr_set())) {
m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(m_gen, saved_reg),
InstructionInfo::Kind::EPILOGUE);
}
}
@@ -254,33 +273,38 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
int j = n_xmm_backups;
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
if (saved_reg.is_xmm(m_gen.instr_set())) {
j--;
int offset = j * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset),
m_gen.add_instr_no_ir(f_rec,
IGen::load128_xmm128_reg_offset(m_gen, saved_reg, RSP, offset),
InstructionInfo::Kind::EPILOGUE);
}
}
ASSERT(j == 0);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, xmm_backup_stack_offset),
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(m_gen, RSP, xmm_backup_stack_offset),
InstructionInfo::Kind::EPILOGUE);
}
} else {
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
if (saved_reg.is_xmm(m_gen.instr_set())) {
m_gen.add_instr_no_ir(f_rec, IGen::load128_simd128_gpr64(m_gen, saved_reg, RSP),
InstructionInfo::Kind::EPILOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE),
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(m_gen, RSP, XMM_SIZE),
InstructionInfo::Kind::EPILOGUE);
}
}
}
m_gen.add_instr_no_ir(f_rec, IGen::ret(), InstructionInfo::Kind::EPILOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::ret(m_gen), InstructionInfo::Kind::EPILOGUE);
}
void CodeGenerator::do_asm_function(FunctionEnv* env, int f_idx, bool allow_saved_regs) {
void CodeGenerator::do_goal_function_arm64(FunctionEnv* env, int f_idx) {
throw std::runtime_error("NYI - CodeGenerator::do_goal_function_arm64");
}
void CodeGenerator::do_asm_function_x86(FunctionEnv* env, int f_idx, bool allow_saved_regs) {
auto f_rec = m_gen.get_existing_function_record(f_idx);
const auto& allocs = env->alloc_result();
@@ -316,6 +340,10 @@ void CodeGenerator::do_asm_function(FunctionEnv* env, int f_idx, bool allow_save
}
// do the actual op
ir->do_codegen(&m_gen, allocs, i_rec);
ir->do_codegen_x86(&m_gen, allocs, i_rec);
}
}
void CodeGenerator::do_asm_function_arm64(FunctionEnv* env, int f_idx, bool allow_saved_regs) {
throw std::runtime_error("NYI - CodeGenerator::do_asm_function");
}
+8 -3
View File
@@ -18,14 +18,19 @@ class TypeSystem;
class CodeGenerator {
public:
CodeGenerator(FileEnv* env, DebugInfo* debug_info, GameVersion version);
CodeGenerator(FileEnv* env,
DebugInfo* debug_info,
GameVersion version,
emitter::InstructionSet instruction_set);
std::vector<u8> run(const TypeSystem* ts);
emitter::ObjectGeneratorStats get_obj_stats() const { return m_gen.get_stats(); }
private:
void do_function(FunctionEnv* env, int f_idx);
void do_goal_function(FunctionEnv* env, int f_idx);
void do_asm_function(FunctionEnv* env, int f_idx, bool allow_saved_regs);
void do_goal_function_x86(FunctionEnv* env, int f_idx);
void do_goal_function_arm64(FunctionEnv* env, int f_idx);
void do_asm_function_x86(FunctionEnv* env, int f_idx, bool allow_saved_regs);
void do_asm_function_arm64(FunctionEnv* env, int f_idx, bool allow_saved_regs);
emitter::ObjectGenerator m_gen;
FileEnv* m_fe = nullptr;
DebugInfo* m_debug_info = nullptr;
+6 -2
View File
@@ -10,6 +10,8 @@
#include "common/link_types.h"
#include "common/util/FileUtil.h"
#include "goalc/compiler/CodeGenerator.h"
#include "goalc/emitter/InstructionSet.h"
#include "goalc/make/Tools.h"
#include "goalc/regalloc/Allocator.h"
#include "goalc/regalloc/Allocator_v2.h"
@@ -19,10 +21,12 @@
using namespace goos;
Compiler::Compiler(GameVersion version,
emitter::InstructionSet instr_set,
const std::optional<REPL::Config> repl_config,
const std::string& user_profile,
std::unique_ptr<REPL::Wrapper> repl)
: m_version(version),
m_instr_set(instr_set),
m_goos(user_profile),
m_debugger(&m_listener, &m_goos.reader, version),
m_make(repl_config, user_profile),
@@ -307,7 +311,7 @@ std::vector<u8> Compiler::codegen_object_file(FileEnv* env) {
try {
auto debug_info = &m_debugger.get_debug_info_for_object(env->name());
debug_info->clear();
CodeGenerator gen(env, debug_info, m_version);
CodeGenerator gen(env, debug_info, m_version, m_instr_set);
bool ok = true;
auto result = gen.run(&m_ts);
for (auto& f : env->functions()) {
@@ -331,7 +335,7 @@ bool Compiler::codegen_and_disassemble_object_file(FileEnv* env,
bool omit_ir) {
auto debug_info = &m_debugger.get_debug_info_for_object(env->name());
debug_info->clear();
CodeGenerator gen(env, debug_info, m_version);
CodeGenerator gen(env, debug_info, m_version, m_instr_set);
*data_out = gen.run(&m_ts);
bool ok = true;
*asm_out = debug_info->disassemble_all_functions(&ok, &m_goos.reader, omit_ir);
+3
View File
@@ -16,6 +16,7 @@
#include "goalc/compiler/symbol_info.h"
#include "goalc/data_compiler/game_text_common.h"
#include "goalc/debugger/Debugger.h"
#include "goalc/emitter/InstructionSet.h"
#include "goalc/emitter/Register.h"
#include "goalc/listener/Listener.h"
#include "goalc/make/MakeSystem.h"
@@ -46,6 +47,7 @@ struct GlobalConstantInfo {
class Compiler {
public:
Compiler(GameVersion version,
emitter::InstructionSet instr_set,
const std::optional<REPL::Config> repl_config = {},
const std::string& user_profile = "#f",
std::unique_ptr<REPL::Wrapper> repl = nullptr);
@@ -118,6 +120,7 @@ class Compiler {
private:
GameVersion m_version;
emitter::InstructionSet m_instr_set;
TypeSystem m_ts;
std::unique_ptr<GlobalEnv> m_global_env = nullptr;
std::unique_ptr<None> m_none = nullptr;
+579 -309
View File
File diff suppressed because it is too large Load Diff
+253 -127
View File
@@ -2,9 +2,9 @@
#include <string>
#include "CodeGenerator.h"
#include "Val.h"
#include "goalc/compiler/Label.h"
#include "goalc/emitter/ObjectGenerator.h"
#include "goalc/emitter/Register.h"
#include "goalc/regalloc/allocator_interface.h"
@@ -13,9 +13,12 @@ class IR {
public:
virtual std::string print() = 0;
virtual RegAllocInstr to_rai() = 0;
virtual void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) = 0;
virtual void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) = 0;
virtual void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) = 0;
virtual void add_constraints(std::vector<IRegConstraint>* constraints, int my_id) {
(void)constraints;
(void)my_id;
@@ -29,9 +32,12 @@ class IR_Return : public IR {
std::string print() override;
RegAllocInstr to_rai() override;
void add_constraints(std::vector<IRegConstraint>* constraints, int my_id) override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
const RegVal* value() { return m_value; }
protected:
@@ -45,9 +51,12 @@ class IR_LoadConstant64 : public IR {
IR_LoadConstant64(const RegVal* dest, u64 value);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -59,9 +68,12 @@ class IR_LoadSymbolPointer : public IR {
IR_LoadSymbolPointer(const RegVal* dest, std::string name);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -73,9 +85,12 @@ class IR_SetSymbolValue : public IR {
IR_SetSymbolValue(const SymbolVal* dest, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const SymbolVal* m_dest = nullptr;
@@ -87,9 +102,12 @@ class IR_GetSymbolValue : public IR {
IR_GetSymbolValue(const RegVal* dest, const SymbolVal* src, bool sext);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -102,9 +120,12 @@ class IR_RegSet : public IR {
IR_RegSet(const RegVal* dest, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -120,9 +141,12 @@ class IR_FunctionCall : public IR {
std::optional<emitter::Register> ret_reg);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void add_constraints(std::vector<IRegConstraint>* constraints, int my_id) override;
protected:
@@ -138,9 +162,12 @@ class IR_RegValAddr : public IR {
IR_RegValAddr(const RegVal* dest, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -152,9 +179,12 @@ class IR_StaticVarAddr : public IR {
IR_StaticVarAddr(const RegVal* dest, const StaticObject* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -166,9 +196,12 @@ class IR_StaticVarLoad : public IR {
IR_StaticVarLoad(const RegVal* dest, const StaticObject* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -180,9 +213,12 @@ class IR_FunctionAddr : public IR {
IR_FunctionAddr(const RegVal* dest, FunctionEnv* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -216,9 +252,12 @@ class IR_IntegerMath : public IR {
IR_IntegerMath(IntegerMathKind kind, RegVal* dest, u8 shift_amount);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
IntegerMathKind get_kind() const { return m_kind; }
protected:
@@ -235,9 +274,12 @@ class IR_FloatMath : public IR {
IR_FloatMath(FloatMathKind kind, RegVal* dest, RegVal* arg);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
FloatMathKind get_kind() const { return m_kind; }
protected:
@@ -265,9 +307,12 @@ class IR_GotoLabel : public IR {
explicit IR_GotoLabel(const Label* dest);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const Label* m_dest = nullptr;
@@ -279,9 +324,12 @@ class IR_ConditionalBranch : public IR {
IR_ConditionalBranch(const Condition& condition, Label _label);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void mark_as_resolved() { m_resolved = true; }
Condition condition;
@@ -296,9 +344,12 @@ class IR_Null : public IR {
IR_Null() = default;
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
};
class IR_ValueReset : public IR {
@@ -306,9 +357,12 @@ class IR_ValueReset : public IR {
IR_ValueReset(std::vector<RegVal*> args);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
std::vector<RegVal*> m_args;
@@ -319,9 +373,12 @@ class IR_FloatToInt : public IR {
IR_FloatToInt(const RegVal* dest, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
@@ -333,9 +390,12 @@ class IR_IntToFloat : public IR {
IR_IntToFloat(const RegVal* dest, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
@@ -347,9 +407,12 @@ class IR_GetStackAddr : public IR {
IR_GetStackAddr(const RegVal* dest, int slot);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
@@ -361,9 +424,12 @@ class IR_Nop : public IR {
IR_Nop();
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
};
class IR_Asm : public IR {
@@ -384,9 +450,12 @@ class IR_LoadConstOffset : public IR_Asm {
bool use_coloring = true);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
@@ -404,9 +473,12 @@ class IR_StoreConstOffset : public IR_Asm {
bool use_coloring = true);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_value = nullptr;
@@ -420,9 +492,12 @@ class IR_AsmRet : public IR_Asm {
IR_AsmRet(bool use_coloring);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
};
class IR_AsmPush : public IR_Asm {
@@ -430,9 +505,12 @@ class IR_AsmPush : public IR_Asm {
IR_AsmPush(bool use_coloring, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_src = nullptr;
@@ -443,9 +521,12 @@ class IR_AsmPop : public IR_Asm {
IR_AsmPop(bool use_coloring, const RegVal* dst);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dst = nullptr;
@@ -456,9 +537,12 @@ class IR_AsmSub : public IR_Asm {
IR_AsmSub(bool use_coloring, const RegVal* dst, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dst = nullptr;
@@ -470,9 +554,12 @@ class IR_AsmAdd : public IR_Asm {
IR_AsmAdd(bool use_coloring, const RegVal* dst, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dst = nullptr;
@@ -484,9 +571,12 @@ class IR_AsmFNop : public IR_Asm {
IR_AsmFNop();
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
};
class IR_AsmFWait : public IR_Asm {
@@ -494,9 +584,12 @@ class IR_AsmFWait : public IR_Asm {
IR_AsmFWait();
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
};
class IR_GetSymbolValueAsm : public IR_Asm {
@@ -504,9 +597,12 @@ class IR_GetSymbolValueAsm : public IR_Asm {
IR_GetSymbolValueAsm(bool use_coloring, const RegVal* dest, std::string sym_name, bool sext);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dest = nullptr;
@@ -519,9 +615,12 @@ class IR_JumpReg : public IR_Asm {
IR_JumpReg(bool use_coloring, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_src = nullptr;
@@ -532,9 +631,12 @@ class IR_RegSetAsm : public IR_Asm {
IR_RegSetAsm(bool use_color, const RegVal* dst, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -551,9 +653,12 @@ class IR_VFMath3Asm : public IR_Asm {
Kind kind);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -594,9 +699,12 @@ class IR_Int128Math3Asm : public IR_Asm {
Kind kind);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -615,9 +723,12 @@ class IR_Int128Math2Asm : public IR_Asm {
std::optional<int64_t> = std::nullopt);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -632,9 +743,12 @@ class IR_VFMath2Asm : public IR_Asm {
IR_VFMath2Asm(bool use_color, const RegVal* dst, const RegVal* src, Kind kind);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -647,9 +761,12 @@ class IR_BlendVF : public IR_Asm {
IR_BlendVF(bool use_color, const RegVal* dst, const RegVal* src1, const RegVal* src2, u8 mask);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -666,9 +783,12 @@ class IR_SplatVF : public IR_Asm {
const emitter::Register::VF_ELEMENT element);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -681,9 +801,12 @@ class IR_SwizzleVF : public IR_Asm {
IR_SwizzleVF(bool use_color, const RegVal* dst, const RegVal* src, const u8 m_controlBytes);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
@@ -696,9 +819,12 @@ class IR_SqrtVF : public IR_Asm {
IR_SqrtVF(bool use_color, const RegVal* dst, const RegVal* src);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
void do_codegen_arm64(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
+7 -5
View File
@@ -192,7 +192,8 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest
IRegConstraint constr;
constr.instr_idx = 0; // constraint at function start
auto ireg_arg = new_func_env->make_ireg(
lambda.params.at(i).type, arg_regs.at(i).is_gpr() ? RegClass::GPR_64 : RegClass::INT_128);
lambda.params.at(i).type,
arg_regs.at(i).is_gpr(m_instr_set) ? RegClass::GPR_64 : RegClass::INT_128);
ireg_arg->mark_as_settable();
constr.ireg = ireg_arg->ireg();
constr.desired_register = arg_regs.at(i);
@@ -230,7 +231,8 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest
for (u32 i = 0; i < lambda.params.size(); i++) {
auto ireg = new_func_env->make_ireg(
lambda.params.at(i).type, arg_regs.at(i).is_gpr() ? RegClass::GPR_64 : RegClass::INT_128);
lambda.params.at(i).type,
arg_regs.at(i).is_gpr(m_instr_set) ? RegClass::GPR_64 : RegClass::INT_128);
ireg->mark_as_settable();
if (!new_func_env->params.insert({m_goos.intern_ptr(lambda.params.at(i).name), ireg})
.second) {
@@ -608,7 +610,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,
auto cc = get_function_calling_convention(function->type(), m_ts);
RegClass ret_reg_class = RegClass::GPR_64;
if (cc.return_reg && cc.return_reg->is_xmm()) {
if (cc.return_reg && cc.return_reg->is_128bit_simd(m_instr_set)) {
ret_reg_class = RegClass::INT_128;
}
@@ -641,8 +643,8 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,
for (int i = 0; i < (int)args.size(); i++) {
const auto& arg = args.at(i);
auto reg = cc.arg_regs.at(i);
arg_outs.push_back(
env->make_ireg(arg->type(), reg.is_xmm() ? RegClass::INT_128 : RegClass::GPR_64));
arg_outs.push_back(env->make_ireg(
arg->type(), reg.is_128bit_simd(m_instr_set) ? RegClass::INT_128 : RegClass::GPR_64));
arg_outs.back()->mark_as_settable();
env->emit_ir<IR_RegSet>(form, arg_outs.back(), arg);
}
+5 -3
View File
@@ -572,7 +572,8 @@ Val* Compiler::compile_defmethod(const goos::Object& form, const goos::Object& _
IRegConstraint constr;
constr.instr_idx = 0; // constraint at function start
auto ireg_arg = new_func_env->make_ireg(
lambda.params.at(i).type, arg_regs.at(i).is_gpr() ? RegClass::GPR_64 : RegClass::INT_128);
lambda.params.at(i).type,
arg_regs.at(i).is_gpr(m_instr_set) ? RegClass::GPR_64 : RegClass::INT_128);
ireg_arg->mark_as_settable();
constr.ireg = ireg_arg->ireg();
constr.desired_register = arg_regs.at(i);
@@ -609,8 +610,9 @@ Val* Compiler::compile_defmethod(const goos::Object& form, const goos::Object& _
func_block_env->emit_ir<IR_ValueReset>(form, reset_args_for_coloring);
for (u32 i = 0; i < lambda.params.size(); i++) {
auto ireg = new_func_env->make_ireg(
lambda.params.at(i).type, arg_regs.at(i).is_gpr() ? RegClass::GPR_64 : RegClass::INT_128);
auto ireg = new_func_env->make_ireg(lambda.params.at(i).type, arg_regs.at(i).is_gpr(m_instr_set)
? RegClass::GPR_64
: RegClass::INT_128);
ireg->mark_as_settable();
if (!new_func_env->params.insert({m_goos.intern_ptr(lambda.params.at(i).name), ireg}).second) {
throw_compiler_error(form, "defmethod has multiple arguments named {}",
+2 -3
View File
@@ -2,9 +2,8 @@
#include "common/goos/Reader.h"
#include "Zydis/Zydis.h"
#include "goalc/compiler/Env.h"
#include "goalc/compiler/IR.h"
#include "Zydis/Decoder.h"
#include "Zydis/Formatter.h"
#include "fmt/color.h"
#include "fmt/format.h"
+5 -3
View File
@@ -2,6 +2,7 @@
#include <memory>
#include <string>
#include <variant>
#include <vector>
#include "common/common_types.h"
@@ -17,14 +18,13 @@ class HeapObject;
} // namespace goos
struct InstructionInfo {
emitter::Instruction instruction; //! the actual x86 instruction
emitter::Instruction instruction;
enum class Kind { PROLOGUE, IR, EPILOGUE } kind;
int ir_idx = -1;
int offset = -1;
InstructionInfo(const emitter::Instruction& _instruction, Kind _kind)
: instruction(_instruction), kind(_kind) {}
InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx)
: instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {}
};
@@ -43,4 +43,6 @@ std::string disassemble_x86_function(
const std::vector<std::string>& ir_strings,
bool* had_failure,
bool print_whole_function,
bool omit_ir);
bool omit_ir);
// TODO ARM64 - disassemble arm64 functions as well
+103 -25
View File
@@ -6,7 +6,12 @@
* The CodeTester can't be used for tests requiring the full GOAL language/linking.
*/
#include <stdexcept>
#include "common/common_types.h"
#include "goalc/emitter/Instruction.h"
#include "goalc/emitter/Register.h"
#ifdef OS_POSIX
#include <sys/mman.h>
#elif _WIN32
@@ -18,11 +23,12 @@
#include "CodeTester.h"
#include "IGen.h"
#include "fmt/format.h"
namespace emitter {
CodeTester::CodeTester() : m_info(RegisterInfo::make_register_info()) {}
CodeTester::CodeTester() : m_info(RegisterInfo::make_register_info()), m_gen(GameVersion::Jak1) {}
CodeTester::CodeTester(InstructionSet instruction_set)
: m_info(RegisterInfo::make_register_info()), m_gen(GameVersion::Jak1, instruction_set) {}
/*!
* Convert to a string for comparison against an assembler or tests.
@@ -50,27 +56,37 @@ std::string CodeTester::dump_to_hex_string(bool nospace) {
/*!
* Add an instruction to the buffer.
*/
void CodeTester::emit(const Instruction& instr) {
code_buffer_size += instr.emit(code_buffer + code_buffer_size);
void CodeTester::emit(const emitter::Instruction& instr) {
u8* start = code_buffer + code_buffer_size;
code_buffer_size += instr.emit(start);
ASSERT(code_buffer_size <= code_buffer_capacity);
}
/*!
* Add a return instruction to the buffer.
*/
void CodeTester::emit_return() {
emit(IGen::ret());
emit(IGen::ret(m_gen));
}
/*!
* Pop all GPRs off of the stack. Optionally exclude rax.
* Pops RSP always, which is weird, but doesn't cause issues.
*/
void CodeTester::emit_pop_all_gprs(bool exclude_rax) {
for (int i = 16; i-- > 0;) {
if (i != RAX || !exclude_rax) {
emit(IGen::pop_gpr64(i));
void CodeTester::emit_pop_all_gprs(bool exclude_return_register) {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 16; i-- > 0;) {
if (i != RAX || !exclude_return_register) {
emit(IGen::pop_gpr64(m_gen, i));
}
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 31; i-- > 0;) {
if (i != X0 || !exclude_return_register) {
emit(IGen::pop_gpr64(m_gen, i));
}
}
} else {
throw std::runtime_error("CodeTester::emit_pop_all_gprs unhandled instruction set");
}
}
@@ -78,34 +94,62 @@ void CodeTester::emit_pop_all_gprs(bool exclude_rax) {
* Push all GPRs onto the stack. Optionally exclude RAX.
* Pushes RSP always, which is weird, but doesn't cause issues.
*/
void CodeTester::emit_push_all_gprs(bool exclude_rax) {
for (int i = 0; i < 16; i++) {
if (i != RAX || !exclude_rax) {
emit(IGen::push_gpr64(i));
void CodeTester::emit_push_all_gprs(bool exclude_return_register) {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 0; i < 16; i++) {
if (i != RAX || !exclude_return_register) {
emit(IGen::push_gpr64(m_gen, i));
}
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 31; i++) {
if (i != X0 || !exclude_return_register) {
emit(IGen::push_gpr64(m_gen, i));
}
}
} else {
throw std::runtime_error("CodeTester::emit_push_all_gprs unhandled instruction set");
}
}
/*!
* Push all xmm registers (all 128-bits) to the stack.
*/
void CodeTester::emit_push_all_xmms() {
emit(IGen::sub_gpr64_imm8s(RSP, 8));
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(RSP, 16));
emit(IGen::store128_gpr64_xmm128(RSP, XMM0 + i));
void CodeTester::emit_push_all_simd() {
if (m_gen.instr_set() == InstructionSet::X86) {
emit(IGen::sub_gpr64_imm8s(m_gen, RSP, 8));
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(m_gen, RSP, 16));
emit(IGen::store128_gpr64_simd128(m_gen, RSP, XMM0 + i));
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(m_gen, SP, 16));
emit(IGen::store128_gpr64_simd128(m_gen, SP, Q0 + i));
}
} else {
throw std::runtime_error("CodeTester::emit_push_all_simd unhandled instruction set");
}
}
/*!
* Pop all xmm registers (all 128-bits) from the stack
*/
void CodeTester::emit_pop_all_xmms() {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_xmm128_gpr64(XMM0 + i, RSP));
emit(IGen::add_gpr64_imm8s(RSP, 16));
void CodeTester::emit_pop_all_simd() {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_simd128_gpr64(m_gen, XMM0 + i, RSP));
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 16));
}
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 8));
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_simd128_gpr64(m_gen, Q0 + i, SP));
emit(IGen::add_gpr64_imm8s(m_gen, SP, 16));
}
} else {
throw std::runtime_error("CodeTester::emit_pop_all_simd unhandled instruction set");
}
emit(IGen::add_gpr64_imm8s(RSP, 8));
}
/*!
@@ -119,8 +163,23 @@ void CodeTester::clear() {
* Execute the buffered code with no arguments, return the value of RAX.
*/
u64 CodeTester::execute() {
#if defined(__aarch64__)
// allegedly needed because ARM requires flushing after writing new instructions
// on x86 it does nothing
__builtin___clear_cache((char*)code_buffer, (char*)code_buffer + code_buffer_size);
#endif
// clang-format off
#if defined(__APPLE__) && defined(__aarch64__)
// TODO - we may need to switch to using pthread_jit_write_protect_np
// there may also be issues if multiple threasd are involved
// but this seems to work so keep it simple until something proves otherwise.
mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ);
auto ret = ((u64(*)())code_buffer)();
mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ);
return ret;
#else
return ((u64(*)())code_buffer)();
#endif
// clang-format on
}
@@ -130,7 +189,14 @@ u64 CodeTester::execute() {
*/
u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) {
// clang-format off
#if defined(__APPLE__) && defined(__aarch64__)
mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ);
auto ret = ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3);
mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ);
return ret;
#else
return ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3);
#endif
// clang-format on
}
@@ -138,8 +204,20 @@ u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) {
* Allocate a code buffer of the given size.
*/
void CodeTester::init_code_buffer(int capacity) {
// TODO Apple Silicon - You cannot make a page be RWX,
// or more specifically it can't be both writable and executable at the same time
//
// https://github.com/zherczeg/sljit/issues/99
//
// The solution to this is to flip-flop between permissions, or perhaps have two threads
// one that has writing permission, and another with executable permission
#if defined(__APPLE__) && defined(__aarch64__)
code_buffer = (u8*)mmap(nullptr, capacity, PROT_WRITE | PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, 0, 0);
#else
code_buffer = (u8*)mmap(nullptr, capacity, PROT_EXEC | PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
#endif
if (code_buffer == (u8*)(-1)) {
ASSERT_MSG(false, "[CodeTester] Failed to map memory!");
}
+15 -12
View File
@@ -8,9 +8,6 @@
* The CodeTester can't be used for tests requiring the full GOAL language/linking.
*/
#ifndef JAK_CODETESTER_H
#define JAK_CODETESTER_H
#include <cstring>
#include <stdexcept>
#include <string>
@@ -20,16 +17,28 @@
#include "common/common_types.h"
#include "goalc/emitter/InstructionSet.h"
#include "goalc/emitter/ObjectGenerator.h"
namespace emitter {
class CodeTester {
private:
int code_buffer_size = 0;
int code_buffer_capacity = 0;
u8* code_buffer = nullptr;
RegisterInfo m_info;
ObjectGenerator m_gen;
public:
CodeTester();
CodeTester(InstructionSet instruction_set);
std::string dump_to_hex_string(bool nospace = false);
ObjectGenerator generator() const { return m_gen; }
void init_code_buffer(int capacity);
void emit_push_all_gprs(bool exclude_rax = false);
void emit_pop_all_gprs(bool exclude_rax = false);
void emit_push_all_xmms();
void emit_pop_all_xmms();
void emit_push_all_simd();
void emit_pop_all_simd();
void emit_return();
void emit(const Instruction& instr);
u64 execute();
@@ -64,6 +73,7 @@ class CodeTester {
* Should allow emitter tests which run code to do the right thing on windows.
*/
Register get_c_abi_arg_reg(int i) {
// TODO ARM64 - x86 specific
#ifdef _WIN32
switch (i) {
case 0:
@@ -128,12 +138,5 @@ class CodeTester {
void clear();
~CodeTester();
private:
int code_buffer_size = 0;
int code_buffer_capacity = 0;
u8* code_buffer = nullptr;
RegisterInfo m_info;
};
} // namespace emitter
#endif // JAK_CODETESTER_H
File diff suppressed because it is too large Load Diff
+967 -2740
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+803
View File
@@ -0,0 +1,803 @@
#pragma once
#include "goalc/emitter/Instruction.h"
#include "goalc/emitter/Register.h"
namespace emitter {
namespace IGen {
namespace ARM64 {
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// MOVES
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Move data from src to dst. Moves all 64-bits of the GPR.
*/
InstructionARM64 mov_gpr64_gpr64(Register dst, Register src);
/*!
* Move a 64-bit constant into a register.
*/
InstructionARM64 mov_gpr64_u64(Register dst, uint64_t val);
/*!
* Move a 32-bit constant into a register. Zeros the upper 32 bits.
*/
InstructionARM64 mov_gpr64_u32(Register dst, uint64_t val);
/*!
* Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits.
* When possible prefer mov_gpr64_u32. (use this only for negative values...)
* This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64.
*/
InstructionARM64 mov_gpr64_s32(Register dst, int64_t val);
/*!
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
*/
InstructionARM64 movd_gpr32_xmm32(Register dst, Register src);
/*!
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
*/
InstructionARM64 movd_xmm32_gpr32(Register dst, Register src);
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
InstructionARM64 movq_gpr64_xmm64(Register dst, Register src);
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
InstructionARM64 movq_xmm64_gpr64(Register dst, Register src);
/*!
* Move 32-bits between xmm's
*/
InstructionARM64 mov_xmm32_xmm32(Register dst, Register src);
// todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// GOAL Loads and Stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* movsx dst, BYTE PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionARM64 load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
/*!
* movzx dst, BYTE PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movsx dst, WORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionARM64 store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movzx dst, WORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movsxd dst, DWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionARM64 load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
/*!
* movzxd dst, DWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* mov dst, QWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionARM64 load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionARM64 load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionARM64 store_goal_vf(Register addr, Register value, Register off, s64 offset);
InstructionARM64 store_goal_gpr(Register addr, Register value, Register off, int offset, int size);
InstructionARM64 load_goal_xmm128(Register dst, Register addr, Register off, int offset);
/*!
* Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register.
* This will pick the appropriate fancy addressing mode instruction.
*/
InstructionARM64 load_goal_gpr(Register dst,
Register addr,
Register off,
int offset,
int size,
bool sign_extend);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// LOADS n' STORES - XMM32
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionARM64 store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value);
InstructionARM64 load32_xmm32_gpr64_plus_gpr64(Register simd_dest, Register addr1, Register addr2);
InstructionARM64 store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register xmm_value,
s64 offset);
InstructionARM64 load32_xmm32_gpr64_plus_gpr64_plus_s8(Register simd_dest,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register xmm_value,
s64 offset);
InstructionARM64 lea_reg_plus_off32(Register dest, Register base, s64 offset);
InstructionARM64 lea_reg_plus_off8(Register dest, Register base, s64 offset);
InstructionARM64 lea_reg_plus_off(Register dest, Register base, s64 offset);
InstructionARM64 store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset);
InstructionARM64 store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset);
InstructionARM64 load32_xmm32_gpr64_plus_gpr64_plus_s32(Register simd_dest,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 load32_xmm32_gpr64_plus_s32(Register simd_dest, Register base, s64 offset);
InstructionARM64 load32_xmm32_gpr64_plus_s8(Register simd_dest, Register base, s64 offset);
InstructionARM64 load_goal_xmm32(Register simd_dest, Register addr, Register off, s64 offset);
InstructionARM64 store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset);
InstructionARM64 store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset);
InstructionARM64 load_reg_offset_xmm32(Register simd_dest, Register base, s64 offset);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// LOADS n' STORES - XMM128
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Store a 128-bit xmm into an address stored in a register, no offset
*/
InstructionARM64 store128_gpr64_simd128(Register gpr_addr, Register xmm_value);
InstructionARM64 store128_gpr64_simd128_s32(Register gpr_addr, Register xmm_value, s64 offset);
InstructionARM64 store128_gpr64_simd128_s8(Register gpr_addr, Register xmm_value, s64 offset);
InstructionARM64 load128_simd128_gpr64(Register simd_dest, Register gpr_addr);
InstructionARM64 load128_simd128_gpr64_s32(Register simd_dest, Register gpr_addr, s64 offset);
InstructionARM64 load128_simd128_gpr64_s8(Register simd_dest, Register gpr_addr, s64 offset);
InstructionARM64 load128_xmm128_reg_offset(Register simd_dest, Register base, s64 offset);
InstructionARM64 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// RIP loads and stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionARM64 load64_rip_s32(Register dest, s64 offset);
InstructionARM64 load32s_rip_s32(Register dest, s64 offset);
InstructionARM64 load32u_rip_s32(Register dest, s64 offset);
InstructionARM64 load16u_rip_s32(Register dest, s64 offset);
InstructionARM64 load16s_rip_s32(Register dest, s64 offset);
InstructionARM64 load8u_rip_s32(Register dest, s64 offset);
InstructionARM64 load8s_rip_s32(Register dest, s64 offset);
InstructionARM64 static_load(Register dest, s64 offset, int size, bool sign_extend);
InstructionARM64 store64_rip_s32(Register src, s64 offset);
InstructionARM64 store32_rip_s32(Register src, s64 offset);
InstructionARM64 store16_rip_s32(Register src, s64 offset);
InstructionARM64 store8_rip_s32(Register src, s64 offset);
InstructionARM64 static_store(Register value, s64 offset, int size);
InstructionARM64 static_addr(Register dst, s64 offset);
InstructionARM64 static_load_xmm32(Register simd_dest, s64 offset);
InstructionARM64 static_store_xmm32(Register xmm_value, s64 offset);
// TODO, special load/stores of 128 bit values.
// TODO, consider specialized stack loads and stores?
InstructionARM64 load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg);
/*!
* Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset.
*/
InstructionARM64 store64_gpr64_plus_s32(Register addr, int32_t offset, Register value);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FUNCTION STUFF
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Function return. Pops the 64-bit return address (real) off the stack and jumps to it.
*/
InstructionARM64 ret();
/*!
* Instruction to push gpr (64-bits) onto the stack
*/
InstructionARM64 push_gpr64(Register reg);
/*!
* Instruction to pop 64 bit gpr from the stack
*/
InstructionARM64 pop_gpr64(Register reg);
/*!
* Call a function stored in a 64-bit gpr
*/
InstructionARM64 call_r64(Register reg_);
/*!
* Jump to an x86-64 address stored in a 64-bit gpr.
*/
InstructionARM64 jmp_r64(Register reg_);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// INTEGER MATH
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionARM64 sub_gpr64_imm8s(Register reg, int64_t imm);
InstructionARM64 sub_gpr64_imm32s(Register reg, int64_t imm);
InstructionARM64 add_gpr64_imm8s(Register reg, int64_t v);
InstructionARM64 add_gpr64_imm32s(Register reg, int64_t v);
InstructionARM64 add_gpr64_imm(Register reg, int64_t imm);
InstructionARM64 sub_gpr64_imm(Register reg, int64_t imm);
InstructionARM64 add_gpr64_gpr64(Register dst, Register src);
InstructionARM64 sub_gpr64_gpr64(Register dst, Register src);
/*!
* Multiply gprs (32-bit, signed).
* (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply)
*/
InstructionARM64 imul_gpr32_gpr32(Register dst, Register src);
/*!
* Multiply gprs (64-bit, signed).
* DANGER - this treats all operands as 64-bit. This is not like the EE.
*/
InstructionARM64 imul_gpr64_gpr64(Register dst, Register src);
/*!
* Divide (idiv, 32 bit)
*/
InstructionARM64 idiv_gpr32(Register reg);
InstructionARM64 unsigned_div_gpr32(Register reg);
/*!
* Convert doubleword to quadword for division.
*/
InstructionARM64 cdq();
/*!
* Move from gpr32 to gpr64, with sign extension.
* Needed for multiplication/divsion madness.
*/
InstructionARM64 movsx_r64_r32(Register dst, Register src);
/*!
* Compare gpr64. This sets the flags for the jumps.
* todo UNTESTED
*/
InstructionARM64 cmp_gpr64_gpr64(Register a, Register b);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// BIT STUFF
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Or of two gprs
*/
InstructionARM64 or_gpr64_gpr64(Register dst, Register src);
/*!
* And of two gprs
*/
InstructionARM64 and_gpr64_gpr64(Register dst, Register src);
/*!
* Xor of two gprs
*/
InstructionARM64 xor_gpr64_gpr64(Register dst, Register src);
/*!
* Bitwise not a gpr
*/
InstructionARM64 not_gpr64(Register reg);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// SHIFTS
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Shift 64-bit gpr left by CL register
*/
InstructionARM64 shl_gpr64_cl(Register reg);
/*!
* Shift 64-bit gpr right (logical) by CL register
*/
InstructionARM64 shr_gpr64_cl(Register reg);
/*!
* Shift 64-bit gpr right (arithmetic) by CL register
*/
InstructionARM64 sar_gpr64_cl(Register reg);
/*!
* Shift 64-ptr left (logical) by the constant shift amount "sa".
*/
InstructionARM64 shl_gpr64_u8(Register reg, uint8_t sa);
/*!
* Shift 64-ptr right (logical) by the constant shift amount "sa".
*/
InstructionARM64 shr_gpr64_u8(Register reg, uint8_t sa);
/*!
* Shift 64-ptr right (arithmetic) by the constant shift amount "sa".
*/
InstructionARM64 sar_gpr64_u8(Register reg, uint8_t sa);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// CONTROL FLOW
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
*/
InstructionARM64 jmp_32();
/*!
* Jump if equal.
*/
InstructionARM64 je_32();
/*!
* Jump not equal.
*/
InstructionARM64 jne_32();
/*!
* Jump less than or equal.
*/
InstructionARM64 jle_32();
/*!
* Jump greater than or equal.
*/
InstructionARM64 jge_32();
/*!
* Jump less than
*/
InstructionARM64 jl_32();
/*!
* Jump greater than
*/
InstructionARM64 jg_32();
/*!
* Jump below or equal
*/
InstructionARM64 jbe_32();
/*!
* Jump above or equal
*/
InstructionARM64 jae_32();
/*!
* Jump below
*/
InstructionARM64 jb_32();
/*!
* Jump above
*/
InstructionARM64 ja_32();
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FLOAT MATH
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Compare two floats and set flag register for jump (ucomiss)
*/
InstructionARM64 cmp_flt_flt(Register a, Register b);
InstructionARM64 sqrts_xmm(Register dst, Register src);
/*!
* Multiply two floats in xmm's
*/
InstructionARM64 mulss_xmm_xmm(Register dst, Register src);
/*!
* Divide two floats in xmm's
*/
InstructionARM64 divss_xmm_xmm(Register dst, Register src);
/*!
* Subtract two floats in xmm's
*/
InstructionARM64 subss_xmm_xmm(Register dst, Register src);
/*!
* Add two floats in xmm's
*/
InstructionARM64 addss_xmm_xmm(Register dst, Register src);
/*!
* Floating point minimum.
*/
InstructionARM64 minss_xmm_xmm(Register dst, Register src);
/*!
* Floating point maximum.
*/
InstructionARM64 maxss_xmm_xmm(Register dst, Register src);
/*!
* Convert GPR int32 to XMM float (single precision)
*/
InstructionARM64 int32_to_float(Register dst, Register src);
/*!
* Convert XMM float to GPR int32(single precision) (truncate)
*/
InstructionARM64 float_to_int32(Register dst, Register src);
InstructionARM64 nop();
// TODO - rsqrt / abs / sqrt
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// UTILITIES
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* A "null" instruction. This instruction does not generate any bytes
* but can be referred to by a label. Useful to insert in place of a real instruction
* if the real instruction has been optimized out.
*/
InstructionARM64 null();
/////////////////////////////
// AVX (VF - Vector Float) //
/////////////////////////////
InstructionARM64 nop_vf();
InstructionARM64 wait_vf();
InstructionARM64 mov_vf_vf(Register dst, Register src);
InstructionARM64 loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionARM64 loadvf_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 loadvf_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2);
InstructionARM64 storevf_gpr64_plus_gpr64_plus_s8(Register value,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 storevf_gpr64_plus_gpr64_plus_s32(Register value,
Register addr1,
Register addr2,
s64 offset);
InstructionARM64 loadvf_rip_plus_s32(Register dest, s64 offset);
// TODO - rip relative loads and stores.
InstructionARM64 blend_vf(Register dst, Register src1, Register src2, u8 mask);
InstructionARM64 shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw);
/*
Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved.
Here's a brief run-down:
- 8-bits / 4 groups of 2 bits
- Right-to-left, each group is used to determine which element in `src` gets copied into
`dst`'s element (W->X).
- GROUP OPTIONS
- 00b - Copy the least-significant element (X)
- 01b - Copy the second element (from the right) (Y)
- 10b - Copy the third element (from the right) (Z)
- 11b - Copy the most significant element (W)
Examples
; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land)
SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions
> (1.5, 1.5, 1.5, 1.5)
SHUFPS xmm1, xmm1, 0x39 ; Rotate right
> (4.5, 1.5, 2.5, 3.5)
*/
InstructionARM64 swizzle_vf(Register dst, Register src, u8 controlBytes);
/*
Splats a single element in 'src' to all elements in 'dst'
For example (pseudocode):
xmm1 = (1.5, 2.5, 3.5, 4.5)
xmm2 = (1, 2, 3, 4)
splat_vf(xmm1, xmm2, XMM_ELEMENT::X);
xmm1 = (4, 4, 4, 4)
*/
InstructionARM64 splat_vf(Register dst, Register src, Register::VF_ELEMENT element);
InstructionARM64 xor_vf(Register dst, Register src1, Register src2);
InstructionARM64 sub_vf(Register dst, Register src1, Register src2);
InstructionARM64 add_vf(Register dst, Register src1, Register src2);
InstructionARM64 mul_vf(Register dst, Register src1, Register src2);
InstructionARM64 max_vf(Register dst, Register src1, Register src2);
InstructionARM64 min_vf(Register dst, Register src1, Register src2);
InstructionARM64 div_vf(Register dst, Register src1, Register src2);
InstructionARM64 sqrt_vf(Register dst, Register src);
InstructionARM64 itof_vf(Register dst, Register src);
InstructionARM64 ftoi_vf(Register dst, Register src);
InstructionARM64 pw_sra(Register dst, Register src, u8 imm);
InstructionARM64 pw_srl(Register dst, Register src, u8 imm);
InstructionARM64 ph_srl(Register dst, Register src, u8 imm);
InstructionARM64 pw_sll(Register dst, Register src, u8 imm);
InstructionARM64 ph_sll(Register dst, Register src, u8 imm);
InstructionARM64 parallel_add_byte(Register dst, Register src0, Register src1);
InstructionARM64 parallel_bitwise_or(Register dst, Register src0, Register src1);
InstructionARM64 parallel_bitwise_xor(Register dst, Register src0, Register src1);
InstructionARM64 parallel_bitwise_and(Register dst, Register src0, Register src1);
// Reminder - a word in MIPS = 32bits = a DWORD in x86
// MIPS || x86
// -----------------------
// byte || byte
// halfword || word
// word || dword
// doubleword || quadword
// -- Unpack High Data Instructions
InstructionARM64 pextub_swapped(Register dst, Register src0, Register src1);
InstructionARM64 pextuh_swapped(Register dst, Register src0, Register src1);
InstructionARM64 pextuw_swapped(Register dst, Register src0, Register src1);
// -- Unpack Low Data Instructions
InstructionARM64 pextlb_swapped(Register dst, Register src0, Register src1);
InstructionARM64 pextlh_swapped(Register dst, Register src0, Register src1);
InstructionARM64 pextlw_swapped(Register dst, Register src0, Register src1);
// Equal to than comparison as 16 bytes (8 bits)
InstructionARM64 parallel_compare_e_b(Register dst, Register src0, Register src1);
// Equal to than comparison as 8 halfwords (16 bits)
InstructionARM64 parallel_compare_e_h(Register dst, Register src0, Register src1);
// Equal to than comparison as 4 words (32 bits)
InstructionARM64 parallel_compare_e_w(Register dst, Register src0, Register src1);
// Greater than comparison as 16 bytes (8 bits)
InstructionARM64 parallel_compare_gt_b(Register dst, Register src0, Register src1);
// Greater than comparison as 8 halfwords (16 bits)
InstructionARM64 parallel_compare_gt_h(Register dst, Register src0, Register src1);
// Greater than comparison as 4 words (32 bits)
InstructionARM64 parallel_compare_gt_w(Register dst, Register src0, Register src1);
InstructionARM64 vpunpcklqdq(Register dst, Register src0, Register src1);
InstructionARM64 pcpyld_swapped(Register dst, Register src0, Register src1);
InstructionARM64 pcpyud(Register dst, Register src0, Register src1);
InstructionARM64 vpsubd(Register dst, Register src0, Register src1);
InstructionARM64 vpsrldq(Register dst, Register src, u8 imm);
InstructionARM64 vpslldq(Register dst, Register src, u8 imm);
InstructionARM64 vpshuflw(Register dst, Register src, u8 imm);
InstructionARM64 vpshufhw(Register dst, Register src, u8 imm);
InstructionARM64 vpackuswb(Register dst, Register src0, Register src1);
} // namespace ARM64
} // namespace IGen
} // namespace emitter
File diff suppressed because it is too large Load Diff
+803
View File
@@ -0,0 +1,803 @@
#pragma once
#include "goalc/emitter/Instruction.h"
#include "goalc/emitter/Register.h"
namespace emitter {
namespace IGen {
namespace X86 {
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// MOVES
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Move data from src to dst. Moves all 64-bits of the GPR.
*/
InstructionX86 mov_gpr64_gpr64(Register dst, Register src);
/*!
* Move a 64-bit constant into a register.
*/
InstructionX86 mov_gpr64_u64(Register dst, uint64_t val);
/*!
* Move a 32-bit constant into a register. Zeros the upper 32 bits.
*/
InstructionX86 mov_gpr64_u32(Register dst, uint64_t val);
/*!
* Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits.
* When possible prefer mov_gpr64_u32. (use this only for negative values...)
* This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64.
*/
InstructionX86 mov_gpr64_s32(Register dst, int64_t val);
/*!
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
*/
InstructionX86 movd_gpr32_xmm32(Register dst, Register src);
/*!
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
*/
InstructionX86 movd_xmm32_gpr32(Register dst, Register src);
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
InstructionX86 movq_gpr64_xmm64(Register dst, Register src);
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
InstructionX86 movq_xmm64_gpr64(Register dst, Register src);
/*!
* Move 32-bits between xmm's
*/
InstructionX86 mov_xmm32_xmm32(Register dst, Register src);
// todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// GOAL Loads and Stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* movsx dst, BYTE PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionX86 load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
/*!
* movzx dst, BYTE PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movsx dst, WORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionX86 store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movzx dst, WORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* movsxd dst, DWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionX86 load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
/*!
* movzxd dst, DWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
/*!
* mov dst, QWORD PTR [addr1 + addr2]
* addr1 and addr2 have to be different registers.
* Cannot use rsp.
*/
InstructionX86 load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value);
InstructionX86 load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register value,
s64 offset);
InstructionX86 store_goal_vf(Register addr, Register value, Register off, s64 offset);
InstructionX86 store_goal_gpr(Register addr, Register value, Register off, int offset, int size);
InstructionX86 load_goal_xmm128(Register dst, Register addr, Register off, int offset);
/*!
* Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register.
* This will pick the appropriate fancy addressing mode instruction.
*/
InstructionX86 load_goal_gpr(Register dst,
Register addr,
Register off,
int offset,
int size,
bool sign_extend);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// LOADS n' STORES - XMM32
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionX86 store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value);
InstructionX86 load32_xmm32_gpr64_plus_gpr64(Register simd_dest, Register addr1, Register addr2);
InstructionX86 store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1,
Register addr2,
Register xmm_value,
s64 offset);
InstructionX86 load32_xmm32_gpr64_plus_gpr64_plus_s8(Register simd_dest,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1,
Register addr2,
Register xmm_value,
s64 offset);
InstructionX86 lea_reg_plus_off32(Register dest, Register base, s64 offset);
InstructionX86 lea_reg_plus_off8(Register dest, Register base, s64 offset);
InstructionX86 lea_reg_plus_off(Register dest, Register base, s64 offset);
InstructionX86 store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset);
InstructionX86 store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset);
InstructionX86 load32_xmm32_gpr64_plus_gpr64_plus_s32(Register simd_dest,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 load32_xmm32_gpr64_plus_s32(Register simd_dest, Register base, s64 offset);
InstructionX86 load32_xmm32_gpr64_plus_s8(Register simd_dest, Register base, s64 offset);
InstructionX86 load_goal_xmm32(Register simd_dest, Register addr, Register off, s64 offset);
InstructionX86 store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset);
InstructionX86 store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset);
InstructionX86 load_reg_offset_xmm32(Register simd_dest, Register base, s64 offset);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// LOADS n' STORES - XMM128
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Store a 128-bit xmm into an address stored in a register, no offset
*/
InstructionX86 store128_gpr64_simd128(Register gpr_addr, Register xmm_value);
InstructionX86 store128_gpr64_simd128_s32(Register gpr_addr, Register xmm_value, s64 offset);
InstructionX86 store128_gpr64_simd128_s8(Register gpr_addr, Register xmm_value, s64 offset);
InstructionX86 load128_simd128_gpr64(Register simd_dest, Register gpr_addr);
InstructionX86 load128_simd128_gpr64_s32(Register simd_dest, Register gpr_addr, s64 offset);
InstructionX86 load128_simd128_gpr64_s8(Register simd_dest, Register gpr_addr, s64 offset);
InstructionX86 load128_xmm128_reg_offset(Register simd_dest, Register base, s64 offset);
InstructionX86 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// RIP loads and stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionX86 load64_rip_s32(Register dest, s64 offset);
InstructionX86 load32s_rip_s32(Register dest, s64 offset);
InstructionX86 load32u_rip_s32(Register dest, s64 offset);
InstructionX86 load16u_rip_s32(Register dest, s64 offset);
InstructionX86 load16s_rip_s32(Register dest, s64 offset);
InstructionX86 load8u_rip_s32(Register dest, s64 offset);
InstructionX86 load8s_rip_s32(Register dest, s64 offset);
InstructionX86 static_load(Register dest, s64 offset, int size, bool sign_extend);
InstructionX86 store64_rip_s32(Register src, s64 offset);
InstructionX86 store32_rip_s32(Register src, s64 offset);
InstructionX86 store16_rip_s32(Register src, s64 offset);
InstructionX86 store8_rip_s32(Register src, s64 offset);
InstructionX86 static_store(Register value, s64 offset, int size);
InstructionX86 static_addr(Register dst, s64 offset);
InstructionX86 static_load_xmm32(Register simd_dest, s64 offset);
InstructionX86 static_store_xmm32(Register xmm_value, s64 offset);
// TODO, special load/stores of 128 bit values.
// TODO, consider specialized stack loads and stores?
InstructionX86 load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg);
/*!
* Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset.
*/
InstructionX86 store64_gpr64_plus_s32(Register addr, int32_t offset, Register value);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FUNCTION STUFF
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Function return. Pops the 64-bit return address (real) off the stack and jumps to it.
*/
InstructionX86 ret();
/*!
* Instruction to push gpr (64-bits) onto the stack
*/
InstructionX86 push_gpr64(Register reg);
/*!
* Instruction to pop 64 bit gpr from the stack
*/
InstructionX86 pop_gpr64(Register reg);
/*!
* Call a function stored in a 64-bit gpr
*/
InstructionX86 call_r64(Register reg_);
/*!
* Jump to an x86-64 address stored in a 64-bit gpr.
*/
InstructionX86 jmp_r64(Register reg_);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// INTEGER MATH
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionX86 sub_gpr64_imm8s(Register reg, int64_t imm);
InstructionX86 sub_gpr64_imm32s(Register reg, int64_t imm);
InstructionX86 add_gpr64_imm8s(Register reg, int64_t v);
InstructionX86 add_gpr64_imm32s(Register reg, int64_t v);
InstructionX86 add_gpr64_imm(Register reg, int64_t imm);
InstructionX86 sub_gpr64_imm(Register reg, int64_t imm);
InstructionX86 add_gpr64_gpr64(Register dst, Register src);
InstructionX86 sub_gpr64_gpr64(Register dst, Register src);
/*!
* Multiply gprs (32-bit, signed).
* (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply)
*/
InstructionX86 imul_gpr32_gpr32(Register dst, Register src);
/*!
* Multiply gprs (64-bit, signed).
* DANGER - this treats all operands as 64-bit. This is not like the EE.
*/
InstructionX86 imul_gpr64_gpr64(Register dst, Register src);
/*!
* Divide (idiv, 32 bit)
*/
InstructionX86 idiv_gpr32(Register reg);
InstructionX86 unsigned_div_gpr32(Register reg);
/*!
* Convert doubleword to quadword for division.
*/
InstructionX86 cdq();
/*!
* Move from gpr32 to gpr64, with sign extension.
* Needed for multiplication/divsion madness.
*/
InstructionX86 movsx_r64_r32(Register dst, Register src);
/*!
* Compare gpr64. This sets the flags for the jumps.
* todo UNTESTED
*/
InstructionX86 cmp_gpr64_gpr64(Register a, Register b);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// BIT STUFF
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Or of two gprs
*/
InstructionX86 or_gpr64_gpr64(Register dst, Register src);
/*!
* And of two gprs
*/
InstructionX86 and_gpr64_gpr64(Register dst, Register src);
/*!
* Xor of two gprs
*/
InstructionX86 xor_gpr64_gpr64(Register dst, Register src);
/*!
* Bitwise not a gpr
*/
InstructionX86 not_gpr64(Register reg);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// SHIFTS
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Shift 64-bit gpr left by CL register
*/
InstructionX86 shl_gpr64_cl(Register reg);
/*!
* Shift 64-bit gpr right (logical) by CL register
*/
InstructionX86 shr_gpr64_cl(Register reg);
/*!
* Shift 64-bit gpr right (arithmetic) by CL register
*/
InstructionX86 sar_gpr64_cl(Register reg);
/*!
* Shift 64-ptr left (logical) by the constant shift amount "sa".
*/
InstructionX86 shl_gpr64_u8(Register reg, uint8_t sa);
/*!
* Shift 64-ptr right (logical) by the constant shift amount "sa".
*/
InstructionX86 shr_gpr64_u8(Register reg, uint8_t sa);
/*!
* Shift 64-ptr right (arithmetic) by the constant shift amount "sa".
*/
InstructionX86 sar_gpr64_u8(Register reg, uint8_t sa);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// CONTROL FLOW
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
*/
InstructionX86 jmp_32();
/*!
* Jump if equal.
*/
InstructionX86 je_32();
/*!
* Jump not equal.
*/
InstructionX86 jne_32();
/*!
* Jump less than or equal.
*/
InstructionX86 jle_32();
/*!
* Jump greater than or equal.
*/
InstructionX86 jge_32();
/*!
* Jump less than
*/
InstructionX86 jl_32();
/*!
* Jump greater than
*/
InstructionX86 jg_32();
/*!
* Jump below or equal
*/
InstructionX86 jbe_32();
/*!
* Jump above or equal
*/
InstructionX86 jae_32();
/*!
* Jump below
*/
InstructionX86 jb_32();
/*!
* Jump above
*/
InstructionX86 ja_32();
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FLOAT MATH
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Compare two floats and set flag register for jump (ucomiss)
*/
InstructionX86 cmp_flt_flt(Register a, Register b);
InstructionX86 sqrts_xmm(Register dst, Register src);
/*!
* Multiply two floats in xmm's
*/
InstructionX86 mulss_xmm_xmm(Register dst, Register src);
/*!
* Divide two floats in xmm's
*/
InstructionX86 divss_xmm_xmm(Register dst, Register src);
/*!
* Subtract two floats in xmm's
*/
InstructionX86 subss_xmm_xmm(Register dst, Register src);
/*!
* Add two floats in xmm's
*/
InstructionX86 addss_xmm_xmm(Register dst, Register src);
/*!
* Floating point minimum.
*/
InstructionX86 minss_xmm_xmm(Register dst, Register src);
/*!
* Floating point maximum.
*/
InstructionX86 maxss_xmm_xmm(Register dst, Register src);
/*!
* Convert GPR int32 to XMM float (single precision)
*/
InstructionX86 int32_to_float(Register dst, Register src);
/*!
* Convert XMM float to GPR int32(single precision) (truncate)
*/
InstructionX86 float_to_int32(Register dst, Register src);
InstructionX86 nop();
// TODO - rsqrt / abs / sqrt
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// UTILITIES
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* A "null" instruction. This instruction does not generate any bytes
* but can be referred to by a label. Useful to insert in place of a real instruction
* if the real instruction has been optimized out.
*/
InstructionX86 null();
/////////////////////////////
// AVX (VF - Vector Float) //
/////////////////////////////
InstructionX86 nop_vf();
InstructionX86 wait_vf();
InstructionX86 mov_vf_vf(Register dst, Register src);
InstructionX86 loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2);
InstructionX86 loadvf_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 loadvf_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2);
InstructionX86 storevf_gpr64_plus_gpr64_plus_s8(Register value,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 storevf_gpr64_plus_gpr64_plus_s32(Register value,
Register addr1,
Register addr2,
s64 offset);
InstructionX86 loadvf_rip_plus_s32(Register dest, s64 offset);
// TODO - rip relative loads and stores.
InstructionX86 blend_vf(Register dst, Register src1, Register src2, u8 mask);
InstructionX86 shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw);
/*
Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved.
Here's a brief run-down:
- 8-bits / 4 groups of 2 bits
- Right-to-left, each group is used to determine which element in `src` gets copied into
`dst`'s element (W->X).
- GROUP OPTIONS
- 00b - Copy the least-significant element (X)
- 01b - Copy the second element (from the right) (Y)
- 10b - Copy the third element (from the right) (Z)
- 11b - Copy the most significant element (W)
Examples
; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land)
SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions
> (1.5, 1.5, 1.5, 1.5)
SHUFPS xmm1, xmm1, 0x39 ; Rotate right
> (4.5, 1.5, 2.5, 3.5)
*/
InstructionX86 swizzle_vf(Register dst, Register src, u8 controlBytes);
/*
Splats a single element in 'src' to all elements in 'dst'
For example (pseudocode):
xmm1 = (1.5, 2.5, 3.5, 4.5)
xmm2 = (1, 2, 3, 4)
splat_vf(xmm1, xmm2, XMM_ELEMENT::X);
xmm1 = (4, 4, 4, 4)
*/
InstructionX86 splat_vf(Register dst, Register src, Register::VF_ELEMENT element);
InstructionX86 xor_vf(Register dst, Register src1, Register src2);
InstructionX86 sub_vf(Register dst, Register src1, Register src2);
InstructionX86 add_vf(Register dst, Register src1, Register src2);
InstructionX86 mul_vf(Register dst, Register src1, Register src2);
InstructionX86 max_vf(Register dst, Register src1, Register src2);
InstructionX86 min_vf(Register dst, Register src1, Register src2);
InstructionX86 div_vf(Register dst, Register src1, Register src2);
InstructionX86 sqrt_vf(Register dst, Register src);
InstructionX86 itof_vf(Register dst, Register src);
InstructionX86 ftoi_vf(Register dst, Register src);
InstructionX86 pw_sra(Register dst, Register src, u8 imm);
InstructionX86 pw_srl(Register dst, Register src, u8 imm);
InstructionX86 ph_srl(Register dst, Register src, u8 imm);
InstructionX86 pw_sll(Register dst, Register src, u8 imm);
InstructionX86 ph_sll(Register dst, Register src, u8 imm);
InstructionX86 parallel_add_byte(Register dst, Register src0, Register src1);
InstructionX86 parallel_bitwise_or(Register dst, Register src0, Register src1);
InstructionX86 parallel_bitwise_xor(Register dst, Register src0, Register src1);
InstructionX86 parallel_bitwise_and(Register dst, Register src0, Register src1);
// Reminder - a word in MIPS = 32bits = a DWORD in x86
// MIPS || x86
// -----------------------
// byte || byte
// halfword || word
// word || dword
// doubleword || quadword
// -- Unpack High Data Instructions
InstructionX86 pextub_swapped(Register dst, Register src0, Register src1);
InstructionX86 pextuh_swapped(Register dst, Register src0, Register src1);
InstructionX86 pextuw_swapped(Register dst, Register src0, Register src1);
// -- Unpack Low Data Instructions
InstructionX86 pextlb_swapped(Register dst, Register src0, Register src1);
InstructionX86 pextlh_swapped(Register dst, Register src0, Register src1);
InstructionX86 pextlw_swapped(Register dst, Register src0, Register src1);
// Equal to than comparison as 16 bytes (8 bits)
InstructionX86 parallel_compare_e_b(Register dst, Register src0, Register src1);
// Equal to than comparison as 8 halfwords (16 bits)
InstructionX86 parallel_compare_e_h(Register dst, Register src0, Register src1);
// Equal to than comparison as 4 words (32 bits)
InstructionX86 parallel_compare_e_w(Register dst, Register src0, Register src1);
// Greater than comparison as 16 bytes (8 bits)
InstructionX86 parallel_compare_gt_b(Register dst, Register src0, Register src1);
// Greater than comparison as 8 halfwords (16 bits)
InstructionX86 parallel_compare_gt_h(Register dst, Register src0, Register src1);
// Greater than comparison as 4 words (32 bits)
InstructionX86 parallel_compare_gt_w(Register dst, Register src0, Register src1);
InstructionX86 vpunpcklqdq(Register dst, Register src0, Register src1);
InstructionX86 pcpyld_swapped(Register dst, Register src0, Register src1);
InstructionX86 pcpyud(Register dst, Register src0, Register src1);
InstructionX86 vpsubd(Register dst, Register src0, Register src1);
InstructionX86 vpsrldq(Register dst, Register src, u8 imm);
InstructionX86 vpslldq(Register dst, Register src, u8 imm);
InstructionX86 vpshuflw(Register dst, Register src, u8 imm);
InstructionX86 vpshufhw(Register dst, Register src, u8 imm);
InstructionX86 vpackuswb(Register dst, Register src0, Register src1);
} // namespace X86
} // namespace IGen
} // namespace emitter
+149 -20
View File
@@ -1,12 +1,111 @@
#pragma once
#ifndef JAK_INSTRUCTION_H
#define JAK_INSTRUCTION_H
#include <cstring>
#include <variant>
#include "common/common_types.h"
#include "common/util/Assert.h"
namespace emitter {
/*!
* A high-level description of a opcode. It can emit itself.
*/
template <typename InstructionType>
struct InstructionImpl {
/*!
* Emit into a buffer and return how many bytes written (can be zero)
*/
u8 emit(u8* buffer) const { return static_cast<const InstructionType*>(this)->emit(buffer); }
// TODO - the below might only be relevant for X86, in which case
// they can eventually leave this parent type
// and at that point, things can likely be simplified
//
// For now, just trying to make things compile / work
u8 length() const { return static_cast<const InstructionType*>(this)->length(); }
int get_imm_size() const { return static_cast<const InstructionType*>(this)->get_imm_size(); }
int get_disp_size() const { return static_cast<const InstructionType*>(this)->get_disp_size(); }
int offset_of_imm() const { return static_cast<const InstructionType*>(this)->offset_of_imm(); }
int offset_of_disp() const { return static_cast<const InstructionType*>(this)->offset_of_disp(); }
};
namespace ARM64 {
struct Field {
u32 bits;
constexpr explicit Field(u32 v) : bits(v) {}
};
constexpr u32 Base(u32 value, u32 width) {
return value << (32 - width);
}
constexpr Field Rd(u32 x) {
return Field{(x & 31) << 0};
}
constexpr Field Rt(u32 x) {
return Field{(x & 31) << 0};
}
constexpr Field Rn(u32 x) {
return Field{(x & 31) << 5};
}
constexpr Field Rm(u32 x) {
return Field{(x & 31) << 16};
}
constexpr Field Imm6(u32 x) {
return Field{(x & 0b111111) << 10};
}
constexpr Field Imm9(s32 x) {
return Field{(static_cast<uint32_t>(x) & 0b111111111) << 12};
}
constexpr Field Imm12(u32 x) {
ASSERT(x >= 0 && x <= 4095);
return Field{(static_cast<uint32_t>(x) & 0b111111111111) << 10};
}
} // namespace ARM64
struct InstructionARM64 : InstructionImpl<InstructionARM64> {
// The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a
// single 32-bit word in that stream.
// Info:
// - https://yurichev.com/mirrors/ARMv8-A_Architecture_Reference_Manual_(Issue_A.a).pdf
// - https://www.scs.stanford.edu/~zyedidia/arm64/
// - https://armconverter.com/?lock=arm64&code=STR+X0,+[SP,+%23-8]!
u32 encoding;
InstructionARM64() = delete;
template <typename... Fs>
constexpr InstructionARM64(uint32_t base, Fs... fields) : encoding((base | ... | fields.bits)) {
static_assert((std::is_same_v<Fs, emitter::ARM64::Field> && ...),
"All operands must be Field types");
}
uint8_t emit(uint8_t* buffer) const {
memcpy(buffer, &encoding, 4);
return 4;
}
uint8_t length() const { return 4; }
int get_imm_size() const { return 0; }
int offset_of_imm() const { return 0; }
int offset_of_disp() const { return 0; }
int get_disp_size() const { return 0; }
};
/*!
* The ModRM byte
*/
@@ -133,13 +232,7 @@ struct VEX2 {
: R(r), reg_id(_reg_id), prefix(_prefix), L(l) {}
};
/*!
* A high-level description of an x86-64 opcode. It can emit itself.
*/
struct Instruction {
Instruction(uint8_t opcode) : op(opcode) {}
uint8_t op;
struct InstructionX86 : InstructionImpl<InstructionX86> {
enum Flags {
kOp2Set = (1 << 0),
kOp3Set = (1 << 1),
@@ -151,23 +244,27 @@ struct Instruction {
kSetImm = (1 << 7),
};
InstructionX86(u8 opcode) : op(opcode) {}
u8 op;
u8 m_flags = 0;
uint8_t op2;
u8 op2;
uint8_t op3;
u8 op3;
u8 n_vex = 0;
uint8_t vex[3] = {0, 0, 0};
u8 vex[3] = {0, 0, 0};
// the rex byte
uint8_t m_rex = 0;
u8 m_rex = 0;
// the modrm byte
uint8_t m_modrm = 0;
u8 m_modrm = 0;
// the sib byte
uint8_t m_sib = 0;
u8 m_sib = 0;
// the displacement
Imm disp;
@@ -924,9 +1021,6 @@ struct Instruction {
return offset;
}
/*!
* Emit into a buffer and return how many bytes written (can be zero)
*/
uint8_t emit(uint8_t* buffer) const {
if (m_flags & kIsNull)
return 0;
@@ -1015,6 +1109,41 @@ struct Instruction {
return count;
}
};
} // namespace emitter
#endif // JAK_INSTRUCTION_H
class Instruction {
public:
using Variant = std::variant<InstructionX86, InstructionARM64>;
Variant instr;
Instruction() = delete;
template <typename T>
Instruction(T v) : instr(std::move(v)) {}
u8 emit(u8* buffer) const {
return std::visit([&](auto const& i) { return i.emit(buffer); }, instr);
}
u8 length() const {
return std::visit([](auto const& i) { return i.length(); }, instr);
}
int get_imm_size() const {
return std::visit([](auto const& i) { return i.get_imm_size(); }, instr);
}
int get_disp_size() const {
return std::visit([](auto const& i) { return i.get_disp_size(); }, instr);
}
int offset_of_imm() const {
return std::visit([](auto const& i) { return i.offset_of_imm(); }, instr);
}
int offset_of_disp() const {
return std::visit([](auto const& i) { return i.offset_of_disp(); }, instr);
}
};
} // namespace emitter
+5
View File
@@ -0,0 +1,5 @@
#pragma once
namespace emitter {
enum class InstructionSet { X86, ARM64 };
};
+6 -4
View File
@@ -1,6 +1,6 @@
/*!
* @file ObjectGenerator.cpp
* Tool to build GOAL object files. Will eventually support v3 and v4.
* Tool to build GOAL object files.
*
* There are 5 steps:
* 1. The user adds static data / instructions and specifies links.
@@ -21,11 +21,13 @@
#include "goalc/debugger/DebugInfo.h"
#include "fmt/format.h"
namespace emitter {
ObjectGenerator::ObjectGenerator(GameVersion version) : m_version(version) {}
ObjectGenerator::ObjectGenerator(GameVersion version)
: m_version(version), m_instruction_set(InstructionSet::X86) {}
ObjectGenerator::ObjectGenerator(GameVersion version, InstructionSet instr_set)
: m_version(version), m_instruction_set(instr_set) {}
/*!
* Build an object file with the v3 format.
+5
View File
@@ -15,6 +15,7 @@
#include "common/versions/versions.h"
#include "goalc/debugger/DebugInfo.h"
#include "goalc/emitter/InstructionSet.h"
struct FunctionDebugInfo;
class TypeSystem;
@@ -64,6 +65,7 @@ struct ObjectGeneratorStats {
class ObjectGenerator {
public:
ObjectGenerator(GameVersion version);
ObjectGenerator(GameVersion version, InstructionSet instr_set);
ObjectFileData generate_data_v3(const TypeSystem* ts);
FunctionRecord add_function_to_seg(int seg,
FunctionDebugInfo* debug,
@@ -99,6 +101,8 @@ class ObjectGenerator {
GameVersion version() const { return m_version; }
InstructionSet instr_set() const { return m_instruction_set; }
private:
void handle_temp_static_type_links(int seg);
void handle_temp_jump_links(int seg);
@@ -209,6 +213,7 @@ class ObjectGenerator {
template <typename T>
using seg_map = std::array<std::map<std::string, std::vector<T>>, N_SEG>;
GameVersion m_version;
InstructionSet m_instruction_set;
// final data
seg_vector<u8> m_data_by_seg;
+39 -26
View File
@@ -13,6 +13,8 @@
#include "common/goal_constants.h"
#include "common/util/Assert.h"
#include "goalc/emitter/InstructionSet.h"
namespace emitter {
enum class HWRegKind : u8 { GPR, XMM, INVALID };
@@ -60,9 +62,6 @@ enum X86_REG : s8 {
XMM15, // saved
};
// TODO - i think it'll be better to make some sort of abstraction
// mapping between x86 and arm, but just using this enum as a place to prototype
// the registers to use.
enum ARM64_REG : s8 {
X0, // arg 0, caller-saved RDI
X1, // arg 1, caller-saved RSI
@@ -104,7 +103,7 @@ enum ARM64_REG : s8 {
// quadword registers, equivalent to XMMs
// the convention in arm64 is the callee preserves all Q values
// at the same time though, the caller should not depend on this convention!
Q0,
Q0 = 0,
Q1,
Q2,
Q3,
@@ -119,23 +118,7 @@ enum ARM64_REG : s8 {
Q12,
Q13,
Q14,
Q15,
Q16,
Q17,
Q18,
Q19,
Q20,
Q21,
Q22,
Q23,
Q24,
Q25,
Q26,
Q27,
Q28,
Q29,
Q30,
Q31
Q15
};
class Register {
@@ -145,14 +128,44 @@ class Register {
// intentionally not explicit so we can use X86_REGs in place of Registers
Register(int id) : m_id(id) {}
bool is_xmm() const { return m_id >= XMM0 && m_id <= XMM15; }
bool is_128bit_simd(emitter::InstructionSet instr_set) const {
if (instr_set == emitter::InstructionSet::X86) {
return m_id >= XMM0 && m_id <= XMM15;
} else if (instr_set == emitter::InstructionSet::ARM64) {
return m_id >= Q0 && m_id <= Q15;
} else {
ASSERT_MSG(false, "is_128bit_simd: instruction set not supported");
}
}
bool is_gpr() const { return m_id >= RAX && m_id <= R15; }
bool is_xmm(emitter::InstructionSet instr_set) const {
if (instr_set == emitter::InstructionSet::X86) {
return m_id >= XMM0 && m_id <= XMM15;
} else if (instr_set == emitter::InstructionSet::ARM64) {
return false;
} else {
ASSERT_MSG(false, "is_xmm: instruction set not supported");
}
}
int hw_id() const {
if (is_xmm()) {
bool is_gpr(emitter::InstructionSet instr_set) const {
if (instr_set == emitter::InstructionSet::X86) {
return m_id >= RAX && m_id <= R15;
} else if (instr_set == emitter::InstructionSet::ARM64) {
return (m_id >= X0 && m_id <= X30) || m_id == SP;
} else {
ASSERT_MSG(false, "is_gpr: instruction set not supported");
}
}
int hw_id(emitter::InstructionSet instr_set) const {
// ARM64 does not require the concept of a hw_id
if (instr_set != emitter::InstructionSet::X86) {
ASSERT_MSG(false, "hw_id is only applicable for x86");
}
if (is_xmm(instr_set)) {
return m_id - XMM0;
} else if (is_gpr()) {
} else if (is_gpr(instr_set)) {
return m_id - RAX;
} else {
ASSERT(false);
+3 -3
View File
@@ -103,7 +103,7 @@ int main(int argc, char** argv) {
// if a command is provided on the command line, no REPL just run the compiler on it
try {
if (!cmd.empty()) {
compiler = std::make_unique<Compiler>(game_version);
compiler = std::make_unique<Compiler>(game_version, emitter::InstructionSet::X86);
compiler->run_front_end_on_string(cmd);
return 0;
}
@@ -130,7 +130,7 @@ int main(int argc, char** argv) {
// the compiler may throw an exception if it fails to load its standard library.
try {
compiler = std::make_unique<Compiler>(
game_version, std::make_optional(repl_config), username,
game_version, emitter::InstructionSet::X86, std::make_optional(repl_config), username,
std::make_unique<REPL::Wrapper>(username, repl_config, startup_file, nrepl_server_ok));
// Start nREPL Server if it spun up successfully
if (nrepl_server_ok) {
@@ -158,7 +158,7 @@ int main(int argc, char** argv) {
compiler->save_repl_history();
}
compiler = std::make_unique<Compiler>(
game_version, std::make_optional(repl_config), username,
game_version, emitter::InstructionSet::X86, std::make_optional(repl_config), username,
std::make_unique<REPL::Wrapper>(username, repl_config, startup_file, nrepl_server_ok));
status = ReplStatus::OK;
}
+6 -4
View File
@@ -3,6 +3,7 @@
#include "common/versions/versions.h"
#include "goalc/compiler/Compiler.h"
#include "goalc/emitter/InstructionSet.h"
int main(int argc, char** argv) {
// logging
@@ -27,16 +28,17 @@ int main(int argc, char** argv) {
std::unique_ptr<Compiler> compiler;
ReplStatus status = ReplStatus::OK;
try {
compiler = std::make_unique<Compiler>(game_version, std::nullopt, "",
std::make_unique<REPL::Wrapper>(game_version));
compiler = std::make_unique<Compiler>(game_version, emitter::InstructionSet::X86, std::nullopt,
"", std::make_unique<REPL::Wrapper>(game_version));
while (status != ReplStatus::WANT_EXIT) {
if (status == ReplStatus::WANT_RELOAD) {
lg::info("Reloading compiler...");
if (compiler) {
compiler->save_repl_history();
}
compiler = std::make_unique<Compiler>(game_version, std::nullopt, "",
std::make_unique<REPL::Wrapper>(game_version));
compiler =
std::make_unique<Compiler>(game_version, emitter::InstructionSet::X86, std::nullopt, "",
std::make_unique<REPL::Wrapper>(game_version));
status = ReplStatus::OK;
}
std::string input_from_stdin = compiler->get_repl_input();
+3 -2
View File
@@ -298,8 +298,9 @@ void Workspace::start_tracking_file(const LSPSpec::DocumentUri& file_uri,
const std::string progress_title =
fmt::format("Compiling {}", version_to_game_name_external(game_version.value()));
m_requester.send_progress_create_request(progress_title, "compiling project", -1);
m_compiler_instances.emplace(game_version.value(),
std::make_unique<Compiler>(game_version.value()));
m_compiler_instances.emplace(
game_version.value(),
std::make_unique<Compiler>(game_version.value(), emitter::InstructionSet::X86));
try {
// TODO - this should happen on a separate thread so the LSP is not blocking during this
// lengthy step
-2
View File
@@ -9,7 +9,6 @@ add_executable(goalc-test
${CMAKE_CURRENT_LIST_DIR}/test_kernel_jak1.cpp
${CMAKE_CURRENT_LIST_DIR}/all_jak1_symbols.cpp
${CMAKE_CURRENT_LIST_DIR}/test_type_system.cpp
${CMAKE_CURRENT_LIST_DIR}/test_CodeTester.cpp
${CMAKE_CURRENT_LIST_DIR}/test_emitter.cpp
${CMAKE_CURRENT_LIST_DIR}/test_emitter_avx.cpp
${CMAKE_CURRENT_LIST_DIR}/test_common_util.cpp
@@ -17,7 +16,6 @@ add_executable(goalc-test
${CMAKE_CURRENT_LIST_DIR}/test_math.cpp
${CMAKE_CURRENT_LIST_DIR}/test_zstd.cpp
${CMAKE_CURRENT_LIST_DIR}/test_zydis.cpp
${CMAKE_CURRENT_LIST_DIR}/goalc/test_goal_kernel.cpp
${CMAKE_CURRENT_LIST_DIR}/decompiler/FormRegressionTest.cpp
${CMAKE_CURRENT_LIST_DIR}/decompiler/test_AtomicOpBuilder.cpp
${CMAKE_CURRENT_LIST_DIR}/decompiler/test_FormBeforeExpressions.cpp
+39 -15
View File
@@ -1,19 +1,43 @@
set(GOALC_TEST_CASES
${CMAKE_CURRENT_LIST_DIR}/test_arithmetic.cpp
${CMAKE_CURRENT_LIST_DIR}/test_collections.cpp
${CMAKE_CURRENT_LIST_DIR}/test_compiler.cpp
${CMAKE_CURRENT_LIST_DIR}/test_control_statements.cpp
${CMAKE_CURRENT_LIST_DIR}/test_debugger.cpp
${CMAKE_CURRENT_LIST_DIR}/test_game_no_debug.cpp
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel.cpp
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel2.cpp
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel3.cpp
${CMAKE_CURRENT_LIST_DIR}/test_jak2_compiler.cpp
${CMAKE_CURRENT_LIST_DIR}/test_variables.cpp
${CMAKE_CURRENT_LIST_DIR}/test_with_game.cpp
${CMAKE_CURRENT_LIST_DIR}/test_type_consistency.cpp
${CMAKE_CURRENT_LIST_DIR}/test_vector_float.cpp
# TODO - order matters, unfortunately, the kernel tests write to the filesystem and
# other tests depend on that, solve that someday, maybe
if(CMAKE_APPLE_SILICON_PROCESSOR STREQUAL "arm64")
set(GOALC_TEST_CASES
# ${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel2.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel3.cpp
${CMAKE_CURRENT_LIST_DIR}/test_CodeTester.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_arithmetic.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_collections.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_compiler.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_control_statements.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_debugger.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_game_no_debug.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_jak2_compiler.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_variables.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_with_game.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_type_consistency.cpp
# ${CMAKE_CURRENT_LIST_DIR}/test_vector_float.cpp
)
else()
set(GOALC_TEST_CASES
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel.cpp
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel2.cpp
${CMAKE_CURRENT_LIST_DIR}/test_goal_kernel3.cpp
${CMAKE_CURRENT_LIST_DIR}/test_CodeTester.cpp
${CMAKE_CURRENT_LIST_DIR}/test_arithmetic.cpp
${CMAKE_CURRENT_LIST_DIR}/test_collections.cpp
${CMAKE_CURRENT_LIST_DIR}/test_compiler.cpp
${CMAKE_CURRENT_LIST_DIR}/test_control_statements.cpp
${CMAKE_CURRENT_LIST_DIR}/test_debugger.cpp
${CMAKE_CURRENT_LIST_DIR}/test_game_no_debug.cpp
${CMAKE_CURRENT_LIST_DIR}/test_jak2_compiler.cpp
${CMAKE_CURRENT_LIST_DIR}/test_variables.cpp
${CMAKE_CURRENT_LIST_DIR}/test_with_game.cpp
${CMAKE_CURRENT_LIST_DIR}/test_type_consistency.cpp
${CMAKE_CURRENT_LIST_DIR}/test_vector_float.cpp
)
endif()
set(GOALC_TEST_FRAMEWORK_SOURCES
${CMAKE_CURRENT_LIST_DIR}/framework/test_runner.cpp
+378
View File
@@ -0,0 +1,378 @@
/*!
* @file test_CodeTester.cpp
* Tests for the CodeTester, a tool for testing the emitter by emitting code and running it
* from within the test application.
*
* These tests should just make sure the basic functionality of CodeTester works, and that it
* can generate prologues/epilogues, and execute them without crashing.
*/
#include "goalc/emitter/CodeTester.h"
#include "goalc/emitter/IGen.h"
#include "goalc/emitter/InstructionSet.h"
#include "goalc/emitter/Register.h"
#include "gtest/gtest.h"
using namespace emitter;
TEST(CodeTester, prologue_x86) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit_push_all_gprs();
// check we generate the right code for pushing all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57");
}
TEST(CodeTester, prologue_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
// tester.emit(IGen::push_gpr64(tester.generator(), ARM64_REG::X0));
// EXPECT_EQ(tester.dump_to_hex_string(), "e0 8f 1f f8");
tester.emit_push_all_gprs();
// check we generate the right code for pushing all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"e0 0f 1f f8 e1 0f 1f f8 e2 0f 1f f8 e3 0f 1f f8 e4 0f 1f f8 e5 0f 1f f8 e6 0f 1f f8 "
"e7 0f 1f f8 e8 0f 1f f8 e9 0f 1f f8 ea 0f 1f f8 eb 0f 1f f8 ec 0f 1f f8 ed 0f 1f f8 "
"ee 0f 1f f8 ef 0f 1f f8 f0 0f 1f f8 f1 0f 1f f8 f2 0f 1f f8 f3 0f 1f f8 f4 0f 1f f8 "
"f5 0f 1f f8 f6 0f 1f f8 f7 0f 1f f8 f8 0f 1f f8 f9 0f 1f f8 fa 0f 1f f8 fb 0f 1f f8 "
"fc 0f 1f f8 fd 0f 1f f8 fe 0f 1f f8");
}
TEST(CodeTester, epilogue_x86) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit_pop_all_gprs();
// check we generate the right code for popping all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58");
}
TEST(CodeTester, epilogue_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
tester.emit_pop_all_gprs();
// check we generate the right code for popping all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"fe 07 41 f8 fd 07 41 f8 fc 07 41 f8 fb 07 41 f8 fa 07 41 f8 f9 07 41 f8 f8 07 41 f8 "
"f7 07 41 f8 f6 07 41 f8 f5 07 41 f8 f4 07 41 f8 f3 07 41 f8 f2 07 41 f8 f1 07 41 f8 "
"f0 07 41 f8 ef 07 41 f8 ee 07 41 f8 ed 07 41 f8 ec 07 41 f8 eb 07 41 f8 ea 07 41 f8 "
"e9 07 41 f8 e8 07 41 f8 e7 07 41 f8 e6 07 41 f8 e5 07 41 f8 e4 07 41 f8 e3 07 41 f8 "
"e2 07 41 f8 e1 07 41 f8 e0 07 41 f8");
}
TEST(CodeTester, sub_gpr64_imm8_x86) {
CodeTester tester;
tester.init_code_buffer(256);
for (int i = 0; i < 16; i++) {
tester.emit(IGen::sub_gpr64_imm8s(tester.generator(), i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"4883E8FF4883E9FF4883EAFF4883EBFF4883ECFF4883EDFF4883EEFF4883EFFF4983E8FF4983E9FF4983EA"
"FF4983EBFF4983ECFF4983EDFF4983EEFF4983EFFF");
}
TEST(CodeTester, sub_gpr64_imm8_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
for (int i = 0; i < 31; i++) {
tester.emit(IGen::sub_gpr64_imm8s(tester.generator(), i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"0004009121040091420400916304009184040091A5040091C6040091E704009108050091290500914A0500"
"916B0500918C050091AD050091CE050091EF0500911006009131060091520600917306009194060091B506"
"0091D6060091F706009118070091390700915A0700917B0700919C070091BD070091DE070091");
}
TEST(CodeTester, add_gpr64_imm8_x86) {
CodeTester tester;
tester.init_code_buffer(256);
for (int i = 0; i < 16; i++) {
tester.emit(IGen::add_gpr64_imm8s(tester.generator(), i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"4883C0FF4883C1FF4883C2FF4883C3FF4883C4FF4883C5FF4883C6FF4883C7FF4983C0FF4983C1FF4983C2"
"FF4983C3FF4983C4FF4983C5FF4983C6FF4983C7FF");
}
TEST(CodeTester, add_gpr64_imm8_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
for (int i = 0; i < 31; i++) {
tester.emit(IGen::add_gpr64_imm8s(tester.generator(), i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"000400D1210400D1420400D1630400D1840400D1A50400D1C60400D1E70400D1080500D1290500D14A0500"
"D16B0500D18C0500D1AD0500D1CE0500D1EF0500D1100600D1310600D1520600D1730600D1940600D1B506"
"00D1D60600D1F70600D1180700D1390700D15A0700D17B0700D19C0700D1BD0700D1DE0700D1");
}
TEST(CodeTester, simd_store_128_x86) {
CodeTester tester;
tester.init_code_buffer(256);
// movdqa [rbx], xmm3
// movdqa [r14], xmm3
// movdqa [rbx], xmm14
// movdqa [r14], xmm13
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), RBX, XMM3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), R14, XMM3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), RBX, XMM14));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), R14, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(),
"66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), RSP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), R12, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), RBP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), RBP, XMM11));
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), R13, XMM2));
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
tester.clear();
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), R13, XMM12));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
}
TEST(CodeTester, simd_store_128_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q14));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q13));
EXPECT_EQ(tester.dump_to_hex_string(), "43 00 80 3d c3 01 80 3d 4e 00 80 3d cd 01 80 3d");
}
TEST(CodeTester, xmm_load_128_x86) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM3, RBX));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM3, R14));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM14, RBX));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM13, R14));
EXPECT_EQ(tester.dump_to_hex_string(),
"66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM1, RSP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM13, R12));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM1, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM11, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM2, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
tester.clear();
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), XMM12, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
}
TEST(CodeTester, xmm_load_128_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X14));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q14, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q13, X14));
EXPECT_EQ(tester.dump_to_hex_string(), "23 00 c0 3d c3 01 c0 3d 2e 00 c0 3d cd 01 c0 3d");
}
// These tests actually execute the code, you cannot execute arm64 code on x86 and vise versa
// so these tests have to be conditional based on the platform unfortunately.
TEST(CodeTester, execute_push_pop_simd_x86) {
CodeTester tester;
tester.init_code_buffer(512);
tester.emit_push_all_simd();
tester.emit_pop_all_simd();
tester.emit_return();
EXPECT_EQ(
tester.dump_to_hex_string(),
"48 83 ec 08 48 83 ec 10 66 0f 7f 04 24 48 83 ec 10 66 0f 7f 0c 24 48 83 ec 10 66 0f 7f 14 "
"24 48 83 ec 10 66 0f 7f 1c 24 48 83 ec 10 66 0f 7f 24 24 48 83 ec 10 66 0f 7f 2c 24 48 83 "
"ec 10 66 0f 7f 34 24 48 83 ec 10 66 0f 7f 3c 24 48 83 ec 10 66 44 0f 7f 04 24 48 83 ec 10 "
"66 44 0f 7f 0c 24 48 83 ec 10 66 44 0f 7f 14 24 48 83 ec 10 66 44 0f 7f 1c 24 48 83 ec 10 "
"66 44 0f 7f 24 24 48 83 ec 10 66 44 0f 7f 2c 24 48 83 ec 10 66 44 0f 7f 34 24 48 83 ec 10 "
"66 44 0f 7f 3c 24 66 0f 6f 04 24 48 83 c4 10 66 0f 6f 0c 24 48 83 c4 10 66 0f 6f 14 24 48 "
"83 c4 10 66 0f 6f 1c 24 48 83 c4 10 66 0f 6f 24 24 48 83 c4 10 66 0f 6f 2c 24 48 83 c4 10 "
"66 0f 6f 34 24 48 83 c4 10 66 0f 6f 3c 24 48 83 c4 10 66 44 0f 6f 04 24 48 83 c4 10 66 44 "
"0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 0f 6f 1c 24 48 83 c4 10 66 44 "
"0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 66 44 0f 6f 34 24 48 83 c4 10 66 44 "
"0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
#ifndef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_push_pop_simd_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(512);
tester.emit_push_all_simd();
tester.emit_pop_all_simd();
tester.emit_return();
EXPECT_EQ(
tester.dump_to_hex_string(),
"ff 43 00 d1 e0 03 80 3d ff 43 00 d1 e1 03 80 3d ff 43 00 d1 e2 03 80 3d ff 43 00 d1 e3 03 "
"80 3d ff 43 00 d1 e4 03 80 3d ff 43 00 d1 e5 03 80 3d ff 43 00 d1 e6 03 80 3d ff 43 00 d1 "
"e7 03 80 3d ff 43 00 d1 e8 03 80 3d ff 43 00 d1 e9 03 80 3d ff 43 00 d1 ea 03 80 3d ff 43 "
"00 d1 eb 03 80 3d ff 43 00 d1 ec 03 80 3d ff 43 00 d1 ed 03 80 3d ff 43 00 d1 ee 03 80 3d "
"ff 43 00 d1 ef 03 80 3d e0 03 c0 3d ff 43 00 91 e1 03 c0 3d ff 43 00 91 e2 03 c0 3d ff 43 "
"00 91 e3 03 c0 3d ff 43 00 91 e4 03 c0 3d ff 43 00 91 e5 03 c0 3d ff 43 00 91 e6 03 c0 3d "
"ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 c0 3d ff 43 00 91 ea 03 "
"c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 ed 03 c0 3d ff 43 00 91 "
"ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_push_pop_all_the_things_x86) {
CodeTester tester;
tester.init_code_buffer(512);
tester.emit_push_all_simd();
tester.emit_push_all_gprs();
// ...
tester.emit_pop_all_gprs();
tester.emit_pop_all_simd();
tester.emit_return();
EXPECT_EQ(tester.dump_to_hex_string(),
"48 83 ec 08 48 83 ec 10 66 0f 7f 04 24 48 83 ec 10 66 0f 7f 0c 24 48 83 ec 10 66 0f "
"7f 14 24 48 83 ec 10 66 0f 7f 1c 24 48 83 ec 10 66 0f 7f 24 24 48 83 ec 10 66 0f 7f "
"2c 24 48 83 ec 10 66 0f 7f 34 24 48 83 ec 10 66 0f 7f 3c 24 48 83 ec 10 66 44 0f 7f "
"04 24 48 83 ec 10 66 44 0f 7f 0c 24 48 83 ec 10 66 44 0f 7f 14 24 48 83 ec 10 66 44 "
"0f 7f 1c 24 48 83 ec 10 66 44 0f 7f 24 24 48 83 ec 10 66 44 0f 7f 2c 24 48 83 ec 10 "
"66 44 0f 7f 34 24 48 83 ec 10 66 44 0f 7f 3c 24 50 51 52 53 54 55 56 57 41 50 41 51 "
"41 52 41 53 41 54 41 55 41 56 41 57 41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 "
"5f 5e 5d 5c 5b 5a 59 58 66 0f 6f 04 24 48 83 c4 10 66 0f 6f 0c 24 48 83 c4 10 66 0f "
"6f 14 24 48 83 c4 10 66 0f 6f 1c 24 48 83 c4 10 66 0f 6f 24 24 48 83 c4 10 66 0f 6f "
"2c 24 48 83 c4 10 66 0f 6f 34 24 48 83 c4 10 66 0f 6f 3c 24 48 83 c4 10 66 44 0f 6f "
"04 24 48 83 c4 10 66 44 0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 "
"0f 6f 1c 24 48 83 c4 10 66 44 0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 "
"66 44 0f 6f 34 24 48 83 c4 10 66 44 0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
#ifndef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_push_pop_all_the_things_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(512);
tester.emit_push_all_simd();
tester.emit_push_all_gprs();
// ...
tester.emit_pop_all_gprs();
tester.emit_pop_all_simd();
tester.emit_return();
EXPECT_EQ(
tester.dump_to_hex_string(),
"ff 43 00 d1 e0 03 80 3d ff 43 00 d1 e1 03 80 3d ff 43 00 d1 e2 03 80 3d ff 43 00 d1 e3 03 "
"80 3d ff 43 00 d1 e4 03 80 3d ff 43 00 d1 e5 03 80 3d ff 43 00 d1 e6 03 80 3d ff 43 00 d1 "
"e7 03 80 3d ff 43 00 d1 e8 03 80 3d ff 43 00 d1 e9 03 80 3d ff 43 00 d1 ea 03 80 3d ff 43 "
"00 d1 eb 03 80 3d ff 43 00 d1 ec 03 80 3d ff 43 00 d1 ed 03 80 3d ff 43 00 d1 ee 03 80 3d "
"ff 43 00 d1 ef 03 80 3d e0 0f 1f f8 e1 0f 1f f8 e2 0f 1f f8 e3 0f 1f f8 e4 0f 1f f8 e5 0f "
"1f f8 e6 0f 1f f8 e7 0f 1f f8 e8 0f 1f f8 e9 0f 1f f8 ea 0f 1f f8 eb 0f 1f f8 ec 0f 1f f8 "
"ed 0f 1f f8 ee 0f 1f f8 ef 0f 1f f8 f0 0f 1f f8 f1 0f 1f f8 f2 0f 1f f8 f3 0f 1f f8 f4 0f "
"1f f8 f5 0f 1f f8 f6 0f 1f f8 f7 0f 1f f8 f8 0f 1f f8 f9 0f 1f f8 fa 0f 1f f8 fb 0f 1f f8 "
"fc 0f 1f f8 fd 0f 1f f8 fe 0f 1f f8 fe 07 41 f8 fd 07 41 f8 fc 07 41 f8 fb 07 41 f8 fa 07 "
"41 f8 f9 07 41 f8 f8 07 41 f8 f7 07 41 f8 f6 07 41 f8 f5 07 41 f8 f4 07 41 f8 f3 07 41 f8 "
"f2 07 41 f8 f1 07 41 f8 f0 07 41 f8 ef 07 41 f8 ee 07 41 f8 ed 07 41 f8 ec 07 41 f8 eb 07 "
"41 f8 ea 07 41 f8 e9 07 41 f8 e8 07 41 f8 e7 07 41 f8 e6 07 41 f8 e5 07 41 f8 e4 07 41 f8 "
"e3 07 41 f8 e2 07 41 f8 e1 07 41 f8 e0 07 41 f8 e0 03 c0 3d ff 43 00 91 e1 03 c0 3d ff 43 "
"00 91 e2 03 c0 3d ff 43 00 91 e3 03 c0 3d ff 43 00 91 e4 03 c0 3d ff 43 00 91 e5 03 c0 3d "
"ff 43 00 91 e6 03 c0 3d ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 "
"c0 3d ff 43 00 91 ea 03 c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 "
"ed 03 c0 3d ff 43 00 91 ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_return_x86) {
CodeTester tester;
tester.init_code_buffer(256);
// test creating a function which simply returns
tester.emit_return();
EXPECT_EQ(tester.dump_to_hex_string(), "c3");
// and execute it!
#ifndef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_return_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
// test creating a function which simply returns
tester.emit(IGen::add_gpr64_imm8s(tester.generator(), ARM64_REG::X0, 1));
tester.emit(IGen::ret(tester.generator()));
EXPECT_EQ(tester.dump_to_hex_string(), "00 04 00 91 c0 03 5f d6");
// and execute it!
#ifdef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_push_pop_gprs_x86) {
CodeTester tester;
tester.init_code_buffer(256);
// test we can push/pop gprs without crashing.
tester.emit_push_all_gprs();
tester.emit_pop_all_gprs();
tester.emit_return();
EXPECT_EQ(tester.dump_to_hex_string(),
"50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57 41 5f 41 5e "
"41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58 c3");
#ifndef __aarch64__
tester.execute();
#endif
}
TEST(CodeTester, execute_push_pop_gprs_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
// test we can push/pop gprs without crashing.
tester.emit_push_all_gprs();
tester.emit_pop_all_gprs();
tester.emit_return();
EXPECT_EQ(tester.dump_to_hex_string(),
"e0 0f 1f f8 e1 0f 1f f8 e2 0f 1f f8 e3 0f 1f f8 e4 0f 1f f8 e5 0f 1f f8 e6 0f 1f f8 "
"e7 0f 1f f8 e8 0f 1f f8 e9 0f 1f f8 ea 0f 1f f8 eb 0f 1f f8 ec 0f 1f f8 ed 0f 1f f8 "
"ee 0f 1f f8 ef 0f 1f f8 f0 0f 1f f8 f1 0f 1f f8 f2 0f 1f f8 f3 0f 1f f8 f4 0f 1f f8 "
"f5 0f 1f f8 f6 0f 1f f8 f7 0f 1f f8 f8 0f 1f f8 f9 0f 1f f8 fa 0f 1f f8 fb 0f 1f f8 "
"fc 0f 1f f8 fd 0f 1f f8 fe 0f 1f f8 fe 07 41 f8 fd 07 41 f8 fc 07 41 f8 fb 07 41 f8 "
"fa 07 41 f8 f9 07 41 f8 f8 07 41 f8 f7 07 41 f8 f6 07 41 f8 f5 07 41 f8 f4 07 41 f8 "
"f3 07 41 f8 f2 07 41 f8 f1 07 41 f8 f0 07 41 f8 ef 07 41 f8 ee 07 41 f8 ed 07 41 f8 "
"ec 07 41 f8 eb 07 41 f8 ea 07 41 f8 e9 07 41 f8 e8 07 41 f8 e7 07 41 f8 e6 07 41 f8 "
"e5 07 41 f8 e4 07 41 f8 e3 07 41 f8 e2 07 41 f8 e1 07 41 f8 e0 07 41 f8 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
}
+1 -3
View File
@@ -1,5 +1,3 @@
// https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#value-parameterized-tests
#include <chrono>
#include <iostream>
#include <random>
@@ -119,7 +117,7 @@ class ArithmeticTests : public testing::TestWithParam<IntegerParam> {
// Called before the first test in this test suite.
static void SetUpTestSuite() {
runtime_thread = std::make_unique<std::thread>(std::thread(GoalTest::runtime_no_kernel_jak1));
compiler = std::make_unique<Compiler>(GameVersion::Jak1);
compiler = std::make_unique<Compiler>(GameVersion::Jak1, emitter::InstructionSet::X86);
runner = std::make_unique<GoalTest::CompilerTestRunner>();
runner->c = compiler.get();
}
+1 -1
View File
@@ -14,7 +14,7 @@ class CollectionTests : public testing::TestWithParam<CollectionParam> {
public:
static void SetUpTestSuite() {
runtime_thread = std::make_unique<std::thread>(std::thread(GoalTest::runtime_no_kernel_jak1));
compiler = std::make_unique<Compiler>(GameVersion::Jak1);
compiler = std::make_unique<Compiler>(GameVersion::Jak1, emitter::InstructionSet::X86);
runner = std::make_unique<GoalTest::CompilerTestRunner>();
runner->c = compiler.get();
}
+2 -2
View File
@@ -2,6 +2,6 @@
#include "gtest/gtest.h"
TEST(CompilerAndRuntime, ConstructCompiler) {
Compiler compiler1(GameVersion::Jak1);
Compiler compiler2(GameVersion::Jak2);
Compiler compiler1(GameVersion::Jak1, emitter::InstructionSet::X86);
Compiler compiler2(GameVersion::Jak2, emitter::InstructionSet::X86);
}
+2 -2
View File
@@ -1,8 +1,8 @@
#include <string>
#include <thread>
#include "game/runtime.h"
#include "goalc/compiler/Compiler.h"
#include "goalc/emitter/InstructionSet.h"
#include "gtest/gtest.h"
#include "test/goalc/framework/test_runner.h"
@@ -14,7 +14,7 @@ class ControlStatementTests : public testing::TestWithParam<ControlStatementPara
public:
static void SetUpTestSuite() {
runtime_thread = std::make_unique<std::thread>(std::thread(GoalTest::runtime_no_kernel_jak1));
compiler = std::make_unique<Compiler>(GameVersion::Jak1);
compiler = std::make_unique<Compiler>(GameVersion::Jak1, emitter::InstructionSet::X86);
runner = std::make_unique<GoalTest::CompilerTestRunner>();
runner->c = compiler.get();
}
+6 -6
View File
@@ -34,7 +34,7 @@ void connect_compiler_and_debugger(Compiler& compiler, bool do_break) {
}
} // namespace
TEST(Jak1Debugger, DebuggerBasicConnect) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
// evidently you can't ptrace threads in your own process, so we need to run the runtime in a
// separate process.
if (!fork()) {
@@ -51,7 +51,7 @@ TEST(Jak1Debugger, DebuggerBasicConnect) {
}
TEST(Jak1Debugger, DebuggerBreakAndContinue) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
// evidently you can't ptrace threads in your own process, so we need to run the runtime in a
// separate process.
if (!fork()) {
@@ -73,7 +73,7 @@ TEST(Jak1Debugger, DebuggerBreakAndContinue) {
}
TEST(Jak1Debugger, DebuggerReadMemory) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
// evidently you can't ptrace threads in your own process, so we need to run the runtime in a
// separate process.
if (!fork()) {
@@ -97,7 +97,7 @@ TEST(Jak1Debugger, DebuggerReadMemory) {
}
TEST(Jak1Debugger, DebuggerWriteMemory) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
// evidently you can't ptrace threads in your own process, so we need to run the runtime in a
// separate process.
if (!fork()) {
@@ -128,7 +128,7 @@ TEST(Jak1Debugger, DebuggerWriteMemory) {
}
TEST(Jak1Debugger, Symbol) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
// evidently you can't ptrace threads in your own process, so we need to run the runtime in a
// separate process.
if (!fork()) {
@@ -160,7 +160,7 @@ TEST(Jak1Debugger, Symbol) {
TEST(Jak1Debugger, SimpleBreakpoint) {
try {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
if (!fork()) {
GoalTest::runtime_no_kernel_jak1();
+1 -1
View File
@@ -5,7 +5,7 @@
#include "test/goalc/framework/test_runner.h"
TEST(Jak1NoDebugSegment, Init) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
compiler.run_front_end_on_string("(build-kernel)");
std::thread runtime_thread = std::thread(GoalTest::runtime_with_kernel_no_debug_segment);
+1 -1
View File
@@ -37,7 +37,7 @@ class Jak1KernelTest : public testing::Test {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion v) : compiler(v) {}
SharedCompiler(GameVersion v) : compiler(v, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+1 -1
View File
@@ -38,7 +38,7 @@ class Jak2KernelTest : public testing::Test {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion v) : compiler(v) {}
SharedCompiler(GameVersion v) : compiler(v, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+1 -1
View File
@@ -38,7 +38,7 @@ class Jak3KernelTest : public testing::Test {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion v) : compiler(v) {}
SharedCompiler(GameVersion v) : compiler(v, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+1 -1
View File
@@ -11,7 +11,7 @@ class Jak2GoalcTests : public testing::TestWithParam<Jak2Param> {
public:
static void SetUpTestSuite() {
runtime_thread = std::make_unique<std::thread>(std::thread(GoalTest::runtime_no_kernel_jak2));
compiler = std::make_unique<Compiler>(GameVersion::Jak2);
compiler = std::make_unique<Compiler>(GameVersion::Jak2, emitter::InstructionSet::X86);
runner = std::make_unique<GoalTest::CompilerTestRunner>();
runner->c = compiler.get();
}
+8 -8
View File
@@ -22,7 +22,7 @@ void add_jak3_expected_type_mismatches(Compiler& /*c*/) {}
void add_jakx_expected_type_mismatches(Compiler& /*c*/) {}
TEST(Jak1TypeConsistency, MANUAL_TEST_TypeConsistencyWithBuildFirst) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak1_expected_type_mismatches(compiler);
@@ -31,7 +31,7 @@ TEST(Jak1TypeConsistency, MANUAL_TEST_TypeConsistencyWithBuildFirst) {
}
TEST(Jak2TypeConsistency, MANUAL_TEST_TypeConsistencyWithBuildFirst) {
Compiler compiler(GameVersion::Jak2);
Compiler compiler(GameVersion::Jak2, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak2_expected_type_mismatches(compiler);
@@ -40,7 +40,7 @@ TEST(Jak2TypeConsistency, MANUAL_TEST_TypeConsistencyWithBuildFirst) {
}
TEST(Jak3TypeConsistency, TypeConsistencyWithBuildFirst) {
Compiler compiler(GameVersion::Jak3);
Compiler compiler(GameVersion::Jak3, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak3_expected_type_mismatches(compiler);
@@ -49,7 +49,7 @@ TEST(Jak3TypeConsistency, TypeConsistencyWithBuildFirst) {
}
// TEST(JakXTypeConsistency, TypeConsistencyWithBuildFirst) {
// Compiler compiler(GameVersion::JakX);
// Compiler compiler(GameVersion::JakX, emitter::InstructionSet::X86);
// compiler.enable_throw_on_redefines();
// add_common_expected_type_mismatches(compiler);
// add_jakx_expected_type_mismatches(compiler);
@@ -58,7 +58,7 @@ TEST(Jak3TypeConsistency, TypeConsistencyWithBuildFirst) {
// }
TEST(Jak1TypeConsistency, TypeConsistency) {
Compiler compiler(GameVersion::Jak1);
Compiler compiler(GameVersion::Jak1, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak1_expected_type_mismatches(compiler);
@@ -67,7 +67,7 @@ TEST(Jak1TypeConsistency, TypeConsistency) {
}
TEST(Jak2TypeConsistency, TypeConsistency) {
Compiler compiler(GameVersion::Jak2);
Compiler compiler(GameVersion::Jak2, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak2_expected_type_mismatches(compiler);
@@ -76,7 +76,7 @@ TEST(Jak2TypeConsistency, TypeConsistency) {
}
TEST(Jak3TypeConsistency, TypeConsistency) {
Compiler compiler(GameVersion::Jak3);
Compiler compiler(GameVersion::Jak3, emitter::InstructionSet::X86);
compiler.enable_throw_on_redefines();
add_common_expected_type_mismatches(compiler);
add_jak3_expected_type_mismatches(compiler);
@@ -85,7 +85,7 @@ TEST(Jak3TypeConsistency, TypeConsistency) {
}
// TEST(JakXTypeConsistency, TypeConsistency) {
// Compiler compiler(GameVersion::JakX);
// Compiler compiler(GameVersion::JakX, emitter::InstructionSet::X86);
// compiler.enable_throw_on_redefines();
// add_common_expected_type_mismatches(compiler);
// add_jakx_expected_type_mismatches(compiler);
+1 -1
View File
@@ -32,7 +32,7 @@ class VariableTests : public testing::TestWithParam<VariableParam> {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion version) : compiler(version) {}
SharedCompiler(GameVersion version) : compiler(version, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+1 -1
View File
@@ -51,7 +51,7 @@ class WithMinimalGameTests : public ::testing::Test {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion v) : compiler(v) {}
SharedCompiler(GameVersion v) : compiler(v, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+1 -1
View File
@@ -49,7 +49,7 @@ class WithGameTests : public ::testing::Test {
void TearDown() {}
struct SharedCompiler {
SharedCompiler(GameVersion v) : compiler(v) {}
SharedCompiler(GameVersion v) : compiler(v, emitter::InstructionSet::X86) {}
std::thread runtime_thread;
Compiler compiler;
GoalTest::CompilerTestRunner runner;
+2 -62
View File
@@ -28,13 +28,11 @@
"DGO/DAR.DGO",
"DGO/TIT.DGO"
],
"skip_compile_files": [
"timer", // accessing timer regs
"display", // interrupt handlers
"target-snowball" // screwed up labels, likely cut content
],
"skip_compile_functions": [
/// GCOMMON
// these functions are not implemented by the compiler in OpenGOAL, but are in GOAL.
@@ -47,212 +45,154 @@
"breakpoint-range-set!",
// inline assembly
"valid?",
/// GKERNEL
// asm
"(method 10 process)",
"(method 14 dead-pool)",
/// GSTATE
"enter-state", // stack pointer asm
/// MATH
"rand-vu-init",
"rand-vu",
"rand-vu-nostep", // random hardware
// trig
"sin-rad", // fpu acc
"cos-rad", // fpu acc
"atan-series-rad", // fpu acc
/// VECTOR-H
"(method 3 vector)", // this function appears twice, which confuses the compiler.
"vector4-dot", // fpu acc
"(method 3 profile-frame)", // double definition.
// dma-disasm
"disasm-dma-list", // missing a single cast :(
// math camera
"transform-point-vector!",
"transform-point-qword!",
"transform-point-vector-scale!",
// display-h
"put-draw-env",
// geometry
"calculate-basis-functions-vector!", // asm requiring manual rewrite
"curve-evaluate!", // asm requiring manual rewrite
"point-in-triangle-cross", // logior on floats manual fixup
// texture
"(method 9 texture-page-dir)", // multiplication on pointers
"adgif-shader<-texture-with-update!", // misrecognized bitfield stuff.
// asm
"invalidate-cache-line",
// stats-h
"(method 11 perf-stat)",
"(method 12 perf-stat)",
// sprite-distorter
"sprite-draw-distorters", // uses clipping flag.
// sync-info
"(method 15 sync-info)", // needs display stuff first
"(method 15 sync-info-eased)", // needs display stuff first
"(method 15 sync-info-paused)", // needs display stuff first
// sparticle
"lookup-part-group-pointer-by-name", // address of element in array issue
// ripple - calls an asm function
"ripple-execute",
"get-task-status",
"print-game-text-scaled", // float/int, looks like a bug in original code?
// aligner - return-from-thread, currently not supported
"(method 9 align-control)",
// stat collection
"start-perf-stat-collection",
"end-perf-stat-collection",
// double definition
"(method 3 game-save)",
// new stack boxed array
"update-time-of-day",
// weird asm, was rewritten
"close-sky-buffer",
// float to int
"(method 10 bsp-header)",
// multiply defined.
"(method 3 sprite-aux-list)",
// camera
"slave-set-rotation!",
"v-slrp2!",
"v-slrp3!", // vector-dot involving the stack
// function returning float with a weird cast.
"debug-menu-item-var-make-float",
// decompiler BUG
"level-hint-task-process",
"(method 26 level)",
"(method 9 level)",
"(method 10 level)", // asm
// cam-states
"cam-los-collide", // vector-dot involving the stack
// cam-layout
"cam-layout-save-cam-trans", // temporary, im sure this can be fixed
// anim-tester
"(method 3 anim-tester)",
"anim-tester-save-object-seqs", // anim-tester -- new basic on the stack
// default-menu
"all-texture-tweak-adjust", // dynamic-field access placeholder case TODO
"debug-menu-make-instance-menu", // also disabled
// joint
"(method 9 art-mesh-geo)", // PLACEHOLDER array access
"flatten-joint-control-to-spr",
"make-joint-jump-tables",
"(method 5 art-joint-anim)", // defined identically twice in the same file...probably a bug?
// process-drawable
"fill-skeleton-cache", // cache dxwbin
"execute-math-engine", // handle casts -- was fixed manually
// ambient
"ambient-type-music", // IR_StoreConstOffset::do_codegen can't handle this (c {} sz {})
"ambient-type-music", // IR_StoreConstOffset::do_codegen_x86 can't handle this (c {} sz {})
// main
"display-loop",
"on",
// target-handler
"target-generic-event-handler", // return type forced to none
// shadow-cpu-h
"(method 3 shadow-edge)", // defined twice in the same file, one is wrong and old
// sky - these are skipped and not used
"sky-draw",
"sky-upload",
"sky-add-frame-data",
// drawable
"vis-cull", // unsupported asm
"draw-instance-info", // skipped for now, debug only
"foreground-engine-execute",
"real-main-draw-hook", // dma handling not complete
// generic-obs
"command-get-process", // handle casts
// navigate
"end-collect-nav",
"start-collect-nav",
// appears twice
"(method 9 drawable-tree-instance-tie)",
"(method 11 drawable-tree-instance-tie)",
"(method 12 drawable-tree-instance-tie)",
"(method 13 drawable-tree-instance-tie)",
"ray-triangle-intersect", // requires SLL implementation
"(method 51 snow-bunny)", // bitfield problem
"ice-cube-default-event-handler", // return casted to none issue
"(method 51 ice-cube)", // bitfield problem
"(method 13 collide-mesh)", // scratchpad sadness
"(method 10 collide-mesh)", // collide-mesh-cache-tri handling
// not in use in PC port
"tie-near-init-engine",
"tie-near-end-buffer",
"(method 19 process-drawable)",
"curve-evaluate!",
"generic-reset-buffers",
"generic-merc-execute-all",
/// COLLIDE-EDGE-GRAB
"(method 9 edge-grab-info)", // asm
/// COLLIDE-SHAPE-RIDER
// type mess
"(method 22 collide-shape-prim-mesh)",
/// COLLIDE-REACTION-TARGET
"poly-find-nearest-edge",
/// GLIST
// i dont even want to know
"glst-find-node-by-name",
"glst-length-of-longest-name",
"race-time-save"
],
"skip_compile_states": {
"cam-master-active": [
"event"
@@ -267,4 +207,4 @@
"code" // dead code not analyzed properly after a loop
]
}
}
}
+1 -1
View File
@@ -104,7 +104,7 @@ OfflineTestCompileResult compile(OfflineTestDecompiler& dc,
const OfflineTestWorkGroup& work_group,
const OfflineTestConfig& config) {
OfflineTestCompileResult result;
Compiler compiler(game_name_to_version(config.game_name));
Compiler compiler(game_name_to_version(config.game_name), emitter::InstructionSet::X86);
compiler.run_front_end_on_file(
{"decompiler", "config", game_name_to_all_types[config.game_name]});
-233
View File
@@ -1,233 +0,0 @@
/*!
* @file test_CodeTester.cpp
* Tests for the CodeTester, a tool for testing the emitter by emitting code and running it
* from within the test application.
*
* These tests should just make sure the basic functionality of CodeTester works, and that it
* can generate prologues/epilogues, and execute them without crashing.
*/
#include "goalc/emitter/CodeTester.h"
#include "goalc/emitter/IGen.h"
#include "gtest/gtest.h"
using namespace emitter;
TEST(CodeTester, prologue) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit_push_all_gprs();
// check we generate the right code for pushing all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57");
}
TEST(CodeTester, epilogue) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit_pop_all_gprs();
// check we generate the right code for popping all gpr's
EXPECT_EQ(tester.dump_to_hex_string(),
"41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58");
}
TEST(CodeTester, execute_return) {
CodeTester tester;
tester.init_code_buffer(256);
// test creating a function which simply returns
tester.emit_return();
// and execute it!
tester.execute();
}
TEST(CodeTester, execute_push_pop_gprs) {
CodeTester tester;
tester.init_code_buffer(256);
// test we can push/pop gprs without crashing.
tester.emit_push_all_gprs();
tester.emit_pop_all_gprs();
tester.emit_return();
tester.execute();
}
TEST(CodeTester, xmm_store_128) {
CodeTester tester;
tester.init_code_buffer(256);
// movdqa [rbx], xmm3
// movdqa [r14], xmm3
// movdqa [rbx], xmm14
// movdqa [r14], xmm13
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(),
"66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(),
// "f3 0f 7f 1b f3 41 0f 7f 1e f3 44 0f 7f 33 f3 45 0f 7f 2e");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 0c 24"); // requires SIB byte.
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 2c 24"); // requires SIB byte and REX
// byte
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 4d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 7f 5d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 7f 55 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 65 00");
}
TEST(CodeTester, sub_gpr64_imm8) {
CodeTester tester;
tester.init_code_buffer(256);
for (int i = 0; i < 16; i++) {
tester.emit(IGen::sub_gpr64_imm8s(i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"4883E8FF4883E9FF4883EAFF4883EBFF4883ECFF4883EDFF4883EEFF4883EFFF4983E8FF4983E9FF4983EA"
"FF4983EBFF4983ECFF4983EDFF4983EEFF4983EFFF");
}
TEST(CodeTester, add_gpr64_imm8) {
CodeTester tester;
tester.init_code_buffer(256);
for (int i = 0; i < 16; i++) {
tester.emit(IGen::add_gpr64_imm8s(i, -1));
}
EXPECT_EQ(tester.dump_to_hex_string(true),
"4883C0FF4883C1FF4883C2FF4883C3FF4883C4FF4883C5FF4883C6FF4883C7FF4983C0FF4983C1FF4983C2"
"FF4983C3FF4983C4FF4983C5FF4983C6FF4983C7FF");
}
TEST(CodeTester, xmm_load_128) {
CodeTester tester;
tester.init_code_buffer(256);
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
// tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
// EXPECT_EQ(tester.dump_to_hex_string(),
// "f3 0f 6f 1b f3 41 0f 6f 1e f3 44 0f 6f 33 f3 45 0f 6f 2e");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 0c 24"); // requires SIB byte.
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 2c 24"); // requires SIB byte and REX
// byte
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 4d 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 6f 5d 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 6f 55 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 65 00");
tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
EXPECT_EQ(tester.dump_to_hex_string(),
"66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
}
TEST(CodeTester, push_pop_xmms) {
CodeTester tester;
tester.init_code_buffer(512);
tester.emit_push_all_xmms();
tester.emit_pop_all_xmms();
tester.emit_return();
tester.execute();
}
TEST(CodeTester, push_pop_all_the_things) {
CodeTester tester;
tester.init_code_buffer(512);
tester.emit_push_all_xmms();
tester.emit_push_all_gprs();
// ...
tester.emit_pop_all_gprs();
tester.emit_pop_all_xmms();
tester.emit_return();
tester.execute();
}
+3903 -3901
View File
File diff suppressed because it is too large Load Diff
+337 -329
View File
File diff suppressed because it is too large Load Diff
+3637 -1631
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -40,7 +40,7 @@ third-party/libtinyfiledialogs:
alternatives:
- https://github.com/btzy/nativefiledialog-extended (only file dialog support though!)
third-party/sse2neon:
git: https://github.com/DLTcollab/sse2neon/commit/2eede22be8c5922e44616260c5eab728e3c5e26f
git: https://github.com/DLTcollab/sse2neon/releases/tag/v1.9.1
license: MIT
third-party/curl:
git: https://github.com/curl/curl/tree/curl-8_3_0