Files
Tyler Wilding 64bcd8c030 goalc: Get CodeTester tests passing on Arm64 (only targetting macOS atm) (#3290)
This PR does the following:
- Designs a mechanism by which arm64 instructions can be encoded and
emitted
- Dispatch our higher-level instruction emitting calls to either x86 or
arm64 instructions depending on what the compiler is set to (defaults to
x86)
- Bare minimum scaffolding to get the arm64 instructions successfully
executing atleast on apple silicon
- Implement enough instructions to get the codetester test suite passing
on arm
2026-03-30 20:20:47 -04:00

235 lines
7.0 KiB
C++

/*!
* @file CodeTester.cpp
* The CodeTester is a utility to run the output of the compiler as part of a unit test.
* This is effective for tests which try all combinations of registers, etc.
*
* The CodeTester can't be used for tests requiring the full GOAL language/linking.
*/
#include <stdexcept>
#include "common/common_types.h"
#include "goalc/emitter/Instruction.h"
#include "goalc/emitter/Register.h"
#ifdef OS_POSIX
#include <sys/mman.h>
#elif _WIN32
#include "third-party/mman/mman.h"
#endif
#include <cstdio>
#include "CodeTester.h"
#include "IGen.h"
namespace emitter {
CodeTester::CodeTester() : m_info(RegisterInfo::make_register_info()), m_gen(GameVersion::Jak1) {}
CodeTester::CodeTester(InstructionSet instruction_set)
: m_info(RegisterInfo::make_register_info()), m_gen(GameVersion::Jak1, instruction_set) {}
/*!
* Convert to a string for comparison against an assembler or tests.
*/
std::string CodeTester::dump_to_hex_string(bool nospace) {
std::string result;
char buff[32];
for (int i = 0; i < code_buffer_size; i++) {
if (nospace) {
sprintf(buff, "%02X", code_buffer[i]);
} else {
sprintf(buff, "%02x ", code_buffer[i]);
}
result += buff;
}
// remove trailing space
if (!nospace && !result.empty()) {
result.pop_back();
}
return result;
}
/*!
* Add an instruction to the buffer.
*/
void CodeTester::emit(const emitter::Instruction& instr) {
u8* start = code_buffer + code_buffer_size;
code_buffer_size += instr.emit(start);
ASSERT(code_buffer_size <= code_buffer_capacity);
}
/*!
* Add a return instruction to the buffer.
*/
void CodeTester::emit_return() {
emit(IGen::ret(m_gen));
}
/*!
* Pop all GPRs off of the stack. Optionally exclude rax.
* Pops RSP always, which is weird, but doesn't cause issues.
*/
void CodeTester::emit_pop_all_gprs(bool exclude_return_register) {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 16; i-- > 0;) {
if (i != RAX || !exclude_return_register) {
emit(IGen::pop_gpr64(m_gen, i));
}
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 31; i-- > 0;) {
if (i != X0 || !exclude_return_register) {
emit(IGen::pop_gpr64(m_gen, i));
}
}
} else {
throw std::runtime_error("CodeTester::emit_pop_all_gprs unhandled instruction set");
}
}
/*!
* Push all GPRs onto the stack. Optionally exclude RAX.
* Pushes RSP always, which is weird, but doesn't cause issues.
*/
void CodeTester::emit_push_all_gprs(bool exclude_return_register) {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 0; i < 16; i++) {
if (i != RAX || !exclude_return_register) {
emit(IGen::push_gpr64(m_gen, i));
}
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 31; i++) {
if (i != X0 || !exclude_return_register) {
emit(IGen::push_gpr64(m_gen, i));
}
}
} else {
throw std::runtime_error("CodeTester::emit_push_all_gprs unhandled instruction set");
}
}
/*!
* Push all xmm registers (all 128-bits) to the stack.
*/
void CodeTester::emit_push_all_simd() {
if (m_gen.instr_set() == InstructionSet::X86) {
emit(IGen::sub_gpr64_imm8s(m_gen, RSP, 8));
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(m_gen, RSP, 16));
emit(IGen::store128_gpr64_simd128(m_gen, RSP, XMM0 + i));
}
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(m_gen, SP, 16));
emit(IGen::store128_gpr64_simd128(m_gen, SP, Q0 + i));
}
} else {
throw std::runtime_error("CodeTester::emit_push_all_simd unhandled instruction set");
}
}
/*!
* Pop all xmm registers (all 128-bits) from the stack
*/
void CodeTester::emit_pop_all_simd() {
if (m_gen.instr_set() == InstructionSet::X86) {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_simd128_gpr64(m_gen, XMM0 + i, RSP));
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 16));
}
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 8));
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_simd128_gpr64(m_gen, Q0 + i, SP));
emit(IGen::add_gpr64_imm8s(m_gen, SP, 16));
}
} else {
throw std::runtime_error("CodeTester::emit_pop_all_simd unhandled instruction set");
}
}
/*!
* Remove everything from the code buffer
*/
void CodeTester::clear() {
code_buffer_size = 0;
}
/*!
* Execute the buffered code with no arguments, return the value of RAX.
*/
u64 CodeTester::execute() {
#if defined(__aarch64__)
// allegedly needed because ARM requires flushing after writing new instructions
// on x86 it does nothing
__builtin___clear_cache((char*)code_buffer, (char*)code_buffer + code_buffer_size);
#endif
// clang-format off
#if defined(__APPLE__) && defined(__aarch64__)
// TODO - we may need to switch to using pthread_jit_write_protect_np
// there may also be issues if multiple threasd are involved
// but this seems to work so keep it simple until something proves otherwise.
mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ);
auto ret = ((u64(*)())code_buffer)();
mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ);
return ret;
#else
return ((u64(*)())code_buffer)();
#endif
// clang-format on
}
/*!
* Execute code buffer with arguments. Use get_c_abi_arg to figure out which registers the
* arguments will appear in (will handle windows/linux differences)
*/
u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) {
// clang-format off
#if defined(__APPLE__) && defined(__aarch64__)
mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ);
auto ret = ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3);
mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ);
return ret;
#else
return ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3);
#endif
// clang-format on
}
/*!
* Allocate a code buffer of the given size.
*/
void CodeTester::init_code_buffer(int capacity) {
// TODO Apple Silicon - You cannot make a page be RWX,
// or more specifically it can't be both writable and executable at the same time
//
// https://github.com/zherczeg/sljit/issues/99
//
// The solution to this is to flip-flop between permissions, or perhaps have two threads
// one that has writing permission, and another with executable permission
#if defined(__APPLE__) && defined(__aarch64__)
code_buffer = (u8*)mmap(nullptr, capacity, PROT_WRITE | PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, 0, 0);
#else
code_buffer = (u8*)mmap(nullptr, capacity, PROT_EXEC | PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
#endif
if (code_buffer == (u8*)(-1)) {
ASSERT_MSG(false, "[CodeTester] Failed to map memory!");
}
code_buffer_capacity = capacity;
code_buffer_size = 0;
}
CodeTester::~CodeTester() {
if (code_buffer_capacity) {
munmap(code_buffer, code_buffer_capacity);
}
}
} // namespace emitter