goalc: Implement the bulk of ARM64 instructions from x86 (#4318)

Less than 100 instructions left to implement, with the vast vast
majority being load-and-stores. These will likely be knocked out quickly
but they require a more involved implementation than just simply
translating the instructions (several need multiple instructions, others
may need reserved registers (x16 or x17 are common for this purpose))

This is a good milestone to get something pushed to master.
This commit is contained in:
Tyler Wilding
2026-06-22 22:25:49 -04:00
committed by GitHub
parent f9a7fba6e2
commit 8daf33492e
17 changed files with 5359 additions and 4587 deletions
+3 -1
View File
@@ -714,7 +714,9 @@ bool break_now(const ThreadID& tid) {
bool cont_now(const ThreadID& tid) {
return false;
}
bool open_memory(const ThreadID& tid, MemoryHandle* out);
bool open_memory(const ThreadID& tid, MemoryHandle* out) {
return false;
}
bool close_memory(const ThreadID& tid, MemoryHandle* handle) {
return false;
}
+2 -1
View File
@@ -5,7 +5,8 @@
#include "common/versions/versions.h"
#include "goalc/data_compiler/DataObjectGenerator.h"
#include <fmt/chrono.h>
#include "fmt/chrono.h"
std::string get_current_time_and_date() {
auto const now = std::chrono::floor<std::chrono::seconds>(std::chrono::system_clock::now());
+1 -1
View File
@@ -3,13 +3,13 @@
#include "common/util/gltf_util.h"
#include "decompiler/extractor/extractor_util.h"
#include "decompiler/level_extractor/extract_collide_frags.h"
#include "decompiler/level_extractor/extract_merc.h"
#include "goalc/build_level/collide/jak2/collide.h"
#include "goalc/build_level/common/Tfrag.h"
#include "goalc/build_level/jak2/Entity.h"
#include "goalc/build_level/jak2/FileInfo.h"
#include "goalc/build_level/jak2/LevelFile.h"
#include <decompiler/level_extractor/extract_collide_frags.h>
namespace jak2 {
bool run_build_level(const std::string& input_file,
+1 -1
View File
@@ -1,13 +1,13 @@
#include "build_level.h"
#include "decompiler/extractor/extractor_util.h"
#include "decompiler/level_extractor/extract_collide_frags.h"
#include "decompiler/level_extractor/extract_merc.h"
#include "goalc/build_level/collide/jak3/collide.h"
#include "goalc/build_level/common/Tfrag.h"
#include "goalc/build_level/jak3/Entity.h"
#include "goalc/build_level/jak3/FileInfo.h"
#include "goalc/build_level/jak3/LevelFile.h"
#include <decompiler/level_extractor/extract_collide_frags.h>
namespace jak3 {
bool run_build_level(const std::string& input_file,
+40 -35
View File
@@ -121,7 +121,7 @@ void regset_common(emitter::ObjectGenerator* gen,
gen->count_eliminated_move();
gen->add_instr(IGen::null(*gen), irec);
} else {
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
}
} else if (src_is_xmm128 && dst_is_xmm128) {
if (src_reg == dst_reg) {
@@ -133,20 +133,20 @@ void regset_common(emitter::ObjectGenerator* gen,
}
} else if (src_class == RegClass::FLOAT && dst_class == RegClass::GPR_64) {
// xmm 1x -> gpr
gen->add_instr(IGen::movd_gpr32_xmm32(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::movd_gpr32_f32(*gen, dst_reg, src_reg), irec);
// don't forget to sign extend
gen->add_instr(IGen::movsx_r64_r32(*gen, dst_reg, dst_reg), irec);
} else if (src_class == RegClass::GPR_64 && dst_class == RegClass::FLOAT) {
// gpr -> xmm 1x
gen->add_instr(IGen::movd_xmm32_gpr32(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::movd_f32_gpr32(*gen, dst_reg, src_reg), irec);
} else if (src_is_xmm128 && dst_class == RegClass::FLOAT) {
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
gen->add_instr(IGen::movq_xmm64_gpr64(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::movq_f64_gpr64(*gen, dst_reg, src_reg), irec);
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
gen->add_instr(IGen::movq_gpr64_xmm64(*gen, dst_reg, src_reg), irec);
gen->add_instr(IGen::movq_gpr64_f64(*gen, dst_reg, src_reg), irec);
} else {
ASSERT(false); // unhandled move.
}
@@ -256,7 +256,7 @@ void IR_LoadSymbolPointer::do_codegen_x86(emitter::ObjectGenerator* gen,
if (m_name == "#f") {
static_assert(false_symbol_offset() == 0, "false symbol location");
if (dest_reg.is_xmm(gen->instr_set())) {
gen->add_instr(IGen::movq_xmm64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
gen->add_instr(IGen::movq_f64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
} else {
gen->add_instr(IGen::mov_gpr64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
}
@@ -417,7 +417,8 @@ void IR_GotoLabel::do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
(void)allocs;
auto instr = gen->add_instr(IGen::jmp_32(*gen), irec);
auto instr = gen->add_instr(IGen::jmp_imm(*gen), irec);
// TODO ARM - have to patch this differently, encoding for the immediate is different
gen->link_instruction_jump(instr, gen->get_future_ir_record_in_same_func(irec, m_dest->idx));
}
@@ -711,15 +712,21 @@ void IR_IntegerMath::do_codegen_x86(emitter::ObjectGenerator* gen,
ASSERT(!m_arg);
break;
case IntegerMathKind::SHLV_64:
gen->add_instr(IGen::shl_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
// TODO ARM - register provided but unused on x86
gen->add_instr(IGen::shl_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
// TODO ARM - x86 forces you to use CL, which is dumb, but the register allocator
// has that logic baked in somewhere
// ARM has no such constraint, so we should be able to use any register for the shift amount
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
break;
case IntegerMathKind::SHRV_64:
gen->add_instr(IGen::shr_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
// TODO ARM - register provided but unused on x86
gen->add_instr(IGen::shr_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
break;
case IntegerMathKind::SARV_64:
gen->add_instr(IGen::sar_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
// TODO ARM - register provided but unused on x86
gen->add_instr(IGen::sar_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
break;
case IntegerMathKind::SHL_64:
@@ -823,37 +830,37 @@ void IR_FloatMath::do_codegen_x86(emitter::ObjectGenerator* gen,
switch (m_kind) {
case FloatMathKind::DIV_SS:
gen->add_instr(
IGen::divss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::div_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::MUL_SS:
gen->add_instr(
IGen::mulss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::mul_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::ADD_SS:
gen->add_instr(
IGen::addss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::add_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::SUB_SS:
gen->add_instr(
IGen::subss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::sub_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::MAX_SS:
gen->add_instr(
IGen::maxss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::max_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::MIN_SS:
gen->add_instr(
IGen::minss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
IGen::min_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
irec);
break;
case FloatMathKind::SQRT_SS:
gen->add_instr(
IGen::sqrts_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)), irec);
IGen::sqrt_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)), irec);
break;
default:
ASSERT(false);
@@ -895,7 +902,7 @@ void IR_StaticVarLoad::do_codegen_x86(emitter::ObjectGenerator* gen,
ASSERT(load_info.requires_load == true);
auto instr =
gen->add_instr(IGen::static_load_xmm32(*gen, get_reg(m_dest, allocs, irec), 0), irec);
gen->add_instr(IGen::static_load_f32(*gen, get_reg(m_dest, allocs, irec), 0), irec);
gen->link_instruction_static(instr, m_src->rec, 0);
} else if (m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT) {
// we don't check the load info intentionally because we want to allow loading an entire
@@ -966,38 +973,38 @@ void IR_ConditionalBranch::do_codegen_x86(emitter::ObjectGenerator* gen,
ASSERT(m_resolved);
switch (condition.kind) {
case ConditionKind::EQUAL:
jump_instr = IGen::je_32(*gen);
jump_instr = IGen::je_imm(*gen);
break;
case ConditionKind::NOT_EQUAL:
jump_instr = IGen::jne_32(*gen);
jump_instr = IGen::jne_imm(*gen);
break;
case ConditionKind::LEQ:
if (condition.is_signed) {
jump_instr = IGen::jle_32(*gen);
jump_instr = IGen::jle_imm(*gen);
} else {
jump_instr = IGen::jbe_32(*gen);
jump_instr = IGen::jbe_imm(*gen);
}
break;
case ConditionKind::GEQ:
if (condition.is_signed) {
jump_instr = IGen::jge_32(*gen);
jump_instr = IGen::jge_imm(*gen);
} else {
jump_instr = IGen::jae_32(*gen);
jump_instr = IGen::jae_imm(*gen);
}
break;
case ConditionKind::LT:
if (condition.is_signed) {
jump_instr = IGen::jl_32(*gen);
jump_instr = IGen::jl_imm(*gen);
} else {
jump_instr = IGen::jb_32(*gen);
jump_instr = IGen::jb_imm(*gen);
}
break;
case ConditionKind::GT:
if (condition.is_signed) {
jump_instr = IGen::jg_32(*gen);
jump_instr = IGen::jg_imm(*gen);
} else {
jump_instr = IGen::ja_32(*gen);
jump_instr = IGen::ja_imm(*gen);
}
break;
default:
@@ -1005,7 +1012,7 @@ void IR_ConditionalBranch::do_codegen_x86(emitter::ObjectGenerator* gen,
}
if (condition.is_float) {
gen->add_instr(IGen::cmp_flt_flt(*gen, get_reg(condition.a, allocs, irec),
gen->add_instr(IGen::cmp_f32_f32(*gen, get_reg(condition.a, allocs, irec),
get_reg(condition.b, allocs, irec)),
irec);
} else {
@@ -1210,8 +1217,7 @@ void IR_FloatToInt::do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
gen->add_instr(
IGen::float_to_int32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)),
irec);
IGen::f32_to_int32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)), irec);
gen->add_instr(
IGen::movsx_r64_r32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_dest, allocs, irec)),
irec);
@@ -1244,8 +1250,7 @@ void IR_IntToFloat::do_codegen_x86(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
gen->add_instr(
IGen::int32_to_float(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)),
irec);
IGen::int32_to_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)), irec);
}
void IR_IntToFloat::do_codegen_arm64(emitter::ObjectGenerator* gen,
+3 -2
View File
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "goalc/emitter/Instruction.h"
#include "goalc/emitter/InstructionSet.h"
#include "goalc/emitter/Register.h"
#ifdef OS_POSIX
#include <sys/mman.h>
@@ -125,7 +126,7 @@ void CodeTester::emit_push_all_simd() {
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::sub_gpr64_imm8s(m_gen, SP, 16));
emit(IGen::store128_gpr64_simd128(m_gen, SP, Q0 + i));
emit(IGen::store128_gpr64_simd128(m_gen, SP, V0 + i));
}
} else {
throw std::runtime_error("CodeTester::emit_push_all_simd unhandled instruction set");
@@ -144,7 +145,7 @@ void CodeTester::emit_pop_all_simd() {
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 8));
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
for (int i = 0; i < 16; i++) {
emit(IGen::load128_simd128_gpr64(m_gen, Q0 + i, SP));
emit(IGen::load128_simd128_gpr64(m_gen, V0 + i, SP));
emit(IGen::add_gpr64_imm8s(m_gen, SP, 16));
}
} else {
+73 -73
View File
@@ -31,24 +31,24 @@ Instruction mov_gpr64_s32(const ObjectGenerator& gen, Register dst, int64_t val)
IGEN_DISPATCH(mov_gpr64_s32, dst, val);
}
Instruction movd_gpr32_xmm32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movd_gpr32_xmm32, dst, src);
Instruction movd_gpr32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movd_gpr32_f32, dst, src);
}
Instruction movd_xmm32_gpr32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movd_xmm32_gpr32, dst, src);
Instruction movd_f32_gpr32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movd_f32_gpr32, dst, src);
}
Instruction movq_gpr64_xmm64(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movq_gpr64_xmm64, dst, src);
Instruction movq_gpr64_f64(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movq_gpr64_f64, dst, src);
}
Instruction movq_xmm64_gpr64(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movq_xmm64_gpr64, dst, src);
Instruction movq_f64_gpr64(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(movq_f64_gpr64, dst, src);
}
Instruction mov_xmm32_xmm32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(mov_xmm32_xmm32, dst, src);
Instruction mov_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(mov_f32_f32, dst, src);
}
Instruction load8s_gpr64_gpr64_plus_gpr64(const ObjectGenerator& gen,
@@ -516,31 +516,31 @@ Instruction store128_xmm128_reg_offset(const ObjectGenerator& gen,
}
Instruction load64_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load64_rip_s32, dest, offset);
IGEN_DISPATCH(load64_pcRel_s32, dest, offset);
}
Instruction load32s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load32s_rip_s32, dest, offset);
IGEN_DISPATCH(load32s_pcRel_s32, dest, offset);
}
Instruction load32u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load32u_rip_s32, dest, offset);
IGEN_DISPATCH(load32u_pcRel_s32, dest, offset);
}
Instruction load16u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load16u_rip_s32, dest, offset);
IGEN_DISPATCH(load16u_pcRel_s32, dest, offset);
}
Instruction load16s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load16s_rip_s32, dest, offset);
IGEN_DISPATCH(load16s_pcRel_s32, dest, offset);
}
Instruction load8u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load8u_rip_s32, dest, offset);
IGEN_DISPATCH(load8u_pcRel_s32, dest, offset);
}
Instruction load8s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
IGEN_DISPATCH(load8s_rip_s32, dest, offset);
IGEN_DISPATCH(load8s_pcRel_s32, dest, offset);
}
Instruction static_load(const ObjectGenerator& gen,
@@ -552,19 +552,19 @@ Instruction static_load(const ObjectGenerator& gen,
}
Instruction store64_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
IGEN_DISPATCH(store64_rip_s32, src, offset);
IGEN_DISPATCH(store64_pcRel_s32, src, offset);
}
Instruction store32_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
IGEN_DISPATCH(store32_rip_s32, src, offset);
IGEN_DISPATCH(store32_pcRel_s32, src, offset);
}
Instruction store16_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
IGEN_DISPATCH(store16_rip_s32, src, offset);
IGEN_DISPATCH(store16_pcRel_s32, src, offset);
}
Instruction store8_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
IGEN_DISPATCH(store8_rip_s32, src, offset);
IGEN_DISPATCH(store8_pcRel_s32, src, offset);
}
Instruction static_store(const ObjectGenerator& gen, Register value, s64 offset, int size) {
@@ -575,12 +575,12 @@ Instruction static_addr(const ObjectGenerator& gen, Register dst, s64 offset) {
IGEN_DISPATCH(static_addr, dst, offset);
}
Instruction static_load_xmm32(const ObjectGenerator& gen, Register simd_dest, s64 offset) {
IGEN_DISPATCH(static_load_xmm32, simd_dest, offset);
Instruction static_load_f32(const ObjectGenerator& gen, Register simd_dest, s64 offset) {
IGEN_DISPATCH(static_load_f32, simd_dest, offset);
}
Instruction static_store_xmm32(const ObjectGenerator& gen, Register xmm_value, s64 offset) {
IGEN_DISPATCH(static_store_xmm32, xmm_value, offset);
Instruction static_store_f32(const ObjectGenerator& gen, Register xmm_value, s64 offset) {
IGEN_DISPATCH(static_store_f32, xmm_value, offset);
}
Instruction load64_gpr64_plus_s32(const ObjectGenerator& gen,
@@ -693,16 +693,16 @@ Instruction not_gpr64(const ObjectGenerator& gen, Register reg) {
IGEN_DISPATCH(not_gpr64, reg);
}
Instruction shl_gpr64_cl(const ObjectGenerator& gen, Register reg) {
IGEN_DISPATCH(shl_gpr64_cl, reg);
Instruction shl_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
IGEN_DISPATCH(shl_gpr64_reg, reg, shift_reg);
}
Instruction shr_gpr64_cl(const ObjectGenerator& gen, Register reg) {
IGEN_DISPATCH(shr_gpr64_cl, reg);
Instruction shr_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
IGEN_DISPATCH(shr_gpr64_reg, reg, shift_reg);
}
Instruction sar_gpr64_cl(const ObjectGenerator& gen, Register reg) {
IGEN_DISPATCH(sar_gpr64_cl, reg);
Instruction sar_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
IGEN_DISPATCH(sar_gpr64_reg, reg, shift_reg);
}
Instruction shl_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa) {
@@ -717,88 +717,88 @@ Instruction sar_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa) {
IGEN_DISPATCH(sar_gpr64_u8, reg, sa);
}
Instruction jmp_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jmp_32);
Instruction jmp_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jmp_imm);
}
Instruction je_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(je_32);
Instruction je_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(je_imm);
}
Instruction jne_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jne_32);
Instruction jne_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jne_imm);
}
Instruction jle_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jle_32);
Instruction jle_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jle_imm);
}
Instruction jge_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jge_32);
Instruction jge_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jge_imm);
}
Instruction jl_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jl_32);
Instruction jl_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jl_imm);
}
Instruction jg_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jg_32);
Instruction jg_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jg_imm);
}
Instruction jbe_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jbe_32);
Instruction jbe_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jbe_imm);
}
Instruction jae_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jae_32);
Instruction jae_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jae_imm);
}
Instruction jb_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(jb_32);
Instruction jb_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(jb_imm);
}
Instruction ja_32(const ObjectGenerator& gen) {
IGEN_DISPATCH(ja_32);
Instruction ja_imm(const ObjectGenerator& gen) {
IGEN_DISPATCH(ja_imm);
}
Instruction cmp_flt_flt(const ObjectGenerator& gen, Register a, Register b) {
IGEN_DISPATCH(cmp_flt_flt, a, b);
Instruction cmp_f32_f32(const ObjectGenerator& gen, Register a, Register b) {
IGEN_DISPATCH(cmp_f32_f32, a, b);
}
Instruction sqrts_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(sqrts_xmm, dst, src);
Instruction sqrt_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(sqrt_f32, dst, src);
}
Instruction mulss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(mulss_xmm_xmm, dst, src);
Instruction mul_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(mul_f32_f32, dst, src);
}
Instruction divss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(divss_xmm_xmm, dst, src);
Instruction div_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(div_f32_f32, dst, src);
}
Instruction subss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(subss_xmm_xmm, dst, src);
Instruction sub_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(sub_f32_f32, dst, src);
}
Instruction addss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(addss_xmm_xmm, dst, src);
Instruction add_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(add_f32_f32, dst, src);
}
Instruction minss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(minss_xmm_xmm, dst, src);
Instruction min_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(min_f32_f32, dst, src);
}
Instruction maxss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(maxss_xmm_xmm, dst, src);
Instruction max_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(max_f32_f32, dst, src);
}
Instruction int32_to_float(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(int32_to_float, dst, src);
Instruction int32_to_f32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(int32_to_f32, dst, src);
}
Instruction float_to_int32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(float_to_int32, dst, src);
Instruction f32_to_int32(const ObjectGenerator& gen, Register dst, Register src) {
IGEN_DISPATCH(f32_to_int32, dst, src);
}
Instruction nop(const ObjectGenerator& gen) {
+42 -40
View File
@@ -35,27 +35,27 @@ Instruction mov_gpr64_s32(const ObjectGenerator& gen, Register dst, int64_t val)
/*!
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
*/
Instruction movd_gpr32_xmm32(const ObjectGenerator& gen, Register dst, Register src);
Instruction movd_gpr32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
*/
Instruction movd_xmm32_gpr32(const ObjectGenerator& gen, Register dst, Register src);
Instruction movd_f32_gpr32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
Instruction movq_gpr64_xmm64(const ObjectGenerator& gen, Register dst, Register src);
Instruction movq_gpr64_f64(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
Instruction movq_xmm64_gpr64(const ObjectGenerator& gen, Register dst, Register src);
Instruction movq_f64_gpr64(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Move 32-bits between xmm's
*/
Instruction mov_xmm32_xmm32(const ObjectGenerator& gen, Register dst, Register src);
Instruction mov_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
// todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64
@@ -486,9 +486,9 @@ Instruction static_store(const ObjectGenerator& gen, Register value, s64 offset,
Instruction static_addr(const ObjectGenerator& gen, Register dst, s64 offset);
Instruction static_load_xmm32(const ObjectGenerator& gen, Register simd_dest, s64 offset);
Instruction static_load_f32(const ObjectGenerator& gen, Register simd_dest, s64 offset);
Instruction static_store_xmm32(const ObjectGenerator& gen, Register xmm_value, s64 offset);
Instruction static_store_f32(const ObjectGenerator& gen, Register xmm_value, s64 offset);
// TODO, special load/stores of 128 bit values.
@@ -618,19 +618,21 @@ Instruction not_gpr64(const ObjectGenerator& gen, Register reg);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Shift 64-bit gpr left by CL register
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
*/
Instruction shl_gpr64_cl(const ObjectGenerator& gen, Register reg);
Instruction shl_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (logical) by CL register
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
* x86)
*/
Instruction shr_gpr64_cl(const ObjectGenerator& gen, Register reg);
Instruction shr_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (arithmetic) by CL register
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
* x86)
*/
Instruction sar_gpr64_cl(const ObjectGenerator& gen, Register reg);
Instruction sar_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
/*!
* Shift 64-ptr left (logical) by the constant shift amount "sa".
@@ -654,57 +656,57 @@ Instruction sar_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa);
/*!
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
*/
Instruction jmp_32(const ObjectGenerator& gen);
Instruction jmp_imm(const ObjectGenerator& gen);
/*!
* Jump if equal.
*/
Instruction je_32(const ObjectGenerator& gen);
Instruction je_imm(const ObjectGenerator& gen);
/*!
* Jump not equal.
*/
Instruction jne_32(const ObjectGenerator& gen);
Instruction jne_imm(const ObjectGenerator& gen);
/*!
* Jump less than or equal.
*/
Instruction jle_32(const ObjectGenerator& gen);
Instruction jle_imm(const ObjectGenerator& gen);
/*!
* Jump greater than or equal.
*/
Instruction jge_32(const ObjectGenerator& gen);
Instruction jge_imm(const ObjectGenerator& gen);
/*!
* Jump less than
*/
Instruction jl_32(const ObjectGenerator& gen);
Instruction jl_imm(const ObjectGenerator& gen);
/*!
* Jump greater than
*/
Instruction jg_32(const ObjectGenerator& gen);
Instruction jg_imm(const ObjectGenerator& gen);
/*!
* Jump below or equal
*/
Instruction jbe_32(const ObjectGenerator& gen);
Instruction jbe_imm(const ObjectGenerator& gen);
/*!
* Jump above or equal
*/
Instruction jae_32(const ObjectGenerator& gen);
Instruction jae_imm(const ObjectGenerator& gen);
/*!
* Jump below
*/
Instruction jb_32(const ObjectGenerator& gen);
Instruction jb_imm(const ObjectGenerator& gen);
/*!
* Jump above
*/
Instruction ja_32(const ObjectGenerator& gen);
Instruction ja_imm(const ObjectGenerator& gen);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FLOAT MATH
@@ -713,49 +715,49 @@ Instruction ja_32(const ObjectGenerator& gen);
/*!
* Compare two floats and set flag register for jump (ucomiss)
*/
Instruction cmp_flt_flt(const ObjectGenerator& gen, Register a, Register b);
Instruction cmp_f32_f32(const ObjectGenerator& gen, Register a, Register b);
Instruction sqrts_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction sqrt_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Multiply two floats in xmm's
* Multiply two floats in f32's
*/
Instruction mulss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction mul_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Divide two floats in xmm's
* Divide two floats in f32's
*/
Instruction divss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction div_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Subtract two floats in xmm's
* Subtract two floats in f32's
*/
Instruction subss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction sub_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Add two floats in xmm's
* Add two floats in f32's
*/
Instruction addss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction add_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Floating point minimum.
*/
Instruction minss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction min_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Floating point maximum.
*/
Instruction maxss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
Instruction max_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Convert GPR int32 to XMM float (single precision)
* Convert GPR int32 to float (single precision)
*/
Instruction int32_to_float(const ObjectGenerator& gen, Register dst, Register src);
Instruction int32_to_f32(const ObjectGenerator& gen, Register dst, Register src);
/*!
* Convert XMM float to GPR int32(single precision) (truncate)
* Convert float to GPR int32(single precision) (truncate)
*/
Instruction float_to_int32(const ObjectGenerator& gen, Register dst, Register src);
Instruction f32_to_int32(const ObjectGenerator& gen, Register dst, Register src);
Instruction nop(const ObjectGenerator& gen);
File diff suppressed because it is too large Load Diff
+50 -48
View File
@@ -34,27 +34,27 @@ InstructionARM64 mov_gpr64_s32(Register dst, int64_t val);
/*!
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
*/
InstructionARM64 movd_gpr32_xmm32(Register dst, Register src);
InstructionARM64 movd_gpr32_f32(Register dst, Register src);
/*!
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
*/
InstructionARM64 movd_xmm32_gpr32(Register dst, Register src);
InstructionARM64 movd_f32_gpr32(Register dst, Register src);
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
InstructionARM64 movq_gpr64_xmm64(Register dst, Register src);
InstructionARM64 movq_gpr64_f64(Register dst, Register src);
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
InstructionARM64 movq_xmm64_gpr64(Register dst, Register src);
InstructionARM64 movq_f64_gpr64(Register dst, Register src);
/*!
* Move 32-bits between xmm's
*/
InstructionARM64 mov_xmm32_xmm32(Register dst, Register src);
InstructionARM64 mov_f32_f32(Register dst, Register src);
// todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64
@@ -323,37 +323,37 @@ InstructionARM64 store128_xmm128_reg_offset(Register base, Register xmm_val, s64
// RIP loads and stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionARM64 load64_rip_s32(Register dest, s64 offset);
InstructionARM64 load64_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load32s_rip_s32(Register dest, s64 offset);
InstructionARM64 load32s_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load32u_rip_s32(Register dest, s64 offset);
InstructionARM64 load32u_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load16u_rip_s32(Register dest, s64 offset);
InstructionARM64 load16u_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load16s_rip_s32(Register dest, s64 offset);
InstructionARM64 load16s_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load8u_rip_s32(Register dest, s64 offset);
InstructionARM64 load8u_pcRel_s32(Register dest, s64 offset);
InstructionARM64 load8s_rip_s32(Register dest, s64 offset);
InstructionARM64 load8s_pcRel_s32(Register dest, s64 offset);
InstructionARM64 static_load(Register dest, s64 offset, int size, bool sign_extend);
InstructionARM64 store64_rip_s32(Register src, s64 offset);
InstructionARM64 store64_pcRel_s32(Register src, s64 offset);
InstructionARM64 store32_rip_s32(Register src, s64 offset);
InstructionARM64 store32_pcRel_s32(Register src, s64 offset);
InstructionARM64 store16_rip_s32(Register src, s64 offset);
InstructionARM64 store16_pcRel_s32(Register src, s64 offset);
InstructionARM64 store8_rip_s32(Register src, s64 offset);
InstructionARM64 store8_pcRel_s32(Register src, s64 offset);
InstructionARM64 static_store(Register value, s64 offset, int size);
InstructionARM64 static_addr(Register dst, s64 offset);
InstructionARM64 static_load_xmm32(Register simd_dest, s64 offset);
InstructionARM64 static_load_f32(Register simd_dest, s64 offset);
InstructionARM64 static_store_xmm32(Register xmm_value, s64 offset);
InstructionARM64 static_store_f32(Register xmm_value, s64 offset);
// TODO, special load/stores of 128 bit values.
@@ -477,19 +477,21 @@ InstructionARM64 not_gpr64(Register reg);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Shift 64-bit gpr left by CL register
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
*/
InstructionARM64 shl_gpr64_cl(Register reg);
InstructionARM64 shl_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (logical) by CL register
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
* x86)
*/
InstructionARM64 shr_gpr64_cl(Register reg);
InstructionARM64 shr_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (arithmetic) by CL register
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
* x86)
*/
InstructionARM64 sar_gpr64_cl(Register reg);
InstructionARM64 sar_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-ptr left (logical) by the constant shift amount "sa".
@@ -513,57 +515,57 @@ InstructionARM64 sar_gpr64_u8(Register reg, uint8_t sa);
/*!
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
*/
InstructionARM64 jmp_32();
InstructionARM64 jmp_imm();
/*!
* Jump if equal.
*/
InstructionARM64 je_32();
InstructionARM64 je_imm();
/*!
* Jump not equal.
*/
InstructionARM64 jne_32();
InstructionARM64 jne_imm();
/*!
* Jump less than or equal.
*/
InstructionARM64 jle_32();
InstructionARM64 jle_imm();
/*!
* Jump greater than or equal.
*/
InstructionARM64 jge_32();
InstructionARM64 jge_imm();
/*!
* Jump less than
*/
InstructionARM64 jl_32();
InstructionARM64 jl_imm();
/*!
* Jump greater than
*/
InstructionARM64 jg_32();
InstructionARM64 jg_imm();
/*!
* Jump below or equal
*/
InstructionARM64 jbe_32();
InstructionARM64 jbe_imm();
/*!
* Jump above or equal
*/
InstructionARM64 jae_32();
InstructionARM64 jae_imm();
/*!
* Jump below
*/
InstructionARM64 jb_32();
InstructionARM64 jb_imm();
/*!
* Jump above
*/
InstructionARM64 ja_32();
InstructionARM64 ja_imm();
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FLOAT MATH
@@ -572,49 +574,49 @@ InstructionARM64 ja_32();
/*!
* Compare two floats and set flag register for jump (ucomiss)
*/
InstructionARM64 cmp_flt_flt(Register a, Register b);
InstructionARM64 cmp_f32_f32(Register a, Register b);
InstructionARM64 sqrts_xmm(Register dst, Register src);
InstructionARM64 sqrt_f32(Register dst, Register src);
/*!
* Multiply two floats in xmm's
*/
InstructionARM64 mulss_xmm_xmm(Register dst, Register src);
InstructionARM64 mul_f32_f32(Register dst, Register src);
/*!
* Divide two floats in xmm's
*/
InstructionARM64 divss_xmm_xmm(Register dst, Register src);
InstructionARM64 div_f32_f32(Register dst, Register src);
/*!
* Subtract two floats in xmm's
*/
InstructionARM64 subss_xmm_xmm(Register dst, Register src);
InstructionARM64 sub_f32_f32(Register dst, Register src);
/*!
* Add two floats in xmm's
*/
InstructionARM64 addss_xmm_xmm(Register dst, Register src);
InstructionARM64 add_f32_f32(Register dst, Register src);
/*!
* Floating point minimum.
*/
InstructionARM64 minss_xmm_xmm(Register dst, Register src);
InstructionARM64 min_f32_f32(Register dst, Register src);
/*!
* Floating point maximum.
*/
InstructionARM64 maxss_xmm_xmm(Register dst, Register src);
InstructionARM64 max_f32_f32(Register dst, Register src);
/*!
* Convert GPR int32 to XMM float (single precision)
* Convert GPR int32 to float (single precision)
*/
InstructionARM64 int32_to_float(Register dst, Register src);
InstructionARM64 int32_to_f32(Register dst, Register src);
/*!
* Convert XMM float to GPR int32(single precision) (truncate)
* Convert float to GPR int32(single precision) (truncate)
*/
InstructionARM64 float_to_int32(Register dst, Register src);
InstructionARM64 f32_to_int32(Register dst, Register src);
InstructionARM64 nop();
@@ -800,4 +802,4 @@ InstructionARM64 vpshufhw(Register dst, Register src, u8 imm);
InstructionARM64 vpackuswb(Register dst, Register src0, Register src1);
} // namespace ARM64
} // namespace IGen
} // namespace emitter
} // namespace emitter
+58 -54
View File
@@ -59,7 +59,7 @@ InstructionX86 mov_gpr64_s32(Register dst, int64_t val) {
return instr;
}
InstructionX86 movd_gpr32_xmm32(Register dst, Register src) {
InstructionX86 movd_gpr32_f32(Register dst, Register src) {
ASSERT(dst.is_gpr(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0x66);
@@ -70,7 +70,7 @@ InstructionX86 movd_gpr32_xmm32(Register dst, Register src) {
return instr;
}
InstructionX86 movd_xmm32_gpr32(Register dst, Register src) {
InstructionX86 movd_f32_gpr32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_gpr(instr_set));
InstructionX86 instr(0x66);
@@ -81,7 +81,7 @@ InstructionX86 movd_xmm32_gpr32(Register dst, Register src) {
return instr;
}
InstructionX86 movq_gpr64_xmm64(Register dst, Register src) {
InstructionX86 movq_gpr64_f64(Register dst, Register src) {
ASSERT(dst.is_gpr(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0x66);
@@ -92,7 +92,7 @@ InstructionX86 movq_gpr64_xmm64(Register dst, Register src) {
return instr;
}
InstructionX86 movq_xmm64_gpr64(Register dst, Register src) {
InstructionX86 movq_f64_gpr64(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_gpr(instr_set));
InstructionX86 instr(0x66);
@@ -103,7 +103,7 @@ InstructionX86 movq_xmm64_gpr64(Register dst, Register src) {
return instr;
}
InstructionX86 mov_xmm32_xmm32(Register dst, Register src) {
InstructionX86 mov_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1155,7 +1155,7 @@ InstructionX86 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 o
}
}
InstructionX86 load64_rip_s32(Register dest, s64 offset) {
InstructionX86 load64_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0x8b);
@@ -1163,7 +1163,7 @@ InstructionX86 load64_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load32s_rip_s32(Register dest, s64 offset) {
InstructionX86 load32s_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0x63);
@@ -1171,7 +1171,7 @@ InstructionX86 load32s_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load32u_rip_s32(Register dest, s64 offset) {
InstructionX86 load32u_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0x8b);
@@ -1179,7 +1179,7 @@ InstructionX86 load32u_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load16u_rip_s32(Register dest, s64 offset) {
InstructionX86 load16u_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0xf);
@@ -1188,7 +1188,7 @@ InstructionX86 load16u_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load16s_rip_s32(Register dest, s64 offset) {
InstructionX86 load16s_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0xf);
@@ -1197,7 +1197,7 @@ InstructionX86 load16s_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load8u_rip_s32(Register dest, s64 offset) {
InstructionX86 load8u_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0xf);
@@ -1206,7 +1206,7 @@ InstructionX86 load8u_rip_s32(Register dest, s64 offset) {
return instr;
}
InstructionX86 load8s_rip_s32(Register dest, s64 offset) {
InstructionX86 load8s_pcRel_s32(Register dest, s64 offset) {
ASSERT(dest.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
InstructionX86 instr(0xf);
@@ -1219,33 +1219,33 @@ InstructionX86 static_load(Register dest, s64 offset, int size, bool sign_extend
switch (size) {
case 1:
if (sign_extend) {
return load8s_rip_s32(dest, offset);
return load8s_pcRel_s32(dest, offset);
} else {
return load8u_rip_s32(dest, offset);
return load8u_pcRel_s32(dest, offset);
}
break;
case 2:
if (sign_extend) {
return load16s_rip_s32(dest, offset);
return load16s_pcRel_s32(dest, offset);
} else {
return load16u_rip_s32(dest, offset);
return load16u_pcRel_s32(dest, offset);
}
break;
case 4:
if (sign_extend) {
return load32s_rip_s32(dest, offset);
return load32s_pcRel_s32(dest, offset);
} else {
return load32u_rip_s32(dest, offset);
return load32u_pcRel_s32(dest, offset);
}
break;
case 8:
return load64_rip_s32(dest, offset);
return load64_pcRel_s32(dest, offset);
default:
ASSERT(false);
}
}
InstructionX86 store64_rip_s32(Register src, s64 offset) {
InstructionX86 store64_pcRel_s32(Register src, s64 offset) {
ASSERT(src.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1254,7 +1254,7 @@ InstructionX86 store64_rip_s32(Register src, s64 offset) {
return instr;
}
InstructionX86 store32_rip_s32(Register src, s64 offset) {
InstructionX86 store32_pcRel_s32(Register src, s64 offset) {
ASSERT(src.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1263,7 +1263,7 @@ InstructionX86 store32_rip_s32(Register src, s64 offset) {
return instr;
}
InstructionX86 store16_rip_s32(Register src, s64 offset) {
InstructionX86 store16_pcRel_s32(Register src, s64 offset) {
ASSERT(src.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1274,7 +1274,7 @@ InstructionX86 store16_rip_s32(Register src, s64 offset) {
return instr;
}
InstructionX86 store8_rip_s32(Register src, s64 offset) {
InstructionX86 store8_pcRel_s32(Register src, s64 offset) {
ASSERT(src.is_gpr(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1289,13 +1289,13 @@ InstructionX86 store8_rip_s32(Register src, s64 offset) {
InstructionX86 static_store(Register value, s64 offset, int size) {
switch (size) {
case 1:
return store8_rip_s32(value, offset);
return store8_pcRel_s32(value, offset);
case 2:
return store16_rip_s32(value, offset);
return store16_pcRel_s32(value, offset);
case 4:
return store32_rip_s32(value, offset);
return store32_pcRel_s32(value, offset);
case 8:
return store64_rip_s32(value, offset);
return store64_pcRel_s32(value, offset);
default:
ASSERT(false);
}
@@ -1309,7 +1309,7 @@ InstructionX86 static_addr(Register dst, s64 offset) {
return instr;
}
InstructionX86 static_load_xmm32(Register simd_dest, s64 offset) {
InstructionX86 static_load_f32(Register simd_dest, s64 offset) {
ASSERT(simd_dest.is_xmm(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1322,7 +1322,7 @@ InstructionX86 static_load_xmm32(Register simd_dest, s64 offset) {
return instr;
}
InstructionX86 static_store_xmm32(Register xmm_value, s64 offset) {
InstructionX86 static_store_f32(Register xmm_value, s64 offset) {
ASSERT(xmm_value.is_xmm(instr_set));
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
@@ -1569,21 +1569,24 @@ InstructionX86 not_gpr64(Register reg) {
return instr;
}
InstructionX86 shl_gpr64_cl(Register reg) {
InstructionX86 shl_gpr64_reg(Register reg, Register _) {
// x86 is forced to use CL
ASSERT(reg.is_gpr(instr_set));
InstructionX86 instr(0xd3);
instr.set_modrm_and_rex(4, reg.hw_id(instr_set), 3, true);
return instr;
}
InstructionX86 shr_gpr64_cl(Register reg) {
InstructionX86 shr_gpr64_reg(Register reg, Register _) {
// x86 is forced to use CL
ASSERT(reg.is_gpr(instr_set));
InstructionX86 instr(0xd3);
instr.set_modrm_and_rex(5, reg.hw_id(instr_set), 3, true);
return instr;
}
InstructionX86 sar_gpr64_cl(Register reg) {
InstructionX86 sar_gpr64_reg(Register reg, Register _) {
// x86 is forced to use CL
ASSERT(reg.is_gpr(instr_set));
InstructionX86 instr(0xd3);
instr.set_modrm_and_rex(7, reg.hw_id(instr_set), 3, true);
@@ -1614,83 +1617,83 @@ InstructionX86 sar_gpr64_u8(Register reg, uint8_t sa) {
return instr;
}
InstructionX86 jmp_32() {
InstructionX86 jmp_imm() {
InstructionX86 instr(0xe9);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 je_32() {
InstructionX86 je_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x84);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jne_32() {
InstructionX86 jne_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x85);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jle_32() {
InstructionX86 jle_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x8e);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jge_32() {
InstructionX86 jge_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x8d);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jl_32() {
InstructionX86 jl_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x8c);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jg_32() {
InstructionX86 jg_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x8f);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jbe_32() {
InstructionX86 jbe_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x86);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jae_32() {
InstructionX86 jae_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x83);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 jb_32() {
InstructionX86 jb_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x82);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 ja_32() {
InstructionX86 ja_imm() {
InstructionX86 instr(0x0f);
instr.set_op2(0x87);
instr.set(Imm(4, 0));
return instr;
}
InstructionX86 cmp_flt_flt(Register a, Register b) {
InstructionX86 cmp_f32_f32(Register a, Register b) {
ASSERT(a.is_xmm(instr_set));
ASSERT(b.is_xmm(instr_set));
InstructionX86 instr(0x0f);
@@ -1699,7 +1702,7 @@ InstructionX86 cmp_flt_flt(Register a, Register b) {
return instr;
}
InstructionX86 sqrts_xmm(Register dst, Register src) {
InstructionX86 sqrt_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1710,7 +1713,7 @@ InstructionX86 sqrts_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 mulss_xmm_xmm(Register dst, Register src) {
InstructionX86 mul_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1721,7 +1724,7 @@ InstructionX86 mulss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 divss_xmm_xmm(Register dst, Register src) {
InstructionX86 div_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1732,7 +1735,7 @@ InstructionX86 divss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 subss_xmm_xmm(Register dst, Register src) {
InstructionX86 sub_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1743,7 +1746,7 @@ InstructionX86 subss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 addss_xmm_xmm(Register dst, Register src) {
InstructionX86 add_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1754,7 +1757,7 @@ InstructionX86 addss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 minss_xmm_xmm(Register dst, Register src) {
InstructionX86 min_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1765,7 +1768,7 @@ InstructionX86 minss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 maxss_xmm_xmm(Register dst, Register src) {
InstructionX86 max_f32_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -1776,7 +1779,7 @@ InstructionX86 maxss_xmm_xmm(Register dst, Register src) {
return instr;
}
InstructionX86 int32_to_float(Register dst, Register src) {
InstructionX86 int32_to_f32(Register dst, Register src) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_gpr(instr_set));
InstructionX86 instr(0xf3);
@@ -1787,7 +1790,7 @@ InstructionX86 int32_to_float(Register dst, Register src) {
return instr;
}
InstructionX86 float_to_int32(Register dst, Register src) {
InstructionX86 f32_to_int32(Register dst, Register src) {
ASSERT(dst.is_gpr(instr_set));
ASSERT(src.is_xmm(instr_set));
InstructionX86 instr(0xf3);
@@ -2167,6 +2170,7 @@ InstructionX86 pw_sll(Register dst, Register src, u8 imm) {
instr.set(Imm(1, imm));
return instr;
}
InstructionX86 ph_sll(Register dst, Register src, u8 imm) {
ASSERT(dst.is_xmm(instr_set));
ASSERT(src.is_xmm(instr_set));
@@ -2447,4 +2451,4 @@ InstructionX86 vpackuswb(Register dst, Register src0, Register src1) {
}
} // namespace X86
} // namespace IGen
} // namespace emitter
} // namespace emitter
+52 -50
View File
@@ -34,27 +34,27 @@ InstructionX86 mov_gpr64_s32(Register dst, int64_t val);
/*!
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
*/
InstructionX86 movd_gpr32_xmm32(Register dst, Register src);
InstructionX86 movd_gpr32_f32(Register dst, Register src);
/*!
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
*/
InstructionX86 movd_xmm32_gpr32(Register dst, Register src);
InstructionX86 movd_f32_gpr32(Register dst, Register src);
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
InstructionX86 movq_gpr64_xmm64(Register dst, Register src);
InstructionX86 movq_gpr64_f64(Register dst, Register src);
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
InstructionX86 movq_xmm64_gpr64(Register dst, Register src);
InstructionX86 movq_f64_gpr64(Register dst, Register src);
/*!
* Move 32-bits between xmm's
*/
InstructionX86 mov_xmm32_xmm32(Register dst, Register src);
InstructionX86 mov_f32_f32(Register dst, Register src);
// todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64
@@ -323,37 +323,37 @@ InstructionX86 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 o
// RIP loads and stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
InstructionX86 load64_rip_s32(Register dest, s64 offset);
InstructionX86 load64_pcRel_s32(Register dest, s64 offset);
InstructionX86 load32s_rip_s32(Register dest, s64 offset);
InstructionX86 load32s_pcRel_s32(Register dest, s64 offset);
InstructionX86 load32u_rip_s32(Register dest, s64 offset);
InstructionX86 load32u_pcRel_s32(Register dest, s64 offset);
InstructionX86 load16u_rip_s32(Register dest, s64 offset);
InstructionX86 load16u_pcRel_s32(Register dest, s64 offset);
InstructionX86 load16s_rip_s32(Register dest, s64 offset);
InstructionX86 load16s_pcRel_s32(Register dest, s64 offset);
InstructionX86 load8u_rip_s32(Register dest, s64 offset);
InstructionX86 load8u_pcRel_s32(Register dest, s64 offset);
InstructionX86 load8s_rip_s32(Register dest, s64 offset);
InstructionX86 load8s_pcRel_s32(Register dest, s64 offset);
InstructionX86 static_load(Register dest, s64 offset, int size, bool sign_extend);
InstructionX86 store64_rip_s32(Register src, s64 offset);
InstructionX86 store64_pcRel_s32(Register src, s64 offset);
InstructionX86 store32_rip_s32(Register src, s64 offset);
InstructionX86 store32_pcRel_s32(Register src, s64 offset);
InstructionX86 store16_rip_s32(Register src, s64 offset);
InstructionX86 store16_pcRel_s32(Register src, s64 offset);
InstructionX86 store8_rip_s32(Register src, s64 offset);
InstructionX86 store8_pcRel_s32(Register src, s64 offset);
InstructionX86 static_store(Register value, s64 offset, int size);
InstructionX86 static_addr(Register dst, s64 offset);
InstructionX86 static_load_xmm32(Register simd_dest, s64 offset);
InstructionX86 static_load_f32(Register simd_dest, s64 offset);
InstructionX86 static_store_xmm32(Register xmm_value, s64 offset);
InstructionX86 static_store_f32(Register xmm_value, s64 offset);
// TODO, special load/stores of 128 bit values.
@@ -477,19 +477,21 @@ InstructionX86 not_gpr64(Register reg);
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*!
* Shift 64-bit gpr left by CL register
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
*/
InstructionX86 shl_gpr64_cl(Register reg);
InstructionX86 shl_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (logical) by CL register
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
* x86)
*/
InstructionX86 shr_gpr64_cl(Register reg);
InstructionX86 shr_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-bit gpr right (arithmetic) by CL register
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
* x86)
*/
InstructionX86 sar_gpr64_cl(Register reg);
InstructionX86 sar_gpr64_reg(Register reg, Register shift_reg);
/*!
* Shift 64-ptr left (logical) by the constant shift amount "sa".
@@ -513,57 +515,57 @@ InstructionX86 sar_gpr64_u8(Register reg, uint8_t sa);
/*!
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
*/
InstructionX86 jmp_32();
InstructionX86 jmp_imm();
/*!
* Jump if equal.
*/
InstructionX86 je_32();
InstructionX86 je_imm();
/*!
* Jump not equal.
*/
InstructionX86 jne_32();
InstructionX86 jne_imm();
/*!
* Jump less than or equal.
*/
InstructionX86 jle_32();
InstructionX86 jle_imm();
/*!
* Jump greater than or equal.
*/
InstructionX86 jge_32();
InstructionX86 jge_imm();
/*!
* Jump less than
*/
InstructionX86 jl_32();
InstructionX86 jl_imm();
/*!
* Jump greater than
*/
InstructionX86 jg_32();
InstructionX86 jg_imm();
/*!
* Jump below or equal
*/
InstructionX86 jbe_32();
InstructionX86 jbe_imm();
/*!
* Jump above or equal
*/
InstructionX86 jae_32();
InstructionX86 jae_imm();
/*!
* Jump below
*/
InstructionX86 jb_32();
InstructionX86 jb_imm();
/*!
* Jump above
*/
InstructionX86 ja_32();
InstructionX86 ja_imm();
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// FLOAT MATH
@@ -572,49 +574,49 @@ InstructionX86 ja_32();
/*!
* Compare two floats and set flag register for jump (ucomiss)
*/
InstructionX86 cmp_flt_flt(Register a, Register b);
InstructionX86 cmp_f32_f32(Register a, Register b);
InstructionX86 sqrts_xmm(Register dst, Register src);
InstructionX86 sqrt_f32(Register dst, Register src);
/*!
* Multiply two floats in xmm's
* Multiply two floats in f32's
*/
InstructionX86 mulss_xmm_xmm(Register dst, Register src);
InstructionX86 mul_f32_f32(Register dst, Register src);
/*!
* Divide two floats in xmm's
* Divide two floats in f32's
*/
InstructionX86 divss_xmm_xmm(Register dst, Register src);
InstructionX86 div_f32_f32(Register dst, Register src);
/*!
* Subtract two floats in xmm's
* Subtract two floats in f32's
*/
InstructionX86 subss_xmm_xmm(Register dst, Register src);
InstructionX86 sub_f32_f32(Register dst, Register src);
/*!
* Add two floats in xmm's
* Add two floats in f32's
*/
InstructionX86 addss_xmm_xmm(Register dst, Register src);
InstructionX86 add_f32_f32(Register dst, Register src);
/*!
* Floating point minimum.
*/
InstructionX86 minss_xmm_xmm(Register dst, Register src);
InstructionX86 min_f32_f32(Register dst, Register src);
/*!
* Floating point maximum.
*/
InstructionX86 maxss_xmm_xmm(Register dst, Register src);
InstructionX86 max_f32_f32(Register dst, Register src);
/*!
* Convert GPR int32 to XMM float (single precision)
*/
InstructionX86 int32_to_float(Register dst, Register src);
InstructionX86 int32_to_f32(Register dst, Register src);
/*!
* Convert XMM float to GPR int32(single precision) (truncate)
*/
InstructionX86 float_to_int32(Register dst, Register src);
InstructionX86 f32_to_int32(Register dst, Register src);
InstructionX86 nop();
@@ -800,4 +802,4 @@ InstructionX86 vpshufhw(Register dst, Register src, u8 imm);
InstructionX86 vpackuswb(Register dst, Register src0, Register src1);
} // namespace X86
} // namespace IGen
} // namespace emitter
} // namespace emitter
+139 -17
View File
@@ -1,6 +1,7 @@
#pragma once
#include <cstring>
#include <span>
#include <variant>
#include "common/common_types.h"
@@ -44,59 +45,180 @@ constexpr u32 Base(u32 value, u32 width) {
return value << (32 - width);
}
// TODO - consider passing in the instruction name to make debugging easier when an assertion is
// hit
// TODO NOW - fix below
constexpr u64 pow2(u64 n) {
return 1ull << n;
}
constexpr s64 pow2s(u64 n) {
return 1ull << n;
}
constexpr Field Hw(u32 x) {
ASSERT(x >= 0 && x <= (4 - 1));
return Field{(x & 4) << 21};
}
constexpr Field Sh(u32 x) {
ASSERT(x >= 0 && x <= (2 - 1));
return Field{(x & 1) << 22};
}
constexpr Field Shift(u32 x) {
ASSERT(x >= 0 && x <= (4 - 1));
return Field{(x & 2) << 22};
}
constexpr Field Rd(u32 x) {
ASSERT(x >= 0 && x <= (32 - 1));
return Field{(x & 31) << 0};
}
constexpr Field Rt(u32 x) {
ASSERT(x >= 0 && x <= (32 - 1));
return Field{(x & 31) << 0};
}
constexpr Field Rn(u32 x) {
ASSERT(x >= 0 && x <= (32 - 1));
return Field{(x & 31) << 5};
}
constexpr Field Rm(u32 x) {
ASSERT(x >= 0 && x <= (32 - 1));
return Field{(x & 31) << 16};
}
constexpr Field Imm4(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
return Field{(x & 0b111111) << 11};
}
constexpr Field Imm6(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 6)));
return Field{(x & 0b111111) << 10};
}
constexpr Field Imm9(s32 x) {
return Field{(static_cast<uint32_t>(x) & 0b111111111) << 12};
constexpr Field Imm9s(s32 x) {
ASSERT(x >= (pow2s(9 - 1) * -1) && x <= (pow2s(9 - 1) - 1));
return Field{(static_cast<u32>(x) & 0b111111111) << 12};
}
constexpr Field Imm12(u32 x) {
ASSERT(x >= 0 && x <= 4095);
return Field{(static_cast<uint32_t>(x) & 0b111111111111) << 10};
ASSERT(x >= 0 && x <= (pow2(12) - 1));
return Field{(static_cast<u32>(x) & 0b111111111111) << 10};
}
constexpr Field Imm16(u32 x) {
ASSERT(x >= 0 && x <= (pow2(16) - 1));
return Field{static_cast<u32>((x & (pow2(16) - 1)) << 16)};
}
constexpr Field Imm26(u32 x) {
ASSERT(x >= 0 && x <= (67108864 - 1));
return Field{(static_cast<uint32_t>(x) & 0b11111111111111111111111111) << 0};
}
constexpr Field Imm19(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 19) - 1));
return Field{(static_cast<uint32_t>(x) & 0b1111111111111111111) << 5};
}
constexpr Field Imms(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
return Field{(static_cast<uint32_t>(x) & 0b111111) << 10};
}
constexpr Field Immr(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
return Field{(static_cast<uint32_t>(x) & 0b111111) << 16};
}
constexpr Field Immh(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
return Field{(static_cast<uint32_t>(x) & 0b111111) << 19};
}
constexpr Field Immb(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 3) - 1));
return Field{(static_cast<uint32_t>(x) & 0b111111) << 16};
}
constexpr Field Cond(u32 x) {
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
return Field{(static_cast<uint32_t>(x) & 0b1111) << 0};
}
} // namespace ARM64
struct InstructionARM64 : InstructionImpl<InstructionARM64> {
// The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a
// single 32-bit word in that stream.
// Info:
// - https://yurichev.com/mirrors/ARMv8-A_Architecture_Reference_Manual_(Issue_A.a).pdf
// - https://www.scs.stanford.edu/~zyedidia/arm64/
// - https://armconverter.com/?lock=arm64&code=STR+X0,+[SP,+%23-8]!
u32 encoding;
// The ARM instruction stream is a sequence of word-aligned words.
// Each ARM instruction is a single 32-bit word in that stream.
//
// Some x86 instructions are not possible to represent in ARM in a single instruction
// however, in order to not have to overhaul things at the IR level,
// it feels preferably to instead allow an instruction to emit multiple instructions if needed
//
// To do so, the instruction can optionally include multiple encodings
// all of which are emitted at once.
static constexpr int kMaxInstrs = 64;
u32 encodings[kMaxInstrs]{};
u8 count = 0;
InstructionARM64() = delete;
// --- single instruction ---
template <typename... Fs>
constexpr InstructionARM64(uint32_t base, Fs... fields) : encoding((base | ... | fields.bits)) {
static_assert((std::is_same_v<Fs, emitter::ARM64::Field> && ...),
"All operands must be Field types");
constexpr InstructionARM64(uint32_t base, Fs... fields) {
static_assert((std::is_same_v<Fs, emitter::ARM64::Field> && ...));
encodings[0] = (base | ... | fields.bits);
count = 1;
}
// --- multi instruction (variadic) ---
template <typename... Instrs>
constexpr InstructionARM64(const Instrs&... instrs)
requires(std::is_same_v<Instrs, InstructionARM64> && ...)
{
u8 idx = 0;
auto append = [&](const InstructionARM64& i) {
for (uint8_t j = 0; j < i.count; ++j) {
encodings[idx++] = i.encodings[j];
}
};
(append(instrs), ...);
count = idx;
}
InstructionARM64(std::span<const InstructionARM64> instrs) {
u8 idx = 0;
for (const auto& i : instrs) {
for (uint8_t j = 0; j < i.count; ++j) {
encodings[idx++] = i.encodings[j];
}
}
count = idx;
}
uint8_t emit(uint8_t* buffer) const {
memcpy(buffer, &encoding, 4);
return 4;
if (count == 1 && encodings[0] == 0) {
return 0;
}
memcpy(buffer, encodings, count * 4);
return count * 4;
}
uint8_t length() const { return 4; }
uint8_t length() const {
if (count == 1 && encodings[0] == 0) {
return 0;
}
return count * 4;
}
// TODO ARM - all placeholders, no idea if this is even relevant, if not, get rid of it all
int get_imm_size() const { return 0; }
int offset_of_imm() const { return 0; }
+46 -23
View File
@@ -81,14 +81,17 @@ enum ARM64_REG : s8 {
X13, // temp, not-saved
X14, // temp, not-saved
X15, // temp, not-saved
X16, // temp, not-saved
X17, // temp, not-saved
// temp, not-saved - Conventionally used for linker/veneer/temporary values (we will reserve this
// one atleast)
X16,
// temp, not-saved - Conventionally used for linker/veneer/temporary values
X17,
X18, // temp, not-saved
x19, // saved TODO purpose?, R12
x20, // pp, R13
x21, // st, R14
x22, // offset, TODO purpose?, R15
X19, // saved TODO purpose?, R12
X20, // pp, R13
X21, // st, R14
X22, // offset, TODO purpose?, R15
X23, // unused, callee saved
X24, // unused, callee saved
X25, // unused, callee saved
@@ -103,22 +106,39 @@ enum ARM64_REG : s8 {
// quadword registers, equivalent to XMMs
// the convention in arm64 is the callee preserves all Q values
// at the same time though, the caller should not depend on this convention!
Q0 = 0,
Q1,
Q2,
Q3,
Q4,
Q5,
Q6,
Q7,
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15
V0 = 0,
V1,
V2,
V3,
V4,
V5,
V6,
V7,
V8,
V9,
V10,
V11,
V12,
V13,
V14,
V15,
// TODO ARM - we'll want to check at runtime if the platform has 16 V registers, or 32
V16,
V17,
V18,
V19,
V20,
V21,
V22,
V23,
V24,
V25,
V26,
V27,
V28,
V29,
V30,
V31,
};
class Register {
@@ -128,11 +148,14 @@ class Register {
// intentionally not explicit so we can use X86_REGs in place of Registers
Register(int id) : m_id(id) {}
// TODO ARM64 - this assertion isn't as useful for ARM
// since Q/V registers are not unique in terms of their id
// instead it is the instruction itself that deduces what set of registers to use
bool is_128bit_simd(emitter::InstructionSet instr_set) const {
if (instr_set == emitter::InstructionSet::X86) {
return m_id >= XMM0 && m_id <= XMM15;
} else if (instr_set == emitter::InstructionSet::ARM64) {
return m_id >= Q0 && m_id <= Q15;
return m_id >= V0 && m_id <= V31;
} else {
ASSERT_MSG(false, "is_128bit_simd: instruction set not supported");
}
+28 -32
View File
@@ -150,10 +150,10 @@ TEST(CodeTester, simd_store_128_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q14));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q13));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, V3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, V3));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, V14));
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, V13));
EXPECT_EQ(tester.dump_to_hex_string(), "43 00 80 3d c3 01 80 3d 4e 00 80 3d cd 01 80 3d");
}
@@ -197,13 +197,25 @@ TEST(CodeTester, xmm_load_128_arm64) {
CodeTester tester(emitter::InstructionSet::ARM64);
tester.init_code_buffer(256);
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X14));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q14, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q13, X14));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V3, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V3, X14));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V14, X1));
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V13, X14));
EXPECT_EQ(tester.dump_to_hex_string(), "23 00 c0 3d c3 01 c0 3d 2e 00 c0 3d cd 01 c0 3d");
}
void execute_tester(CodeTester& tester) {
if (tester.generator().instr_set() == InstructionSet::ARM64) {
#ifdef __aarch64__
tester.execute();
#endif
} else if (tester.generator().instr_set() == InstructionSet::X86) {
#ifndef __aarch64__
tester.execute();
#endif
}
}
// These tests actually execute the code, you cannot execute arm64 code on x86 and vise versa
// so these tests have to be conditional based on the platform unfortunately.
TEST(CodeTester, execute_push_pop_simd_x86) {
@@ -225,9 +237,7 @@ TEST(CodeTester, execute_push_pop_simd_x86) {
"0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 0f 6f 1c 24 48 83 c4 10 66 44 "
"0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 66 44 0f 6f 34 24 48 83 c4 10 66 44 "
"0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
#ifndef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_push_pop_simd_arm64) {
@@ -247,9 +257,7 @@ TEST(CodeTester, execute_push_pop_simd_arm64) {
"ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 c0 3d ff 43 00 91 ea 03 "
"c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 ed 03 c0 3d ff 43 00 91 "
"ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_push_pop_all_the_things_x86) {
@@ -276,9 +284,7 @@ TEST(CodeTester, execute_push_pop_all_the_things_x86) {
"04 24 48 83 c4 10 66 44 0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 "
"0f 6f 1c 24 48 83 c4 10 66 44 0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 "
"66 44 0f 6f 34 24 48 83 c4 10 66 44 0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
#ifndef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_push_pop_all_the_things_arm64) {
@@ -310,9 +316,7 @@ TEST(CodeTester, execute_push_pop_all_the_things_arm64) {
"ff 43 00 91 e6 03 c0 3d ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 "
"c0 3d ff 43 00 91 ea 03 c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 "
"ed 03 c0 3d ff 43 00 91 ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_return_x86) {
@@ -322,9 +326,7 @@ TEST(CodeTester, execute_return_x86) {
tester.emit_return();
EXPECT_EQ(tester.dump_to_hex_string(), "c3");
// and execute it!
#ifndef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_return_arm64) {
@@ -335,9 +337,7 @@ TEST(CodeTester, execute_return_arm64) {
tester.emit(IGen::ret(tester.generator()));
EXPECT_EQ(tester.dump_to_hex_string(), "00 04 00 91 c0 03 5f d6");
// and execute it!
#ifdef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_push_pop_gprs_x86) {
@@ -350,9 +350,7 @@ TEST(CodeTester, execute_push_pop_gprs_x86) {
EXPECT_EQ(tester.dump_to_hex_string(),
"50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57 41 5f 41 5e "
"41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58 c3");
#ifndef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
TEST(CodeTester, execute_push_pop_gprs_arm64) {
@@ -372,7 +370,5 @@ TEST(CodeTester, execute_push_pop_gprs_arm64) {
"f3 07 41 f8 f2 07 41 f8 f1 07 41 f8 f0 07 41 f8 ef 07 41 f8 ee 07 41 f8 ed 07 41 f8 "
"ec 07 41 f8 eb 07 41 f8 ea 07 41 f8 e9 07 41 f8 e8 07 41 f8 e7 07 41 f8 e6 07 41 f8 "
"e5 07 41 f8 e4 07 41 f8 e3 07 41 f8 e2 07 41 f8 e1 07 41 f8 e0 07 41 f8 c0 03 5f d6");
#ifdef __aarch64__
tester.execute();
#endif
execute_tester(tester);
}
+4061 -3903
View File
File diff suppressed because it is too large Load Diff
+8 -8
View File
@@ -634,20 +634,20 @@ TEST(EmitterAVX, VPSHUFHW) {
TEST(EmitterAVX, movq_to_gpr_from_xmm) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), RSP, XMM0 + 3));
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), RSP, XMM0 + 13));
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), R12, XMM0 + 3));
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), R12, XMM0 + 13));
tester.emit(IGen::movq_gpr64_f64(tester.generator(), RSP, XMM0 + 3));
tester.emit(IGen::movq_gpr64_f64(tester.generator(), RSP, XMM0 + 13));
tester.emit(IGen::movq_gpr64_f64(tester.generator(), R12, XMM0 + 3));
tester.emit(IGen::movq_gpr64_f64(tester.generator(), R12, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F7EDC664C0F7EEC66490F7EDC664D0F7EEC");
}
TEST(EmitterAVX, movq_to_xmm_from_gpr) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 3, RSP));
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 13, RSP));
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 3, R12));
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 13, R12));
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 3, RSP));
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 13, RSP));
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 3, R12));
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 13, R12));
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F6EDC664C0F6EEC66490F6EDC664D0F6EEC");
}