mirror of
https://github.com/open-goal/jak-project
synced 2026-06-23 01:19:57 -04:00
goalc: Implement the bulk of ARM64 instructions from x86 (#4318)
Less than 100 instructions left to implement, with the vast vast majority being load-and-stores. These will likely be knocked out quickly but they require a more involved implementation than just simply translating the instructions (several need multiple instructions, others may need reserved registers (x16 or x17 are common for this purpose)) This is a good milestone to get something pushed to master.
This commit is contained in:
@@ -714,7 +714,9 @@ bool break_now(const ThreadID& tid) {
|
||||
bool cont_now(const ThreadID& tid) {
|
||||
return false;
|
||||
}
|
||||
bool open_memory(const ThreadID& tid, MemoryHandle* out);
|
||||
bool open_memory(const ThreadID& tid, MemoryHandle* out) {
|
||||
return false;
|
||||
}
|
||||
bool close_memory(const ThreadID& tid, MemoryHandle* handle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
#include "common/versions/versions.h"
|
||||
|
||||
#include "goalc/data_compiler/DataObjectGenerator.h"
|
||||
#include <fmt/chrono.h>
|
||||
|
||||
#include "fmt/chrono.h"
|
||||
|
||||
std::string get_current_time_and_date() {
|
||||
auto const now = std::chrono::floor<std::chrono::seconds>(std::chrono::system_clock::now());
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
#include "common/util/gltf_util.h"
|
||||
|
||||
#include "decompiler/extractor/extractor_util.h"
|
||||
#include "decompiler/level_extractor/extract_collide_frags.h"
|
||||
#include "decompiler/level_extractor/extract_merc.h"
|
||||
#include "goalc/build_level/collide/jak2/collide.h"
|
||||
#include "goalc/build_level/common/Tfrag.h"
|
||||
#include "goalc/build_level/jak2/Entity.h"
|
||||
#include "goalc/build_level/jak2/FileInfo.h"
|
||||
#include "goalc/build_level/jak2/LevelFile.h"
|
||||
#include <decompiler/level_extractor/extract_collide_frags.h>
|
||||
|
||||
namespace jak2 {
|
||||
bool run_build_level(const std::string& input_file,
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "build_level.h"
|
||||
|
||||
#include "decompiler/extractor/extractor_util.h"
|
||||
#include "decompiler/level_extractor/extract_collide_frags.h"
|
||||
#include "decompiler/level_extractor/extract_merc.h"
|
||||
#include "goalc/build_level/collide/jak3/collide.h"
|
||||
#include "goalc/build_level/common/Tfrag.h"
|
||||
#include "goalc/build_level/jak3/Entity.h"
|
||||
#include "goalc/build_level/jak3/FileInfo.h"
|
||||
#include "goalc/build_level/jak3/LevelFile.h"
|
||||
#include <decompiler/level_extractor/extract_collide_frags.h>
|
||||
|
||||
namespace jak3 {
|
||||
bool run_build_level(const std::string& input_file,
|
||||
|
||||
+40
-35
@@ -121,7 +121,7 @@ void regset_common(emitter::ObjectGenerator* gen,
|
||||
gen->count_eliminated_move();
|
||||
gen->add_instr(IGen::null(*gen), irec);
|
||||
} else {
|
||||
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
|
||||
}
|
||||
} else if (src_is_xmm128 && dst_is_xmm128) {
|
||||
if (src_reg == dst_reg) {
|
||||
@@ -133,20 +133,20 @@ void regset_common(emitter::ObjectGenerator* gen,
|
||||
}
|
||||
} else if (src_class == RegClass::FLOAT && dst_class == RegClass::GPR_64) {
|
||||
// xmm 1x -> gpr
|
||||
gen->add_instr(IGen::movd_gpr32_xmm32(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::movd_gpr32_f32(*gen, dst_reg, src_reg), irec);
|
||||
// don't forget to sign extend
|
||||
gen->add_instr(IGen::movsx_r64_r32(*gen, dst_reg, dst_reg), irec);
|
||||
} else if (src_class == RegClass::GPR_64 && dst_class == RegClass::FLOAT) {
|
||||
// gpr -> xmm 1x
|
||||
gen->add_instr(IGen::movd_xmm32_gpr32(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::movd_f32_gpr32(*gen, dst_reg, src_reg), irec);
|
||||
} else if (src_is_xmm128 && dst_class == RegClass::FLOAT) {
|
||||
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
|
||||
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
|
||||
gen->add_instr(IGen::mov_xmm32_xmm32(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::mov_f32_f32(*gen, dst_reg, src_reg), irec);
|
||||
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
|
||||
gen->add_instr(IGen::movq_xmm64_gpr64(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::movq_f64_gpr64(*gen, dst_reg, src_reg), irec);
|
||||
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
|
||||
gen->add_instr(IGen::movq_gpr64_xmm64(*gen, dst_reg, src_reg), irec);
|
||||
gen->add_instr(IGen::movq_gpr64_f64(*gen, dst_reg, src_reg), irec);
|
||||
} else {
|
||||
ASSERT(false); // unhandled move.
|
||||
}
|
||||
@@ -256,7 +256,7 @@ void IR_LoadSymbolPointer::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
if (m_name == "#f") {
|
||||
static_assert(false_symbol_offset() == 0, "false symbol location");
|
||||
if (dest_reg.is_xmm(gen->instr_set())) {
|
||||
gen->add_instr(IGen::movq_xmm64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
|
||||
gen->add_instr(IGen::movq_f64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
|
||||
} else {
|
||||
gen->add_instr(IGen::mov_gpr64_gpr64(*gen, dest_reg, gRegInfo.get_st_reg()), irec);
|
||||
}
|
||||
@@ -417,7 +417,8 @@ void IR_GotoLabel::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
(void)allocs;
|
||||
auto instr = gen->add_instr(IGen::jmp_32(*gen), irec);
|
||||
auto instr = gen->add_instr(IGen::jmp_imm(*gen), irec);
|
||||
// TODO ARM - have to patch this differently, encoding for the immediate is different
|
||||
gen->link_instruction_jump(instr, gen->get_future_ir_record_in_same_func(irec, m_dest->idx));
|
||||
}
|
||||
|
||||
@@ -711,15 +712,21 @@ void IR_IntegerMath::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
ASSERT(!m_arg);
|
||||
break;
|
||||
case IntegerMathKind::SHLV_64:
|
||||
gen->add_instr(IGen::shl_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
|
||||
// TODO ARM - register provided but unused on x86
|
||||
gen->add_instr(IGen::shl_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
|
||||
// TODO ARM - x86 forces you to use CL, which is dumb, but the register allocator
|
||||
// has that logic baked in somewhere
|
||||
// ARM has no such constraint, so we should be able to use any register for the shift amount
|
||||
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
|
||||
break;
|
||||
case IntegerMathKind::SHRV_64:
|
||||
gen->add_instr(IGen::shr_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
|
||||
// TODO ARM - register provided but unused on x86
|
||||
gen->add_instr(IGen::shr_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
|
||||
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
|
||||
break;
|
||||
case IntegerMathKind::SARV_64:
|
||||
gen->add_instr(IGen::sar_gpr64_cl(*gen, get_reg(m_dest, allocs, irec)), irec);
|
||||
// TODO ARM - register provided but unused on x86
|
||||
gen->add_instr(IGen::sar_gpr64_reg(*gen, get_reg(m_dest, allocs, irec), 0), irec);
|
||||
ASSERT(get_reg(m_arg, allocs, irec) == emitter::RCX);
|
||||
break;
|
||||
case IntegerMathKind::SHL_64:
|
||||
@@ -823,37 +830,37 @@ void IR_FloatMath::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
switch (m_kind) {
|
||||
case FloatMathKind::DIV_SS:
|
||||
gen->add_instr(
|
||||
IGen::divss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::div_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::MUL_SS:
|
||||
gen->add_instr(
|
||||
IGen::mulss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::mul_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::ADD_SS:
|
||||
gen->add_instr(
|
||||
IGen::addss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::add_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::SUB_SS:
|
||||
gen->add_instr(
|
||||
IGen::subss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::sub_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::MAX_SS:
|
||||
gen->add_instr(
|
||||
IGen::maxss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::max_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::MIN_SS:
|
||||
gen->add_instr(
|
||||
IGen::minss_xmm_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
IGen::min_f32_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)),
|
||||
irec);
|
||||
break;
|
||||
case FloatMathKind::SQRT_SS:
|
||||
gen->add_instr(
|
||||
IGen::sqrts_xmm(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)), irec);
|
||||
IGen::sqrt_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_arg, allocs, irec)), irec);
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
@@ -895,7 +902,7 @@ void IR_StaticVarLoad::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
ASSERT(load_info.requires_load == true);
|
||||
|
||||
auto instr =
|
||||
gen->add_instr(IGen::static_load_xmm32(*gen, get_reg(m_dest, allocs, irec), 0), irec);
|
||||
gen->add_instr(IGen::static_load_f32(*gen, get_reg(m_dest, allocs, irec), 0), irec);
|
||||
gen->link_instruction_static(instr, m_src->rec, 0);
|
||||
} else if (m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT) {
|
||||
// we don't check the load info intentionally because we want to allow loading an entire
|
||||
@@ -966,38 +973,38 @@ void IR_ConditionalBranch::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
ASSERT(m_resolved);
|
||||
switch (condition.kind) {
|
||||
case ConditionKind::EQUAL:
|
||||
jump_instr = IGen::je_32(*gen);
|
||||
jump_instr = IGen::je_imm(*gen);
|
||||
break;
|
||||
case ConditionKind::NOT_EQUAL:
|
||||
jump_instr = IGen::jne_32(*gen);
|
||||
jump_instr = IGen::jne_imm(*gen);
|
||||
break;
|
||||
case ConditionKind::LEQ:
|
||||
if (condition.is_signed) {
|
||||
jump_instr = IGen::jle_32(*gen);
|
||||
jump_instr = IGen::jle_imm(*gen);
|
||||
} else {
|
||||
jump_instr = IGen::jbe_32(*gen);
|
||||
jump_instr = IGen::jbe_imm(*gen);
|
||||
}
|
||||
break;
|
||||
case ConditionKind::GEQ:
|
||||
if (condition.is_signed) {
|
||||
jump_instr = IGen::jge_32(*gen);
|
||||
jump_instr = IGen::jge_imm(*gen);
|
||||
} else {
|
||||
jump_instr = IGen::jae_32(*gen);
|
||||
jump_instr = IGen::jae_imm(*gen);
|
||||
}
|
||||
break;
|
||||
|
||||
case ConditionKind::LT:
|
||||
if (condition.is_signed) {
|
||||
jump_instr = IGen::jl_32(*gen);
|
||||
jump_instr = IGen::jl_imm(*gen);
|
||||
} else {
|
||||
jump_instr = IGen::jb_32(*gen);
|
||||
jump_instr = IGen::jb_imm(*gen);
|
||||
}
|
||||
break;
|
||||
case ConditionKind::GT:
|
||||
if (condition.is_signed) {
|
||||
jump_instr = IGen::jg_32(*gen);
|
||||
jump_instr = IGen::jg_imm(*gen);
|
||||
} else {
|
||||
jump_instr = IGen::ja_32(*gen);
|
||||
jump_instr = IGen::ja_imm(*gen);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -1005,7 +1012,7 @@ void IR_ConditionalBranch::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
}
|
||||
|
||||
if (condition.is_float) {
|
||||
gen->add_instr(IGen::cmp_flt_flt(*gen, get_reg(condition.a, allocs, irec),
|
||||
gen->add_instr(IGen::cmp_f32_f32(*gen, get_reg(condition.a, allocs, irec),
|
||||
get_reg(condition.b, allocs, irec)),
|
||||
irec);
|
||||
} else {
|
||||
@@ -1210,8 +1217,7 @@ void IR_FloatToInt::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
gen->add_instr(
|
||||
IGen::float_to_int32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)),
|
||||
irec);
|
||||
IGen::f32_to_int32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)), irec);
|
||||
gen->add_instr(
|
||||
IGen::movsx_r64_r32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_dest, allocs, irec)),
|
||||
irec);
|
||||
@@ -1244,8 +1250,7 @@ void IR_IntToFloat::do_codegen_x86(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
gen->add_instr(
|
||||
IGen::int32_to_float(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)),
|
||||
irec);
|
||||
IGen::int32_to_f32(*gen, get_reg(m_dest, allocs, irec), get_reg(m_src, allocs, irec)), irec);
|
||||
}
|
||||
|
||||
void IR_IntToFloat::do_codegen_arm64(emitter::ObjectGenerator* gen,
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "goalc/emitter/Instruction.h"
|
||||
#include "goalc/emitter/InstructionSet.h"
|
||||
#include "goalc/emitter/Register.h"
|
||||
#ifdef OS_POSIX
|
||||
#include <sys/mman.h>
|
||||
@@ -125,7 +126,7 @@ void CodeTester::emit_push_all_simd() {
|
||||
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
emit(IGen::sub_gpr64_imm8s(m_gen, SP, 16));
|
||||
emit(IGen::store128_gpr64_simd128(m_gen, SP, Q0 + i));
|
||||
emit(IGen::store128_gpr64_simd128(m_gen, SP, V0 + i));
|
||||
}
|
||||
} else {
|
||||
throw std::runtime_error("CodeTester::emit_push_all_simd unhandled instruction set");
|
||||
@@ -144,7 +145,7 @@ void CodeTester::emit_pop_all_simd() {
|
||||
emit(IGen::add_gpr64_imm8s(m_gen, RSP, 8));
|
||||
} else if (m_gen.instr_set() == InstructionSet::ARM64) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
emit(IGen::load128_simd128_gpr64(m_gen, Q0 + i, SP));
|
||||
emit(IGen::load128_simd128_gpr64(m_gen, V0 + i, SP));
|
||||
emit(IGen::add_gpr64_imm8s(m_gen, SP, 16));
|
||||
}
|
||||
} else {
|
||||
|
||||
+73
-73
@@ -31,24 +31,24 @@ Instruction mov_gpr64_s32(const ObjectGenerator& gen, Register dst, int64_t val)
|
||||
IGEN_DISPATCH(mov_gpr64_s32, dst, val);
|
||||
}
|
||||
|
||||
Instruction movd_gpr32_xmm32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movd_gpr32_xmm32, dst, src);
|
||||
Instruction movd_gpr32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movd_gpr32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction movd_xmm32_gpr32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movd_xmm32_gpr32, dst, src);
|
||||
Instruction movd_f32_gpr32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movd_f32_gpr32, dst, src);
|
||||
}
|
||||
|
||||
Instruction movq_gpr64_xmm64(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movq_gpr64_xmm64, dst, src);
|
||||
Instruction movq_gpr64_f64(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movq_gpr64_f64, dst, src);
|
||||
}
|
||||
|
||||
Instruction movq_xmm64_gpr64(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movq_xmm64_gpr64, dst, src);
|
||||
Instruction movq_f64_gpr64(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(movq_f64_gpr64, dst, src);
|
||||
}
|
||||
|
||||
Instruction mov_xmm32_xmm32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(mov_xmm32_xmm32, dst, src);
|
||||
Instruction mov_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(mov_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction load8s_gpr64_gpr64_plus_gpr64(const ObjectGenerator& gen,
|
||||
@@ -516,31 +516,31 @@ Instruction store128_xmm128_reg_offset(const ObjectGenerator& gen,
|
||||
}
|
||||
|
||||
Instruction load64_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load64_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load64_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load32s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load32s_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load32s_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load32u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load32u_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load32u_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load16u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load16u_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load16u_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load16s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load16s_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load16s_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load8u_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load8u_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load8u_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction load8s_rip_s32(const ObjectGenerator& gen, Register dest, s64 offset) {
|
||||
IGEN_DISPATCH(load8s_rip_s32, dest, offset);
|
||||
IGEN_DISPATCH(load8s_pcRel_s32, dest, offset);
|
||||
}
|
||||
|
||||
Instruction static_load(const ObjectGenerator& gen,
|
||||
@@ -552,19 +552,19 @@ Instruction static_load(const ObjectGenerator& gen,
|
||||
}
|
||||
|
||||
Instruction store64_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
|
||||
IGEN_DISPATCH(store64_rip_s32, src, offset);
|
||||
IGEN_DISPATCH(store64_pcRel_s32, src, offset);
|
||||
}
|
||||
|
||||
Instruction store32_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
|
||||
IGEN_DISPATCH(store32_rip_s32, src, offset);
|
||||
IGEN_DISPATCH(store32_pcRel_s32, src, offset);
|
||||
}
|
||||
|
||||
Instruction store16_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
|
||||
IGEN_DISPATCH(store16_rip_s32, src, offset);
|
||||
IGEN_DISPATCH(store16_pcRel_s32, src, offset);
|
||||
}
|
||||
|
||||
Instruction store8_rip_s32(const ObjectGenerator& gen, Register src, s64 offset) {
|
||||
IGEN_DISPATCH(store8_rip_s32, src, offset);
|
||||
IGEN_DISPATCH(store8_pcRel_s32, src, offset);
|
||||
}
|
||||
|
||||
Instruction static_store(const ObjectGenerator& gen, Register value, s64 offset, int size) {
|
||||
@@ -575,12 +575,12 @@ Instruction static_addr(const ObjectGenerator& gen, Register dst, s64 offset) {
|
||||
IGEN_DISPATCH(static_addr, dst, offset);
|
||||
}
|
||||
|
||||
Instruction static_load_xmm32(const ObjectGenerator& gen, Register simd_dest, s64 offset) {
|
||||
IGEN_DISPATCH(static_load_xmm32, simd_dest, offset);
|
||||
Instruction static_load_f32(const ObjectGenerator& gen, Register simd_dest, s64 offset) {
|
||||
IGEN_DISPATCH(static_load_f32, simd_dest, offset);
|
||||
}
|
||||
|
||||
Instruction static_store_xmm32(const ObjectGenerator& gen, Register xmm_value, s64 offset) {
|
||||
IGEN_DISPATCH(static_store_xmm32, xmm_value, offset);
|
||||
Instruction static_store_f32(const ObjectGenerator& gen, Register xmm_value, s64 offset) {
|
||||
IGEN_DISPATCH(static_store_f32, xmm_value, offset);
|
||||
}
|
||||
|
||||
Instruction load64_gpr64_plus_s32(const ObjectGenerator& gen,
|
||||
@@ -693,16 +693,16 @@ Instruction not_gpr64(const ObjectGenerator& gen, Register reg) {
|
||||
IGEN_DISPATCH(not_gpr64, reg);
|
||||
}
|
||||
|
||||
Instruction shl_gpr64_cl(const ObjectGenerator& gen, Register reg) {
|
||||
IGEN_DISPATCH(shl_gpr64_cl, reg);
|
||||
Instruction shl_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
|
||||
IGEN_DISPATCH(shl_gpr64_reg, reg, shift_reg);
|
||||
}
|
||||
|
||||
Instruction shr_gpr64_cl(const ObjectGenerator& gen, Register reg) {
|
||||
IGEN_DISPATCH(shr_gpr64_cl, reg);
|
||||
Instruction shr_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
|
||||
IGEN_DISPATCH(shr_gpr64_reg, reg, shift_reg);
|
||||
}
|
||||
|
||||
Instruction sar_gpr64_cl(const ObjectGenerator& gen, Register reg) {
|
||||
IGEN_DISPATCH(sar_gpr64_cl, reg);
|
||||
Instruction sar_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg) {
|
||||
IGEN_DISPATCH(sar_gpr64_reg, reg, shift_reg);
|
||||
}
|
||||
|
||||
Instruction shl_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa) {
|
||||
@@ -717,88 +717,88 @@ Instruction sar_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa) {
|
||||
IGEN_DISPATCH(sar_gpr64_u8, reg, sa);
|
||||
}
|
||||
|
||||
Instruction jmp_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jmp_32);
|
||||
Instruction jmp_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jmp_imm);
|
||||
}
|
||||
|
||||
Instruction je_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(je_32);
|
||||
Instruction je_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(je_imm);
|
||||
}
|
||||
|
||||
Instruction jne_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jne_32);
|
||||
Instruction jne_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jne_imm);
|
||||
}
|
||||
|
||||
Instruction jle_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jle_32);
|
||||
Instruction jle_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jle_imm);
|
||||
}
|
||||
|
||||
Instruction jge_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jge_32);
|
||||
Instruction jge_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jge_imm);
|
||||
}
|
||||
|
||||
Instruction jl_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jl_32);
|
||||
Instruction jl_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jl_imm);
|
||||
}
|
||||
|
||||
Instruction jg_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jg_32);
|
||||
Instruction jg_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jg_imm);
|
||||
}
|
||||
|
||||
Instruction jbe_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jbe_32);
|
||||
Instruction jbe_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jbe_imm);
|
||||
}
|
||||
|
||||
Instruction jae_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jae_32);
|
||||
Instruction jae_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jae_imm);
|
||||
}
|
||||
|
||||
Instruction jb_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jb_32);
|
||||
Instruction jb_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(jb_imm);
|
||||
}
|
||||
|
||||
Instruction ja_32(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(ja_32);
|
||||
Instruction ja_imm(const ObjectGenerator& gen) {
|
||||
IGEN_DISPATCH(ja_imm);
|
||||
}
|
||||
|
||||
Instruction cmp_flt_flt(const ObjectGenerator& gen, Register a, Register b) {
|
||||
IGEN_DISPATCH(cmp_flt_flt, a, b);
|
||||
Instruction cmp_f32_f32(const ObjectGenerator& gen, Register a, Register b) {
|
||||
IGEN_DISPATCH(cmp_f32_f32, a, b);
|
||||
}
|
||||
|
||||
Instruction sqrts_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(sqrts_xmm, dst, src);
|
||||
Instruction sqrt_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(sqrt_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction mulss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(mulss_xmm_xmm, dst, src);
|
||||
Instruction mul_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(mul_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction divss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(divss_xmm_xmm, dst, src);
|
||||
Instruction div_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(div_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction subss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(subss_xmm_xmm, dst, src);
|
||||
Instruction sub_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(sub_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction addss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(addss_xmm_xmm, dst, src);
|
||||
Instruction add_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(add_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction minss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(minss_xmm_xmm, dst, src);
|
||||
Instruction min_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(min_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction maxss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(maxss_xmm_xmm, dst, src);
|
||||
Instruction max_f32_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(max_f32_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction int32_to_float(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(int32_to_float, dst, src);
|
||||
Instruction int32_to_f32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(int32_to_f32, dst, src);
|
||||
}
|
||||
|
||||
Instruction float_to_int32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(float_to_int32, dst, src);
|
||||
Instruction f32_to_int32(const ObjectGenerator& gen, Register dst, Register src) {
|
||||
IGEN_DISPATCH(f32_to_int32, dst, src);
|
||||
}
|
||||
|
||||
Instruction nop(const ObjectGenerator& gen) {
|
||||
|
||||
+42
-40
@@ -35,27 +35,27 @@ Instruction mov_gpr64_s32(const ObjectGenerator& gen, Register dst, int64_t val)
|
||||
/*!
|
||||
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
|
||||
*/
|
||||
Instruction movd_gpr32_xmm32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction movd_gpr32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
|
||||
*/
|
||||
Instruction movd_xmm32_gpr32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction movd_f32_gpr32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
|
||||
*/
|
||||
Instruction movq_gpr64_xmm64(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction movq_gpr64_f64(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
|
||||
*/
|
||||
Instruction movq_xmm64_gpr64(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction movq_f64_gpr64(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits between xmm's
|
||||
*/
|
||||
Instruction mov_xmm32_xmm32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction mov_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
// todo - GPR64 -> XMM64 (zext)
|
||||
// todo - XMM -> GPR64
|
||||
@@ -486,9 +486,9 @@ Instruction static_store(const ObjectGenerator& gen, Register value, s64 offset,
|
||||
|
||||
Instruction static_addr(const ObjectGenerator& gen, Register dst, s64 offset);
|
||||
|
||||
Instruction static_load_xmm32(const ObjectGenerator& gen, Register simd_dest, s64 offset);
|
||||
Instruction static_load_f32(const ObjectGenerator& gen, Register simd_dest, s64 offset);
|
||||
|
||||
Instruction static_store_xmm32(const ObjectGenerator& gen, Register xmm_value, s64 offset);
|
||||
Instruction static_store_f32(const ObjectGenerator& gen, Register xmm_value, s64 offset);
|
||||
|
||||
// TODO, special load/stores of 128 bit values.
|
||||
|
||||
@@ -618,19 +618,21 @@ Instruction not_gpr64(const ObjectGenerator& gen, Register reg);
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr left by CL register
|
||||
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
|
||||
*/
|
||||
Instruction shl_gpr64_cl(const ObjectGenerator& gen, Register reg);
|
||||
Instruction shl_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (logical) by CL register
|
||||
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
Instruction shr_gpr64_cl(const ObjectGenerator& gen, Register reg);
|
||||
Instruction shr_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (arithmetic) by CL register
|
||||
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
Instruction sar_gpr64_cl(const ObjectGenerator& gen, Register reg);
|
||||
Instruction sar_gpr64_reg(const ObjectGenerator& gen, Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-ptr left (logical) by the constant shift amount "sa".
|
||||
@@ -654,57 +656,57 @@ Instruction sar_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa);
|
||||
/*!
|
||||
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
|
||||
*/
|
||||
Instruction jmp_32(const ObjectGenerator& gen);
|
||||
Instruction jmp_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump if equal.
|
||||
*/
|
||||
Instruction je_32(const ObjectGenerator& gen);
|
||||
Instruction je_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump not equal.
|
||||
*/
|
||||
Instruction jne_32(const ObjectGenerator& gen);
|
||||
Instruction jne_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump less than or equal.
|
||||
*/
|
||||
Instruction jle_32(const ObjectGenerator& gen);
|
||||
Instruction jle_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump greater than or equal.
|
||||
*/
|
||||
Instruction jge_32(const ObjectGenerator& gen);
|
||||
Instruction jge_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump less than
|
||||
*/
|
||||
Instruction jl_32(const ObjectGenerator& gen);
|
||||
Instruction jl_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump greater than
|
||||
*/
|
||||
Instruction jg_32(const ObjectGenerator& gen);
|
||||
Instruction jg_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump below or equal
|
||||
*/
|
||||
Instruction jbe_32(const ObjectGenerator& gen);
|
||||
Instruction jbe_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump above or equal
|
||||
*/
|
||||
Instruction jae_32(const ObjectGenerator& gen);
|
||||
Instruction jae_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump below
|
||||
*/
|
||||
Instruction jb_32(const ObjectGenerator& gen);
|
||||
Instruction jb_imm(const ObjectGenerator& gen);
|
||||
|
||||
/*!
|
||||
* Jump above
|
||||
*/
|
||||
Instruction ja_32(const ObjectGenerator& gen);
|
||||
Instruction ja_imm(const ObjectGenerator& gen);
|
||||
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
// FLOAT MATH
|
||||
@@ -713,49 +715,49 @@ Instruction ja_32(const ObjectGenerator& gen);
|
||||
/*!
|
||||
* Compare two floats and set flag register for jump (ucomiss)
|
||||
*/
|
||||
Instruction cmp_flt_flt(const ObjectGenerator& gen, Register a, Register b);
|
||||
Instruction cmp_f32_f32(const ObjectGenerator& gen, Register a, Register b);
|
||||
|
||||
Instruction sqrts_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction sqrt_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Multiply two floats in xmm's
|
||||
* Multiply two floats in f32's
|
||||
*/
|
||||
Instruction mulss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction mul_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Divide two floats in xmm's
|
||||
* Divide two floats in f32's
|
||||
*/
|
||||
Instruction divss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction div_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Subtract two floats in xmm's
|
||||
* Subtract two floats in f32's
|
||||
*/
|
||||
Instruction subss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction sub_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Add two floats in xmm's
|
||||
* Add two floats in f32's
|
||||
*/
|
||||
Instruction addss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction add_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point minimum.
|
||||
*/
|
||||
Instruction minss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction min_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point maximum.
|
||||
*/
|
||||
Instruction maxss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction max_f32_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert GPR int32 to XMM float (single precision)
|
||||
* Convert GPR int32 to float (single precision)
|
||||
*/
|
||||
Instruction int32_to_float(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction int32_to_f32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert XMM float to GPR int32(single precision) (truncate)
|
||||
* Convert float to GPR int32(single precision) (truncate)
|
||||
*/
|
||||
Instruction float_to_int32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
Instruction f32_to_int32(const ObjectGenerator& gen, Register dst, Register src);
|
||||
|
||||
Instruction nop(const ObjectGenerator& gen);
|
||||
|
||||
|
||||
+752
-298
File diff suppressed because it is too large
Load Diff
+50
-48
@@ -34,27 +34,27 @@ InstructionARM64 mov_gpr64_s32(Register dst, int64_t val);
|
||||
/*!
|
||||
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
|
||||
*/
|
||||
InstructionARM64 movd_gpr32_xmm32(Register dst, Register src);
|
||||
InstructionARM64 movd_gpr32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
|
||||
*/
|
||||
InstructionARM64 movd_xmm32_gpr32(Register dst, Register src);
|
||||
InstructionARM64 movd_f32_gpr32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
|
||||
*/
|
||||
InstructionARM64 movq_gpr64_xmm64(Register dst, Register src);
|
||||
InstructionARM64 movq_gpr64_f64(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
|
||||
*/
|
||||
InstructionARM64 movq_xmm64_gpr64(Register dst, Register src);
|
||||
InstructionARM64 movq_f64_gpr64(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits between xmm's
|
||||
*/
|
||||
InstructionARM64 mov_xmm32_xmm32(Register dst, Register src);
|
||||
InstructionARM64 mov_f32_f32(Register dst, Register src);
|
||||
|
||||
// todo - GPR64 -> XMM64 (zext)
|
||||
// todo - XMM -> GPR64
|
||||
@@ -323,37 +323,37 @@ InstructionARM64 store128_xmm128_reg_offset(Register base, Register xmm_val, s64
|
||||
// RIP loads and stores
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
InstructionARM64 load64_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load64_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load32s_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load32s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load32u_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load32u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load16u_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load16u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load16s_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load16s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load8u_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load8u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 load8s_rip_s32(Register dest, s64 offset);
|
||||
InstructionARM64 load8s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionARM64 static_load(Register dest, s64 offset, int size, bool sign_extend);
|
||||
|
||||
InstructionARM64 store64_rip_s32(Register src, s64 offset);
|
||||
InstructionARM64 store64_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionARM64 store32_rip_s32(Register src, s64 offset);
|
||||
InstructionARM64 store32_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionARM64 store16_rip_s32(Register src, s64 offset);
|
||||
InstructionARM64 store16_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionARM64 store8_rip_s32(Register src, s64 offset);
|
||||
InstructionARM64 store8_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionARM64 static_store(Register value, s64 offset, int size);
|
||||
|
||||
InstructionARM64 static_addr(Register dst, s64 offset);
|
||||
|
||||
InstructionARM64 static_load_xmm32(Register simd_dest, s64 offset);
|
||||
InstructionARM64 static_load_f32(Register simd_dest, s64 offset);
|
||||
|
||||
InstructionARM64 static_store_xmm32(Register xmm_value, s64 offset);
|
||||
InstructionARM64 static_store_f32(Register xmm_value, s64 offset);
|
||||
|
||||
// TODO, special load/stores of 128 bit values.
|
||||
|
||||
@@ -477,19 +477,21 @@ InstructionARM64 not_gpr64(Register reg);
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr left by CL register
|
||||
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
|
||||
*/
|
||||
InstructionARM64 shl_gpr64_cl(Register reg);
|
||||
InstructionARM64 shl_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (logical) by CL register
|
||||
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
InstructionARM64 shr_gpr64_cl(Register reg);
|
||||
InstructionARM64 shr_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (arithmetic) by CL register
|
||||
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
InstructionARM64 sar_gpr64_cl(Register reg);
|
||||
InstructionARM64 sar_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-ptr left (logical) by the constant shift amount "sa".
|
||||
@@ -513,57 +515,57 @@ InstructionARM64 sar_gpr64_u8(Register reg, uint8_t sa);
|
||||
/*!
|
||||
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
|
||||
*/
|
||||
InstructionARM64 jmp_32();
|
||||
InstructionARM64 jmp_imm();
|
||||
|
||||
/*!
|
||||
* Jump if equal.
|
||||
*/
|
||||
InstructionARM64 je_32();
|
||||
InstructionARM64 je_imm();
|
||||
|
||||
/*!
|
||||
* Jump not equal.
|
||||
*/
|
||||
InstructionARM64 jne_32();
|
||||
InstructionARM64 jne_imm();
|
||||
|
||||
/*!
|
||||
* Jump less than or equal.
|
||||
*/
|
||||
InstructionARM64 jle_32();
|
||||
InstructionARM64 jle_imm();
|
||||
|
||||
/*!
|
||||
* Jump greater than or equal.
|
||||
*/
|
||||
InstructionARM64 jge_32();
|
||||
InstructionARM64 jge_imm();
|
||||
|
||||
/*!
|
||||
* Jump less than
|
||||
*/
|
||||
InstructionARM64 jl_32();
|
||||
InstructionARM64 jl_imm();
|
||||
|
||||
/*!
|
||||
* Jump greater than
|
||||
*/
|
||||
InstructionARM64 jg_32();
|
||||
InstructionARM64 jg_imm();
|
||||
|
||||
/*!
|
||||
* Jump below or equal
|
||||
*/
|
||||
InstructionARM64 jbe_32();
|
||||
InstructionARM64 jbe_imm();
|
||||
|
||||
/*!
|
||||
* Jump above or equal
|
||||
*/
|
||||
InstructionARM64 jae_32();
|
||||
InstructionARM64 jae_imm();
|
||||
|
||||
/*!
|
||||
* Jump below
|
||||
*/
|
||||
InstructionARM64 jb_32();
|
||||
InstructionARM64 jb_imm();
|
||||
|
||||
/*!
|
||||
* Jump above
|
||||
*/
|
||||
InstructionARM64 ja_32();
|
||||
InstructionARM64 ja_imm();
|
||||
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
// FLOAT MATH
|
||||
@@ -572,49 +574,49 @@ InstructionARM64 ja_32();
|
||||
/*!
|
||||
* Compare two floats and set flag register for jump (ucomiss)
|
||||
*/
|
||||
InstructionARM64 cmp_flt_flt(Register a, Register b);
|
||||
InstructionARM64 cmp_f32_f32(Register a, Register b);
|
||||
|
||||
InstructionARM64 sqrts_xmm(Register dst, Register src);
|
||||
InstructionARM64 sqrt_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Multiply two floats in xmm's
|
||||
*/
|
||||
InstructionARM64 mulss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 mul_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Divide two floats in xmm's
|
||||
*/
|
||||
InstructionARM64 divss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 div_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Subtract two floats in xmm's
|
||||
*/
|
||||
InstructionARM64 subss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 sub_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Add two floats in xmm's
|
||||
*/
|
||||
InstructionARM64 addss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 add_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point minimum.
|
||||
*/
|
||||
InstructionARM64 minss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 min_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point maximum.
|
||||
*/
|
||||
InstructionARM64 maxss_xmm_xmm(Register dst, Register src);
|
||||
InstructionARM64 max_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert GPR int32 to XMM float (single precision)
|
||||
* Convert GPR int32 to float (single precision)
|
||||
*/
|
||||
InstructionARM64 int32_to_float(Register dst, Register src);
|
||||
InstructionARM64 int32_to_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert XMM float to GPR int32(single precision) (truncate)
|
||||
* Convert float to GPR int32(single precision) (truncate)
|
||||
*/
|
||||
InstructionARM64 float_to_int32(Register dst, Register src);
|
||||
InstructionARM64 f32_to_int32(Register dst, Register src);
|
||||
|
||||
InstructionARM64 nop();
|
||||
|
||||
@@ -800,4 +802,4 @@ InstructionARM64 vpshufhw(Register dst, Register src, u8 imm);
|
||||
InstructionARM64 vpackuswb(Register dst, Register src0, Register src1);
|
||||
} // namespace ARM64
|
||||
} // namespace IGen
|
||||
} // namespace emitter
|
||||
} // namespace emitter
|
||||
|
||||
+58
-54
@@ -59,7 +59,7 @@ InstructionX86 mov_gpr64_s32(Register dst, int64_t val) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 movd_gpr32_xmm32(Register dst, Register src) {
|
||||
InstructionX86 movd_gpr32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_gpr(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0x66);
|
||||
@@ -70,7 +70,7 @@ InstructionX86 movd_gpr32_xmm32(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 movd_xmm32_gpr32(Register dst, Register src) {
|
||||
InstructionX86 movd_f32_gpr32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
InstructionX86 instr(0x66);
|
||||
@@ -81,7 +81,7 @@ InstructionX86 movd_xmm32_gpr32(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 movq_gpr64_xmm64(Register dst, Register src) {
|
||||
InstructionX86 movq_gpr64_f64(Register dst, Register src) {
|
||||
ASSERT(dst.is_gpr(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0x66);
|
||||
@@ -92,7 +92,7 @@ InstructionX86 movq_gpr64_xmm64(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 movq_xmm64_gpr64(Register dst, Register src) {
|
||||
InstructionX86 movq_f64_gpr64(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
InstructionX86 instr(0x66);
|
||||
@@ -103,7 +103,7 @@ InstructionX86 movq_xmm64_gpr64(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 mov_xmm32_xmm32(Register dst, Register src) {
|
||||
InstructionX86 mov_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1155,7 +1155,7 @@ InstructionX86 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 o
|
||||
}
|
||||
}
|
||||
|
||||
InstructionX86 load64_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load64_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0x8b);
|
||||
@@ -1163,7 +1163,7 @@ InstructionX86 load64_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load32s_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load32s_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0x63);
|
||||
@@ -1171,7 +1171,7 @@ InstructionX86 load32s_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load32u_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load32u_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0x8b);
|
||||
@@ -1179,7 +1179,7 @@ InstructionX86 load32u_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load16u_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load16u_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0xf);
|
||||
@@ -1188,7 +1188,7 @@ InstructionX86 load16u_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load16s_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load16s_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0xf);
|
||||
@@ -1197,7 +1197,7 @@ InstructionX86 load16s_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load8u_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load8u_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0xf);
|
||||
@@ -1206,7 +1206,7 @@ InstructionX86 load8u_rip_s32(Register dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 load8s_rip_s32(Register dest, s64 offset) {
|
||||
InstructionX86 load8s_pcRel_s32(Register dest, s64 offset) {
|
||||
ASSERT(dest.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
InstructionX86 instr(0xf);
|
||||
@@ -1219,33 +1219,33 @@ InstructionX86 static_load(Register dest, s64 offset, int size, bool sign_extend
|
||||
switch (size) {
|
||||
case 1:
|
||||
if (sign_extend) {
|
||||
return load8s_rip_s32(dest, offset);
|
||||
return load8s_pcRel_s32(dest, offset);
|
||||
} else {
|
||||
return load8u_rip_s32(dest, offset);
|
||||
return load8u_pcRel_s32(dest, offset);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (sign_extend) {
|
||||
return load16s_rip_s32(dest, offset);
|
||||
return load16s_pcRel_s32(dest, offset);
|
||||
} else {
|
||||
return load16u_rip_s32(dest, offset);
|
||||
return load16u_pcRel_s32(dest, offset);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (sign_extend) {
|
||||
return load32s_rip_s32(dest, offset);
|
||||
return load32s_pcRel_s32(dest, offset);
|
||||
} else {
|
||||
return load32u_rip_s32(dest, offset);
|
||||
return load32u_pcRel_s32(dest, offset);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
return load64_rip_s32(dest, offset);
|
||||
return load64_pcRel_s32(dest, offset);
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
InstructionX86 store64_rip_s32(Register src, s64 offset) {
|
||||
InstructionX86 store64_pcRel_s32(Register src, s64 offset) {
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
@@ -1254,7 +1254,7 @@ InstructionX86 store64_rip_s32(Register src, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 store32_rip_s32(Register src, s64 offset) {
|
||||
InstructionX86 store32_pcRel_s32(Register src, s64 offset) {
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
@@ -1263,7 +1263,7 @@ InstructionX86 store32_rip_s32(Register src, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 store16_rip_s32(Register src, s64 offset) {
|
||||
InstructionX86 store16_pcRel_s32(Register src, s64 offset) {
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
@@ -1274,7 +1274,7 @@ InstructionX86 store16_rip_s32(Register src, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 store8_rip_s32(Register src, s64 offset) {
|
||||
InstructionX86 store8_pcRel_s32(Register src, s64 offset) {
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
@@ -1289,13 +1289,13 @@ InstructionX86 store8_rip_s32(Register src, s64 offset) {
|
||||
InstructionX86 static_store(Register value, s64 offset, int size) {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return store8_rip_s32(value, offset);
|
||||
return store8_pcRel_s32(value, offset);
|
||||
case 2:
|
||||
return store16_rip_s32(value, offset);
|
||||
return store16_pcRel_s32(value, offset);
|
||||
case 4:
|
||||
return store32_rip_s32(value, offset);
|
||||
return store32_pcRel_s32(value, offset);
|
||||
case 8:
|
||||
return store64_rip_s32(value, offset);
|
||||
return store64_pcRel_s32(value, offset);
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
@@ -1309,7 +1309,7 @@ InstructionX86 static_addr(Register dst, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 static_load_xmm32(Register simd_dest, s64 offset) {
|
||||
InstructionX86 static_load_f32(Register simd_dest, s64 offset) {
|
||||
ASSERT(simd_dest.is_xmm(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
|
||||
@@ -1322,7 +1322,7 @@ InstructionX86 static_load_xmm32(Register simd_dest, s64 offset) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 static_store_xmm32(Register xmm_value, s64 offset) {
|
||||
InstructionX86 static_store_f32(Register xmm_value, s64 offset) {
|
||||
ASSERT(xmm_value.is_xmm(instr_set));
|
||||
ASSERT(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
|
||||
@@ -1569,21 +1569,24 @@ InstructionX86 not_gpr64(Register reg) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 shl_gpr64_cl(Register reg) {
|
||||
InstructionX86 shl_gpr64_reg(Register reg, Register _) {
|
||||
// x86 is forced to use CL
|
||||
ASSERT(reg.is_gpr(instr_set));
|
||||
InstructionX86 instr(0xd3);
|
||||
instr.set_modrm_and_rex(4, reg.hw_id(instr_set), 3, true);
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 shr_gpr64_cl(Register reg) {
|
||||
InstructionX86 shr_gpr64_reg(Register reg, Register _) {
|
||||
// x86 is forced to use CL
|
||||
ASSERT(reg.is_gpr(instr_set));
|
||||
InstructionX86 instr(0xd3);
|
||||
instr.set_modrm_and_rex(5, reg.hw_id(instr_set), 3, true);
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 sar_gpr64_cl(Register reg) {
|
||||
InstructionX86 sar_gpr64_reg(Register reg, Register _) {
|
||||
// x86 is forced to use CL
|
||||
ASSERT(reg.is_gpr(instr_set));
|
||||
InstructionX86 instr(0xd3);
|
||||
instr.set_modrm_and_rex(7, reg.hw_id(instr_set), 3, true);
|
||||
@@ -1614,83 +1617,83 @@ InstructionX86 sar_gpr64_u8(Register reg, uint8_t sa) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jmp_32() {
|
||||
InstructionX86 jmp_imm() {
|
||||
InstructionX86 instr(0xe9);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 je_32() {
|
||||
InstructionX86 je_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x84);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jne_32() {
|
||||
InstructionX86 jne_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x85);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jle_32() {
|
||||
InstructionX86 jle_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x8e);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jge_32() {
|
||||
InstructionX86 jge_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x8d);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jl_32() {
|
||||
InstructionX86 jl_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x8c);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jg_32() {
|
||||
InstructionX86 jg_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x8f);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jbe_32() {
|
||||
InstructionX86 jbe_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x86);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jae_32() {
|
||||
InstructionX86 jae_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x83);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 jb_32() {
|
||||
InstructionX86 jb_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x82);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 ja_32() {
|
||||
InstructionX86 ja_imm() {
|
||||
InstructionX86 instr(0x0f);
|
||||
instr.set_op2(0x87);
|
||||
instr.set(Imm(4, 0));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 cmp_flt_flt(Register a, Register b) {
|
||||
InstructionX86 cmp_f32_f32(Register a, Register b) {
|
||||
ASSERT(a.is_xmm(instr_set));
|
||||
ASSERT(b.is_xmm(instr_set));
|
||||
InstructionX86 instr(0x0f);
|
||||
@@ -1699,7 +1702,7 @@ InstructionX86 cmp_flt_flt(Register a, Register b) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 sqrts_xmm(Register dst, Register src) {
|
||||
InstructionX86 sqrt_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1710,7 +1713,7 @@ InstructionX86 sqrts_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 mulss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 mul_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1721,7 +1724,7 @@ InstructionX86 mulss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 divss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 div_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1732,7 +1735,7 @@ InstructionX86 divss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 subss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 sub_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1743,7 +1746,7 @@ InstructionX86 subss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 addss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 add_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1754,7 +1757,7 @@ InstructionX86 addss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 minss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 min_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1765,7 +1768,7 @@ InstructionX86 minss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 maxss_xmm_xmm(Register dst, Register src) {
|
||||
InstructionX86 max_f32_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1776,7 +1779,7 @@ InstructionX86 maxss_xmm_xmm(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 int32_to_float(Register dst, Register src) {
|
||||
InstructionX86 int32_to_f32(Register dst, Register src) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_gpr(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -1787,7 +1790,7 @@ InstructionX86 int32_to_float(Register dst, Register src) {
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 float_to_int32(Register dst, Register src) {
|
||||
InstructionX86 f32_to_int32(Register dst, Register src) {
|
||||
ASSERT(dst.is_gpr(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
InstructionX86 instr(0xf3);
|
||||
@@ -2167,6 +2170,7 @@ InstructionX86 pw_sll(Register dst, Register src, u8 imm) {
|
||||
instr.set(Imm(1, imm));
|
||||
return instr;
|
||||
}
|
||||
|
||||
InstructionX86 ph_sll(Register dst, Register src, u8 imm) {
|
||||
ASSERT(dst.is_xmm(instr_set));
|
||||
ASSERT(src.is_xmm(instr_set));
|
||||
@@ -2447,4 +2451,4 @@ InstructionX86 vpackuswb(Register dst, Register src0, Register src1) {
|
||||
}
|
||||
} // namespace X86
|
||||
} // namespace IGen
|
||||
} // namespace emitter
|
||||
} // namespace emitter
|
||||
|
||||
+52
-50
@@ -34,27 +34,27 @@ InstructionX86 mov_gpr64_s32(Register dst, int64_t val);
|
||||
/*!
|
||||
* Move 32-bits of xmm to 32 bits of gpr (no sign extension).
|
||||
*/
|
||||
InstructionX86 movd_gpr32_xmm32(Register dst, Register src);
|
||||
InstructionX86 movd_gpr32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits of gpr to 32-bits of xmm (no sign extension)
|
||||
*/
|
||||
InstructionX86 movd_xmm32_gpr32(Register dst, Register src);
|
||||
InstructionX86 movd_f32_gpr32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
|
||||
*/
|
||||
InstructionX86 movq_gpr64_xmm64(Register dst, Register src);
|
||||
InstructionX86 movq_gpr64_f64(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
|
||||
*/
|
||||
InstructionX86 movq_xmm64_gpr64(Register dst, Register src);
|
||||
InstructionX86 movq_f64_gpr64(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Move 32-bits between xmm's
|
||||
*/
|
||||
InstructionX86 mov_xmm32_xmm32(Register dst, Register src);
|
||||
InstructionX86 mov_f32_f32(Register dst, Register src);
|
||||
|
||||
// todo - GPR64 -> XMM64 (zext)
|
||||
// todo - XMM -> GPR64
|
||||
@@ -323,37 +323,37 @@ InstructionX86 store128_xmm128_reg_offset(Register base, Register xmm_val, s64 o
|
||||
// RIP loads and stores
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
InstructionX86 load64_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load64_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load32s_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load32s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load32u_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load32u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load16u_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load16u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load16s_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load16s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load8u_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load8u_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 load8s_rip_s32(Register dest, s64 offset);
|
||||
InstructionX86 load8s_pcRel_s32(Register dest, s64 offset);
|
||||
|
||||
InstructionX86 static_load(Register dest, s64 offset, int size, bool sign_extend);
|
||||
|
||||
InstructionX86 store64_rip_s32(Register src, s64 offset);
|
||||
InstructionX86 store64_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionX86 store32_rip_s32(Register src, s64 offset);
|
||||
InstructionX86 store32_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionX86 store16_rip_s32(Register src, s64 offset);
|
||||
InstructionX86 store16_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionX86 store8_rip_s32(Register src, s64 offset);
|
||||
InstructionX86 store8_pcRel_s32(Register src, s64 offset);
|
||||
|
||||
InstructionX86 static_store(Register value, s64 offset, int size);
|
||||
|
||||
InstructionX86 static_addr(Register dst, s64 offset);
|
||||
|
||||
InstructionX86 static_load_xmm32(Register simd_dest, s64 offset);
|
||||
InstructionX86 static_load_f32(Register simd_dest, s64 offset);
|
||||
|
||||
InstructionX86 static_store_xmm32(Register xmm_value, s64 offset);
|
||||
InstructionX86 static_store_f32(Register xmm_value, s64 offset);
|
||||
|
||||
// TODO, special load/stores of 128 bit values.
|
||||
|
||||
@@ -477,19 +477,21 @@ InstructionX86 not_gpr64(Register reg);
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr left by CL register
|
||||
* Shift 64-bit gpr left by a shift amount in a register (ie. forced to be CL register on x86)
|
||||
*/
|
||||
InstructionX86 shl_gpr64_cl(Register reg);
|
||||
InstructionX86 shl_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (logical) by CL register
|
||||
* Shift 64-bit gpr right (logical) by a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
InstructionX86 shr_gpr64_cl(Register reg);
|
||||
InstructionX86 shr_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-bit gpr right (arithmetic) by CL register
|
||||
* Shift 64-bit gpr right (arithmetic) a shift amount in a register (ie. forced to be CL register on
|
||||
* x86)
|
||||
*/
|
||||
InstructionX86 sar_gpr64_cl(Register reg);
|
||||
InstructionX86 sar_gpr64_reg(Register reg, Register shift_reg);
|
||||
|
||||
/*!
|
||||
* Shift 64-ptr left (logical) by the constant shift amount "sa".
|
||||
@@ -513,57 +515,57 @@ InstructionX86 sar_gpr64_u8(Register reg, uint8_t sa);
|
||||
/*!
|
||||
* Jump, 32-bit constant offset. The offset is by default 0 and must be patched later.
|
||||
*/
|
||||
InstructionX86 jmp_32();
|
||||
InstructionX86 jmp_imm();
|
||||
|
||||
/*!
|
||||
* Jump if equal.
|
||||
*/
|
||||
InstructionX86 je_32();
|
||||
InstructionX86 je_imm();
|
||||
|
||||
/*!
|
||||
* Jump not equal.
|
||||
*/
|
||||
InstructionX86 jne_32();
|
||||
InstructionX86 jne_imm();
|
||||
|
||||
/*!
|
||||
* Jump less than or equal.
|
||||
*/
|
||||
InstructionX86 jle_32();
|
||||
InstructionX86 jle_imm();
|
||||
|
||||
/*!
|
||||
* Jump greater than or equal.
|
||||
*/
|
||||
InstructionX86 jge_32();
|
||||
InstructionX86 jge_imm();
|
||||
|
||||
/*!
|
||||
* Jump less than
|
||||
*/
|
||||
InstructionX86 jl_32();
|
||||
InstructionX86 jl_imm();
|
||||
|
||||
/*!
|
||||
* Jump greater than
|
||||
*/
|
||||
InstructionX86 jg_32();
|
||||
InstructionX86 jg_imm();
|
||||
|
||||
/*!
|
||||
* Jump below or equal
|
||||
*/
|
||||
InstructionX86 jbe_32();
|
||||
InstructionX86 jbe_imm();
|
||||
|
||||
/*!
|
||||
* Jump above or equal
|
||||
*/
|
||||
InstructionX86 jae_32();
|
||||
InstructionX86 jae_imm();
|
||||
|
||||
/*!
|
||||
* Jump below
|
||||
*/
|
||||
InstructionX86 jb_32();
|
||||
InstructionX86 jb_imm();
|
||||
|
||||
/*!
|
||||
* Jump above
|
||||
*/
|
||||
InstructionX86 ja_32();
|
||||
InstructionX86 ja_imm();
|
||||
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
// FLOAT MATH
|
||||
@@ -572,49 +574,49 @@ InstructionX86 ja_32();
|
||||
/*!
|
||||
* Compare two floats and set flag register for jump (ucomiss)
|
||||
*/
|
||||
InstructionX86 cmp_flt_flt(Register a, Register b);
|
||||
InstructionX86 cmp_f32_f32(Register a, Register b);
|
||||
|
||||
InstructionX86 sqrts_xmm(Register dst, Register src);
|
||||
InstructionX86 sqrt_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Multiply two floats in xmm's
|
||||
* Multiply two floats in f32's
|
||||
*/
|
||||
InstructionX86 mulss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 mul_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Divide two floats in xmm's
|
||||
* Divide two floats in f32's
|
||||
*/
|
||||
InstructionX86 divss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 div_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Subtract two floats in xmm's
|
||||
* Subtract two floats in f32's
|
||||
*/
|
||||
InstructionX86 subss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 sub_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Add two floats in xmm's
|
||||
* Add two floats in f32's
|
||||
*/
|
||||
InstructionX86 addss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 add_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point minimum.
|
||||
*/
|
||||
InstructionX86 minss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 min_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Floating point maximum.
|
||||
*/
|
||||
InstructionX86 maxss_xmm_xmm(Register dst, Register src);
|
||||
InstructionX86 max_f32_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert GPR int32 to XMM float (single precision)
|
||||
*/
|
||||
InstructionX86 int32_to_float(Register dst, Register src);
|
||||
InstructionX86 int32_to_f32(Register dst, Register src);
|
||||
|
||||
/*!
|
||||
* Convert XMM float to GPR int32(single precision) (truncate)
|
||||
*/
|
||||
InstructionX86 float_to_int32(Register dst, Register src);
|
||||
InstructionX86 f32_to_int32(Register dst, Register src);
|
||||
|
||||
InstructionX86 nop();
|
||||
|
||||
@@ -800,4 +802,4 @@ InstructionX86 vpshufhw(Register dst, Register src, u8 imm);
|
||||
InstructionX86 vpackuswb(Register dst, Register src0, Register src1);
|
||||
} // namespace X86
|
||||
} // namespace IGen
|
||||
} // namespace emitter
|
||||
} // namespace emitter
|
||||
|
||||
+139
-17
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
#include <variant>
|
||||
|
||||
#include "common/common_types.h"
|
||||
@@ -44,59 +45,180 @@ constexpr u32 Base(u32 value, u32 width) {
|
||||
return value << (32 - width);
|
||||
}
|
||||
|
||||
// TODO - consider passing in the instruction name to make debugging easier when an assertion is
|
||||
// hit
|
||||
|
||||
// TODO NOW - fix below
|
||||
constexpr u64 pow2(u64 n) {
|
||||
return 1ull << n;
|
||||
}
|
||||
|
||||
constexpr s64 pow2s(u64 n) {
|
||||
return 1ull << n;
|
||||
}
|
||||
|
||||
constexpr Field Hw(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (4 - 1));
|
||||
return Field{(x & 4) << 21};
|
||||
}
|
||||
|
||||
constexpr Field Sh(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (2 - 1));
|
||||
return Field{(x & 1) << 22};
|
||||
}
|
||||
|
||||
constexpr Field Shift(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (4 - 1));
|
||||
return Field{(x & 2) << 22};
|
||||
}
|
||||
|
||||
constexpr Field Rd(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (32 - 1));
|
||||
return Field{(x & 31) << 0};
|
||||
}
|
||||
|
||||
constexpr Field Rt(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (32 - 1));
|
||||
return Field{(x & 31) << 0};
|
||||
}
|
||||
|
||||
constexpr Field Rn(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (32 - 1));
|
||||
return Field{(x & 31) << 5};
|
||||
}
|
||||
|
||||
constexpr Field Rm(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (32 - 1));
|
||||
return Field{(x & 31) << 16};
|
||||
}
|
||||
|
||||
constexpr Field Imm4(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
|
||||
return Field{(x & 0b111111) << 11};
|
||||
}
|
||||
|
||||
constexpr Field Imm6(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 6)));
|
||||
return Field{(x & 0b111111) << 10};
|
||||
}
|
||||
|
||||
constexpr Field Imm9(s32 x) {
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111111) << 12};
|
||||
constexpr Field Imm9s(s32 x) {
|
||||
ASSERT(x >= (pow2s(9 - 1) * -1) && x <= (pow2s(9 - 1) - 1));
|
||||
return Field{(static_cast<u32>(x) & 0b111111111) << 12};
|
||||
}
|
||||
|
||||
constexpr Field Imm12(u32 x) {
|
||||
ASSERT(x >= 0 && x <= 4095);
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111111111) << 10};
|
||||
ASSERT(x >= 0 && x <= (pow2(12) - 1));
|
||||
return Field{(static_cast<u32>(x) & 0b111111111111) << 10};
|
||||
}
|
||||
|
||||
constexpr Field Imm16(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (pow2(16) - 1));
|
||||
return Field{static_cast<u32>((x & (pow2(16) - 1)) << 16)};
|
||||
}
|
||||
|
||||
constexpr Field Imm26(u32 x) {
|
||||
ASSERT(x >= 0 && x <= (67108864 - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b11111111111111111111111111) << 0};
|
||||
}
|
||||
|
||||
constexpr Field Imm19(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 19) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b1111111111111111111) << 5};
|
||||
}
|
||||
|
||||
constexpr Field Imms(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111) << 10};
|
||||
}
|
||||
|
||||
constexpr Field Immr(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111) << 16};
|
||||
}
|
||||
|
||||
constexpr Field Immh(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111) << 19};
|
||||
}
|
||||
|
||||
constexpr Field Immb(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 3) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b111111) << 16};
|
||||
}
|
||||
|
||||
constexpr Field Cond(u32 x) {
|
||||
ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
|
||||
return Field{(static_cast<uint32_t>(x) & 0b1111) << 0};
|
||||
}
|
||||
} // namespace ARM64
|
||||
|
||||
struct InstructionARM64 : InstructionImpl<InstructionARM64> {
|
||||
// The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a
|
||||
// single 32-bit word in that stream.
|
||||
// Info:
|
||||
// - https://yurichev.com/mirrors/ARMv8-A_Architecture_Reference_Manual_(Issue_A.a).pdf
|
||||
// - https://www.scs.stanford.edu/~zyedidia/arm64/
|
||||
// - https://armconverter.com/?lock=arm64&code=STR+X0,+[SP,+%23-8]!
|
||||
u32 encoding;
|
||||
// The ARM instruction stream is a sequence of word-aligned words.
|
||||
// Each ARM instruction is a single 32-bit word in that stream.
|
||||
//
|
||||
// Some x86 instructions are not possible to represent in ARM in a single instruction
|
||||
// however, in order to not have to overhaul things at the IR level,
|
||||
// it feels preferably to instead allow an instruction to emit multiple instructions if needed
|
||||
//
|
||||
// To do so, the instruction can optionally include multiple encodings
|
||||
// all of which are emitted at once.
|
||||
static constexpr int kMaxInstrs = 64;
|
||||
|
||||
u32 encodings[kMaxInstrs]{};
|
||||
u8 count = 0;
|
||||
|
||||
InstructionARM64() = delete;
|
||||
|
||||
// --- single instruction ---
|
||||
template <typename... Fs>
|
||||
constexpr InstructionARM64(uint32_t base, Fs... fields) : encoding((base | ... | fields.bits)) {
|
||||
static_assert((std::is_same_v<Fs, emitter::ARM64::Field> && ...),
|
||||
"All operands must be Field types");
|
||||
constexpr InstructionARM64(uint32_t base, Fs... fields) {
|
||||
static_assert((std::is_same_v<Fs, emitter::ARM64::Field> && ...));
|
||||
encodings[0] = (base | ... | fields.bits);
|
||||
count = 1;
|
||||
}
|
||||
|
||||
// --- multi instruction (variadic) ---
|
||||
template <typename... Instrs>
|
||||
constexpr InstructionARM64(const Instrs&... instrs)
|
||||
requires(std::is_same_v<Instrs, InstructionARM64> && ...)
|
||||
{
|
||||
u8 idx = 0;
|
||||
auto append = [&](const InstructionARM64& i) {
|
||||
for (uint8_t j = 0; j < i.count; ++j) {
|
||||
encodings[idx++] = i.encodings[j];
|
||||
}
|
||||
};
|
||||
(append(instrs), ...);
|
||||
count = idx;
|
||||
}
|
||||
|
||||
InstructionARM64(std::span<const InstructionARM64> instrs) {
|
||||
u8 idx = 0;
|
||||
for (const auto& i : instrs) {
|
||||
for (uint8_t j = 0; j < i.count; ++j) {
|
||||
encodings[idx++] = i.encodings[j];
|
||||
}
|
||||
}
|
||||
count = idx;
|
||||
}
|
||||
|
||||
uint8_t emit(uint8_t* buffer) const {
|
||||
memcpy(buffer, &encoding, 4);
|
||||
return 4;
|
||||
if (count == 1 && encodings[0] == 0) {
|
||||
return 0;
|
||||
}
|
||||
memcpy(buffer, encodings, count * 4);
|
||||
return count * 4;
|
||||
}
|
||||
|
||||
uint8_t length() const { return 4; }
|
||||
uint8_t length() const {
|
||||
if (count == 1 && encodings[0] == 0) {
|
||||
return 0;
|
||||
}
|
||||
return count * 4;
|
||||
}
|
||||
|
||||
// TODO ARM - all placeholders, no idea if this is even relevant, if not, get rid of it all
|
||||
int get_imm_size() const { return 0; }
|
||||
|
||||
int offset_of_imm() const { return 0; }
|
||||
|
||||
+46
-23
@@ -81,14 +81,17 @@ enum ARM64_REG : s8 {
|
||||
X13, // temp, not-saved
|
||||
X14, // temp, not-saved
|
||||
X15, // temp, not-saved
|
||||
X16, // temp, not-saved
|
||||
X17, // temp, not-saved
|
||||
// temp, not-saved - Conventionally used for linker/veneer/temporary values (we will reserve this
|
||||
// one atleast)
|
||||
X16,
|
||||
// temp, not-saved - Conventionally used for linker/veneer/temporary values
|
||||
X17,
|
||||
X18, // temp, not-saved
|
||||
|
||||
x19, // saved TODO purpose?, R12
|
||||
x20, // pp, R13
|
||||
x21, // st, R14
|
||||
x22, // offset, TODO purpose?, R15
|
||||
X19, // saved TODO purpose?, R12
|
||||
X20, // pp, R13
|
||||
X21, // st, R14
|
||||
X22, // offset, TODO purpose?, R15
|
||||
X23, // unused, callee saved
|
||||
X24, // unused, callee saved
|
||||
X25, // unused, callee saved
|
||||
@@ -103,22 +106,39 @@ enum ARM64_REG : s8 {
|
||||
// quadword registers, equivalent to XMMs
|
||||
// the convention in arm64 is the callee preserves all Q values
|
||||
// at the same time though, the caller should not depend on this convention!
|
||||
Q0 = 0,
|
||||
Q1,
|
||||
Q2,
|
||||
Q3,
|
||||
Q4,
|
||||
Q5,
|
||||
Q6,
|
||||
Q7,
|
||||
Q8,
|
||||
Q9,
|
||||
Q10,
|
||||
Q11,
|
||||
Q12,
|
||||
Q13,
|
||||
Q14,
|
||||
Q15
|
||||
V0 = 0,
|
||||
V1,
|
||||
V2,
|
||||
V3,
|
||||
V4,
|
||||
V5,
|
||||
V6,
|
||||
V7,
|
||||
V8,
|
||||
V9,
|
||||
V10,
|
||||
V11,
|
||||
V12,
|
||||
V13,
|
||||
V14,
|
||||
V15,
|
||||
// TODO ARM - we'll want to check at runtime if the platform has 16 V registers, or 32
|
||||
V16,
|
||||
V17,
|
||||
V18,
|
||||
V19,
|
||||
V20,
|
||||
V21,
|
||||
V22,
|
||||
V23,
|
||||
V24,
|
||||
V25,
|
||||
V26,
|
||||
V27,
|
||||
V28,
|
||||
V29,
|
||||
V30,
|
||||
V31,
|
||||
};
|
||||
|
||||
class Register {
|
||||
@@ -128,11 +148,14 @@ class Register {
|
||||
// intentionally not explicit so we can use X86_REGs in place of Registers
|
||||
Register(int id) : m_id(id) {}
|
||||
|
||||
// TODO ARM64 - this assertion isn't as useful for ARM
|
||||
// since Q/V registers are not unique in terms of their id
|
||||
// instead it is the instruction itself that deduces what set of registers to use
|
||||
bool is_128bit_simd(emitter::InstructionSet instr_set) const {
|
||||
if (instr_set == emitter::InstructionSet::X86) {
|
||||
return m_id >= XMM0 && m_id <= XMM15;
|
||||
} else if (instr_set == emitter::InstructionSet::ARM64) {
|
||||
return m_id >= Q0 && m_id <= Q15;
|
||||
return m_id >= V0 && m_id <= V31;
|
||||
} else {
|
||||
ASSERT_MSG(false, "is_128bit_simd: instruction set not supported");
|
||||
}
|
||||
|
||||
@@ -150,10 +150,10 @@ TEST(CodeTester, simd_store_128_arm64) {
|
||||
CodeTester tester(emitter::InstructionSet::ARM64);
|
||||
tester.init_code_buffer(256);
|
||||
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q3));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q3));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, Q14));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, Q13));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, V3));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, V3));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X2, V14));
|
||||
tester.emit(IGen::store128_gpr64_simd128(tester.generator(), X14, V13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "43 00 80 3d c3 01 80 3d 4e 00 80 3d cd 01 80 3d");
|
||||
}
|
||||
|
||||
@@ -197,13 +197,25 @@ TEST(CodeTester, xmm_load_128_arm64) {
|
||||
CodeTester tester(emitter::InstructionSet::ARM64);
|
||||
tester.init_code_buffer(256);
|
||||
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X1));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q3, X14));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q14, X1));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), Q13, X14));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V3, X1));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V3, X14));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V14, X1));
|
||||
tester.emit(IGen::load128_simd128_gpr64(tester.generator(), V13, X14));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "23 00 c0 3d c3 01 c0 3d 2e 00 c0 3d cd 01 c0 3d");
|
||||
}
|
||||
|
||||
void execute_tester(CodeTester& tester) {
|
||||
if (tester.generator().instr_set() == InstructionSet::ARM64) {
|
||||
#ifdef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
} else if (tester.generator().instr_set() == InstructionSet::X86) {
|
||||
#ifndef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// These tests actually execute the code, you cannot execute arm64 code on x86 and vise versa
|
||||
// so these tests have to be conditional based on the platform unfortunately.
|
||||
TEST(CodeTester, execute_push_pop_simd_x86) {
|
||||
@@ -225,9 +237,7 @@ TEST(CodeTester, execute_push_pop_simd_x86) {
|
||||
"0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 0f 6f 1c 24 48 83 c4 10 66 44 "
|
||||
"0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 66 44 0f 6f 34 24 48 83 c4 10 66 44 "
|
||||
"0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
|
||||
#ifndef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_push_pop_simd_arm64) {
|
||||
@@ -247,9 +257,7 @@ TEST(CodeTester, execute_push_pop_simd_arm64) {
|
||||
"ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 c0 3d ff 43 00 91 ea 03 "
|
||||
"c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 ed 03 c0 3d ff 43 00 91 "
|
||||
"ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
|
||||
#ifdef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_push_pop_all_the_things_x86) {
|
||||
@@ -276,9 +284,7 @@ TEST(CodeTester, execute_push_pop_all_the_things_x86) {
|
||||
"04 24 48 83 c4 10 66 44 0f 6f 0c 24 48 83 c4 10 66 44 0f 6f 14 24 48 83 c4 10 66 44 "
|
||||
"0f 6f 1c 24 48 83 c4 10 66 44 0f 6f 24 24 48 83 c4 10 66 44 0f 6f 2c 24 48 83 c4 10 "
|
||||
"66 44 0f 6f 34 24 48 83 c4 10 66 44 0f 6f 3c 24 48 83 c4 10 48 83 c4 08 c3");
|
||||
#ifndef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_push_pop_all_the_things_arm64) {
|
||||
@@ -310,9 +316,7 @@ TEST(CodeTester, execute_push_pop_all_the_things_arm64) {
|
||||
"ff 43 00 91 e6 03 c0 3d ff 43 00 91 e7 03 c0 3d ff 43 00 91 e8 03 c0 3d ff 43 00 91 e9 03 "
|
||||
"c0 3d ff 43 00 91 ea 03 c0 3d ff 43 00 91 eb 03 c0 3d ff 43 00 91 ec 03 c0 3d ff 43 00 91 "
|
||||
"ed 03 c0 3d ff 43 00 91 ee 03 c0 3d ff 43 00 91 ef 03 c0 3d ff 43 00 91 c0 03 5f d6");
|
||||
#ifdef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_return_x86) {
|
||||
@@ -322,9 +326,7 @@ TEST(CodeTester, execute_return_x86) {
|
||||
tester.emit_return();
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "c3");
|
||||
// and execute it!
|
||||
#ifndef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_return_arm64) {
|
||||
@@ -335,9 +337,7 @@ TEST(CodeTester, execute_return_arm64) {
|
||||
tester.emit(IGen::ret(tester.generator()));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "00 04 00 91 c0 03 5f d6");
|
||||
// and execute it!
|
||||
#ifdef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_push_pop_gprs_x86) {
|
||||
@@ -350,9 +350,7 @@ TEST(CodeTester, execute_push_pop_gprs_x86) {
|
||||
EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
"50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57 41 5f 41 5e "
|
||||
"41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58 c3");
|
||||
#ifndef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
|
||||
TEST(CodeTester, execute_push_pop_gprs_arm64) {
|
||||
@@ -372,7 +370,5 @@ TEST(CodeTester, execute_push_pop_gprs_arm64) {
|
||||
"f3 07 41 f8 f2 07 41 f8 f1 07 41 f8 f0 07 41 f8 ef 07 41 f8 ee 07 41 f8 ed 07 41 f8 "
|
||||
"ec 07 41 f8 eb 07 41 f8 ea 07 41 f8 e9 07 41 f8 e8 07 41 f8 e7 07 41 f8 e6 07 41 f8 "
|
||||
"e5 07 41 f8 e4 07 41 f8 e3 07 41 f8 e2 07 41 f8 e1 07 41 f8 e0 07 41 f8 c0 03 5f d6");
|
||||
#ifdef __aarch64__
|
||||
tester.execute();
|
||||
#endif
|
||||
execute_tester(tester);
|
||||
}
|
||||
+4061
-3903
File diff suppressed because it is too large
Load Diff
@@ -634,20 +634,20 @@ TEST(EmitterAVX, VPSHUFHW) {
|
||||
TEST(EmitterAVX, movq_to_gpr_from_xmm) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), RSP, XMM0 + 3));
|
||||
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), RSP, XMM0 + 13));
|
||||
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), R12, XMM0 + 3));
|
||||
tester.emit(IGen::movq_gpr64_xmm64(tester.generator(), R12, XMM0 + 13));
|
||||
tester.emit(IGen::movq_gpr64_f64(tester.generator(), RSP, XMM0 + 3));
|
||||
tester.emit(IGen::movq_gpr64_f64(tester.generator(), RSP, XMM0 + 13));
|
||||
tester.emit(IGen::movq_gpr64_f64(tester.generator(), R12, XMM0 + 3));
|
||||
tester.emit(IGen::movq_gpr64_f64(tester.generator(), R12, XMM0 + 13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F7EDC664C0F7EEC66490F7EDC664D0F7EEC");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, movq_to_xmm_from_gpr) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 3, RSP));
|
||||
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 13, RSP));
|
||||
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 3, R12));
|
||||
tester.emit(IGen::movq_xmm64_gpr64(tester.generator(), XMM0 + 13, R12));
|
||||
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 3, RSP));
|
||||
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 13, RSP));
|
||||
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 3, R12));
|
||||
tester.emit(IGen::movq_f64_gpr64(tester.generator(), XMM0 + 13, R12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F6EDC664C0F6EEC66490F6EDC664D0F6EEC");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user