diff --git a/doc/goal_doc.md b/doc/goal_doc.md index 008be0968a..ee9df6e599 100644 --- a/doc/goal_doc.md +++ b/doc/goal_doc.md @@ -1308,6 +1308,13 @@ Inserts a `FNOP` assembly instruction, which is fundamentally the same as a `NOP Inserts a single-byte `nop`. +## `.wait.vf` +```lisp +(.wait.vf) +``` + +Inserts a `FWAIT` assembly instruction, x86 does not require as much synchronization as the PS2's VU registers did, but it has a purpose in rare cases. It is a 2-byte instruction. + ## `.lvf` ```lisp (.lvf dst-reg src-loc [:color #t|#f]) @@ -1330,7 +1337,7 @@ Store a vector float. Works similarly to the `lvf` form, but there is no optimiz ## Three operand vector float operations. ```lisp -(.[].vf dst src0 src1 [:color #t|#f] [:mask #b<0-15>]) +(.[.].vf dst src0 src1 [:color #t|#f] [:mask #b<0-15>]) ``` All the three operand forms work similarly. You can do something like `(.add.vf vf1 vf2 vf3)`. All operations use the similarly named `vps` instruction, xmm128 VEX encoding. We support the following `op-name`s: - `xor` @@ -1342,7 +1349,7 @@ All the three operand forms work similarly. You can do something like `(.add.vf An optional `:mask` value can be provided as a binary number between 0-15 (inclusive). This determines _which_ of the resulting elements will be committed to the destination vector. For example, `:mask #b1011` means that the `w`, `y` and `x` results will be committed. Note that the components are defined left-to-right which may be a little counter-intuitive -- `w` is the left-most, `x` is the right-most. This aligns with the PS2's VU implementation. -Additionally, all of these operations support defining a single `broadcast-element`. This can be one of the 4 vector components `x|y|z|w`. Take the following for an example: `(vaddx.xyzw vf10, vf20, vf30)`, translates into: +Additionally, all of these operations support defining a single `broadcast-element`. This can be one of the 4 vector components `x|y|z|w`. Take the following for an example: `(.add.x.xyzw vf10, vf20, vf30)`, translates into: ```cpp vf10[x] = vf20[x] + vf30[x] @@ -1351,6 +1358,18 @@ vf10[z] = vf20[z] + vf30[x] vf10[w] = vf20[w] + vf30[x] ``` +## Three operand vector float operations with the accumulator +```lisp +(.[.].vf dst src0 src1 acc [:color #t|#f] [:mask #b<0-15>]) +``` +There are a few functions that will perform multiple operations involving the accumulator. We support the following `op-name`s: +- `add.mul` - Calculate the product of `src0` and `src1` and add it to the value of `acc` => `acc + (src0 * src1)` +- `sub.mul` - Calculate the product of `src0` and `src1` and subtract it from the value of `acc` => `acc - (src0 * src1)` + +An optional `:mask` value can be provided as a binary number between 0-15 (inclusive). This determines _which_ of the resulting elements will be committed to the destination vector. For example, `:mask #b1011` means that the `w`, `y` and `x` results will be committed. Note that the components are defined left-to-right which may be a little counter-intuitive -- `w` is the left-most, `x` is the right-most. This aligns with the PS2's VU implementation. + +Additionally, all of these operations support defining a single `broadcast-element`. This can be one of the 4 vector components `x|y|z|w`. + ## `.abs.vf` ```lisp (.abs.vf dst src [:color #t|#f] [:mask #b<0-15>]) @@ -1358,11 +1377,47 @@ vf10[w] = vf20[w] + vf30[x] Calculates the absolute value of the `src` vector, and stores in the `dst` vector. +## `.div.vf` and `.sqrt.vf` +```lisp +(.div.vf dst src1 src2 :ftf #b<0-3> :fsf #b<0-3> [:color #t|#f]) +``` + +Calculates the quotient of _one_ of `src1`'s components specified by `fsf` _one_ of `src2`'s components specified by `ftf` and stores in every component of `dst` + +```lisp +(.sqrt.vf dst src :ftf #b<0-3> [:color #t|#f]) +``` + +Calculates the square-root of _one_ of `src`'s components specified by `ftf` and stores in every component of `dst` + + +These instructions are interesting as they behave differently than the other math operations. In the original VU, results were stored in a seperate `Q` register, which was _NOT_ 128-bit. Instead it was a 32-bit register, meaning you have to pick which component from `src` you want to use. `:fsf` and `:ftf` are used to accomplish this, as usual, this is through bit flags -- `00` will select `x` and `11` will select `w`. + +As `dst` is just yet another vector / xmm register in x86, things are kept simple and the quotient is copied to _all_ packed single-float positions. This allows: +- Selecting any of the resulting vector slots will be equal to the quotient. +- Since the low-floating-point (X) is defined, the xmm register should function as expected for normal math operations + +## `.outer.product.vf` +```lisp +(.outer.product.vf dst src1 src2 [:color #t|#f]) +``` + +Calculates the outer-product of `src1` and `src2` and stores the result in `dst`. _ONLY_ the x,y,z components are considered, and `dst`'s `w` component will be untouched. The following example illustrates what the outer-product is: + +Given 2 vectors `V1 = <1,2,3,4>` and `V2 = <5,6,7,8>` and assume `VDEST = <0, 0, 0, 999>` +The outer product is computed like so (only x,y,z components are operated on): +`x = (V1y * V2z) - (V2y * V1z) => (2 * 7) - (6 * 3) => -4` +`y = (V1z * V2x) - (V2z * V1x) => (3 * 5) - (7 * 1) => 8` +`z = (V1x * V2y) - (V2x * V1y) => (1 * 6) - (5 * 2) => -4` +`w = N/A, left alone => 999` + +`VDEST = <-4, 8, -4, 999>` + ## `.blend.vf` ```lisp (.blend.vf dst src0 src1 mask [:color #t|#f]) ``` -Wrapper around `vblendps` (VEX xmm128 version) instruction. The `mask` must evaluate to a constant integer at compile time. The integer must be in the range of 0-15. +Wrapper around `vblendps` (VEX xmm128 version) instruction. The `mask` must evaluate to a constant integer at compile time. The integer must be in the range of 0-15. # Compiler Forms - Unsorted diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index 3941071541..288de91caf 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -72,6 +72,13 @@ class Compiler { emitter::Register::VF_ELEMENT broadcastElement, Env* env); + Val* compile_asm_vf_math4_two_operation(const goos::Object& form, + const goos::Object& rest, + IR_VFMath3Asm::Kind first_op_kind, + IR_VFMath3Asm::Kind second_op_kind, + emitter::Register::VF_ELEMENT broadcastElement, + Env* env); + Val* get_field_of_structure(const StructureType* type, Val* object, const std::string& field_name, @@ -241,6 +248,11 @@ class Compiler { int offset, Env* env); void compile_constant_product(RegVal* dest, RegVal* src, int stride, Env* env); + void check_vector_float_regs(const goos::Object& form, + Env* env, + std::vector> args); + u8 ftf_fsf_to_blend_mask(u8 val); + emitter::Register::VF_ELEMENT ftf_fsf_to_vector_element(u8 val); template void throw_compiler_error(const goos::Object& code, const std::string& str, Args&&... args) { @@ -291,44 +303,63 @@ class Compiler { Val* compile_asm_jr(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_mov(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_nop_vf(const goos::Object& form, const goos::Object& rest, Env* env); + // Vector Float Operations Val* compile_asm_lvf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_svf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_wait_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_nop_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_max_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_maxx_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_maxy_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_maxz_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_maxw_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_max_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_max_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_max_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_max_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_min_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_minx_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_miny_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_minz_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_minw_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_min_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_min_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_min_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_min_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_subx_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_suby_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_subz_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_subw_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_sub_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_sub_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_sub_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_sub_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_addx_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_addy_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_addz_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_addw_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_add_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_add_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_add_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_add_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_mul_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_mulx_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_muly_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_mulz_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_mulw_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_mul_add_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_add_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_add_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_add_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_add_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_mul_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_sub_x_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_sub_y_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_sub_z_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_mul_sub_w_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_abs_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_outer_product_vf(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_div_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_sqrt_vf(const goos::Object& form, const goos::Object& rest, Env* env); // Atoms diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 4a9f991a69..cecba5c15a 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -1099,6 +1099,27 @@ void IR_AsmFNop::do_codegen(emitter::ObjectGenerator* gen, gen->add_instr(IGen::nop_vf(), irec); } +/////////////////////// +// AsmFWait +/////////////////////// + +IR_AsmFWait::IR_AsmFWait() : IR_Asm(false) {} + +std::string IR_AsmFWait::print() { + return ".wait.vf"; +} + +RegAllocInstr IR_AsmFWait::to_rai() { + return {}; +} + +void IR_AsmFWait::do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) { + (void)allocs; + gen->add_instr(IGen::wait_vf(), irec); +} + /////////////////////// // AsmPush /////////////////////// @@ -1343,6 +1364,9 @@ std::string IR_VFMath3Asm::print() { case Kind::MIN: function = ".min.vf"; break; + case Kind::DIV: + function = ".div.vf"; + break; default: assert(false); } @@ -1386,11 +1410,16 @@ void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen, case Kind::MIN: gen->add_instr(IGen::min_vf(dst, src1, src2), irec); break; + case Kind::DIV: + gen->add_instr(IGen::div_vf(dst, src1, src2), irec); + break; default: assert(false); } } +// ---- Blend VF + IR_BlendVF::IR_BlendVF(bool use_color, const RegVal* dst, const RegVal* src1, @@ -1422,6 +1451,8 @@ void IR_BlendVF::do_codegen(emitter::ObjectGenerator* gen, gen->add_instr(IGen::blend_vf(dst, src1, src2, m_mask), irec); } +// ----- Splat VF + IR_SplatVF::IR_SplatVF(bool use_color, const RegVal* dst, const RegVal* src, @@ -1449,3 +1480,60 @@ void IR_SplatVF::do_codegen(emitter::ObjectGenerator* gen, auto src = get_reg_asm(m_src, allocs, irec, m_use_coloring); gen->add_instr(IGen::splat_vf(dst, src, m_element), irec); } + +// ---- Swizzle VF + +IR_SwizzleVF::IR_SwizzleVF(bool use_color, + const RegVal* dst, + const RegVal* src, + const u8 controlBytes) + : IR_Asm(use_color), m_dst(dst), m_src(src), m_controlBytes(controlBytes) {} + +std::string IR_SwizzleVF::print() { + return fmt::format(".swizzle.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(), + m_src->print(), m_controlBytes); +} + +RegAllocInstr IR_SwizzleVF::to_rai() { + RegAllocInstr rai; + if (m_use_coloring) { + rai.write.push_back(m_dst->ireg()); + rai.read.push_back(m_src->ireg()); + } + return rai; +} + +void IR_SwizzleVF::do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) { + auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring); + auto src = get_reg_asm(m_src, allocs, irec, m_use_coloring); + gen->add_instr(IGen::swizzle_vf(dst, src, m_controlBytes), irec); +} + +// ---- Square Root VF + +IR_SqrtVF::IR_SqrtVF(bool use_color, const RegVal* dst, const RegVal* src) + : IR_Asm(use_color), m_dst(dst), m_src(src) {} + +std::string IR_SqrtVF::print() { + return fmt::format(".sqrt.vf{} {}, {}", get_color_suffix_string(), m_dst->print(), + m_src->print()); +} + +RegAllocInstr IR_SqrtVF::to_rai() { + RegAllocInstr rai; + if (m_use_coloring) { + rai.write.push_back(m_dst->ireg()); + rai.read.push_back(m_src->ireg()); + } + return rai; +} + +void IR_SqrtVF::do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) { + auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring); + auto src = get_reg_asm(m_src, allocs, irec, m_use_coloring); + gen->add_instr(IGen::sqrt_vf(dst, src), irec); +} diff --git a/goalc/compiler/IR.h b/goalc/compiler/IR.h index 35d8c3f2e4..a155ebc3cf 100644 --- a/goalc/compiler/IR.h +++ b/goalc/compiler/IR.h @@ -473,6 +473,16 @@ class IR_AsmFNop : public IR_Asm { emitter::IR_Record irec) override; }; +class IR_AsmFWait : public IR_Asm { + public: + IR_AsmFWait(); + std::string print() override; + RegAllocInstr to_rai() override; + void do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) override; +}; + class IR_GetSymbolValueAsm : public IR_Asm { public: IR_GetSymbolValueAsm(bool use_coloring, const RegVal* dest, std::string sym_name, bool sext); @@ -517,7 +527,7 @@ class IR_RegSetAsm : public IR_Asm { class IR_VFMath3Asm : public IR_Asm { public: - enum class Kind { XOR, SUB, ADD, MUL, MAX, MIN }; + enum class Kind { XOR, SUB, ADD, MUL, MAX, MIN, DIV }; IR_VFMath3Asm(bool use_color, const RegVal* dst, const RegVal* src1, @@ -556,7 +566,7 @@ class IR_SplatVF : public IR_Asm { public: IR_SplatVF(bool use_color, const RegVal* dst, - const RegVal* src1, + const RegVal* src, const emitter::Register::VF_ELEMENT element); std::string print() override; RegAllocInstr to_rai() override; @@ -569,4 +579,33 @@ class IR_SplatVF : public IR_Asm { const RegVal* m_src = nullptr; const emitter::Register::VF_ELEMENT m_element = emitter::Register::VF_ELEMENT::NONE; }; + +class IR_SwizzleVF : public IR_Asm { + public: + IR_SwizzleVF(bool use_color, const RegVal* dst, const RegVal* src, const u8 m_controlBytes); + std::string print() override; + RegAllocInstr to_rai() override; + void do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) override; + + protected: + const RegVal* m_dst = nullptr; + const RegVal* m_src = nullptr; + const u8 m_controlBytes = 0; +}; + +class IR_SqrtVF : public IR_Asm { + public: + IR_SqrtVF(bool use_color, const RegVal* dst, const RegVal* src); + std::string print() override; + RegAllocInstr to_rai() override; + void do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) override; + + protected: + const RegVal* m_dst = nullptr; + const RegVal* m_src = nullptr; +}; #endif // JAK_IR_H diff --git a/goalc/compiler/compilation/Asm.cpp b/goalc/compiler/compilation/Asm.cpp index 2a424790a6..2e98d46539 100644 --- a/goalc/compiler/compilation/Asm.cpp +++ b/goalc/compiler/compilation/Asm.cpp @@ -250,6 +250,14 @@ Val* Compiler::compile_asm_nop_vf(const goos::Object& form, const goos::Object& return get_none(); } +Val* Compiler::compile_asm_wait_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + auto args = get_va(form, rest); + va_check(form, args, {}, {}); + + env->emit_ir(); + return get_none(); +} + /*! * Load a vector float from memory. Does an aligned load. */ @@ -319,192 +327,15 @@ Val* Compiler::compile_asm_svf(const goos::Object& form, const goos::Object& res return get_none(); } -Val* Compiler::compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_max_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_maxx_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, - emitter::Register::VF_ELEMENT::X, env); -} - -Val* Compiler::compile_asm_maxy_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, - emitter::Register::VF_ELEMENT::Y, env); -} - -Val* Compiler::compile_asm_maxz_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, - emitter::Register::VF_ELEMENT::Z, env); -} - -Val* Compiler::compile_asm_maxw_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, - emitter::Register::VF_ELEMENT::W, env); -} - -Val* Compiler::compile_asm_min_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_minx_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, - emitter::Register::VF_ELEMENT::X, env); -} - -Val* Compiler::compile_asm_miny_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, - emitter::Register::VF_ELEMENT::Y, env); -} - -Val* Compiler::compile_asm_minz_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, - emitter::Register::VF_ELEMENT::Z, env); -} - -Val* Compiler::compile_asm_minw_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, - emitter::Register::VF_ELEMENT::W, env); -} - -Val* Compiler::compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_subx_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, - emitter::Register::VF_ELEMENT::X, env); -} - -Val* Compiler::compile_asm_suby_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, - emitter::Register::VF_ELEMENT::Y, env); -} - -Val* Compiler::compile_asm_subz_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, - emitter::Register::VF_ELEMENT::Z, env); -} - -Val* Compiler::compile_asm_subw_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, - emitter::Register::VF_ELEMENT::W, env); -} - -Val* Compiler::compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_addx_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, - emitter::Register::VF_ELEMENT::X, env); -} - -Val* Compiler::compile_asm_addy_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, - emitter::Register::VF_ELEMENT::Y, env); -} - -Val* Compiler::compile_asm_addz_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, - emitter::Register::VF_ELEMENT::Z, env); -} - -Val* Compiler::compile_asm_addw_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, - emitter::Register::VF_ELEMENT::W, env); -} - -Val* Compiler::compile_asm_mul_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, - emitter::Register::VF_ELEMENT::NONE, env); -} - -Val* Compiler::compile_asm_mulx_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, - emitter::Register::VF_ELEMENT::X, env); -} - -Val* Compiler::compile_asm_muly_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, - emitter::Register::VF_ELEMENT::Y, env); -} - -Val* Compiler::compile_asm_mulz_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, - emitter::Register::VF_ELEMENT::Z, env); -} - -Val* Compiler::compile_asm_mulw_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, - emitter::Register::VF_ELEMENT::W, env); -} - -Val* Compiler::compile_asm_abs_vf(const goos::Object& form, const goos::Object& rest, Env* env) { - auto args = get_va(form, rest); - va_check( - form, args, {{}, {}}, - {{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}}); - bool color = true; - if (args.has_named("color")) { - color = get_true_or_false(form, args.named.at("color")); - } - - auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); - if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid destination register for a vector float 3-arg math form. Got a {}.", - dest->print()); - } - - auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); - if (src->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid first source register for a vector float 3-arg math form. Got a {}.", - src->print()); - } - - u8 mask = 0b1111; - if (args.has_named("mask")) { - mask = args.named.at("mask").as_int(); - if (mask > 15) { - throw_compiler_error( - form, "The value {} is out of range for a destination mask (0-15 inclusive).", mask); +void Compiler::check_vector_float_regs(const goos::Object& form, + Env* env, + std::vector> args) { + for (std::pair arg : args) { + if (!arg.second->settable() || arg.second->ireg().reg_class != RegClass::VECTOR_FLOAT) { + throw_compiler_error(form, "Invalid {} register for a vector float operation form. Got a {}.", + arg.first, arg.second->print()); } } - - // There is no single instruction ABS on AVX, so there are a number of ways to do it manually, - // this is one of them. For example, assume the original vec = <1, -2, -3, 4> - - // First we clear a temporary register, XOR'ing itself - auto temp_reg = env->make_vfr(dest->type()); - env->emit_ir(color, temp_reg, temp_reg, temp_reg, IR_VFMath3Asm::Kind::XOR); - - // Next, find the difference between our source operand and 0, use the same temp register, no need - // to use another <0, 0, 0, 0> - <1, -2, -3, 4> = <-1, 2, 3, 4> - env->emit_ir(color, temp_reg, temp_reg, src, IR_VFMath3Asm::Kind::SUB); - - // Finally, find the maximum between our difference, and the original value - // MAX_OF(<-1, 2, 3, 4>, <1, -2, -3, 4>) = <1, 2, 3, 4> - if (mask == 0b1111) { // If the entire destination is to be copied, we can optimize out the blend - env->emit_ir(color, dest, src, temp_reg, IR_VFMath3Asm::Kind::MAX); - } else { - env->emit_ir(color, temp_reg, src, temp_reg, IR_VFMath3Asm::Kind::MAX); - - // Blend the result back into the destination register using the mask - env->emit_ir(color, dest, dest, temp_reg, mask); - } - - return get_none(); } Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env) { @@ -518,25 +349,10 @@ Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object } auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); - if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid destination register for a vector float 3-arg math form. Got a {}.", - dest->print()); - } - auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); - if (src1->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid first source register for a vector float 3-arg math form. Got a {}.", - src1->print()); - } - auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); - if (src2->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid second source register for a vector float 3-arg math form. Got a {}.", - src2->print()); - } + check_vector_float_regs(form, env, + {{"destination", dest}, {"first source", src1}, {"second source", src2}}); u8 mask = 0b1111; if (args.has_named("mask")) { @@ -566,25 +382,10 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form, } auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); - if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid destination register for a vector float 3-arg math form. Got a {}.", - dest->print()); - } - auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); - if (src1->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid first source register for a vector float 3-arg math form. Got a {}.", - src1->print()); - } - auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); - if (src2->ireg().reg_class != RegClass::VECTOR_FLOAT) { - throw_compiler_error( - form, "Invalid second source register for a vector float 3-arg math form. Got a {}.", - src2->print()); - } + check_vector_float_regs(form, env, + {{"destination", dest}, {"first source", src1}, {"second source", src2}}); u8 mask = 0b1111; if (args.has_named("mask")) { @@ -629,3 +430,516 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form, return get_none(); } + +Val* Compiler::compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_max_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_max_x_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_max_y_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_max_z_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_max_w_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_min_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_min_x_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_min_y_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_min_z_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_min_w_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_sub_x_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_sub_y_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_sub_z_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_sub_w_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_add_x_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_add_y_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_add_z_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_add_w_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_mul_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_mul_x_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_mul_y_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_mul_z_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_mul_w_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_vf_math4_two_operation(const goos::Object& form, + const goos::Object& rest, + IR_VFMath3Asm::Kind first_op_kind, + IR_VFMath3Asm::Kind second_op_kind, + emitter::Register::VF_ELEMENT broadcastElement, + Env* env) { + auto args = get_va(form, rest); + va_check( + form, args, {{}, {}, {}, {}}, + {{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); + // This third register is intended for the ACC/Q/ETC, and is used to temporarily store the value + // that eventually goes into the destination + // + // For example VMADDA: + // > ACC += src1 * src2 + // > DEST = ACC + auto src3 = compile_error_guard(args.unnamed.at(3), env)->to_reg(env); + check_vector_float_regs(form, env, + {{"destination", dest}, + {"first source", src1}, + {"second source", src2}, + {"third source", src3}}); + + u8 mask = 0b1111; + if (args.has_named("mask")) { + mask = args.named.at("mask").as_int(); + if (mask > 15) { + throw_compiler_error(form, "The value {} is out of range for a blend mask (0-15 inclusive).", + mask); + } + } + + // First we clear a temporary register + auto temp_reg = env->make_vfr(dest->type()); + + // If there is a broadcast register, splat that float across the entire src2 register before + // performing the operation For example vaddx.xyzw vf10, vf20, vf30 + // vf10[x] = vf20[x] + vf30[x] + // vf10[y] = vf20[y] + vf30[x] + // vf10[z] = vf20[z] + vf30[x] + // vf10[w] = vf20[w] + vf30[x] + if (broadcastElement != emitter::Register::VF_ELEMENT::NONE) { + env->emit_ir(color, temp_reg, src2, broadcastElement); + + // Perform the first operation + env->emit_ir(color, temp_reg, src1, temp_reg, first_op_kind); + + // If the entire destination is to be copied, we can optimize out the blend + if (mask == 0b1111) { + env->emit_ir(color, dest, src3, temp_reg, second_op_kind); + } else { + // Perform the second operation on the two vectors into the temporary register + env->emit_ir(color, temp_reg, src3, temp_reg, second_op_kind); + // Blend the result back into the destination register using the mask + env->emit_ir(color, dest, dest, temp_reg, mask); + } + } else { + // Perform the first operation + env->emit_ir(color, temp_reg, src1, src2, first_op_kind); + + // If the entire destination is to be copied, we can optimize out the blend + if (mask == 0b1111) { + env->emit_ir(color, dest, src3, temp_reg, second_op_kind); + } else { + // Perform the second operation on the two vectors into the temporary register + env->emit_ir(color, temp_reg, src3, temp_reg, second_op_kind); + // Blend the result back into the destination register using the mask + env->emit_ir(color, dest, dest, temp_reg, mask); + } + } + + return get_none(); +} + +Val* Compiler::compile_asm_mul_add_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_mul_add_x_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_mul_add_y_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_mul_add_z_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_mul_add_w_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::ADD, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_mul_sub_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::NONE, env); +} + +Val* Compiler::compile_asm_mul_sub_x_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::X, env); +} + +Val* Compiler::compile_asm_mul_sub_y_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::Y, env); +} + +Val* Compiler::compile_asm_mul_sub_z_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::Z, env); +} + +Val* Compiler::compile_asm_mul_sub_w_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + return compile_asm_vf_math4_two_operation(form, rest, IR_VFMath3Asm::Kind::MUL, + IR_VFMath3Asm::Kind::SUB, + emitter::Register::VF_ELEMENT::W, env); +} + +Val* Compiler::compile_asm_abs_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + auto args = get_va(form, rest); + va_check( + form, args, {{}, {}}, + {{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + check_vector_float_regs(form, env, {{"destination", dest}, {"source", src}}); + + u8 mask = 0b1111; + if (args.has_named("mask")) { + mask = args.named.at("mask").as_int(); + if (mask > 15) { + throw_compiler_error( + form, "The value {} is out of range for a destination mask (0-15 inclusive).", mask); + } + } + + // There is no single instruction ABS on AVX, so there are a number of ways to do it manually, + // this is one of them. For example, assume the original vec = <1, -2, -3, 4> + + // First we clear a temporary register, XOR'ing itself + auto temp_reg = env->make_vfr(dest->type()); + env->emit_ir(color, temp_reg, temp_reg, temp_reg, IR_VFMath3Asm::Kind::XOR); + + // Next, find the difference between our source operand and 0, use the same temp register, no need + // to use another <0, 0, 0, 0> - <1, -2, -3, 4> = <-1, 2, 3, 4> + env->emit_ir(color, temp_reg, temp_reg, src, IR_VFMath3Asm::Kind::SUB); + + // Finally, find the maximum between our difference, and the original value + // MAX_OF(<-1, 2, 3, 4>, <1, -2, -3, 4>) = <1, 2, 3, 4> + if (mask == 0b1111) { // If the entire destination is to be copied, we can optimize out the blend + env->emit_ir(color, dest, src, temp_reg, IR_VFMath3Asm::Kind::MAX); + } else { + env->emit_ir(color, temp_reg, src, temp_reg, IR_VFMath3Asm::Kind::MAX); + + // Blend the result back into the destination register using the mask + env->emit_ir(color, dest, dest, temp_reg, mask); + } + + return get_none(); +} + +u8 Compiler::ftf_fsf_to_blend_mask(u8 val) { + // 00 -> x + // ... + // 11 -> w + return 0b0001 << val; +} + +emitter::Register::VF_ELEMENT Compiler::ftf_fsf_to_vector_element(u8 val) { + // 00 -> x + // ... + // 11 -> w + switch (val) { + case 0b00: + return emitter::Register::VF_ELEMENT::X; + case 0b01: + return emitter::Register::VF_ELEMENT::Y; + case 0b10: + return emitter::Register::VF_ELEMENT::Z; + case 0b11: + return emitter::Register::VF_ELEMENT::W; + } +} + +Val* Compiler::compile_asm_div_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + auto args = get_va(form, rest); + va_check(form, args, {{}, {}, {}}, + { + {"color", {false, goos::ObjectType::SYMBOL}}, + {"fsf", {true, goos::ObjectType::INTEGER}}, + {"ftf", {true, goos::ObjectType::INTEGER}}, + }); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); + check_vector_float_regs(form, env, + {{"destination", dest}, {"first source", src1}, {"second source", src2}}); + + u8 fsf = args.named.at("fsf").as_int(); + if (fsf > 3) { + throw_compiler_error(form, "The value {} is out of range for fsf (0-3 inclusive).", fsf); + } + u8 ftf = args.named.at("ftf").as_int(); + if (ftf > 3) { + throw_compiler_error(form, "The value {} is out of range for ftf (0-3 inclusive).", ftf); + } + + // VDIV in the VU stores its result in a single 32bit `Q` Register, it does not compute the packed + // division result + // + // Further more, you can mix and match the vector elements (ex. src1's X component divided by + // src2's Y) Because of this, we need to blend the two components into corresponding locations, + // perform the divide then place into the cleared dest. register. + // + // Why do we even bother using VDIVPS instead of FDIV? Because otherwise in x86, you have to use + // the FPU stack Registers are nicer. + + // Save one temp reg, use the destination as one + auto temp_reg = env->make_vfr(dest->type()); + + // Splat src1's value into the dest reg, keep it simple, this way no matter which vector component + // is accessed from the final result will be the correct answer + env->emit_ir(color, dest, src1, ftf_fsf_to_vector_element(fsf)); + // Splat src1's value into the the temp reg + env->emit_ir(color, temp_reg, src2, ftf_fsf_to_vector_element(ftf)); + + // Perform the Division + env->emit_ir(color, dest, dest, temp_reg, IR_VFMath3Asm::Kind::DIV); + return get_none(); +} + +Val* Compiler::compile_asm_sqrt_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + auto args = get_va(form, rest); + va_check( + form, args, {{}, {}}, + {{"color", {false, goos::ObjectType::SYMBOL}}, {"ftf", {true, goos::ObjectType::INTEGER}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + check_vector_float_regs(form, env, {{"destination", dest}, {"source", src}}); + + u8 ftf = args.named.at("ftf").as_int(); + if (ftf > 3) { + throw_compiler_error(form, "The value {} is out of range for ftf (0-3 inclusive).", ftf); + } + + // VSQRT in the VU stores its result in a single 32bit `Q` Register, it does not compute the + // packed division result + // + // Because of this, we need to blend the relevent component into a cleared register and then + // perform the SQRT + // + // Why do we even bother using VSQRTPS instead of FSQRT? Because otherwise in x86, you have to use + // the FPU stack Registers are nicer. + + // Splat src's value into the dest reg, keep it simple, this way no matter which vector component + // is accessed from the final result will be the correct answer + env->emit_ir(color, dest, src, ftf_fsf_to_vector_element(ftf)); + + env->emit_ir(color, dest, dest); + return get_none(); +} + +Val* Compiler::compile_asm_outer_product_vf(const goos::Object& form, + const goos::Object& rest, + Env* env) { + auto args = get_va(form, rest); + va_check(form, args, {{}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); + check_vector_float_regs(form, env, + {{"destination", dest}, {"first source", src1}, {"second source", src2}}); + + // Given 2 vectors V1 = <1,2,3,4> and V2 = <5,6,7,8> and assume VDEST = <0, 0, 0, 999> + // The outer product is computed like so (only x,y,z components are operated on): + // x = (V1y * V2z) - (V2y * V1z) => (2 * 7) - (6 * 3) => -4 + // y = (V1z * V2x) - (V2z * V1x) => (3 * 5) - (7 * 1) => 8 + // z = (V1x * V2y) - (V2x * V1y) => (1 * 6) - (5 * 2) => -4 + // w = N/A, left alone => 999 + // + // There is probably a more optimized alg for this, but we can just do this in two stages + // First swizzle the first two vectors accordingly, and store in `dest` + // Then follow up with the second half. + // + // Some temporary regs are required AND its important to not modify dest's `w` or the source + // registers at all + + // Init two temp registers + auto temp1 = env->make_vfr(dest->type()); + auto temp2 = env->make_vfr(dest->type()); + + // First Portion + // - Swizzle src1 appropriately + env->emit_ir(color, temp1, src1, 0b00001001); + // - Move it into 'dest' safely (avoid mutating `w`) + env->emit_ir(color, dest, dest, temp1, 0b0111); + // - Swizzle src2 appropriately + env->emit_ir(color, temp1, src2, 0b00010010); + // - Multiply - Result in `dest` + env->emit_ir(color, temp1, dest, temp1, IR_VFMath3Asm::Kind::MUL); + // - Move it into 'dest' safely (avoid mutating `w`) + env->emit_ir(color, dest, dest, temp1, 0b0111); + + // Second Portion + // - Swizzle src2 appropriately + env->emit_ir(color, temp1, src2, 0b00001001); + // - Swizzle src1 appropriately + env->emit_ir(color, temp2, src1, 0b00010010); + // - Multiply - Result in `temp1` + env->emit_ir(color, temp1, temp1, temp2, IR_VFMath3Asm::Kind::MUL); + + // Finalize + // - Subtract + env->emit_ir(color, temp2, dest, temp1, IR_VFMath3Asm::Kind::SUB); + // - Blend result, as to avoid not modifying dest's `w` component + env->emit_ir(color, dest, dest, temp2, 0b0111); + return get_none(); +} diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index 232e2d61ee..3342259dd1 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -14,6 +14,7 @@ static const std::unordered_map< Val* (Compiler::*)(const goos::Object& form, const goos::Object& rest, Env* env)> goal_forms = { // INLINE ASM + {".nop", &Compiler::compile_nop}, {".ret", &Compiler::compile_asm_ret}, {".push", &Compiler::compile_asm_push}, {".pop", &Compiler::compile_asm_pop}, @@ -25,44 +26,67 @@ static const std::unordered_map< {".mov", &Compiler::compile_asm_mov}, // INLINE ASM - VECTOR FLOAT OPERATIONS - {".nop.vf", &Compiler::compile_asm_nop_vf}, - {".nop", &Compiler::compile_nop}, {".lvf", &Compiler::compile_asm_lvf}, {".svf", &Compiler::compile_asm_svf}, + {".blend.vf", &Compiler::compile_asm_blend_vf}, + + {".nop.vf", &Compiler::compile_asm_nop_vf}, + {".wait.vf", &Compiler::compile_asm_wait_vf}, + {".xor.vf", &Compiler::compile_asm_xor_vf}, {".max.vf", &Compiler::compile_asm_max_vf}, - {".maxx.vf", &Compiler::compile_asm_maxx_vf}, - {".maxy.vf", &Compiler::compile_asm_maxy_vf}, - {".maxz.vf", &Compiler::compile_asm_maxz_vf}, - {".maxw.vf", &Compiler::compile_asm_maxw_vf}, + {".max.x.vf", &Compiler::compile_asm_max_x_vf}, + {".max.y.vf", &Compiler::compile_asm_max_y_vf}, + {".max.z.vf", &Compiler::compile_asm_max_z_vf}, + {".max.w.vf", &Compiler::compile_asm_max_w_vf}, {".min.vf", &Compiler::compile_asm_min_vf}, - {".minx.vf", &Compiler::compile_asm_minx_vf}, - {".miny.vf", &Compiler::compile_asm_miny_vf}, - {".minz.vf", &Compiler::compile_asm_minz_vf}, - {".minw.vf", &Compiler::compile_asm_minw_vf}, - - {".sub.vf", &Compiler::compile_asm_sub_vf}, - {".subx.vf", &Compiler::compile_asm_subx_vf}, - {".suby.vf", &Compiler::compile_asm_suby_vf}, - {".subz.vf", &Compiler::compile_asm_subz_vf}, - {".subw.vf", &Compiler::compile_asm_subw_vf}, + {".min.x.vf", &Compiler::compile_asm_min_x_vf}, + {".min.y.vf", &Compiler::compile_asm_min_y_vf}, + {".min.z.vf", &Compiler::compile_asm_min_z_vf}, + {".min.w.vf", &Compiler::compile_asm_min_w_vf}, {".add.vf", &Compiler::compile_asm_add_vf}, - {".addx.vf", &Compiler::compile_asm_addx_vf}, - {".addy.vf", &Compiler::compile_asm_addy_vf}, - {".addz.vf", &Compiler::compile_asm_addz_vf}, - {".addw.vf", &Compiler::compile_asm_addw_vf}, + {".add.x.vf", &Compiler::compile_asm_add_x_vf}, + {".add.y.vf", &Compiler::compile_asm_add_y_vf}, + {".add.z.vf", &Compiler::compile_asm_add_z_vf}, + {".add.w.vf", &Compiler::compile_asm_add_w_vf}, + + {".sub.vf", &Compiler::compile_asm_sub_vf}, + {".sub.x.vf", &Compiler::compile_asm_sub_x_vf}, + {".sub.y.vf", &Compiler::compile_asm_sub_y_vf}, + {".sub.z.vf", &Compiler::compile_asm_sub_z_vf}, + {".sub.w.vf", &Compiler::compile_asm_sub_w_vf}, {".mul.vf", &Compiler::compile_asm_mul_vf}, - {".mulx.vf", &Compiler::compile_asm_mulx_vf}, - {".muly.vf", &Compiler::compile_asm_muly_vf}, - {".mulz.vf", &Compiler::compile_asm_mulz_vf}, - {".mulw.vf", &Compiler::compile_asm_mulw_vf}, + {".mul.x.vf", &Compiler::compile_asm_mul_x_vf}, + {".mul.y.vf", &Compiler::compile_asm_mul_y_vf}, + {".mul.z.vf", &Compiler::compile_asm_mul_z_vf}, + {".mul.w.vf", &Compiler::compile_asm_mul_w_vf}, + + {".add.mul.vf", &Compiler::compile_asm_mul_add_vf}, + {".add.mul.x.vf", &Compiler::compile_asm_mul_add_x_vf}, + {".add.mul.y.vf", &Compiler::compile_asm_mul_add_y_vf}, + {".add.mul.z.vf", &Compiler::compile_asm_mul_add_z_vf}, + {".add.mul.w.vf", &Compiler::compile_asm_mul_add_w_vf}, + + {".sub.mul.vf", &Compiler::compile_asm_mul_sub_vf}, + {".sub.mul.x.vf", &Compiler::compile_asm_mul_sub_x_vf}, + {".sub.mul.y.vf", &Compiler::compile_asm_mul_sub_y_vf}, + {".sub.mul.z.vf", &Compiler::compile_asm_mul_sub_z_vf}, + {".sub.mul.w.vf", &Compiler::compile_asm_mul_sub_w_vf}, {".abs.vf", &Compiler::compile_asm_abs_vf}, - {".blend.vf", &Compiler::compile_asm_blend_vf}, + // NOTE - to compute the Outer Product with the VU, two back to back instructions were used + // involving the ACC + // However, we can be better than that and just provide a single instruction + // BUT - if things used side effects of the modified ACC or benefited from only doing 1/2 + // operations, we'll need to implement them separately. + {".outer.product.vf", &Compiler::compile_asm_outer_product_vf}, + + {".div.vf", &Compiler::compile_asm_div_vf}, + {".sqrt.vf", &Compiler::compile_asm_sqrt_vf}, // BLOCK FORMS {"top-level", &Compiler::compile_top_level}, @@ -422,4 +446,4 @@ Val* Compiler::compile_pointer_add(const goos::Object& form, const goos::Object& } return result; -} \ No newline at end of file +} diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index 51a557f4f2..1ecd9cad3b 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -2015,17 +2015,7 @@ class IGen { return instr; } - static Instruction nop_vf() { - // FNOP - Instruction instr(0xd9); - instr.set_op2(0xd0); - return instr; - } - - // eventually... - // sqrt - // rsqrt - // abs + // TODO - rsqrt / abs / sqrt //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // UTILITIES @@ -2045,6 +2035,18 @@ class IGen { ///////////////////////////// // AVX (VF - Vector Float) // ///////////////////////////// + + static Instruction nop_vf() { + Instruction instr(0xd9); // FNOP + instr.set_op2(0xd0); + return instr; + } + + static Instruction wait_vf() { + Instruction instr(0x9B); // FWAIT / WAIT + return instr; + } + static Instruction mov_vf_vf(Register dst, Register src) { assert(dst.is_xmm()); assert(src.is_xmm()); @@ -2168,6 +2170,18 @@ class IGen { // TODO - rip relative loads and stores. + static Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { + assert(!(mask & 0b11110000)); + assert(dst.is_xmm()); + assert(src1.is_xmm()); + assert(src2.is_xmm()); + Instruction instr(0x0c); // VBLENDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, + src1.hw_id(), false, VexPrefix::P_66); + instr.set(Imm(1, mask)); + return instr; + } + static Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { assert(dst.is_xmm()); assert(src.is_xmm()); @@ -2190,18 +2204,19 @@ class IGen { Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. Here's a brief run-down: - 8-bits / 4 groups of 2 bits - - Each group is used to determine which element in `src` gets copied to `dst`'s respective - element. - - Right to Left, the first 2-bit group controls which `dst` element, gets copied to `src`'s - most-significant byte (left-most) and so on. GROUP OPTIONS - - 00b - Copy the least-significant element + - Right-to-left, each group is used to determine which element in `src` gets copied into + `dst`'s element (W->X). + - GROUP OPTIONS + - 00b - Copy the least-significant element (X) - 01b - Copy the second element (from the right) - 10b - Copy the third element (from the right) - - 11b - Copy the most significant element + - 11b - Copy the most significant element (W) Examples ; xmm1 = (1.5, 2.5, 3.5, 4.5) SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions - (1.5, 1.5, 1.5, 1.5) SHUFPS xmm1, xmm1, 0x39 ; Rotate right (4.5, 1.5, 2.5, 3.5) + > (1.5, 1.5, 1.5, 1.5) + SHUFPS xmm1, xmm1, 0x39 ; Rotate right + > (4.5, 1.5, 2.5, 3.5) */ static Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { assert(dst.is_xmm()); @@ -2297,15 +2312,20 @@ class IGen { return instr; } - static Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { - assert(!(mask & 0b11110000)); + static Instruction div_vf(Register dst, Register src1, Register src2) { assert(dst.is_xmm()); assert(src1.is_xmm()); assert(src2.is_xmm()); - Instruction instr(0x0c); // VBLENDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, - src1.hw_id(), false, VexPrefix::P_66); - instr.set(Imm(1, mask)); + Instruction instr(0x5E); // VDIVPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; + } + + static Instruction sqrt_vf(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0x51); // VSQRTPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); return instr; } }; diff --git a/test/goalc/source_templates/with_game/test-vector-math-1-operand.template.gc b/test/goalc/source_templates/with_game/test-vector-math-1-operand.template.gc new file mode 100644 index 0000000000..bf6431506a --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-math-1-operand.template.gc @@ -0,0 +1,21 @@ +(defun test-vector-math () + (let ((vector-in-1 (new 'stack 'vector)) + (vector-out (new 'stack 'vector))) + + (set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }}) + (set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }}) + + (rlet ((vf1 :class vf :reset-here #t) + (vf2 :class vf :reset-here #t)) + + (.lvf vf1 vector-in-1) + (.lvf vf2 vector-out) + + ({{ operation }} vf2 vf1{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}) + (.wait.vf) + + (.svf vector-out vf2)) + + (format #t "(~f, ~f, ~f, ~f)~%" (-> vector-out x) (-> vector-out y) (-> vector-out z) (-> vector-out w)))) + +(test-vector-math) diff --git a/test/goalc/source_templates/with_game/test-vector-math-2-operand-acc.template.gc b/test/goalc/source_templates/with_game/test-vector-math-2-operand-acc.template.gc new file mode 100644 index 0000000000..cfd640faf2 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-math-2-operand-acc.template.gc @@ -0,0 +1,29 @@ +(defun test-vector-outer-product () + (let ((vector-in-1 (new 'stack 'vector)) + (vector-in-2 (new 'stack 'vector)) + (vector-acc (new 'stack 'vector)) + (vector-out (new 'stack 'vector))) + + (set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }}) + (set-vector! vector-in-2 {{ v2x }} {{ v2y }} {{ v2z }} {{ v2w }}) + (set-vector! vector-acc {{ accx }} {{ accy }} {{ accz }} {{ accw }}) + (set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }}) + + (rlet ((vf1 :class vf :reset-here #t) + (vf2 :class vf :reset-here #t) + (vfd :class vf :reset-here #t) + (acc :class vf :reset-here #t)) + + (.lvf vfd vector-out) + (.lvf vf1 vector-in-1) + (.lvf vf2 vector-in-2) + (.lvf acc vector-acc) + + ({{ operation }} vfd vf1 vf2 acc{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}) + (.wait.vf) + + (.svf vector-out vfd)) + + (format #t "(~f, ~f, ~f, ~f)~%" (-> vector-out x) (-> vector-out y) (-> vector-out z) (-> vector-out w)))) + +(test-vector-outer-product) diff --git a/test/goalc/source_templates/with_game/test-vector-math.template.gc b/test/goalc/source_templates/with_game/test-vector-math-2-operand.template.gc similarity index 51% rename from test/goalc/source_templates/with_game/test-vector-math.template.gc rename to test/goalc/source_templates/with_game/test-vector-math-2-operand.template.gc index e20c1b61e7..4697959336 100644 --- a/test/goalc/source_templates/with_game/test-vector-math.template.gc +++ b/test/goalc/source_templates/with_game/test-vector-math-2-operand.template.gc @@ -1,23 +1,24 @@ (defun test-vector-math () (let ((vector-in-1 (new 'stack 'vector)) - {% if twoOperands %}(vector-in-2 (new 'stack 'vector)){% endif %} + (vector-in-2 (new 'stack 'vector)) (vector-out (new 'stack 'vector))) (set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }}) - {% if twoOperands %}(set-vector! vector-in-2 {{ v2x }} {{ v2y }} {{ v2z }} {{ v2w }}){% endif %} + (set-vector! vector-in-2 {{ v2x }} {{ v2y }} {{ v2z }} {{ v2w }}) (set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }}) (rlet ((vf1 :class vf :reset-here #t) - {% if twoOperands %}(vf2 :class vf :reset-here #t){% endif %} + (vf2 :class vf :reset-here #t) (vf3 :class vf :reset-here #t)) (.lvf vf1 vector-in-1) - {% if twoOperands %}(.lvf vf2 vector-in-2){% endif %} + (.lvf vf2 vector-in-2) (.lvf vf3 vector-out) - {% if twoOperands %}({{ operation }} vf3 vf1 vf2{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}){% else %}({{ operation }} vf3 vf1{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}){% endif %} - + ({{ operation }} vf3 vf1 vf2{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}) + (.wait.vf) + (.svf vector-out vf3)) (format #t "(~f, ~f, ~f, ~f)~%" (-> vector-out x) (-> vector-out y) (-> vector-out z) (-> vector-out w)))) diff --git a/test/goalc/source_templates/with_game/test-vector-math-division.template.gc b/test/goalc/source_templates/with_game/test-vector-math-division.template.gc new file mode 100644 index 0000000000..62bf07f34e --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-math-division.template.gc @@ -0,0 +1,25 @@ +(defun test-vector-division () + (let ((vector-in-1 (new 'stack 'vector)) + (vector-in-2 (new 'stack 'vector)) + (vector-out (new 'stack 'vector))) + + (set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }}) + (set-vector! vector-in-2 {{ v2x }} {{ v2y }} {{ v2z }} {{ v2w }}) + (set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }}) + + (rlet ((vf1 :class vf :reset-here #t) + (vf2 :class vf :reset-here #t) + (vf3 :class vf :reset-here #t)) + + (.lvf vf1 vector-in-1) + (.lvf vf2 vector-in-2) + (.lvf vf3 vector-out) + + ({{ operation }} vf3 vf1 vf2 :fsf #b{{ fsf }} :ftf #b{{ ftf }}) + (.wait.vf) + + (.svf vector-out vf3)) + + (format #t "~f~%" (-> vector-out x)))) + +(test-vector-division) diff --git a/test/goalc/source_templates/with_game/test-vector-math-sqrt.template.gc b/test/goalc/source_templates/with_game/test-vector-math-sqrt.template.gc new file mode 100644 index 0000000000..4a6b46dee2 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-math-sqrt.template.gc @@ -0,0 +1,21 @@ +(defun test-vector-sqrt () + (let ((vector-in-1 (new 'stack 'vector)) + (vector-out (new 'stack 'vector))) + + (set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }}) + (set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }}) + + (rlet ((vf1 :class vf :reset-here #t) + (vf2 :class vf :reset-here #t)) + + (.lvf vf1 vector-in-1) + (.lvf vf2 vector-out) + + ({{ operation }} vf2 vf1 :ftf #b{{ ftf }}) + (.wait.vf) + + (.svf vector-out vf2)) + + (format #t "~f~%" (-> vector-out x)))) + +(test-vector-sqrt) diff --git a/test/goalc/source_templates/with_game/test-vector-outer-product.gc b/test/goalc/source_templates/with_game/test-vector-outer-product.gc new file mode 100644 index 0000000000..6492db65ad --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-outer-product.gc @@ -0,0 +1,26 @@ +(defun test-vector-outer-product () + (let ((vector-in-1 (new 'stack 'vector)) + (vector-in-2 (new 'stack 'vector)) + (vector-out (new 'stack 'vector))) + + (set-vector! vector-in-1 1.0 2.0 3.0 4.0) + (set-vector! vector-in-2 5.0 6.0 7.0 8.0) + (set-vector! vector-out 0.0 0.0 0.0 999.0) + + (rlet ((vf1 :class vf :reset-here #t) + (vf2 :class vf :reset-here #t) + (vf3 :class vf :reset-here #t)) + + (.lvf vf1 vector-in-1) + (.lvf vf2 vector-in-2) + (.lvf vf3 vector-out) + + + (.outer.product.vf vf3 vf1 vf2) + (.wait.vf) + + (.svf vector-out vf3)) + + (format #t "(~f, ~f, ~f, ~f)~%" (-> vector-out x) (-> vector-out y) (-> vector-out z) (-> vector-out w)))) + +(test-vector-outer-product) diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index 50edfbadf1..e8be466604 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -344,222 +344,13 @@ TEST_F(WithGameTests, StaticBoxedArray) { // VECTOR FLOAT TESTS -struct VectorFloatRegister { - float x = 0; - float y = 0; - float z = 0; - float w = 0; +// ---- One off Tests - void setJson(nlohmann::json& data, std::string vectorKey) { - data[fmt::format("{}x", vectorKey)] = x; - data[fmt::format("{}y", vectorKey)] = y; - data[fmt::format("{}z", vectorKey)] = z; - data[fmt::format("{}w", vectorKey)] = w; - } - - float getBroadcastElement(emitter::Register::VF_ELEMENT bc, float defValue) { - switch (bc) { - case emitter::Register::VF_ELEMENT::X: - return x; - case emitter::Register::VF_ELEMENT::Y: - return y; - case emitter::Register::VF_ELEMENT::Z: - return z; - case emitter::Register::VF_ELEMENT::W: - return w; - default: - return defValue; - } - } - - std::string toGOALFormat() { - std::string answer = fmt::format("({:.4f}, {:.4f}, {:.4f}, {:.4f})", x, y, z, w); - // {fmt} formats negative 0 as "-0.000", just going to flip any negative zeros to positives as I - // don't think is an OpenGOAL issue - return std::regex_replace(answer, std::regex("-0.0000"), "0.0000"); - } -}; - -struct VectorFloatTestCase { - VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5}; - VectorFloatRegister input2 = {-5.5, -0.0, 10.0, 7.5}; - VectorFloatRegister dest = {11, 22, 33, 44}; - - int destinationMask = -1; - emitter::Register::VF_ELEMENT bc = emitter::Register::VF_ELEMENT::NONE; - std::function operation; - - VectorFloatRegister getExpectedResult() { - VectorFloatRegister expectedResult; - expectedResult.x = destinationMask & 0b0001 - ? operation(input1.x, input2.getBroadcastElement(bc, input2.x)) - : dest.x; - expectedResult.y = destinationMask & 0b0010 - ? operation(input1.y, input2.getBroadcastElement(bc, input2.y)) - : dest.y; - expectedResult.z = destinationMask & 0b0100 - ? operation(input1.z, input2.getBroadcastElement(bc, input2.z)) - : dest.z; - expectedResult.w = destinationMask & 0b1000 - ? operation(input1.w, input2.getBroadcastElement(bc, input2.w)) - : dest.w; - return expectedResult; - } - - std::string getOperationBroadcast() { - switch (bc) { - case emitter::Register::VF_ELEMENT::X: - return "x"; - case emitter::Register::VF_ELEMENT::Y: - return "y"; - case emitter::Register::VF_ELEMENT::Z: - return "z"; - case emitter::Register::VF_ELEMENT::W: - return "w"; - default: - return ""; - } - } - - void setJson(nlohmann::json& data, std::string func, bool twoOperands = true) { - input1.setJson(data, "v1"); - data["twoOperands"] = twoOperands; - if (twoOperands) { - input2.setJson(data, "v2"); - } - dest.setJson(data, "dest"); - data["operation"] = fmt::format(func); - if (destinationMask == -1) { - data["destinationMask"] = false; - } else { - data["destinationMask"] = fmt::format("{:b}", destinationMask); - } - } -}; - -std::vector vectorMathTestCaseGen() { - std::string test = fmt::format("{:.4f}", -0.0); - - std::vector cases = {}; - for (int i = 0; i <= 15; i++) { - VectorFloatTestCase testCase = VectorFloatTestCase(); - testCase.destinationMask = i; - cases.push_back(testCase); - // Re-add each case with each broadcast varient - for (int j = 0; j < 4; j++) { - VectorFloatTestCase testCaseBC = VectorFloatTestCase(); - testCaseBC.destinationMask = i; - testCaseBC.bc = static_cast(j); - cases.push_back(testCaseBC); - } - } - return cases; +TEST_F(WithGameTests, VFOuterProduct) { + runner.run_static_test(env, testCategory, "test-vector-outer-product.gc", + {"(-4.0000, 8.0000, -4.0000, 999.0000)\n0\n"}); } -class VectorFloatParameterizedTestFixtureWithRunner - : public WithGameTests, - public ::testing::WithParamInterface { - protected: - std::string templateFile = "test-vector-math.template.gc"; -}; - -// NOTE - an excellent article - -// https://www.sandordargo.com/blog/2019/04/24/parameterized-testing-with-gtest - -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_ADD_XYZW_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { return x + y; }; - - nlohmann::json data; - testCase.setJson(data, fmt::format(".add{}.vf", testCase.getOperationBroadcast())); - - std::string outFile = runner.test_file_name( - fmt::format("vector-math-add{}-{{}}.generated.gc", testCase.getOperationBroadcast())); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_SUB_XYZW_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { return x - y; }; - - nlohmann::json data; - testCase.setJson(data, fmt::format(".sub{}.vf", testCase.getOperationBroadcast())); - - std::string outFile = runner.test_file_name( - fmt::format("vector-math-sub{}-{{}}.generated.gc", testCase.getOperationBroadcast())); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MUL_XYZW_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { return x * y; }; - - nlohmann::json data; - testCase.setJson(data, fmt::format(".mul{}.vf", testCase.getOperationBroadcast())); - - std::string outFile = runner.test_file_name( - fmt::format("vector-math-mul{}-{{}}.generated.gc", testCase.getOperationBroadcast())); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MIN_XYZW_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { return fmin(x, y); }; - - nlohmann::json data; - testCase.setJson(data, fmt::format(".min{}.vf", testCase.getOperationBroadcast())); - - std::string outFile = runner.test_file_name( - fmt::format("vector-math-min{}-{{}}.generated.gc", testCase.getOperationBroadcast())); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MAX_XYZW_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { return fmax(x, y); }; - - nlohmann::json data; - testCase.setJson(data, fmt::format(".max{}.vf", testCase.getOperationBroadcast())); - - std::string outFile = runner.test_file_name( - fmt::format("vector-math-max{}-{{}}.generated.gc", testCase.getOperationBroadcast())); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -// TODO - This test runs more often than the rest, should probably be split into it's own fixture -// (broadcasting ignored!) -TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_ABS_DEST) { - VectorFloatTestCase testCase = GetParam(); - testCase.operation = [](float x, float y) { - // Avoid compiler warnings for unused variable, making a varient that accepts a lambda with only - // 1 float is just unnecessary complexity - (void)y; - return fabs(x); - }; - - nlohmann::json data; - testCase.setJson(data, ".abs.vf", false); - - std::string outFile = runner.test_file_name("vector-math-abs-{}.generated.gc"); - env.write(templateFile, data, outFile); - runner.run_test(testCategory, outFile, - {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); -} - -INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, - VectorFloatParameterizedTestFixtureWithRunner, - ::testing::ValuesIn(vectorMathTestCaseGen())); - TEST_F(WithGameTests, VFLoadAndStore) { runner.run_static_test(env, testCategory, "test-vf-load-and-store.gc", {"2.0000\n0\n"}); } @@ -596,3 +387,516 @@ TEST(TypeConsistency, TypeConsistency) { compiler.run_test_no_load("test/goalc/source_templates/with_game/test-build-game.gc"); compiler.run_test_no_load("decompiler/config/all-types.gc"); } + +struct VectorFloatRegister { + float x = 0; + float y = 0; + float z = 0; + float w = 0; + + void setJson(nlohmann::json& data, std::string vectorKey) { + data[fmt::format("{}x", vectorKey)] = x; + data[fmt::format("{}y", vectorKey)] = y; + data[fmt::format("{}z", vectorKey)] = z; + data[fmt::format("{}w", vectorKey)] = w; + } + + float getBroadcastElement(emitter::Register::VF_ELEMENT bc, float defValue) { + switch (bc) { + case emitter::Register::VF_ELEMENT::X: + return x; + case emitter::Register::VF_ELEMENT::Y: + return y; + case emitter::Register::VF_ELEMENT::Z: + return z; + case emitter::Register::VF_ELEMENT::W: + return w; + default: + return defValue; + } + } + + std::string toGOALFormat() { + std::string answer = fmt::format("({:.4f}, {:.4f}, {:.4f}, {:.4f})", x, y, z, w); + // {fmt} formats negative 0 as "-0.000", just going to flip any negative zeros to positives as I + // don't think is an OpenGOAL issue + // Additionally, GOAL doesn't have -/+ Inf it seems, so replace with NaN. -nan is also just NaN + return std::regex_replace(std::regex_replace(answer, std::regex("-0.0000"), "0.0000"), + std::regex("nan|inf|-nan|-inf"), "NaN"); + } + + std::string toGOALFormat(float val) { + std::string answer = fmt::format("{:.4f}", x); + // {fmt} formats negative 0 as "-0.000", just going to flip any negative zeros to positives as I + // don't think is an OpenGOAL issue + // Additionally, GOAL doesn't have -/+ Inf it seems, so replace with NaN + return std::regex_replace(std::regex_replace(answer, std::regex("-0.0000"), "0.0000"), + std::regex("nan|inf|-nan|-inf"), "NaN"); + } +}; + +struct VectorFloatTestCase { + VectorFloatRegister dest = {11, 22, 33, 44}; + int destinationMask = -1; + emitter::Register::VF_ELEMENT bc = emitter::Register::VF_ELEMENT::NONE; + + std::string getOperationBroadcast() { + switch (bc) { + case emitter::Register::VF_ELEMENT::X: + return ".x"; + case emitter::Register::VF_ELEMENT::Y: + return ".y"; + case emitter::Register::VF_ELEMENT::Z: + return ".z"; + case emitter::Register::VF_ELEMENT::W: + return ".w"; + default: + return ""; + } + } + + virtual VectorFloatRegister getExpectedResult() = 0; + virtual void setJson(nlohmann::json& data, std::string func) = 0; +}; + +struct VectorFloatTestCase_TwoOperand : VectorFloatTestCase { + VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5}; + VectorFloatRegister input2 = {-5.5, -0.0, 10.0, 7.5}; + + std::function operation; + + VectorFloatRegister getExpectedResult() { + VectorFloatRegister expectedResult; + expectedResult.x = destinationMask & 0b0001 + ? operation(input1.x, input2.getBroadcastElement(bc, input2.x)) + : dest.x; + expectedResult.y = destinationMask & 0b0010 + ? operation(input1.y, input2.getBroadcastElement(bc, input2.y)) + : dest.y; + expectedResult.z = destinationMask & 0b0100 + ? operation(input1.z, input2.getBroadcastElement(bc, input2.z)) + : dest.z; + expectedResult.w = destinationMask & 0b1000 + ? operation(input1.w, input2.getBroadcastElement(bc, input2.w)) + : dest.w; + return expectedResult; + } + + void setJson(nlohmann::json& data, std::string func) { + input1.setJson(data, "v1"); + input2.setJson(data, "v2"); + dest.setJson(data, "dest"); + data["operation"] = fmt::format(func); + if (destinationMask == -1) { + data["destinationMask"] = false; + } else { + data["destinationMask"] = fmt::format("{:b}", destinationMask); + } + } +}; + +std::vector vectorMathCaseGen_TwoOperand() { + std::vector cases = {}; + for (int i = 0; i <= 15; i++) { + VectorFloatTestCase_TwoOperand testCase = VectorFloatTestCase_TwoOperand(); + testCase.destinationMask = i; + cases.push_back(testCase); + // Re-add each case with each broadcast variant + for (int j = 0; j < 4; j++) { + VectorFloatTestCase_TwoOperand testCaseBC = VectorFloatTestCase_TwoOperand(); + testCaseBC.destinationMask = i; + testCaseBC.bc = static_cast(j); + cases.push_back(testCaseBC); + } + } + return cases; +} + +class VectorFloatParameterizedTestFixtureWithRunner_TwoOperand + : public WithGameTests, + public ::testing::WithParamInterface { + protected: + std::string templateFile = "test-vector-math-2-operand.template.gc"; +}; + +// NOTE - an excellent article - +// https://www.sandordargo.com/blog/2019/04/24/parameterized-testing-with-gtest + +// --- 2 Operand VF Operations + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, VF_ADD_XYZW_DEST) { + VectorFloatTestCase_TwoOperand testCase = GetParam(); + testCase.operation = [](float x, float y) { return x + y; }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".add{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-add{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, VF_SUB_XYZW_DEST) { + VectorFloatTestCase_TwoOperand testCase = GetParam(); + testCase.operation = [](float x, float y) { return x - y; }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".sub{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-sub{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, VF_MUL_XYZW_DEST) { + VectorFloatTestCase_TwoOperand testCase = GetParam(); + testCase.operation = [](float x, float y) { return x * y; }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".mul{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-mul{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, VF_MIN_XYZW_DEST) { + VectorFloatTestCase_TwoOperand testCase = GetParam(); + testCase.operation = [](float x, float y) { return fmin(x, y); }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".min{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-min{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, VF_MAX_XYZW_DEST) { + VectorFloatTestCase_TwoOperand testCase = GetParam(); + testCase.operation = [](float x, float y) { return fmax(x, y); }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".max{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-max{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, + VectorFloatParameterizedTestFixtureWithRunner_TwoOperand, + ::testing::ValuesIn(vectorMathCaseGen_TwoOperand())); + +// --- 1 Operand VF Operations + +struct VectorFloatTestCase_SingleOperand : VectorFloatTestCase { + VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5}; + + std::function operation; + + VectorFloatRegister getExpectedResult() { + VectorFloatRegister expectedResult; + expectedResult.x = + destinationMask & 0b0001 ? operation(input1.getBroadcastElement(bc, input1.x)) : dest.x; + expectedResult.y = + destinationMask & 0b0010 ? operation(input1.getBroadcastElement(bc, input1.y)) : dest.y; + expectedResult.z = + destinationMask & 0b0100 ? operation(input1.getBroadcastElement(bc, input1.z)) : dest.z; + expectedResult.w = + destinationMask & 0b1000 ? operation(input1.getBroadcastElement(bc, input1.w)) : dest.w; + return expectedResult; + } + + void setJson(nlohmann::json& data, std::string func) { + input1.setJson(data, "v1"); + dest.setJson(data, "dest"); + data["operation"] = fmt::format(func); + if (destinationMask == -1) { + data["destinationMask"] = false; + } else { + data["destinationMask"] = fmt::format("{:b}", destinationMask); + } + } +}; + +std::vector vectorMathCaseGen_SingleOperand_NoBroadcast() { + std::vector cases = {}; + for (int i = 0; i <= 15; i++) { + VectorFloatTestCase_SingleOperand testCase = VectorFloatTestCase_SingleOperand(); + testCase.destinationMask = i; + cases.push_back(testCase); + } + return cases; +} + +class VectorFloatParameterizedTestFixtureWithRunner_SingleOperand + : public WithGameTests, + public ::testing::WithParamInterface { + protected: + std::string templateFile = "test-vector-math-1-operand.template.gc"; +}; + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_SingleOperand, VF_ABS_DEST) { + VectorFloatTestCase_SingleOperand testCase = GetParam(); + testCase.operation = [](float x) { return fabs(x); }; + + nlohmann::json data; + testCase.setJson(data, ".abs.vf"); + + std::string outFile = runner.test_file_name("vector-math-abs-{}.generated.gc"); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, + VectorFloatParameterizedTestFixtureWithRunner_SingleOperand, + ::testing::ValuesIn(vectorMathCaseGen_SingleOperand_NoBroadcast())); + +// --- 2 Operand With ACC VF Operations +// TODO - these pollute tests, it would be nicer long-term to move these into the framework +// namespace + +struct VectorFloatTestCase_TwoOperandACC : VectorFloatTestCase { + VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5}; + VectorFloatRegister input2 = {-5.5, -0.0, 10.0, 7.5}; + VectorFloatRegister acc = {-15.5, -0.0, 20.0, 70.5}; + + std::function operation; + + VectorFloatRegister getExpectedResult() { + VectorFloatRegister expectedResult; + expectedResult.x = destinationMask & 0b0001 + ? operation(input1.x, input2.getBroadcastElement(bc, input2.x), acc.x) + : dest.x; + expectedResult.y = destinationMask & 0b0010 + ? operation(input1.y, input2.getBroadcastElement(bc, input2.y), acc.y) + : dest.y; + expectedResult.z = destinationMask & 0b0100 + ? operation(input1.z, input2.getBroadcastElement(bc, input2.z), acc.z) + : dest.z; + expectedResult.w = destinationMask & 0b1000 + ? operation(input1.w, input2.getBroadcastElement(bc, input2.w), acc.w) + : dest.w; + return expectedResult; + } + + void setJson(nlohmann::json& data, std::string func) { + input1.setJson(data, "v1"); + input2.setJson(data, "v2"); + acc.setJson(data, "acc"); + dest.setJson(data, "dest"); + data["operation"] = fmt::format(func); + if (destinationMask == -1) { + data["destinationMask"] = false; + } else { + data["destinationMask"] = fmt::format("{:b}", destinationMask); + } + } +}; + +// TODO - unnecessary duplication for these generation methods, use some templates (only the type +// changes) +std::vector vectorMathCaseGen_TwoOperandACC() { + std::vector cases = {}; + for (int i = 0; i <= 15; i++) { + VectorFloatTestCase_TwoOperandACC testCase = VectorFloatTestCase_TwoOperandACC(); + testCase.destinationMask = i; + cases.push_back(testCase); + // Re-add each case with each broadcast variant + for (int j = 0; j < 4; j++) { + VectorFloatTestCase_TwoOperandACC testCaseBC = VectorFloatTestCase_TwoOperandACC(); + testCaseBC.destinationMask = i; + testCaseBC.bc = static_cast(j); + cases.push_back(testCaseBC); + } + } + return cases; +} + +class VectorFloatParameterizedTestFixtureWithRunner_TwoOperandACC + : public WithGameTests, + public ::testing::WithParamInterface { + protected: + std::string templateFile = "test-vector-math-2-operand-acc.template.gc"; +}; + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperandACC, VF_MUL_ADD_XYZW_DEST) { + VectorFloatTestCase_TwoOperandACC testCase = GetParam(); + testCase.operation = [](float x, float y, float acc) { return (x * y) + acc; }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".add.mul{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-add-mul{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperandACC, VF_MUL_SUB_XYZW_DEST) { + VectorFloatTestCase_TwoOperandACC testCase = GetParam(); + testCase.operation = [](float x, float y, float acc) { return acc - (x * y); }; + + nlohmann::json data; + testCase.setJson(data, fmt::format(".sub.mul{}.vf", testCase.getOperationBroadcast())); + + std::string outFile = runner.test_file_name( + fmt::format("vector-math-sub-mul{}-{{}}.generated.gc", testCase.getOperationBroadcast())); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())}); +} + +INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, + VectorFloatParameterizedTestFixtureWithRunner_TwoOperandACC, + ::testing::ValuesIn(vectorMathCaseGen_TwoOperandACC())); + +// ---- Two Operand Quotient Register Operations + +struct VectorFloatTestCase_TwoOperandQuotient : VectorFloatTestCase { + VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5}; + VectorFloatRegister input2 = {-5.5, -0.0, 10.0, 10.0}; + + int fsf = 0; + int ftf = 0; + + std::function operation; + + VectorFloatRegister getExpectedResult() { + float operand1 = + input1.getBroadcastElement(static_cast(fsf), input1.x); + float operand2 = + input2.getBroadcastElement(static_cast(ftf), input2.x); + float result = operation(operand1, operand2); + VectorFloatRegister expectedResult; + expectedResult.x = result; + expectedResult.y = result; + expectedResult.z = result; + expectedResult.w = result; + return expectedResult; + } + + void setJson(nlohmann::json& data, std::string func) { + input1.setJson(data, "v1"); + input2.setJson(data, "v2"); + dest.setJson(data, "dest"); + data["operation"] = fmt::format(func); + data["ftf"] = fmt::format("{:b}", ftf); + data["fsf"] = fmt::format("{:b}", fsf); + } +}; + +std::vector vectorMathCaseGen_TwoOperandQuotient() { + std::vector cases = {}; + for (int i = 0; i <= 3; i++) { + VectorFloatTestCase_TwoOperandQuotient testCase = VectorFloatTestCase_TwoOperandQuotient(); + testCase.fsf = i; + for (int j = 0; j <= 3; j++) { + testCase.ftf = j; + cases.push_back(testCase); + } + } + return cases; +} + +class VectorFloatParameterizedTestFixtureWithRunner_TwoOperandQuotient + : public WithGameTests, + public ::testing::WithParamInterface { + protected: + std::string templateFile = "test-vector-math-division.template.gc"; +}; + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_TwoOperandQuotient, VF_DIV_FTF_FSF) { + VectorFloatTestCase_TwoOperandQuotient testCase = GetParam(); + testCase.operation = [](float x, float y) { return x / y; }; + + nlohmann::json data; + testCase.setJson(data, ".div.vf"); + + std::string outFile = runner.test_file_name("vector-math-div-{}.generated.gc"); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat( + testCase.getExpectedResult().x))}); +} + +INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, + VectorFloatParameterizedTestFixtureWithRunner_TwoOperandQuotient, + ::testing::ValuesIn(vectorMathCaseGen_TwoOperandQuotient())); + +// ---- Single Operand Quotient Register Operations + +struct VectorFloatTestCase_OneOperandQuotient : VectorFloatTestCase { + VectorFloatRegister input1 = {2, -2, 0.0, 100}; + + int ftf = 0; + + std::function operation; + + VectorFloatRegister getExpectedResult() { + float operand1 = + input1.getBroadcastElement(static_cast(ftf), input1.x); + float result = operation(operand1); + VectorFloatRegister expectedResult; + expectedResult.x = result; + expectedResult.y = result; + expectedResult.z = result; + expectedResult.w = result; + return expectedResult; + } + + void setJson(nlohmann::json& data, std::string func) { + input1.setJson(data, "v1"); + dest.setJson(data, "dest"); + data["operation"] = fmt::format(func); + data["ftf"] = fmt::format("{:b}", ftf); + } +}; + +std::vector vectorMathCaseGen_OneOperandQuotient() { + std::vector cases = {}; + for (int i = 0; i <= 3; i++) { + VectorFloatTestCase_OneOperandQuotient testCase = VectorFloatTestCase_OneOperandQuotient(); + testCase.ftf = i; + cases.push_back(testCase); + } + return cases; +} + +class VectorFloatParameterizedTestFixtureWithRunner_OneOperandQuotient + : public WithGameTests, + public ::testing::WithParamInterface { + protected: + std::string templateFile = "test-vector-math-sqrt.template.gc"; +}; + +TEST_P(VectorFloatParameterizedTestFixtureWithRunner_OneOperandQuotient, VF_SQRT_FTF) { + VectorFloatTestCase_OneOperandQuotient testCase = GetParam(); + testCase.operation = [](float x) { return sqrt(x); }; + + nlohmann::json data; + testCase.setJson(data, ".sqrt.vf"); + + std::string outFile = runner.test_file_name("vector-math-sqrt-{}.generated.gc"); + env.write(templateFile, data, outFile); + runner.run_test(testCategory, outFile, + {fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat( + testCase.getExpectedResult().x))}); +} + +INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests, + VectorFloatParameterizedTestFixtureWithRunner_OneOperandQuotient, + ::testing::ValuesIn(vectorMathCaseGen_OneOperandQuotient())); diff --git a/test/test_emitter_avx.cpp b/test/test_emitter_avx.cpp index d4868fd1f6..96ca85595c 100644 --- a/test/test_emitter_avx.cpp +++ b/test/test_emitter_avx.cpp @@ -11,6 +11,13 @@ TEST(EmitterAVX, VF_NOP) { EXPECT_EQ(tester.dump_to_hex_string(true), "D9D0"); } +TEST(EmitterAVX, WAIT_VF) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::wait_vf()); + EXPECT_EQ(tester.dump_to_hex_string(true), "9B"); +} + TEST(EmitterAVX, MOV_VF) { CodeTester tester; tester.init_code_buffer(10000); @@ -281,6 +288,33 @@ TEST(EmitterAVX, BlendVF) { "43110CED03"); } +TEST(EmitterAVX, DivVF) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::div_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::div_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::div_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::div_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::div_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::div_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::div_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::div_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E05EDBC4C1605EDDC5905EDBC4C1105EDDC5605EEBC441605EEDC5105EEBC441105EED"); +} + +TEST(EmitterAVX, SqrtVF) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::sqrt_vf(XMM0 + 3, XMM0 + 4)); + tester.emit(IGen::sqrt_vf(XMM0 + 3, XMM0 + 14)); + tester.emit(IGen::sqrt_vf(XMM0 + 13, XMM0 + 4)); + tester.emit(IGen::sqrt_vf(XMM0 + 13, XMM0 + 14)); + + EXPECT_EQ(tester.dump_to_hex_string(true), "C5F851DCC4C17851DEC57851ECC4417851EE"); +} + TEST(EmitterAVX, RIP) { CodeTester tester; tester.init_code_buffer(1024);