mirror of https://github.com/PCSX2/pcsx2
Common: Switch pack/unpack instructions to auto SSE/AVX
This commit is contained in:
parent
0c8c798051
commit
94d87a35be
|
|
@ -59,22 +59,22 @@ namespace x86Emitter
|
|||
struct SimdImpl_PUnpack
|
||||
{
|
||||
// Unpack and interleave low-order bytes from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LBW;
|
||||
const xImplSimd_3Arg LBW;
|
||||
// Unpack and interleave low-order words from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LWD;
|
||||
const xImplSimd_3Arg LWD;
|
||||
// Unpack and interleave low-order doublewords from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LDQ;
|
||||
const xImplSimd_3Arg LDQ;
|
||||
// Unpack and interleave low-order quadwords from src and dest into dest.
|
||||
const xImplSimd_DestRegSSE LQDQ;
|
||||
const xImplSimd_3Arg LQDQ;
|
||||
|
||||
// Unpack and interleave high-order bytes from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HBW;
|
||||
const xImplSimd_3Arg HBW;
|
||||
// Unpack and interleave high-order words from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HWD;
|
||||
const xImplSimd_3Arg HWD;
|
||||
// Unpack and interleave high-order doublewords from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HDQ;
|
||||
const xImplSimd_3Arg HDQ;
|
||||
// Unpack and interleave high-order quadwords from src and dest into dest.
|
||||
const xImplSimd_DestRegSSE HQDQ;
|
||||
const xImplSimd_3Arg HQDQ;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
@ -86,19 +86,19 @@ namespace x86Emitter
|
|||
{
|
||||
// Converts packed signed word integers from src and dest into packed signed
|
||||
// byte integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegEither SSWB;
|
||||
const xImplSimd_3Arg SSWB;
|
||||
|
||||
// Converts packed signed dword integers from src and dest into packed signed
|
||||
// word integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegEither SSDW;
|
||||
const xImplSimd_3Arg SSDW;
|
||||
|
||||
// Converts packed unsigned word integers from src and dest into packed unsigned
|
||||
// byte integers in dest, using unsigned saturation.
|
||||
const xImplSimd_DestRegEither USWB;
|
||||
const xImplSimd_3Arg USWB;
|
||||
|
||||
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
||||
// unsigned word integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegSSE USDW;
|
||||
const xImplSimd_3Arg USDW;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
@ -113,14 +113,14 @@ namespace x86Emitter
|
|||
// dest[2] <- dest[3]
|
||||
// dest[3] <- src[3]
|
||||
//
|
||||
const xImplSimd_DestRegSSE HPS;
|
||||
const xImplSimd_3Arg HPS;
|
||||
|
||||
// Unpacks the high quadword [double-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest.lo <- dest.hi
|
||||
// dest.hi <- src.hi
|
||||
//
|
||||
const xImplSimd_DestRegSSE HPD;
|
||||
const xImplSimd_3Arg HPD;
|
||||
|
||||
// Unpacks the low doubleword [single-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
|
|
@ -129,7 +129,7 @@ namespace x86Emitter
|
|||
// dest[1] <- src[0]
|
||||
// dest[0] <- dest[0]
|
||||
//
|
||||
const xImplSimd_DestRegSSE LPS;
|
||||
const xImplSimd_3Arg LPS;
|
||||
|
||||
// Unpacks the low quadword [double-precision] values from src and dest into
|
||||
// dest, effectively moving the low portion of src into the upper portion of dest.
|
||||
|
|
@ -137,7 +137,7 @@ namespace x86Emitter
|
|||
// dest.hi <- src.lo
|
||||
// dest.lo <- dest.lo [remains unchanged!]
|
||||
//
|
||||
const xImplSimd_DestRegSSE LPD;
|
||||
const xImplSimd_3Arg LPD;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -597,32 +597,32 @@ namespace x86Emitter
|
|||
};
|
||||
|
||||
const SimdImpl_PUnpack xPUNPCK =
|
||||
{
|
||||
{0x66, 0x60}, // LBW
|
||||
{0x66, 0x61}, // LWD
|
||||
{0x66, 0x62}, // LDQ
|
||||
{0x66, 0x6c}, // LQDQ
|
||||
{
|
||||
{SIMDInstructionInfo(0x60).i().p66()}, // LBW
|
||||
{SIMDInstructionInfo(0x61).i().p66()}, // LWD
|
||||
{SIMDInstructionInfo(0x62).i().p66()}, // LDQ
|
||||
{SIMDInstructionInfo(0x6c).i().p66()}, // LQDQ
|
||||
|
||||
{0x66, 0x68}, // HBW
|
||||
{0x66, 0x69}, // HWD
|
||||
{0x66, 0x6a}, // HDQ
|
||||
{0x66, 0x6d}, // HQDQ
|
||||
{SIMDInstructionInfo(0x68).i().p66()}, // HBW
|
||||
{SIMDInstructionInfo(0x69).i().p66()}, // HWD
|
||||
{SIMDInstructionInfo(0x6a).i().p66()}, // HDQ
|
||||
{SIMDInstructionInfo(0x6d).i().p66()}, // HQDQ
|
||||
};
|
||||
|
||||
const SimdImpl_Pack xPACK =
|
||||
{
|
||||
{0x66, 0x63}, // SSWB
|
||||
{0x66, 0x6b}, // SSDW
|
||||
{0x66, 0x67}, // USWB
|
||||
{0x66, 0x2b38}, // USDW
|
||||
{
|
||||
{SIMDInstructionInfo(0x63).i().p66()}, // SSWB
|
||||
{SIMDInstructionInfo(0x6b).i().p66()}, // SSDW
|
||||
{SIMDInstructionInfo(0x67).i().p66()}, // USWB
|
||||
{SIMDInstructionInfo(0x2b).i().p66().m0f38()}, // USDW
|
||||
};
|
||||
|
||||
const xImplSimd_Unpack xUNPCK =
|
||||
{
|
||||
{0x00, 0x15}, // HPS
|
||||
{0x66, 0x15}, // HPD
|
||||
{0x00, 0x14}, // LPS
|
||||
{0x66, 0x14}, // LPD
|
||||
{
|
||||
{SIMDInstructionInfo(0x15).f()}, // HPS
|
||||
{SIMDInstructionInfo(0x15).d().p66()}, // HPD
|
||||
{SIMDInstructionInfo(0x14).f()}, // LPS
|
||||
{SIMDInstructionInfo(0x14).d().p66()}, // LPD
|
||||
};
|
||||
|
||||
const xImplSimd_PInsert xPINSR;
|
||||
|
|
|
|||
|
|
@ -313,6 +313,23 @@ TEST(CodegenTests, SSETest)
|
|||
CODEGEN_TEST(xPEXTR.D(ptr32[rax], xmm9, 1), "66 44 0f 3a 16 08 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(ptr64[rax], xmm9, 1), "66 4c 0f 3a 16 08 01");
|
||||
|
||||
CODEGEN_TEST(xPUNPCK.LBW(xmm1, xmm2), "66 0f 60 ca");
|
||||
CODEGEN_TEST(xPUNPCK.LWD(xmm1, ptr[r8]), "66 41 0f 61 08");
|
||||
CODEGEN_TEST(xPUNPCK.LDQ(xmm1, xmm8), "66 41 0f 62 c8");
|
||||
CODEGEN_TEST(xPUNPCK.LQDQ(xmm8, xmm2), "66 44 0f 6c c2");
|
||||
CODEGEN_TEST(xPUNPCK.HBW(xmm1, xmm2), "66 0f 68 ca");
|
||||
CODEGEN_TEST(xPUNPCK.HWD(xmm1, ptr[r8]), "66 41 0f 69 08");
|
||||
CODEGEN_TEST(xPUNPCK.HDQ(xmm1, xmm8), "66 41 0f 6a c8");
|
||||
CODEGEN_TEST(xPUNPCK.HQDQ(xmm8, xmm2), "66 44 0f 6d c2");
|
||||
CODEGEN_TEST(xPACK.SSWB(xmm1, xmm2), "66 0f 63 ca");
|
||||
CODEGEN_TEST(xPACK.SSDW(xmm1, ptr[rax]), "66 0f 6b 08");
|
||||
CODEGEN_TEST(xPACK.USWB(xmm1, xmm8), "66 41 0f 67 c8");
|
||||
CODEGEN_TEST(xPACK.USDW(xmm8, xmm2), "66 44 0f 38 2b c2");
|
||||
CODEGEN_TEST(xUNPCK.LPS(xmm1, xmm2), "0f 14 ca");
|
||||
CODEGEN_TEST(xUNPCK.LPD(xmm1, ptr[r8]), "66 41 0f 14 08");
|
||||
CODEGEN_TEST(xUNPCK.HPS(xmm1, xmm8), "41 0f 15 c8");
|
||||
CODEGEN_TEST(xUNPCK.HPD(xmm8, xmm2), "66 44 0f 15 c2");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
|
|
@ -497,6 +514,23 @@ TEST(CodegenTests, AVXTest)
|
|||
CODEGEN_TEST(xPEXTR.D(ptr32[rax], xmm9, 1), "c4 63 79 16 08 01");
|
||||
CODEGEN_TEST(xPEXTR.Q(ptr64[rax], xmm9, 1), "c4 63 f9 16 08 01");
|
||||
|
||||
CODEGEN_TEST(xPUNPCK.LBW(xmm1, xmm2), "c5 f1 60 ca");
|
||||
CODEGEN_TEST(xPUNPCK.LWD(xmm1, ptr[r8]), "c4 c1 71 61 08");
|
||||
CODEGEN_TEST(xPUNPCK.LDQ(xmm1, xmm8), "c4 c1 71 62 c8");
|
||||
CODEGEN_TEST(xPUNPCK.LQDQ(xmm8, xmm2), "c5 39 6c c2");
|
||||
CODEGEN_TEST(xPUNPCK.HBW(xmm1, xmm2), "c5 f1 68 ca");
|
||||
CODEGEN_TEST(xPUNPCK.HWD(xmm1, ptr[r8]), "c4 c1 71 69 08");
|
||||
CODEGEN_TEST(xPUNPCK.HDQ(xmm1, xmm8), "c4 c1 71 6a c8");
|
||||
CODEGEN_TEST(xPUNPCK.HQDQ(xmm8, xmm2), "c5 39 6d c2");
|
||||
CODEGEN_TEST(xPACK.SSWB(xmm1, xmm2), "c5 f1 63 ca");
|
||||
CODEGEN_TEST(xPACK.SSDW(xmm1, ptr[rax]), "c5 f1 6b 08");
|
||||
CODEGEN_TEST(xPACK.USWB(xmm1, xmm8), "c4 c1 71 67 c8");
|
||||
CODEGEN_TEST(xPACK.USDW(xmm8, xmm2), "c4 62 39 2b c2");
|
||||
CODEGEN_TEST(xUNPCK.LPS(xmm1, xmm2), "c5 f0 14 ca");
|
||||
CODEGEN_TEST(xUNPCK.LPD(xmm1, ptr[r8]), "c4 c1 71 14 08");
|
||||
CODEGEN_TEST(xUNPCK.HPS(xmm1, xmm8), "c4 c1 70 15 c8");
|
||||
CODEGEN_TEST(xUNPCK.HPD(xmm8, xmm2), "c5 39 15 c2");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
|
|
|||
Loading…
Reference in New Issue