From: Julian Seward Date: Sun, 15 Jun 2014 08:17:35 +0000 (+0000) Subject: Remove temporary front end scaffolding for Cat{Even,Odd}Lanes X-Git-Tag: svn/VALGRIND_3_10_1^2~95 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f829fff03b691057d7cbb879124019d12d04f72d;p=thirdparty%2Fvalgrind.git Remove temporary front end scaffolding for Cat{Even,Odd}Lanes and Interleave{LO,HI} operations, and instead generate real UZP1/UZP2/ZIP1/ZIP2 instructions in the back end. git-svn-id: svn://svn.valgrind.org/vex/trunk@2876 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index cfeaadd86a..71e20aa977 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -4949,25 +4949,85 @@ Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, /*--- SIMD and FP instructions ---*/ /*------------------------------------------------------------*/ -/* begin FIXME -- rm temp scaffolding */ -static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp ); -static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp ); - -static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp ); -static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp ); - -static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp ); -static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp ); - -static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp ); -static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp ); -static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp ); -/* end FIXME -- rm temp scaffolding */ +/* Some constructors for interleave/deinterleave expressions. */ + +static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) { + // returns a0 b0 + return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10)); +} + +static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) { + // returns a1 b1 + return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10)); +} + +static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { + // returns a2 a0 b2 b0 + return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210)); +} + +static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { + // returns a3 a1 b3 b1 + return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210)); +} + +static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) { + // returns a1 b1 a0 b0 + return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210)); +} + +static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) { + // returns a3 b3 a2 b2 + return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210)); +} + +static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { + // returns a6 a4 a2 a0 b6 b4 b2 b0 + return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); +} + +static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { + // returns a7 a5 a3 a1 b7 b5 b3 b1 + return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); +} + +static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) { + // returns a3 b3 a2 b2 a1 b1 a0 b0 + return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210)); +} + +static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) { + // returns a7 b7 a6 b6 a5 b5 a4 b4 + return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210)); +} + +static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) { + // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 + return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210), + mkexpr(bFEDCBA9876543210)); +} + +static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) { + // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 + return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210), + mkexpr(bFEDCBA9876543210)); +} + +static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) { + // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 + return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210), + mkexpr(bFEDCBA9876543210)); +} + +static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) { + // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 + return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210), + mkexpr(bFEDCBA9876543210)); +} /* Generate N copies of |bit| in the bottom of a ULong. */ static ULong Replicate ( ULong bit, Int N ) @@ -7999,441 +8059,6 @@ DisResult disInstr_ARM64 ( IRSB* irsb_IN, return dres; } -//////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -/* Spare code for doing reference implementations of various 128-bit - SIMD interleaves/deinterleaves/concatenation ops. For 64-bit - equivalents see the end of guest_arm_toIR.c. */ - -//////////////////////////////////////////////////////////////// -// 64x2 operations -// -static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) -{ - // returns a0 b0 - return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)), - unop(Iop_V128to64, mkexpr(b10))); -} - -static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) -{ - // returns a1 b1 - return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)), - unop(Iop_V128HIto64, mkexpr(b10))); -} - - -//////////////////////////////////////////////////////////////// -// 32x4 operations -// - -// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with -// the top halves guaranteed to be zero. -static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1, - IRTemp* out0, IRTemp v128 ) -{ - if (out3) *out3 = newTemp(Ity_I64); - if (out2) *out2 = newTemp(Ity_I64); - if (out1) *out1 = newTemp(Ity_I64); - if (out0) *out0 = newTemp(Ity_I64); - IRTemp hi64 = newTemp(Ity_I64); - IRTemp lo64 = newTemp(Ity_I64); - assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); - assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); - if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32))); - if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF))); - if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32))); - if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF))); -} - -// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit -// IRTemp. -static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) -{ - IRTemp hi64 = newTemp(Ity_I64); - IRTemp lo64 = newTemp(Ity_I64); - assign(hi64, - binop(Iop_Or64, - binop(Iop_Shl64, mkexpr(in3), mkU8(32)), - binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF)))); - assign(lo64, - binop(Iop_Or64, - binop(Iop_Shl64, mkexpr(in1), mkU8(32)), - binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF)))); - IRTemp res = newTemp(Ity_V128); - assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); - return res; -} - -static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) -{ - // returns a2 a0 b2 b0 - IRTemp a2, a0, b2, b0; - breakV128to32s(NULL, &a2, NULL, &a0, a3210); - breakV128to32s(NULL, &b2, NULL, &b0, b3210); - return mkexpr(mkV128from32s(a2, a0, b2, b0)); -} - -static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) -{ - // returns a3 a1 b3 b1 - IRTemp a3, a1, b3, b1; - breakV128to32s(&a3, NULL, &a1, NULL, a3210); - breakV128to32s(&b3, NULL, &b1, NULL, b3210); - return mkexpr(mkV128from32s(a3, a1, b3, b1)); -} - -static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) -{ - // returns a1 b1 a0 b0 - IRTemp a1, a0, b1, b0; - breakV128to32s(NULL, NULL, &a1, &a0, a3210); - breakV128to32s(NULL, NULL, &b1, &b0, b3210); - return mkexpr(mkV128from32s(a1, b1, a0, b0)); -} - -static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) -{ - // returns a3 b3 a2 b2 - IRTemp a3, a2, b3, b2; - breakV128to32s(&a3, &a2, NULL, NULL, a3210); - breakV128to32s(&b3, &b2, NULL, NULL, b3210); - return mkexpr(mkV128from32s(a3, b3, a2, b2)); -} - -//////////////////////////////////////////////////////////////// -// 16x8 operations -// - -static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5, - IRTemp* out4, IRTemp* out3, IRTemp* out2, - IRTemp* out1,IRTemp* out0, IRTemp v128 ) -{ - if (out7) *out7 = newTemp(Ity_I64); - if (out6) *out6 = newTemp(Ity_I64); - if (out5) *out5 = newTemp(Ity_I64); - if (out4) *out4 = newTemp(Ity_I64); - if (out3) *out3 = newTemp(Ity_I64); - if (out2) *out2 = newTemp(Ity_I64); - if (out1) *out1 = newTemp(Ity_I64); - if (out0) *out0 = newTemp(Ity_I64); - IRTemp hi64 = newTemp(Ity_I64); - IRTemp lo64 = newTemp(Ity_I64); - assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); - assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); - if (out7) - assign(*out7, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), - mkU64(0xFFFF))); - if (out6) - assign(*out6, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), - mkU64(0xFFFF))); - if (out5) - assign(*out5, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), - mkU64(0xFFFF))); - if (out4) - assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF))); - if (out3) - assign(*out3, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), - mkU64(0xFFFF))); - if (out2) - assign(*out2, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), - mkU64(0xFFFF))); - if (out1) - assign(*out1, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), - mkU64(0xFFFF))); - if (out0) - assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF))); -} - -static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, - IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) -{ - IRTemp hi64 = newTemp(Ity_I64); - IRTemp lo64 = newTemp(Ity_I64); - assign(hi64, - binop(Iop_Or64, - binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)), - mkU8(48)), - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)), - mkU8(32))), - binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)), - mkU8(16)), - binop(Iop_And64, - mkexpr(in4), mkU64(0xFFFF))))); - assign(lo64, - binop(Iop_Or64, - binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)), - mkU8(48)), - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)), - mkU8(32))), - binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)), - mkU8(16)), - binop(Iop_And64, - mkexpr(in0), mkU64(0xFFFF))))); - IRTemp res = newTemp(Ity_V128); - assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); - return res; -} - -static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) -{ - // returns a6 a4 a2 a0 b6 b4 b2 b0 - IRTemp a6, a4, a2, a0, b6, b4, b2, b0; - breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210); - breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210); - return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0)); -} - -static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) -{ - // returns a7 a5 a3 a1 b7 b5 b3 b1 - IRTemp a7, a5, a3, a1, b7, b5, b3, b1; - breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210); - breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210); - return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1)); -} - -static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) -{ - // returns a3 b3 a2 b2 a1 b1 a0 b0 - IRTemp a3, b3, a2, b2, a1, a0, b1, b0; - breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210); - breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210); - return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0)); -} - -static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) -{ - // returns a7 b7 a6 b6 a5 b5 a4 b4 - IRTemp a7, b7, a6, b6, a5, b5, a4, b4; - breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210); - breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210); - return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4)); -} - -//////////////////////////////////////////////////////////////// -// 8x16 operations -// - -static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD, - IRTemp* outC, IRTemp* outB, IRTemp* outA, - IRTemp* out9, IRTemp* out8, - IRTemp* out7, IRTemp* out6, IRTemp* out5, - IRTemp* out4, IRTemp* out3, IRTemp* out2, - IRTemp* out1,IRTemp* out0, IRTemp v128 ) -{ - if (outF) *outF = newTemp(Ity_I64); - if (outE) *outE = newTemp(Ity_I64); - if (outD) *outD = newTemp(Ity_I64); - if (outC) *outC = newTemp(Ity_I64); - if (outB) *outB = newTemp(Ity_I64); - if (outA) *outA = newTemp(Ity_I64); - if (out9) *out9 = newTemp(Ity_I64); - if (out8) *out8 = newTemp(Ity_I64); - if (out7) *out7 = newTemp(Ity_I64); - if (out6) *out6 = newTemp(Ity_I64); - if (out5) *out5 = newTemp(Ity_I64); - if (out4) *out4 = newTemp(Ity_I64); - if (out3) *out3 = newTemp(Ity_I64); - if (out2) *out2 = newTemp(Ity_I64); - if (out1) *out1 = newTemp(Ity_I64); - if (out0) *out0 = newTemp(Ity_I64); - IRTemp hi64 = newTemp(Ity_I64); - IRTemp lo64 = newTemp(Ity_I64); - assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); - assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); - if (outF) - assign(*outF, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(56)), - mkU64(0xFF))); - if (outE) - assign(*outE, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), - mkU64(0xFF))); - if (outD) - assign(*outD, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(40)), - mkU64(0xFF))); - if (outC) - assign(*outC, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), - mkU64(0xFF))); - if (outB) - assign(*outB, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(24)), - mkU64(0xFF))); - if (outA) - assign(*outA, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), - mkU64(0xFF))); - if (out9) - assign(*out9, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(8)), - mkU64(0xFF))); - if (out8) - assign(*out8, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(hi64), mkU8(0)), - mkU64(0xFF))); - if (out7) - assign(*out7, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(56)), - mkU64(0xFF))); - if (out6) - assign(*out6, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), - mkU64(0xFF))); - if (out5) - assign(*out5, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(40)), - mkU64(0xFF))); - if (out4) - assign(*out4, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), - mkU64(0xFF))); - if (out3) - assign(*out3, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(24)), - mkU64(0xFF))); - if (out2) - assign(*out2, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), - mkU64(0xFF))); - if (out1) - assign(*out1, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(8)), - mkU64(0xFF))); - if (out0) - assign(*out0, binop(Iop_And64, - binop(Iop_Shr64, mkexpr(lo64), mkU8(0)), - mkU64(0xFF))); -} - -static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC, - IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8, - IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, - IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) -{ - IRTemp vFE = newTemp(Ity_I64); - IRTemp vDC = newTemp(Ity_I64); - IRTemp vBA = newTemp(Ity_I64); - IRTemp v98 = newTemp(Ity_I64); - IRTemp v76 = newTemp(Ity_I64); - IRTemp v54 = newTemp(Ity_I64); - IRTemp v32 = newTemp(Ity_I64); - IRTemp v10 = newTemp(Ity_I64); - assign(vFE, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(inE), mkU64(0xFF)))); - assign(vDC, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(inC), mkU64(0xFF)))); - assign(vBA, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(inA), mkU64(0xFF)))); - assign(v98, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(in8), mkU64(0xFF)))); - assign(v76, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(in6), mkU64(0xFF)))); - assign(v54, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(in4), mkU64(0xFF)))); - assign(v32, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(in2), mkU64(0xFF)))); - assign(v10, binop(Iop_Or64, - binop(Iop_Shl64, - binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)), - binop(Iop_And64, mkexpr(in0), mkU64(0xFF)))); - return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10); -} - -static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, - IRTemp bFEDCBA9876543210 ) -{ - // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 - IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0; - breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8, - NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, - aFEDCBA9876543210); - breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8, - NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, - bFEDCBA9876543210); - return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0, - bE, bC, bA, b8, b6, b4, b2, b0)); -} - -static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, - IRTemp bFEDCBA9876543210 ) -{ - // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 - IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1; - breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL, - &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, - aFEDCBA9876543210); - - breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL, - &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, - aFEDCBA9876543210); - - return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1, - bF, bD, bB, b9, b7, b5, b3, b1)); -} - -static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, - IRTemp bFEDCBA9876543210 ) -{ - // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 - IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0; - breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0, - aFEDCBA9876543210); - breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0, - bFEDCBA9876543210); - return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4, - a3, b3, a2, b2, a1, b1, a0, b0)); -} - -static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, - IRTemp bFEDCBA9876543210 ) -{ - // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 - IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8; - breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - aFEDCBA9876543210); - breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - bFEDCBA9876543210); - return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC, - aB, bB, aA, bA, a9, b9, a8, b8)); -} /*--------------------------------------------------------------------*/ /*--- end guest_arm64_toIR.c ---*/ diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index aa88f7bfef..29f78db64c 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -896,13 +896,27 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_CMGT32x4: *nm = "cmgt"; *ar = "4s"; return; case ARM64vecb_CMGT16x8: *nm = "cmgt"; *ar = "8h"; return; case ARM64vecb_CMGT8x16: *nm = "cmgt"; *ar = "16b"; return; - case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return; - case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return; - case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return; - case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return; - case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return; - case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return; + case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return; + case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return; + case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return; + case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return; + case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return; + case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return; case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return; + case ARM64vecb_UZP164x2: *nm = "uzp1"; *ar = "2d"; return; + case ARM64vecb_UZP132x4: *nm = "uzp1"; *ar = "4s"; return; + case ARM64vecb_UZP116x8: *nm = "uzp1"; *ar = "8h"; return; + case ARM64vecb_UZP18x16: *nm = "uzp1"; *ar = "16b"; return; + case ARM64vecb_UZP264x2: *nm = "uzp2"; *ar = "2d"; return; + case ARM64vecb_UZP232x4: *nm = "uzp2"; *ar = "4s"; return; + case ARM64vecb_UZP216x8: *nm = "uzp2"; *ar = "8h"; return; + case ARM64vecb_UZP28x16: *nm = "uzp2"; *ar = "16b"; return; + case ARM64vecb_ZIP132x4: *nm = "zip1"; *ar = "4s"; return; + case ARM64vecb_ZIP116x8: *nm = "zip1"; *ar = "8h"; return; + case ARM64vecb_ZIP18x16: *nm = "zip1"; *ar = "16b"; return; + case ARM64vecb_ZIP232x4: *nm = "zip2"; *ar = "4s"; return; + case ARM64vecb_ZIP216x8: *nm = "zip2"; *ar = "8h"; return; + case ARM64vecb_ZIP28x16: *nm = "zip2"; *ar = "16b"; return; default: vpanic("showARM64VecBinOp"); } } @@ -3385,11 +3399,13 @@ static inline UChar qregNo ( HReg r ) #define X000000 BITS8(0,0, 0,0,0,0,0,0) #define X000001 BITS8(0,0, 0,0,0,0,0,1) #define X000100 BITS8(0,0, 0,0,0,1,0,0) +#define X000110 BITS8(0,0, 0,0,0,1,1,0) #define X000111 BITS8(0,0, 0,0,0,1,1,1) #define X001000 BITS8(0,0, 0,0,1,0,0,0) #define X001001 BITS8(0,0, 0,0,1,0,0,1) #define X001010 BITS8(0,0, 0,0,1,0,1,0) #define X001101 BITS8(0,0, 0,0,1,1,0,1) +#define X001110 BITS8(0,0, 0,0,1,1,1,0) #define X001111 BITS8(0,0, 0,0,1,1,1,1) #define X010000 BITS8(0,0, 0,1,0,0,0,0) #define X010001 BITS8(0,0, 0,1,0,0,0,1) @@ -3435,8 +3451,11 @@ static inline UChar qregNo ( HReg r ) #define X01100011 BITS8(0,1,1,0,0,0,1,1) #define X01110000 BITS8(0,1,1,1,0,0,0,0) #define X01110001 BITS8(0,1,1,1,0,0,0,1) +#define X01110010 BITS8(0,1,1,1,0,0,1,0) #define X01110011 BITS8(0,1,1,1,0,0,1,1) +#define X01110100 BITS8(0,1,1,1,0,1,0,0) #define X01110101 BITS8(0,1,1,1,0,1,0,1) +#define X01110110 BITS8(0,1,1,1,0,1,1,0) #define X01110111 BITS8(0,1,1,1,0,1,1,1) #define X11000001 BITS8(1,1,0,0,0,0,0,1) #define X11000011 BITS8(1,1,0,0,0,0,1,1) @@ -5038,6 +5057,23 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b + 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d + 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s + 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h + 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b + + 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d + 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s + 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h + 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b + + 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s + 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h + 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b + + 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s + 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h + 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -5212,7 +5248,53 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, break; case ARM64vecb_TBL1: - *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD); + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD); + break; + + case ARM64vecb_UZP164x2: + *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD); + break; + case ARM64vecb_UZP132x4: + *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD); + break; + case ARM64vecb_UZP116x8: + *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD); + break; + case ARM64vecb_UZP18x16: + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD); + break; + + case ARM64vecb_UZP264x2: + *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD); + break; + case ARM64vecb_UZP232x4: + *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD); + break; + case ARM64vecb_UZP216x8: + *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD); + break; + case ARM64vecb_UZP28x16: + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD); + break; + + case ARM64vecb_ZIP132x4: + *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD); + break; + case ARM64vecb_ZIP116x8: + *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD); + break; + case ARM64vecb_ZIP18x16: + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD); + break; + + case ARM64vecb_ZIP232x4: + *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD); + break; + case ARM64vecb_ZIP216x8: + *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD); + break; + case ARM64vecb_ZIP28x16: + *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD); break; default: diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 437834e5f1..baec464cf8 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -360,6 +360,20 @@ typedef ARM64vecb_FCMGT64x2, ARM64vecb_FCMGT32x4, ARM64vecb_TBL1, + ARM64vecb_UZP164x2, + ARM64vecb_UZP132x4, + ARM64vecb_UZP116x8, + ARM64vecb_UZP18x16, + ARM64vecb_UZP264x2, + ARM64vecb_UZP232x4, + ARM64vecb_UZP216x8, + ARM64vecb_UZP28x16, + ARM64vecb_ZIP132x4, + ARM64vecb_ZIP116x8, + ARM64vecb_ZIP18x16, + ARM64vecb_ZIP232x4, + ARM64vecb_ZIP216x8, + ARM64vecb_ZIP28x16, ARM64vecb_INVALID } ARM64VecBinOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 792d0747bc..17d76e479f 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4957,6 +4957,20 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4: case Iop_Perm8x16: + case Iop_InterleaveLO64x2: + case Iop_CatEvenLanes32x4: + case Iop_CatEvenLanes16x8: + case Iop_CatEvenLanes8x16: + case Iop_InterleaveHI64x2: + case Iop_CatOddLanes32x4: + case Iop_CatOddLanes16x8: + case Iop_CatOddLanes8x16: + case Iop_InterleaveHI32x4: + case Iop_InterleaveHI16x8: + case Iop_InterleaveHI8x16: + case Iop_InterleaveLO32x4: + case Iop_InterleaveLO16x8: + case Iop_InterleaveLO8x16: { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); @@ -5009,6 +5023,34 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; case Iop_Perm8x16: op = ARM64vecb_TBL1; break; + case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True; + break; + case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True; + break; + case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True; + break; + case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True; + break; + case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True; + break; + case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True; + break; + case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True; + break; + case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True; + break; + case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True; + break; + case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True; + break; + case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True; + break; + case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True; + break; + case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True; + break; + case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True; + break; default: vassert(0); } if (sw) {