From: Julian Seward Date: Mon, 17 Feb 2014 11:00:53 +0000 (+0000) Subject: Implement more aarch64 vector insns: X-Git-Tag: svn/VALGRIND_3_10_1^2~150 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c33e63a44e8e403e4001d7f8a7ec3dc5281d116c;p=thirdparty%2Fvalgrind.git Implement more aarch64 vector insns: CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) {EOR,BSL,BIT,BIF} (vector) {USHR,SSHR} (vector, immediate) {U,S}SHLL{,2} INS (general) FABD Vd,Vn,Vm git-svn-id: svn://svn.valgrind.org/vex/trunk@2820 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 070c7dfb87..e386ca1179 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -4372,12 +4372,21 @@ Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn) /* begin FIXME -- rm temp scaffolding */ static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp ); static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp ); + static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp ); static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp ); + static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp ); static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp ); + static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp ); static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp ); +static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp ); /* end FIXME -- rm temp scaffolding */ /* Generate N copies of |bit| in the bottom of a ULong. */ @@ -4583,6 +4592,38 @@ static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper, } +/* Helper for decoding laneage for shift-style vector operations + that involve an immediate shift amount. */ +static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, + UInt immh, UInt immb ) +{ + vassert(immh < (1<<4)); + vassert(immb < (1<<3)); + UInt immhb = (immh << 3) | immb; + if (immh & 8) { + if (shift) *shift = 128 - immhb; + if (szBlg2) *szBlg2 = 3; + return True; + } + if (immh & 4) { + if (shift) *shift = 64 - immhb; + if (szBlg2) *szBlg2 = 2; + return True; + } + if (immh & 2) { + if (shift) *shift = 32 - immhb; + if (szBlg2) *szBlg2 = 1; + return True; + } + if (immh & 1) { + if (shift) *shift = 16 - immhb; + if (szBlg2) *szBlg2 = 0; + return True; + } + return False; +} + + /* Generate IR to fold all lanes of the V128 value in 'src' as characterised by the operator 'op', and return the result in the bottom bits of a V128, with all other bits set to zero. */ @@ -5447,6 +5488,7 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6 + 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7 */ if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { @@ -5465,6 +5507,7 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4; else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5; else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6; + else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7; IRType laneTy = Ity_INVALID; Bool zeroHI = False; const HChar* arr = "??"; @@ -5502,11 +5545,28 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) mkexpr(rm), getQReg128(nn), getQReg128(mm))); assign(t2, triop(ix == 5 ? opADD : opSUB, mkexpr(rm), getQReg128(dd), mkexpr(t1))); - putQReg128(dd, mkexpr(t2)); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) + : mkexpr(t2)); DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls", nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); return True; } + if (ok && ix == 7) { + IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; + IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + IRTemp rm = mk_get_IR_rounding_mode(); + IRTemp t1 = newTemp(Ity_V128); + IRTemp t2 = newTemp(Ity_V128); + // FIXME: use Abd primop instead? + assign(t1, triop(opSUB, + mkexpr(rm), getQReg128(nn), getQReg128(mm))); + assign(t2, unop(opABS, mkexpr(t1))); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) + : mkexpr(t2)); + DIP("fabd %s.%s, %s.%s, %s.%s\n", + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } } /* ---------------- ADD/SUB (vector) ---------------- */ @@ -5762,6 +5822,274 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */ + /* 31 28 23 21 15 9 4 ix + 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) == + 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, == 0 + + 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u + 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s + + 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u + 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s + + 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0 + 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0 + + 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0 + 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0 + + 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) y can be expressed directly + x < y == y > x + x <= y == not (x > y) + x >= y == not (y > x) + */ + switch (ix) { + case 1: res = binop(opsEQ[szBlg2], argL, argR); break; + case 2: binop(opsEQ[szBlg2], + binop(Iop_AndV128, argL, argR), + mkV128(0x0000)); + break; + case 3: res = binop(opsGTU[szBlg2], argL, argR); break; + case 4: res = binop(opsGTS[szBlg2], argL, argR); break; + case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL)); + break; + case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); + break; + case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); + break; + case 8: res = binop(opsGTS[szBlg2], argL, argR); break; + case 9: res = unop(Iop_NotV128, + binop(opsGTS[szBlg2], argL, argR)); + break; + case 10: res = binop(opsEQ[szBlg2], argL, argR); break; + case 11: res = binop(opsGTS[szBlg2], argR, argL); break; + default: vassert(0); + } + vassert(res); + putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res); + const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge", + "ge", "gt", "le", "eq", "lt" }; + if (ix <= 6) { + DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1], + nameQReg128(dd), arrSpec, + nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); + } else { + DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1], + nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); + } + return True; + } + /* else fall through */ + } + + /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */ + /* 31 28 23 20 15 9 4 + 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T + 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T + 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T + 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T + */ + if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) + && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + Bool isQ = INSN(30,30) == 1; + UInt op = INSN(23,22); + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + IRTemp argD = newTemp(Ity_V128); + IRTemp argN = newTemp(Ity_V128); + IRTemp argM = newTemp(Ity_V128); + assign(argD, getQReg128(dd)); + assign(argN, getQReg128(nn)); + assign(argM, getQReg128(mm)); + const IROp opXOR = Iop_XorV128; + const IROp opAND = Iop_AndV128; + const IROp opNOT = Iop_NotV128; + IRExpr* res = NULL; + switch (op) { + case BITS2(0,0): + res = binop(opXOR, mkexpr(argM), mkexpr(argN)); + break; + case BITS2(0,1): + res = binop(opXOR, mkexpr(argM), + binop(opAND, + binop(opXOR, mkexpr(argM), mkexpr(argN)), + mkexpr(argD))); + break; + case BITS2(1,0): + res = binop(opXOR, mkexpr(argD), + binop(opAND, + binop(opXOR, mkexpr(argD), mkexpr(argN)), + mkexpr(argM))); + break; + case BITS2(1,1): + res = binop(opXOR, mkexpr(argD), + binop(opAND, + binop(opXOR, mkexpr(argD), mkexpr(argN)), + unop(opNOT, mkexpr(argM)))); + break; + default: + vassert(0); + } + vassert(res); + putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); + const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; + const HChar* arr = isQ ? "16b" : "8b"; + vassert(op < 4); + DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op], + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + + /* ------------ {USHR,SSHR} (vector, immediate) ------------ */ + /* 31 28 22 18 15 9 4 + 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift + 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift + laneTy, shift = case immh:immb of + 0001:xxx -> B, 8-xxx + 001x:xxx -> H, 16-xxxx + 01xx:xxx -> S, 32-xxxxx + 1xxx:xxx -> D, 64-xxxxxx + other -> invalid + As usual the case laneTy==D && q==0 is not allowed. + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) + && INSN(15,10) == BITS6(0,0,0,0,0,1)) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + UInt immh = INSN(22,19); + UInt immb = INSN(18,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + const IROp opsSHRN[4] + = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; + const IROp opsSARN[4] + = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; + UInt szBlg2 = 0; + UInt shift = 0; + Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); + if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2) + && !(szBlg2 == 3/*64bit*/ && !isQ)) { + IROp op = isU ? opsSHRN[szBlg2] : opsSARN[szBlg2]; + IRExpr* src = getQReg128(nn); + IRExpr* res = binop(op, src, mkU8(shift)); + putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); + HChar laneCh = "bhsd"[szBlg2]; + UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2); + DIP("%s %s.%u%c, %s.%u%c, #%u\n", isU ? "ushr" : "sshr", + nameQReg128(dd), nLanes, laneCh, + nameQReg128(nn), nLanes, laneCh, shift); + return True; + } + /* else fall through */ + } + + /* -------------------- {U,S}SHLL{,2} -------------------- */ + /* 31 28 22 18 15 9 4 + 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh + 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh + where Ta,Tb,sh + = case immh of 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) + 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) + 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) + */ + if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) + && INSN(15,10) == BITS6(1,0,1,0,0,1)) { + Bool isQ = INSN(30,30) == 1; + Bool isU = INSN(29,29) == 1; + UInt immh = INSN(22,19); + UInt immb = INSN(18,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt immhb = (immh << 3) | immb; + IRTemp src = newTemp(Ity_V128); + IRTemp zero = newTemp(Ity_V128); + IRExpr* res = NULL; + UInt sh = 0; + const HChar* ta = "??"; + const HChar* tb = "??"; + assign(src, getQReg128(nn)); + assign(zero, mkV128(0x0000)); + if (immh & 1) { + sh = immhb - 8; + vassert(sh < 8); /* so 8-sh is 1..8 */ + ta = "8h"; + tb = isQ ? "16b" : "8b"; + IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) + : mk_InterleaveLO8x16(src, zero); + res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); + } + else if (immh & 2) { + sh = immhb - 16; + vassert(sh < 16); /* so 16-sh is 1..16 */ + ta = "4s"; + tb = isQ ? "8h" : "4h"; + IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) + : mk_InterleaveLO16x8(src, zero); + res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); + } + else if (immh & 4) { + sh = immhb - 32; + vassert(sh < 32); /* so 32-sh is 1..32 */ + ta = "2d"; + tb = isQ ? "4s" : "2s"; + IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) + : mk_InterleaveLO32x4(src, zero); + res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); + } + /* */ + if (res) { + putQReg128(dd, res); + DIP("%cshll%s %s.%s, %s.%s, #%d\n", + isU ? 'u' : 's', isQ ? "2" : "", + nameQReg128(dd), ta, nameQReg128(nn), tb, sh); + return True; + } + /* else fall through */ + } + /* -------------------- XTN{,2} -------------------- */ /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta 0q0 01110 size 100001 001010 n d @@ -6010,6 +6338,53 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) /* else fall through */ } + /* -------------------- INS (general) -------------------- */ + /* 31 28 20 15 9 4 + 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn + where Ts,ix = case imm5 of xxxx1 -> B, xxxx + xxx10 -> H, xxx + xx100 -> S, xx + x1000 -> D, x + */ + if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0) + && INSN(15,10) == BITS6(0,0,0,1,1,1)) { + UInt imm5 = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + HChar ts = '?'; + UInt laneNo = 16; + IRExpr* src = NULL; + if (imm5 & 1) { + src = unop(Iop_64to8, getIReg64orZR(nn)); + laneNo = (imm5 >> 1) & 15; + ts = 'b'; + } + else if (imm5 & 2) { + src = unop(Iop_64to16, getIReg64orZR(nn)); + laneNo = (imm5 >> 2) & 7; + ts = 'h'; + } + else if (imm5 & 4) { + src = unop(Iop_64to32, getIReg64orZR(nn)); + laneNo = (imm5 >> 3) & 3; + ts = 's'; + } + else if (imm5 & 8) { + src = getIReg64orZR(nn); + laneNo = (imm5 >> 4) & 1; + ts = 'd'; + } + /* */ + if (src) { + vassert(laneNo < 16); + putQRegLane(dd, laneNo, src); + DIP("ins %s.%c[%u], %s\n", + nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); + return True; + } + /* else invalid; fall through */ + } + /* FIXME Temporary hacks to get through ld.so FIXME */ /* ------------------ movi vD.4s, #0x0 ------------------ */ @@ -6400,6 +6775,23 @@ static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) return mkexpr(mkV128from32s(a3, a1, b3, b1)); } +static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a1 b1 a0 b0 + IRTemp a1, a0, b1, b0; + breakV128to32s(NULL, NULL, &a1, &a0, a3210); + breakV128to32s(NULL, NULL, &b1, &b0, b3210); + return mkexpr(mkV128from32s(a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) +{ + // returns a3 b3 a2 b2 + IRTemp a3, a2, b3, b2; + breakV128to32s(&a3, &a2, NULL, NULL, a3210); + breakV128to32s(&b3, &b2, NULL, NULL, b3210); + return mkexpr(mkV128from32s(a3, b3, a2, b2)); +} //////////////////////////////////////////////////////////////// // 16x8 operations @@ -6509,6 +6901,24 @@ static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1)); } +static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a3 b3 a2 b2 a1 b1 a0 b0 + IRTemp a3, b3, a2, b2, a1, a0, b1, b0; + breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210); + breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210); + return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) +{ + // returns a7 b7 a6 b6 a5 b5 a4 b4 + IRTemp a7, b7, a6, b6, a5, b5, a4, b4; + breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210); + breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210); + return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4)); +} + //////////////////////////////////////////////////////////////// // 8x16 operations // @@ -6686,6 +7096,35 @@ static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, bF, bD, bB, b9, b7, b5, b3, b1)); } +static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 + IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0; + breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0, + aFEDCBA9876543210); + breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0, + bFEDCBA9876543210); + return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4, + a3, b3, a2, b2, a1, b1, a0, b0)); +} + +static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, + IRTemp bFEDCBA9876543210 ) +{ + // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 + IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8; + breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + aFEDCBA9876543210); + breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + bFEDCBA9876543210); + return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC, + aB, bB, aA, bA, a9, b9, a8, b8)); +} /*--------------------------------------------------------------------*/ /*--- end guest_arm64_toIR.c ---*/ diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index accc74ce6d..21e12813a6 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -200,7 +200,6 @@ void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr ) } - /* --------- Condition codes, ARM64 encoding. --------- */ static const HChar* showARM64CondCode ( ARM64CondCode cond ) { @@ -881,6 +880,8 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_SMIN8x16: *nm = "smin"; *ar = "16b"; return; case ARM64vecb_AND: *nm = "and "; *ar = "all"; return; case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return; + case ARM64vecb_XOR: *nm = "eor "; *ar = "all"; return; + case ARM64vecb_CMEQ64x2: *nm = "cmeq"; *ar = "2d"; return; default: vpanic("showARM64VecBinOp"); } } @@ -889,11 +890,23 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, /*OUT*/const HChar** ar, ARM64VecUnaryOp op ) { switch (op) { - case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return; + case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return; case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; - default: vpanic("showARM64VecBinOp"); + case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; + default: vpanic("showARM64VecUnaryOp"); + } +} + +static void showARM64VecShiftOp(/*OUT*/const HChar** nm, + /*OUT*/const HChar** ar, + ARM64VecShiftOp op ) +{ + switch (op) { + case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return; + case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return; + default: vpanic("showARM64VecShiftImmOp"); } } @@ -1588,6 +1601,25 @@ ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ) { vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2); return i; } +ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, + HReg dst, HReg src, UInt amt ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VShiftImmV; + i->ARM64in.VShiftImmV.op = op; + i->ARM64in.VShiftImmV.dst = dst; + i->ARM64in.VShiftImmV.src = src; + i->ARM64in.VShiftImmV.amt = amt; + UInt maxSh = 0; + switch (op) { + case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2: + maxSh = 63; break; + default: + vassert(0); + } + vassert(maxSh > 0); + vassert(amt > 0 && amt <= maxSh); + return i; +} //ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) { //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); //ZZ i->tag = ARMin_VAluS; @@ -2215,6 +2247,17 @@ void ppARM64Instr ( ARM64Instr* i ) { vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??"); return; } + case ARM64in_VShiftImmV: { + const HChar* nm = "??"; + const HChar* ar = "??"; + showARM64VecShiftOp(&nm, &ar, i->ARM64in.VShiftImmV.op); + vex_printf("%s ", nm); + ppHRegARM64(i->ARM64in.VShiftImmV.dst); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VShiftImmV.src); + vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt); + return; + } //ZZ case ARMin_VAluS: //ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op)); //ZZ ppHRegARM(i->ARMin.VAluS.dst); @@ -2691,6 +2734,10 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst); addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src); return; + case ARM64in_VShiftImmV: + addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src); + return; //ZZ case ARMin_VAluS: //ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst); //ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL); @@ -2979,6 +3026,12 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst); i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src); return; + case ARM64in_VShiftImmV: + i->ARM64in.VShiftImmV.dst + = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst); + i->ARM64in.VShiftImmV.src + = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src); + return; //ZZ case ARMin_VAluS: //ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst); //ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL); @@ -3272,6 +3325,7 @@ static inline UChar qregNo ( HReg r ) #define X11111 BITS8(0,0,0, 1,1,1,1,1) #define X000000 BITS8(0,0, 0,0,0,0,0,0) +#define X000001 BITS8(0,0, 0,0,0,0,0,1) #define X000100 BITS8(0,0, 0,0,0,1,0,0) #define X000111 BITS8(0,0, 0,0,0,1,1,1) #define X001000 BITS8(0,0, 0,0,1,0,0,0) @@ -3280,11 +3334,14 @@ static inline UChar qregNo ( HReg r ) #define X001111 BITS8(0,0, 0,0,1,1,1,1) #define X010000 BITS8(0,0, 0,1,0,0,0,0) #define X010001 BITS8(0,0, 0,1,0,0,0,1) +#define X010110 BITS8(0,0, 0,1,0,1,1,0) #define X011001 BITS8(0,0, 0,1,1,0,0,1) #define X011010 BITS8(0,0, 0,1,1,0,1,0) #define X011011 BITS8(0,0, 0,1,1,0,1,1) +#define X011110 BITS8(0,0, 0,1,1,1,1,0) #define X011111 BITS8(0,0, 0,1,1,1,1,1) #define X100001 BITS8(0,0, 1,0,0,0,0,1) +#define X100011 BITS8(0,0, 1,0,0,0,1,1) #define X100100 BITS8(0,0, 1,0,0,1,0,0) #define X100101 BITS8(0,0, 1,0,0,1,0,1) #define X100110 BITS8(0,0, 1,0,0,1,1,0) @@ -3299,6 +3356,8 @@ static inline UChar qregNo ( HReg r ) #define X111110 BITS8(0,0, 1,1,1,1,1,0) #define X111111 BITS8(0,0, 1,1,1,1,1,1) +#define X1000000 BITS8(0, 1,0,0,0,0,0,0) + #define X00100000 BITS8(0,0,1,0,0,0,0,0) #define X00100001 BITS8(0,0,1,0,0,0,0,1) #define X00100010 BITS8(0,0,1,0,0,0,1,0) @@ -3421,6 +3480,25 @@ static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3, return w; } +static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3, + UInt f4, UInt f5, UInt f6 ) { + vassert(3+6+7+6+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<6)); + vassert(f3 < (1<<7)); + vassert(f4 < (1<<6)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 6) | f2; + w = (w << 7) | f3; + w = (w << 6) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + return w; +} + /* --- 7 fields --- */ static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3, @@ -4843,6 +4921,11 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm + 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm + + 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d + 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d >u, ATC + 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d >s, ATC */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -4931,12 +5014,20 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD); break; + case ARM64vecb_AND: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD); + break; case ARM64vecb_ORR: *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD); break; - case ARM64vecb_AND: - *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD); + case ARM64vecb_XOR: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD); + break; + + case ARM64vecb_CMEQ64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD); break; + default: goto bad; } @@ -4944,17 +5035,24 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_VUnaryV: { /* 31 23 20 15 9 4 - 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d - 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s - 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d - 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s + 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d + 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s + 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d + 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s + 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b */ UInt vD = qregNo(i->ARM64in.VUnaryV.dst); UInt vN = qregNo(i->ARM64in.VUnaryV.arg); switch (i->ARM64in.VUnaryV.op) { + case ARM64vecu_FABS64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD); + break; case ARM64vecu_FNEG64x2: *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD); break; + case ARM64vecu_NOT: + *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD); + break; default: goto bad; } @@ -4974,6 +5072,37 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, X00001, X001010, vN, vD); goto done; } + case ARM64in_VShiftImmV: { + /* + 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh + 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh + where immh:immb + = case T of + 2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx + 4s | sh in 1..31 -> let xxxxx = 32-sh in 01xx:xxx + 8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx + 16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx + */ + UInt vD = qregNo(i->ARM64in.VShiftImmV.dst); + UInt vN = qregNo(i->ARM64in.VShiftImmV.src); + UInt sh = i->ARM64in.VShiftImmV.amt; + ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op; + Bool syned = False; + switch (op) { + case ARM64vecsh_SSHR64x2: syned = True; + case ARM64vecsh_USHR64x2: /* fallthrough */ + if (sh >= 1 && sh <= 63) { + UInt xxxxxx = 64-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X1000000 | xxxxxx, X000001, vN, vD); + goto done; + } + break; + default: + break; + } + goto bad; + } //ZZ case ARMin_VAluS: { //ZZ UInt dN = fregNo(i->ARMin.VAluS.argL); //ZZ UInt dD = fregNo(i->ARMin.VAluS.dst); diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 489b1b05c9..b6e4da8185 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -337,6 +337,8 @@ typedef ARM64vecb_SMIN8x16, ARM64vecb_AND, ARM64vecb_ORR, + ARM64vecb_XOR, + ARM64vecb_CMEQ64x2, ARM64vecb_INVALID } ARM64VecBinOp; @@ -347,10 +349,19 @@ typedef ARM64vecu_FNEG32x4, ARM64vecu_FABS64x2, ARM64vecu_FABS32x4, + ARM64vecu_NOT, ARM64vecu_INVALID } ARM64VecUnaryOp; +typedef + enum { + ARM64vecsh_USHR64x2=350, + ARM64vecsh_SSHR64x2, + ARM64vecsh_INVALID + } + ARM64VecShiftOp; + //ZZ extern const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ); //ZZ //ZZ typedef @@ -534,6 +545,7 @@ typedef ARM64in_VBinV, ARM64in_VUnaryV, ARM64in_VNarrowV, + ARM64in_VShiftImmV, //ZZ ARMin_VAluS, //ZZ ARMin_VCMovD, //ZZ ARMin_VCMovS, @@ -819,6 +831,15 @@ typedef HReg dst; // Q reg HReg src; // Q reg } VNarrowV; + /* Vector shift by immediate. |amt| needs to be > 0 and < + implied lane size of |op|. Zero shifts and out of range + shifts are not allowed. */ + struct { + ARM64VecShiftOp op; + HReg dst; + HReg src; + UInt amt; + } VShiftImmV; //ZZ /* 32-bit FP binary arithmetic */ //ZZ struct { //ZZ ARMVfpOp op; @@ -1022,6 +1043,8 @@ extern ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ); extern ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, HReg, HReg, HReg ); extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg ); extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, + HReg dst, HReg src, UInt amt ); //ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg ); //ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src ); //ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 3d81c0b7b1..0c8d2315b5 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4361,11 +4361,15 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) /* Other cases */ switch (e->Iex.Unop.op) { + case Iop_NotV128: + case Iop_Abs64Fx2: case Iop_Neg64Fx2: { HReg res = newVRegV(env); HReg arg = iselV128Expr(env, e->Iex.Unop.arg); ARM64VecUnaryOp op = ARM64vecu_INVALID; switch (e->Iex.Unop.op) { + case Iop_NotV128: op = ARM64vecu_NOT; break; + case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; default: vassert(0); } @@ -4852,6 +4856,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ case Iop_Add32x4: case Iop_AndV128: case Iop_OrV128: + case Iop_XorV128: case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16: @@ -4869,32 +4874,35 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Sub32x4: case Iop_Sub16x8: case Iop_Mul32x4: - case Iop_Mul16x8: { + case Iop_Mul16x8: + case Iop_CmpEQ64x2: { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); ARM64VecBinOp op = ARM64vecb_INVALID; switch (e->Iex.Binop.op) { - case Iop_AndV128: op = ARM64vecb_AND; break; - case Iop_OrV128: op = ARM64vecb_ORR; break; - case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; - case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; - case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; - case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; - case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; - case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; - case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; - case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; - case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; - case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; - case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; - case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; - case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; - case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; - case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; - case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; - case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; - case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; + case Iop_AndV128: op = ARM64vecb_AND; break; + case Iop_OrV128: op = ARM64vecb_ORR; break; + case Iop_XorV128: op = ARM64vecb_XOR; break; + case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; + case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; + case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; + case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; + case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; + case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; + case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; + case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; + case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; + case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; + case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; + case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; + case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; + case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; + case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; + case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; + case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; + case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; + case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; default: vassert(0); } addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); @@ -5393,27 +5401,32 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ case Iop_ShrN8x16: //ZZ case Iop_ShrN16x8: //ZZ case Iop_ShrN32x4: -//ZZ case Iop_ShrN64x2: { -//ZZ HReg res = newVRegV(env); -//ZZ HReg tmp = newVRegV(env); -//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); -//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); -//ZZ HReg argR2 = newVRegI(env); -//ZZ UInt size; -//ZZ switch (e->Iex.Binop.op) { -//ZZ case Iop_ShrN8x16: size = 0; break; -//ZZ case Iop_ShrN16x8: size = 1; break; -//ZZ case Iop_ShrN32x4: size = 2; break; -//ZZ case Iop_ShrN64x2: size = 3; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); -//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, -//ZZ tmp, argR2, 0, True)); -//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, -//ZZ res, argL, tmp, size, True)); -//ZZ return res; -//ZZ } + case Iop_ShrN64x2: + case Iop_SarN64x2: { + IRExpr* argL = e->Iex.Binop.arg1; + IRExpr* argR = e->Iex.Binop.arg2; + if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { + UInt amt = argR->Iex.Const.con->Ico.U8; + UInt limit = 0; + ARM64VecShiftOp op = ARM64vecsh_INVALID; + switch (e->Iex.Binop.op) { + case Iop_ShrN64x2: + op = ARM64vecsh_USHR64x2; limit = 63; break; + case Iop_SarN64x2: + op = ARM64vecsh_SSHR64x2; limit = 63; break; + default: + vassert(0); + } + if (op != ARM64vecsh_INVALID && amt > 0 && amt <= limit) { + HReg src = iselV128Expr(env, argL); + HReg dst = newVRegV(env); + addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); + return dst; + } + } + /* else fall out; this is unhandled */ + break; + } //ZZ case Iop_ShlN8x16: //ZZ case Iop_ShlN16x8: //ZZ case Iop_ShlN32x4: