return ops[sizeNarrow];
}
+static IROp mkVecQSHLNSATU2U ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecQSHLNSATS2S ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecQSHLNSATS2U ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+
/* Generate IR to create 'arg rotated right by imm', for sane values
of 'ty' and 'imm'. */
static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
}
+/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
+ a new temp in *res, and the Q difference pair in new temps in
+ *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
+ three operations it is. */
+static
+void math_QSHL_IMM ( /*OUT*/IRTemp* res,
+ /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
+ IRTemp src, UInt size, UInt shift, const HChar* nm )
+{
+ vassert(size <= 3);
+ UInt laneBits = 8 << size;
+ vassert(shift < laneBits);
+ newTempsV128_3(res, qDiff1, qDiff2);
+ IRTemp z128 = newTempV128();
+ assign(z128, mkV128(0x0000));
+
+ /* UQSHL */
+ if (vex_streq(nm, "uqshl")) {
+ IROp qop = mkVecQSHLNSATU2U(size);
+ assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
+ if (shift == 0) {
+ /* No shift means no saturation. */
+ assign(*qDiff1, mkexpr(z128));
+ assign(*qDiff2, mkexpr(z128));
+ } else {
+ /* Saturation has occurred if any of the shifted-out bits are
+ nonzero. We get the shifted-out bits by right-shifting the
+ original value. */
+ UInt rshift = laneBits - shift;
+ vassert(rshift >= 1 && rshift < laneBits);
+ assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
+ assign(*qDiff2, mkexpr(z128));
+ }
+ return;
+ }
+
+ /* SQSHL */
+ if (vex_streq(nm, "sqshl")) {
+ IROp qop = mkVecQSHLNSATS2S(size);
+ assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
+ if (shift == 0) {
+ /* No shift means no saturation. */
+ assign(*qDiff1, mkexpr(z128));
+ assign(*qDiff2, mkexpr(z128));
+ } else {
+ /* Saturation has occurred if any of the shifted-out bits are
+ different from the top bit of the original value. */
+ UInt rshift = laneBits - 1 - shift;
+ vassert(rshift >= 0 && rshift < laneBits-1);
+ /* qDiff1 is the shifted out bits, and the top bit of the original
+ value, preceded by zeroes. */
+ assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
+ /* qDiff2 is the top bit of the original value, cloned the
+ correct number of times. */
+ assign(*qDiff2, binop(mkVecSHRN(size),
+ binop(mkVecSARN(size), mkexpr(src),
+ mkU8(laneBits-1)),
+ mkU8(rshift)));
+ /* This also succeeds in comparing the top bit of the original
+ value to itself, which is a bit stupid, but not wrong. */
+ }
+ return;
+ }
+
+ /* SQSHLU */
+ if (vex_streq(nm, "sqshlu")) {
+ IROp qop = mkVecQSHLNSATS2U(size);
+ assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
+ /* This is different from the other two cases, in that
+ saturation can occur even if there is no shift. */
+ /* Saturation has occurred if any of the shifted-out bits, or
+ the top bit of the original value, are nonzero. */
+ UInt rshift = laneBits - 1 - shift;
+ vassert(rshift >= 0 && rshift < laneBits);
+ /* qDiff1 is the shifted out bits, and the top bit of the original
+ value, preceded by zeroes. */
+ assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
+ assign(*qDiff2, mkexpr(z128));
+ return;
+ }
+
+ vassert(0);
+}
+
+
/* QCFLAG tracks the SIMD sticky saturation status. Update the status
thusly: if, after application of |opZHI| to both |qres| and |nres|,
they have the same value, leave QCFLAG unchanged. Otherwise, set it
return True;
}
+ if (opcode == BITS5(0,1,1,1,0)
+ || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
+ /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
+ /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
+ /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
+ UInt size = 0;
+ UInt shift = 0;
+ Bool isQ = bitQ == 1;
+ Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
+ if (!ok || (bitQ == 0 && size == X11)) return False;
+ vassert(size >= 0 && size <= 3);
+ /* The shift encoding has opposite sign for the leftwards case.
+ Adjust shift to compensate. */
+ UInt lanebits = 8 << size;
+ shift = lanebits - shift;
+ vassert(shift >= 0 && shift < lanebits);
+ const HChar* nm = NULL;
+ /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
+ else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
+ else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
+ else vassert(0);
+ IRTemp qDiff1 = IRTemp_INVALID;
+ IRTemp qDiff2 = IRTemp_INVALID;
+ IRTemp res = IRTemp_INVALID;
+ IRTemp src = newTempV128();
+ assign(src, getQReg128(nn));
+ math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
+ isQ ? Iop_ZeroHI64ofV128 : Iop_INVALID);
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, #%u\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
+ return True;
+ }
+
if (bitU == 0
&& (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
/* -------- 0,10000 SHRN{,2} #imm -------- */
case ARM64vecsh_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
case ARM64vecsh_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
case ARM64vecsh_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
+ case ARM64vecsh_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
+ case ARM64vecsh_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
+ case ARM64vecsh_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
+ case ARM64vecsh_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
+ case ARM64vecsh_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
+ case ARM64vecsh_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
+ case ARM64vecsh_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
+ case ARM64vecsh_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
+ case ARM64vecsh_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
+ case ARM64vecsh_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
+ case ARM64vecsh_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
+ case ARM64vecsh_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
default: vpanic("showARM64VecShiftOp");
}
}
i->ARM64in.VShiftImmV.dst = dst;
i->ARM64in.VShiftImmV.src = src;
i->ARM64in.VShiftImmV.amt = amt;
+ UInt minSh = 0;
UInt maxSh = 0;
switch (op) {
- /* NB: the comments below are wrong. Correct is: for right shifts,
- the allowed shift amounts are 1 .. lane_size. For left shifts,
- the allowed shift amoutns are 0 .. lane_size-1. */
- /* For these ordinary, non-saturating non-magical shifts,
- the min shift value is actually zero, but we reject such cases
- and instead only accept 1 as the minimum shift value. */
+ /* For right shifts, the allowed shift amounts are 1 .. lane_size.
+ For left shifts, the allowed shift amounts are 0 .. lane_size-1.
+ */
case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2:
- case ARM64vecsh_SHL64x2:
- maxSh = 63; break;
- case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4:
- case ARM64vecsh_SHL32x4:
- maxSh = 31; break;
- case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8:
- case ARM64vecsh_SHL16x8:
- maxSh = 15; break;
- case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
- case ARM64vecsh_SHL8x16:
- maxSh = 7; break;
- /* Whereas for these shift right and narrow set, the min shift
- value really is 1. */
- case ARM64vecsh_UQSHRN2SD: case ARM64vecsh_SQSHRN2SD:
+ case ARM64vecsh_UQSHRN2SD: case ARM64vecsh_SQSHRN2SD:
case ARM64vecsh_SQSHRUN2SD:
case ARM64vecsh_UQRSHRN2SD: case ARM64vecsh_SQRSHRN2SD:
case ARM64vecsh_SQRSHRUN2SD:
- maxSh = 64; break;
- case ARM64vecsh_UQSHRN4HS: case ARM64vecsh_SQSHRN4HS:
+ minSh = 1; maxSh = 64; break;
+ case ARM64vecsh_SHL64x2:
+ case ARM64vecsh_UQSHL64x2: case ARM64vecsh_SQSHL64x2:
+ case ARM64vecsh_SQSHLU64x2:
+ minSh = 0; maxSh = 63; break;
+ case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4:
+ case ARM64vecsh_UQSHRN4HS: case ARM64vecsh_SQSHRN4HS:
case ARM64vecsh_SQSHRUN4HS:
case ARM64vecsh_UQRSHRN4HS: case ARM64vecsh_SQRSHRN4HS:
case ARM64vecsh_SQRSHRUN4HS:
- maxSh = 32; break;
- case ARM64vecsh_UQSHRN8BH: case ARM64vecsh_SQSHRN8BH:
+ minSh = 1; maxSh = 32; break;
+ case ARM64vecsh_SHL32x4:
+ case ARM64vecsh_UQSHL32x4: case ARM64vecsh_SQSHL32x4:
+ case ARM64vecsh_SQSHLU32x4:
+ minSh = 0; maxSh = 31; break;
+ case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8:
+ case ARM64vecsh_UQSHRN8BH: case ARM64vecsh_SQSHRN8BH:
case ARM64vecsh_SQSHRUN8BH:
case ARM64vecsh_UQRSHRN8BH: case ARM64vecsh_SQRSHRN8BH:
case ARM64vecsh_SQRSHRUN8BH:
- maxSh = 16; break;
+ minSh = 1; maxSh = 16; break;
+ case ARM64vecsh_SHL16x8:
+ case ARM64vecsh_UQSHL16x8: case ARM64vecsh_SQSHL16x8:
+ case ARM64vecsh_SQSHLU16x8:
+ minSh = 0; maxSh = 15; break;
+ case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
+ minSh = 1; maxSh = 8; break;
+ case ARM64vecsh_SHL8x16:
+ case ARM64vecsh_UQSHL8x16: case ARM64vecsh_SQSHL8x16:
+ case ARM64vecsh_SQSHLU8x16:
+ minSh = 0; maxSh = 7; break;
default:
vassert(0);
}
vassert(maxSh > 0);
- vassert(amt > 0 && amt <= maxSh);
+ vassert(amt >= minSh && amt <= maxSh);
return i;
}
ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
#define X011001 BITS8(0,0, 0,1,1,0,0,1)
#define X011010 BITS8(0,0, 0,1,1,0,1,0)
#define X011011 BITS8(0,0, 0,1,1,0,1,1)
+#define X011101 BITS8(0,0, 0,1,1,1,0,1)
#define X011110 BITS8(0,0, 0,1,1,1,1,0)
#define X011111 BITS8(0,0, 0,1,1,1,1,1)
#define X100001 BITS8(0,0, 1,0,0,0,0,1)
001 011110 immh immb 100111 n d UQRSHRN ,,#sh
000 011110 immh immb 100111 n d SQRSHRN ,,#sh
001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
+
where immh:immb
= case T of
2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
- 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
+ 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
+
+ 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
+ 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
+ 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
+
where immh:immb
= case T of
2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
const UInt tmpl_SHL
= X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
+ const UInt tmpl_UQSHL
+ = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
+ const UInt tmpl_SQSHL
+ = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
+ const UInt tmpl_SQSHLU
+ = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
+
switch (i->ARM64in.VShiftImmV.op) {
case ARM64vecsh_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
case ARM64vecsh_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
case ARM64vecsh_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
-
+ case ARM64vecsh_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
+ case ARM64vecsh_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
+ case ARM64vecsh_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
case ARM64vecsh_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
case ARM64vecsh_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
case ARM64vecsh_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
case ARM64vecsh_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
case ARM64vecsh_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
case ARM64vecsh_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
-
+ case ARM64vecsh_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
+ case ARM64vecsh_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
+ case ARM64vecsh_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
case ARM64vecsh_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
case ARM64vecsh_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
case ARM64vecsh_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
case ARM64vecsh_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
case ARM64vecsh_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
case ARM64vecsh_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
-
+ case ARM64vecsh_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
+ case ARM64vecsh_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
+ case ARM64vecsh_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
case ARM64vecsh_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
case ARM64vecsh_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
case ARM64vecsh_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
case ARM64vecsh_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
case ARM64vecsh_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
case ARM64vecsh_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
+ case ARM64vecsh_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
+ case ARM64vecsh_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
+ case ARM64vecsh_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
default: break;
break;
left64x2:
- if (sh >= 1 && sh <= 63) {
+ if (sh >= 0 && sh <= 63) {
*p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
goto done;
}
break;
left32x4:
- if (sh >= 1 && sh <= 31) {
+ if (sh >= 0 && sh <= 31) {
*p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
goto done;
}
break;
left16x8:
- if (sh >= 1 && sh <= 15) {
+ if (sh >= 0 && sh <= 15) {
*p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
goto done;
}
break;
left8x16:
- if (sh >= 1 && sh <= 7) {
+ if (sh >= 0 && sh <= 7) {
*p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
goto done;
}
HReg rD = i->ARM64in.VMov.dst;
HReg rN = i->ARM64in.VMov.src;
switch (i->ARM64in.VMov.szB) {
+ case 16: {
+ UInt dd = qregNo(rD);
+ UInt nn = qregNo(rN);
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
+ goto done;
+ }
case 8: {
UInt dd = dregNo(rD);
UInt nn = dregNo(rN);
ARM64vecsh_SQRSHRN2SD, ARM64vecsh_SQRSHRN4HS, ARM64vecsh_SQRSHRN8BH,
ARM64vecsh_UQRSHRN2SD, ARM64vecsh_UQRSHRN4HS, ARM64vecsh_UQRSHRN8BH,
ARM64vecsh_SQRSHRUN2SD, ARM64vecsh_SQRSHRUN4HS, ARM64vecsh_SQRSHRUN8BH,
+ /* Saturating left shifts, of various flavours. */
+ ARM64vecsh_UQSHL64x2, ARM64vecsh_UQSHL32x4,
+ ARM64vecsh_UQSHL16x8, ARM64vecsh_UQSHL8x16,
+ ARM64vecsh_SQSHL64x2, ARM64vecsh_SQSHL32x4,
+ ARM64vecsh_SQSHL16x8, ARM64vecsh_SQSHL8x16,
+ ARM64vecsh_SQSHLU64x2, ARM64vecsh_SQSHLU32x4,
+ ARM64vecsh_SQSHLU16x8, ARM64vecsh_SQSHLU8x16,
ARM64vecsh_INVALID
}
ARM64VecShiftOp;
HReg dst; // Q reg
HReg src; // Q reg
} VNarrowV;
- /* Vector shift by immediate. |amt| needs to be > 0 and <
- implied lane size of |op|. Zero shifts and out of range
- shifts are not allowed. */
+ /* Vector shift by immediate. For left shifts, |amt| must be
+ >= 0 and < implied lane size of |op|. For right shifts,
+ |amt| must be > 0 and <= implied lane size of |op|. Shifts
+ beyond these ranges are not allowed. */
struct {
ARM64VecShiftOp op;
HReg dst;
//ZZ res, argL, size, True));
//ZZ return res;
//ZZ }
- case Iop_ShrN64x2:
- case Iop_ShrN32x4:
- case Iop_ShrN16x8:
- case Iop_ShrN8x16:
- case Iop_SarN64x2:
- case Iop_SarN32x4:
- case Iop_SarN16x8:
- case Iop_SarN8x16:
- case Iop_ShlN64x2:
- case Iop_ShlN32x4:
- case Iop_ShlN16x8:
- case Iop_ShlN8x16:
+ case Iop_ShrN64x2: case Iop_ShrN32x4:
+ case Iop_ShrN16x8: case Iop_ShrN8x16:
+ case Iop_SarN64x2: case Iop_SarN32x4:
+ case Iop_SarN16x8: case Iop_SarN8x16:
+ case Iop_ShlN64x2: case Iop_ShlN32x4:
+ case Iop_ShlN16x8: case Iop_ShlN8x16:
+ case Iop_QShlN64x2: case Iop_QShlN32x4:
+ case Iop_QShlN16x8: case Iop_QShlN8x16:
+ case Iop_QSalN64x2: case Iop_QSalN32x4:
+ case Iop_QSalN16x8: case Iop_QSalN8x16:
+ case Iop_QShlN64Sx2: case Iop_QShlN32Sx4:
+ case Iop_QShlN16Sx8: case Iop_QShlN8Sx16:
{
IRExpr* argL = e->Iex.Binop.arg1;
IRExpr* argR = e->Iex.Binop.arg2;
if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
UInt amt = argR->Iex.Const.con->Ico.U8;
- UInt limit = 0;
+ UInt limLo = 0;
+ UInt limHi = 0;
ARM64VecShiftOp op = ARM64vecsh_INVALID;
+ /* Establish the instruction to use. */
switch (e->Iex.Binop.op) {
- case Iop_ShrN64x2:
- op = ARM64vecsh_USHR64x2; limit = 63; break;
- case Iop_ShrN32x4:
- op = ARM64vecsh_USHR32x4; limit = 31; break;
- case Iop_ShrN16x8:
- op = ARM64vecsh_USHR16x8; limit = 15; break;
- case Iop_ShrN8x16:
- op = ARM64vecsh_USHR8x16; limit = 7; break;
- case Iop_SarN64x2:
- op = ARM64vecsh_SSHR64x2; limit = 63; break;
- case Iop_SarN32x4:
- op = ARM64vecsh_SSHR32x4; limit = 31; break;
- case Iop_SarN16x8:
- op = ARM64vecsh_SSHR16x8; limit = 15; break;
- case Iop_SarN8x16:
- op = ARM64vecsh_SSHR8x16; limit = 7; break;
- case Iop_ShlN64x2:
- op = ARM64vecsh_SHL64x2; limit = 63; break;
- case Iop_ShlN32x4:
- op = ARM64vecsh_SHL32x4; limit = 31; break;
- case Iop_ShlN16x8:
- op = ARM64vecsh_SHL16x8; limit = 15; break;
- case Iop_ShlN8x16:
- op = ARM64vecsh_SHL8x16; limit = 7; break;
- default:
- vassert(0);
+ case Iop_ShrN64x2: op = ARM64vecsh_USHR64x2; break;
+ case Iop_ShrN32x4: op = ARM64vecsh_USHR32x4; break;
+ case Iop_ShrN16x8: op = ARM64vecsh_USHR16x8; break;
+ case Iop_ShrN8x16: op = ARM64vecsh_USHR8x16; break;
+ case Iop_SarN64x2: op = ARM64vecsh_SSHR64x2; break;
+ case Iop_SarN32x4: op = ARM64vecsh_SSHR32x4; break;
+ case Iop_SarN16x8: op = ARM64vecsh_SSHR16x8; break;
+ case Iop_SarN8x16: op = ARM64vecsh_SSHR8x16; break;
+ case Iop_ShlN64x2: op = ARM64vecsh_SHL64x2; break;
+ case Iop_ShlN32x4: op = ARM64vecsh_SHL32x4; break;
+ case Iop_ShlN16x8: op = ARM64vecsh_SHL16x8; break;
+ case Iop_ShlN8x16: op = ARM64vecsh_SHL8x16; break;
+ case Iop_QShlN64x2: op = ARM64vecsh_UQSHL64x2; break;
+ case Iop_QShlN32x4: op = ARM64vecsh_UQSHL32x4; break;
+ case Iop_QShlN16x8: op = ARM64vecsh_UQSHL16x8; break;
+ case Iop_QShlN8x16: op = ARM64vecsh_UQSHL8x16; break;
+ case Iop_QSalN64x2: op = ARM64vecsh_SQSHL64x2; break;
+ case Iop_QSalN32x4: op = ARM64vecsh_SQSHL32x4; break;
+ case Iop_QSalN16x8: op = ARM64vecsh_SQSHL16x8; break;
+ case Iop_QSalN8x16: op = ARM64vecsh_SQSHL8x16; break;
+ case Iop_QShlN64Sx2: op = ARM64vecsh_SQSHLU64x2; break;
+ case Iop_QShlN32Sx4: op = ARM64vecsh_SQSHLU32x4; break;
+ case Iop_QShlN16Sx8: op = ARM64vecsh_SQSHLU16x8; break;
+ case Iop_QShlN8Sx16: op = ARM64vecsh_SQSHLU8x16; break;
+ default: vassert(0);
}
- if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) {
+ /* Establish the shift limits, for sanity check purposes only. */
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
+ case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
+ case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
+ case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
+ case Iop_SarN64x2: limLo = 1; limHi = 64; break;
+ case Iop_SarN32x4: limLo = 1; limHi = 32; break;
+ case Iop_SarN16x8: limLo = 1; limHi = 16; break;
+ case Iop_SarN8x16: limLo = 1; limHi = 8; break;
+ case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
+ case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
+ case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
+ case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
+ case Iop_QShlN64x2: limLo = 0; limHi = 63; break;
+ case Iop_QShlN32x4: limLo = 0; limHi = 31; break;
+ case Iop_QShlN16x8: limLo = 0; limHi = 15; break;
+ case Iop_QShlN8x16: limLo = 0; limHi = 7; break;
+ case Iop_QSalN64x2: limLo = 0; limHi = 63; break;
+ case Iop_QSalN32x4: limLo = 0; limHi = 31; break;
+ case Iop_QSalN16x8: limLo = 0; limHi = 15; break;
+ case Iop_QSalN8x16: limLo = 0; limHi = 7; break;
+ case Iop_QShlN64Sx2: limLo = 0; limHi = 63; break;
+ case Iop_QShlN32Sx4: limLo = 0; limHi = 31; break;
+ case Iop_QShlN16Sx8: limLo = 0; limHi = 15; break;
+ case Iop_QShlN8Sx16: limLo = 0; limHi = 7; break;
+ default: vassert(0);
+ }
+ /* For left shifts, the allowable amt values are
+ 0 .. lane_bits-1. For right shifts the allowable
+ values are 1 .. lane_bits. */
+ if (op != ARM64vecsh_INVALID && amt >= limLo && amt <= limHi) {
HReg src = iselV128Expr(env, argL);
HReg dst = newVRegV(env);
- if (amt > 0) {
- /* For left shifts, the allowable amt values are
- 0 .. lane_bits-1. For right shifts the allowable
- values are 1 .. lane_bits. By restricting it to
- 1 .. lane_bits-1, we are guaranteed to create a
- valid instruction. */
- addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
- } else {
- dst = src;
- }
+ addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
return dst;
}
+ /* Special case some no-op shifts that the arm64 front end
+ throws at us. We can't generate any instructions for these,
+ but we don't need to either. */
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN64x2: case Iop_ShrN32x4:
+ case Iop_ShrN16x8: case Iop_ShrN8x16:
+ if (amt == 0) {
+ return iselV128Expr(env, argL);
+ }
+ break;
+ default:
+ break;
+ }
+ /* otherwise unhandled */
}
/* else fall out; this is unhandled */
break;