return ops[size];
}
+static IROp mkVecSHU ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecSHS ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecRSHU ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecRSHS ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
const IROp ops[4]
= { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
UInt dd = INSN(4,0);
UInt immhb = (immh << 3) | immb;
- if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,0,0,0,0)) {
- /* -------- 1,1xxx,00000 SHR d_d_#imm -------- */
- UInt sh = 128 - immhb;
+ if ((immh & 8) == 8
+ && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
+ /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
+ /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
+ /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
+ /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
+ Bool isU = bitU == 1;
+ Bool isAcc = opcode == BITS5(0,0,0,1,0);
+ UInt sh = 128 - immhb;
vassert(sh >= 1 && sh <= 64);
- /* Don't generate an out of range IR shift */
- putQReg128(dd, sh == 64
- ? mkV128(0x0000)
- : unop(Iop_ZeroHI64ofV128,
- binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
- DIP("shr d%u, d%u, #%u\n", dd, nn, sh);
+ IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
+ IRExpr* src = getQReg128(nn);
+ IRTemp shf = newTempV128();
+ IRTemp res = newTempV128();
+ if (sh == 64 && isU) {
+ assign(shf, mkV128(0x0000));
+ } else {
+ UInt nudge = 0;
+ if (sh == 64) {
+ vassert(!isU);
+ nudge = 1;
+ }
+ assign(shf, binop(op, src, mkU8(sh - nudge)));
+ }
+ assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
+ : mkexpr(shf));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
+ : (isU ? "ushr" : "sshr");
+ DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
+ return True;
+ }
+
+ if ((immh & 8) == 8
+ && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
+ /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
+ /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
+ /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
+ /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
+ Bool isU = bitU == 1;
+ Bool isAcc = opcode == BITS5(0,0,1,1,0);
+ UInt sh = 128 - immhb;
+ vassert(sh >= 1 && sh <= 64);
+ IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
+ vassert(sh >= 1 && sh <= 64);
+ IRExpr* src = getQReg128(nn);
+ IRTemp imm8 = newTemp(Ity_I8);
+ assign(imm8, mkU8((UChar)(-sh)));
+ IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
+ IRTemp shf = newTempV128();
+ IRTemp res = newTempV128();
+ assign(shf, binop(op, src, amt));
+ assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
+ : mkexpr(shf));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
+ : (isU ? "urshr" : "srshr");
+ DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
return True;
}
return True;
}
+ if (size == X11 && (opcode == BITS5(0,1,0,0,0)
+ || opcode == BITS5(0,1,0,1,0))) {
+ /* -------- 0,xx,01000 SSHL d_d_d -------- */
+ /* -------- 0,xx,01010 SRSHL d_d_d -------- */
+ /* -------- 1,xx,01000 USHL d_d_d -------- */
+ /* -------- 1,xx,01010 URSHL d_d_d -------- */
+ Bool isU = bitU == 1;
+ Bool isR = opcode == BITS5(0,1,0,1,0);
+ IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
+ : (isU ? mkVecSHU(size) : mkVecSHS(size));
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ const HChar* nm = isR ? (isU ? "urshl" : "srshl")
+ : (isU ? "ushl" : "sshl");
+ DIP("%s %s, %s, %s\n", nm,
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
/* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
/* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
UInt nn = INSN(9,5);
UInt dd = INSN(4,0);
- if (opcode == BITS5(0,0,0,0,0)) {
+ if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
/* -------- 0,00000 SSHR std7_std7_#imm -------- */
/* -------- 1,00000 USHR std7_std7_#imm -------- */
+ /* -------- 0,00010 SSRA std7_std7_#imm -------- */
+ /* -------- 1,00010 USRA std7_std7_#imm -------- */
/* laneTy, shift = case immh:immb of
0001:xxx -> B, SHR:8-xxx
001x:xxx -> H, SHR:16-xxxx
UInt shift = 0;
Bool isQ = bitQ == 1;
Bool isU = bitU == 1;
+ Bool isAcc = opcode == BITS5(0,0,0,1,0);
Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
if (!ok || (bitQ == 0 && size == X11)) return False;
vassert(size >= 0 && size <= 3);
vassert(shift >= 1 && shift <= lanebits);
IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
IRExpr* src = getQReg128(nn);
+ IRTemp shf = newTempV128();
IRTemp res = newTempV128();
if (shift == lanebits && isU) {
- assign(res, mkV128(0x0000));
+ assign(shf, mkV128(0x0000));
} else {
UInt nudge = 0;
if (shift == lanebits) {
vassert(!isU);
nudge = 1;
}
- assign(res, binop(op, src, mkU8(shift - nudge)));
+ assign(shf, binop(op, src, mkU8(shift - nudge)));
}
+ assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
+ : mkexpr(shf));
putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
HChar laneCh = "bhsd"[size];
UInt nLanes = (isQ ? 128 : 64) / lanebits;
- const HChar* nm = isU ? "ushr" : "sshr";
+ const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
+ : (isU ? "ushr" : "sshr");
+ DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
+ nameQReg128(dd), nLanes, laneCh,
+ nameQReg128(nn), nLanes, laneCh, shift);
+ return True;
+ }
+
+ if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
+ /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
+ /* -------- 1,00100 URSHR std7_std7_#imm -------- */
+ /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
+ /* -------- 1,00110 URSRA std7_std7_#imm -------- */
+ /* laneTy, shift = case immh:immb of
+ 0001:xxx -> B, SHR:8-xxx
+ 001x:xxx -> H, SHR:16-xxxx
+ 01xx:xxx -> S, SHR:32-xxxxx
+ 1xxx:xxx -> D, SHR:64-xxxxxx
+ other -> invalid
+ */
+ UInt size = 0;
+ UInt shift = 0;
+ Bool isQ = bitQ == 1;
+ Bool isU = bitU == 1;
+ Bool isAcc = opcode == BITS5(0,0,1,1,0);
+ Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
+ if (!ok || (bitQ == 0 && size == X11)) return False;
+ vassert(size >= 0 && size <= 3);
+ UInt lanebits = 8 << size;
+ vassert(shift >= 1 && shift <= lanebits);
+ IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
+ IRExpr* src = getQReg128(nn);
+ IRTemp imm8 = newTemp(Ity_I8);
+ assign(imm8, mkU8((UChar)(-shift)));
+ IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
+ IRTemp shf = newTempV128();
+ IRTemp res = newTempV128();
+ assign(shf, binop(op, src, amt));
+ assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
+ : mkexpr(shf));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ HChar laneCh = "bhsd"[size];
+ UInt nLanes = (isQ ? 128 : 64) / lanebits;
+ const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
+ : (isU ? "urshr" : "srshr");
DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
nameQReg128(dd), nLanes, laneCh,
nameQReg128(nn), nLanes, laneCh, shift);
return True;
}
+ if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
+ /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
+ /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
+ /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
+ /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isU = bitU == 1;
+ Bool isR = opcode == BITS5(0,1,0,1,0);
+ IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
+ : (isU ? mkVecSHU(size) : mkVecSHS(size));
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* nm = isR ? (isU ? "urshl" : "srshl")
+ : (isU ? "ushl" : "sshl");
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
/* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
/* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
+ case ARM64vecb_SSHL64x2: *nm = "sshl"; *ar = "2d"; return;
+ case ARM64vecb_SSHL32x4: *nm = "sshl"; *ar = "4s"; return;
+ case ARM64vecb_SSHL16x8: *nm = "sshl"; *ar = "8h"; return;
+ case ARM64vecb_SSHL8x16: *nm = "sshl"; *ar = "16b"; return;
+ case ARM64vecb_USHL64x2: *nm = "ushl"; *ar = "2d"; return;
+ case ARM64vecb_USHL32x4: *nm = "ushl"; *ar = "4s"; return;
+ case ARM64vecb_USHL16x8: *nm = "ushl"; *ar = "8h"; return;
+ case ARM64vecb_USHL8x16: *nm = "ushl"; *ar = "16b"; return;
+ case ARM64vecb_SRSHL64x2: *nm = "srshl"; *ar = "2d"; return;
+ case ARM64vecb_SRSHL32x4: *nm = "srshl"; *ar = "4s"; return;
+ case ARM64vecb_SRSHL16x8: *nm = "srshl"; *ar = "8h"; return;
+ case ARM64vecb_SRSHL8x16: *nm = "srshl"; *ar = "16b"; return;
+ case ARM64vecb_URSHL64x2: *nm = "urshl"; *ar = "2d"; return;
+ case ARM64vecb_URSHL32x4: *nm = "urshl"; *ar = "4s"; return;
+ case ARM64vecb_URSHL16x8: *nm = "urshl"; *ar = "8h"; return;
+ case ARM64vecb_URSHL8x16: *nm = "urshl"; *ar = "16b"; return;
default: vpanic("showARM64VecBinOp");
}
}
}
}
-static void showARM64VecShiftOp(/*OUT*/const HChar** nm,
- /*OUT*/const HChar** ar,
- ARM64VecShiftOp op )
+static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
+ /*OUT*/const HChar** ar,
+ ARM64VecShiftImmOp op )
{
switch (op) {
- case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
- case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
- case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
- case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
- case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
- case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
- case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
- case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
- case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return;
- case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
- case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return;
- case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return;
- case ARM64vecsh_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
- case ARM64vecsh_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
- case ARM64vecsh_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
- case ARM64vecsh_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
- case ARM64vecsh_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
- case ARM64vecsh_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
- case ARM64vecsh_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
- case ARM64vecsh_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
- case ARM64vecsh_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
- case ARM64vecsh_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
- case ARM64vecsh_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
- case ARM64vecsh_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
- case ARM64vecsh_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
- case ARM64vecsh_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
- case ARM64vecsh_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
- case ARM64vecsh_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
- case ARM64vecsh_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
- case ARM64vecsh_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
- case ARM64vecsh_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
- case ARM64vecsh_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
- case ARM64vecsh_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
- case ARM64vecsh_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
- case ARM64vecsh_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
- case ARM64vecsh_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
- case ARM64vecsh_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
- case ARM64vecsh_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
- case ARM64vecsh_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
- case ARM64vecsh_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
- case ARM64vecsh_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
- case ARM64vecsh_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
- default: vpanic("showARM64VecShiftOp");
+ case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
+ case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
+ case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
+ case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
+ case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
+ case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
+ case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
+ case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
+ case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
+ case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
+ case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
+ case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
+ case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
+ case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
+ case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
+ case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
+ case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
+ case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
+ case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
+ case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
+ case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
+ case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
+ case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
+ case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
+ case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
+ case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
+ case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
+ case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
+ case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
+ case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
+ case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
+ case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
+ case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
+ case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
+ case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
+ case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
+ case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
+ case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
+ case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
+ case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
+ case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
+ case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
+ default: vpanic("showARM64VecShiftImmOp");
}
}
vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
return i;
}
-ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
+ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
HReg dst, HReg src, UInt amt ) {
ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
i->tag = ARM64in_VShiftImmV;
/* For right shifts, the allowed shift amounts are 1 .. lane_size.
For left shifts, the allowed shift amounts are 0 .. lane_size-1.
*/
- case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2:
- case ARM64vecsh_UQSHRN2SD: case ARM64vecsh_SQSHRN2SD:
- case ARM64vecsh_SQSHRUN2SD:
- case ARM64vecsh_UQRSHRN2SD: case ARM64vecsh_SQRSHRN2SD:
- case ARM64vecsh_SQRSHRUN2SD:
+ case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
+ case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
+ case ARM64vecshi_SQSHRUN2SD:
+ case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
+ case ARM64vecshi_SQRSHRUN2SD:
minSh = 1; maxSh = 64; break;
- case ARM64vecsh_SHL64x2:
- case ARM64vecsh_UQSHL64x2: case ARM64vecsh_SQSHL64x2:
- case ARM64vecsh_SQSHLU64x2:
+ case ARM64vecshi_SHL64x2:
+ case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
+ case ARM64vecshi_SQSHLU64x2:
minSh = 0; maxSh = 63; break;
- case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4:
- case ARM64vecsh_UQSHRN4HS: case ARM64vecsh_SQSHRN4HS:
- case ARM64vecsh_SQSHRUN4HS:
- case ARM64vecsh_UQRSHRN4HS: case ARM64vecsh_SQRSHRN4HS:
- case ARM64vecsh_SQRSHRUN4HS:
+ case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
+ case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
+ case ARM64vecshi_SQSHRUN4HS:
+ case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
+ case ARM64vecshi_SQRSHRUN4HS:
minSh = 1; maxSh = 32; break;
- case ARM64vecsh_SHL32x4:
- case ARM64vecsh_UQSHL32x4: case ARM64vecsh_SQSHL32x4:
- case ARM64vecsh_SQSHLU32x4:
+ case ARM64vecshi_SHL32x4:
+ case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
+ case ARM64vecshi_SQSHLU32x4:
minSh = 0; maxSh = 31; break;
- case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8:
- case ARM64vecsh_UQSHRN8BH: case ARM64vecsh_SQSHRN8BH:
- case ARM64vecsh_SQSHRUN8BH:
- case ARM64vecsh_UQRSHRN8BH: case ARM64vecsh_SQRSHRN8BH:
- case ARM64vecsh_SQRSHRUN8BH:
+ case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
+ case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
+ case ARM64vecshi_SQSHRUN8BH:
+ case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
+ case ARM64vecshi_SQRSHRUN8BH:
minSh = 1; maxSh = 16; break;
- case ARM64vecsh_SHL16x8:
- case ARM64vecsh_UQSHL16x8: case ARM64vecsh_SQSHL16x8:
- case ARM64vecsh_SQSHLU16x8:
+ case ARM64vecshi_SHL16x8:
+ case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
+ case ARM64vecshi_SQSHLU16x8:
minSh = 0; maxSh = 15; break;
- case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
+ case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
minSh = 1; maxSh = 8; break;
- case ARM64vecsh_SHL8x16:
- case ARM64vecsh_UQSHL8x16: case ARM64vecsh_SQSHL8x16:
- case ARM64vecsh_SQSHLU8x16:
+ case ARM64vecshi_SHL8x16:
+ case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
+ case ARM64vecshi_SQSHLU8x16:
minSh = 0; maxSh = 7; break;
default:
vassert(0);
case ARM64in_VShiftImmV: {
const HChar* nm = "??";
const HChar* ar = "??";
- showARM64VecShiftOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
+ showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
vex_printf("%s ", nm);
ppHRegARM64(i->ARM64in.VShiftImmV.dst);
vex_printf(".%s, ", ar);
010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
+
+ 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
+ 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
+ 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
+ 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
UInt vN = qregNo(i->ARM64in.VBinV.argL);
*p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
break;
+ case ARM64vecb_SSHL64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_SSHL32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_SSHL16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_SSHL8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
+ break;
+
+ case ARM64vecb_SRSHL64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_SRSHL32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_SRSHL16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_SRSHL8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
+ break;
+
+ case ARM64vecb_USHL64x2:
+ *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_USHL32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_USHL16x8:
+ *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
+ break;
+ case ARM64vecb_USHL8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
+ break;
+
+ case ARM64vecb_URSHL64x2:
+ *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_URSHL32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_URSHL16x8:
+ *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
+ break;
+ case ARM64vecb_URSHL8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
+ break;
+
default:
goto bad;
}
= X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
switch (i->ARM64in.VShiftImmV.op) {
- case ARM64vecsh_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
- case ARM64vecsh_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
- case ARM64vecsh_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
- case ARM64vecsh_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
- case ARM64vecsh_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
- case ARM64vecsh_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
- case ARM64vecsh_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
- case ARM64vecsh_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
- case ARM64vecsh_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
- case ARM64vecsh_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
- case ARM64vecsh_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
- case ARM64vecsh_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
- case ARM64vecsh_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
- case ARM64vecsh_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
- case ARM64vecsh_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
- case ARM64vecsh_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
- case ARM64vecsh_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
- case ARM64vecsh_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
- case ARM64vecsh_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
- case ARM64vecsh_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
- case ARM64vecsh_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
- case ARM64vecsh_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
- case ARM64vecsh_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
- case ARM64vecsh_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
- case ARM64vecsh_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
- case ARM64vecsh_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
- case ARM64vecsh_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
- case ARM64vecsh_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
- case ARM64vecsh_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
- case ARM64vecsh_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
- case ARM64vecsh_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
- case ARM64vecsh_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
- case ARM64vecsh_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
- case ARM64vecsh_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
- case ARM64vecsh_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
- case ARM64vecsh_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
- case ARM64vecsh_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
- case ARM64vecsh_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
- case ARM64vecsh_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
- case ARM64vecsh_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
- case ARM64vecsh_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
- case ARM64vecsh_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
+ case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
+ case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
+ case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
+ case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
+ case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
+ case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
+ case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
+ case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
+ case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
+ case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
+ case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
+ case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
+ case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
+ case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
+ case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
+ case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
+ case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
+ case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
+ case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
+ case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
+ case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
+ case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
+ case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
+ case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
+ case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
+ case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
+ case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
+ case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
+ case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
+ case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
+ case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
+ case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
+ case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
+ case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
+ case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
+ case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
+ case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
+ case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
+ case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
+ case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
+ case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
+ case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
default: break;
ARM64vecb_SQRSHL16x8, ARM64vecb_SQRSHL8x16,
ARM64vecb_UQRSHL64x2, ARM64vecb_UQRSHL32x4,
ARM64vecb_UQRSHL16x8, ARM64vecb_UQRSHL8x16,
+ ARM64vecb_SSHL64x2, ARM64vecb_SSHL32x4,
+ ARM64vecb_SSHL16x8, ARM64vecb_SSHL8x16,
+ ARM64vecb_USHL64x2, ARM64vecb_USHL32x4,
+ ARM64vecb_USHL16x8, ARM64vecb_USHL8x16,
+ ARM64vecb_SRSHL64x2, ARM64vecb_SRSHL32x4,
+ ARM64vecb_SRSHL16x8, ARM64vecb_SRSHL8x16,
+ ARM64vecb_URSHL64x2, ARM64vecb_URSHL32x4,
+ ARM64vecb_URSHL16x8, ARM64vecb_URSHL8x16,
ARM64vecb_INVALID
}
ARM64VecBinOp;
typedef
enum {
- ARM64vecsh_USHR64x2=350, ARM64vecsh_USHR32x4,
- ARM64vecsh_USHR16x8, ARM64vecsh_USHR8x16,
- ARM64vecsh_SSHR64x2, ARM64vecsh_SSHR32x4,
- ARM64vecsh_SSHR16x8, ARM64vecsh_SSHR8x16,
- ARM64vecsh_SHL64x2, ARM64vecsh_SHL32x4,
- ARM64vecsh_SHL16x8, ARM64vecsh_SHL8x16,
+ ARM64vecshi_USHR64x2=350, ARM64vecshi_USHR32x4,
+ ARM64vecshi_USHR16x8, ARM64vecshi_USHR8x16,
+ ARM64vecshi_SSHR64x2, ARM64vecshi_SSHR32x4,
+ ARM64vecshi_SSHR16x8, ARM64vecshi_SSHR8x16,
+ ARM64vecshi_SHL64x2, ARM64vecshi_SHL32x4,
+ ARM64vecshi_SHL16x8, ARM64vecshi_SHL8x16,
/* These narrowing shifts zero out the top half of the destination
register. */
- ARM64vecsh_SQSHRN2SD, ARM64vecsh_SQSHRN4HS, ARM64vecsh_SQSHRN8BH,
- ARM64vecsh_UQSHRN2SD, ARM64vecsh_UQSHRN4HS, ARM64vecsh_UQSHRN8BH,
- ARM64vecsh_SQSHRUN2SD, ARM64vecsh_SQSHRUN4HS, ARM64vecsh_SQSHRUN8BH,
- ARM64vecsh_SQRSHRN2SD, ARM64vecsh_SQRSHRN4HS, ARM64vecsh_SQRSHRN8BH,
- ARM64vecsh_UQRSHRN2SD, ARM64vecsh_UQRSHRN4HS, ARM64vecsh_UQRSHRN8BH,
- ARM64vecsh_SQRSHRUN2SD, ARM64vecsh_SQRSHRUN4HS, ARM64vecsh_SQRSHRUN8BH,
+ ARM64vecshi_SQSHRN2SD, ARM64vecshi_SQSHRN4HS, ARM64vecshi_SQSHRN8BH,
+ ARM64vecshi_UQSHRN2SD, ARM64vecshi_UQSHRN4HS, ARM64vecshi_UQSHRN8BH,
+ ARM64vecshi_SQSHRUN2SD, ARM64vecshi_SQSHRUN4HS, ARM64vecshi_SQSHRUN8BH,
+ ARM64vecshi_SQRSHRN2SD, ARM64vecshi_SQRSHRN4HS, ARM64vecshi_SQRSHRN8BH,
+ ARM64vecshi_UQRSHRN2SD, ARM64vecshi_UQRSHRN4HS, ARM64vecshi_UQRSHRN8BH,
+ ARM64vecshi_SQRSHRUN2SD, ARM64vecshi_SQRSHRUN4HS, ARM64vecshi_SQRSHRUN8BH,
/* Saturating left shifts, of various flavours. */
- ARM64vecsh_UQSHL64x2, ARM64vecsh_UQSHL32x4,
- ARM64vecsh_UQSHL16x8, ARM64vecsh_UQSHL8x16,
- ARM64vecsh_SQSHL64x2, ARM64vecsh_SQSHL32x4,
- ARM64vecsh_SQSHL16x8, ARM64vecsh_SQSHL8x16,
- ARM64vecsh_SQSHLU64x2, ARM64vecsh_SQSHLU32x4,
- ARM64vecsh_SQSHLU16x8, ARM64vecsh_SQSHLU8x16,
- ARM64vecsh_INVALID
+ ARM64vecshi_UQSHL64x2, ARM64vecshi_UQSHL32x4,
+ ARM64vecshi_UQSHL16x8, ARM64vecshi_UQSHL8x16,
+ ARM64vecshi_SQSHL64x2, ARM64vecshi_SQSHL32x4,
+ ARM64vecshi_SQSHL16x8, ARM64vecshi_SQSHL8x16,
+ ARM64vecshi_SQSHLU64x2, ARM64vecshi_SQSHLU32x4,
+ ARM64vecshi_SQSHLU16x8, ARM64vecshi_SQSHLU8x16,
+ ARM64vecshi_INVALID
}
- ARM64VecShiftOp;
+ ARM64VecShiftImmOp;
typedef
enum {
|amt| must be > 0 and <= implied lane size of |op|. Shifts
beyond these ranges are not allowed. */
struct {
- ARM64VecShiftOp op;
- HReg dst;
- HReg src;
- UInt amt;
+ ARM64VecShiftImmOp op;
+ HReg dst;
+ HReg src;
+ UInt amt;
} VShiftImmV;
struct {
HReg dst;
extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg );
extern ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op, UInt dszBlg2,
HReg dst, HReg src );
-extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
+extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
HReg dst, HReg src, UInt amt );
extern ARM64Instr* ARM64Instr_VExtV ( HReg dst,
HReg srcLo, HReg srcHi, UInt amtB );
case Iop_QSub16Sx8: case Iop_QSub8Sx16:
case Iop_QSub64Ux2: case Iop_QSub32Ux4:
case Iop_QSub16Ux8: case Iop_QSub8Ux16:
- case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
- case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
+ case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
+ case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
+ case Iop_Sh8Sx16: case Iop_Sh16Sx8:
+ case Iop_Sh32Sx4: case Iop_Sh64Sx2:
+ case Iop_Sh8Ux16: case Iop_Sh16Ux8:
+ case Iop_Sh32Ux4: case Iop_Sh64Ux2:
+ case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
+ case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
+ case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
+ case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
{
HReg res = newVRegV(env);
HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
+ case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
+ case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
+ case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
+ case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
+ case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
+ case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
+ case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
+ case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
+ case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
+ case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
+ case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
+ case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
+ case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
+ case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
+ case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
+ case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
default: vassert(0);
}
if (sw) {
UInt amt = argR->Iex.Const.con->Ico.U8;
UInt limLo = 0;
UInt limHi = 0;
- ARM64VecShiftOp op = ARM64vecsh_INVALID;
+ ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
/* Establish the instruction to use. */
switch (e->Iex.Binop.op) {
- case Iop_ShrN64x2: op = ARM64vecsh_USHR64x2; break;
- case Iop_ShrN32x4: op = ARM64vecsh_USHR32x4; break;
- case Iop_ShrN16x8: op = ARM64vecsh_USHR16x8; break;
- case Iop_ShrN8x16: op = ARM64vecsh_USHR8x16; break;
- case Iop_SarN64x2: op = ARM64vecsh_SSHR64x2; break;
- case Iop_SarN32x4: op = ARM64vecsh_SSHR32x4; break;
- case Iop_SarN16x8: op = ARM64vecsh_SSHR16x8; break;
- case Iop_SarN8x16: op = ARM64vecsh_SSHR8x16; break;
- case Iop_ShlN64x2: op = ARM64vecsh_SHL64x2; break;
- case Iop_ShlN32x4: op = ARM64vecsh_SHL32x4; break;
- case Iop_ShlN16x8: op = ARM64vecsh_SHL16x8; break;
- case Iop_ShlN8x16: op = ARM64vecsh_SHL8x16; break;
- case Iop_QShlNsatUU64x2: op = ARM64vecsh_UQSHL64x2; break;
- case Iop_QShlNsatUU32x4: op = ARM64vecsh_UQSHL32x4; break;
- case Iop_QShlNsatUU16x8: op = ARM64vecsh_UQSHL16x8; break;
- case Iop_QShlNsatUU8x16: op = ARM64vecsh_UQSHL8x16; break;
- case Iop_QShlNsatSS64x2: op = ARM64vecsh_SQSHL64x2; break;
- case Iop_QShlNsatSS32x4: op = ARM64vecsh_SQSHL32x4; break;
- case Iop_QShlNsatSS16x8: op = ARM64vecsh_SQSHL16x8; break;
- case Iop_QShlNsatSS8x16: op = ARM64vecsh_SQSHL8x16; break;
- case Iop_QShlNsatSU64x2: op = ARM64vecsh_SQSHLU64x2; break;
- case Iop_QShlNsatSU32x4: op = ARM64vecsh_SQSHLU32x4; break;
- case Iop_QShlNsatSU16x8: op = ARM64vecsh_SQSHLU16x8; break;
- case Iop_QShlNsatSU8x16: op = ARM64vecsh_SQSHLU8x16; break;
+ case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
+ case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
+ case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
+ case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
+ case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
+ case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
+ case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
+ case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
+ case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
+ case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
+ case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
+ case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
+ case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
+ case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
+ case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
+ case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
+ case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
+ case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
+ case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
+ case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
+ case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
+ case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
+ case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
+ case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
default: vassert(0);
}
/* Establish the shift limits, for sanity check purposes only. */
/* For left shifts, the allowable amt values are
0 .. lane_bits-1. For right shifts the allowable
values are 1 .. lane_bits. */
- if (op != ARM64vecsh_INVALID && amt >= limLo && amt <= limHi) {
+ if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
HReg src = iselV128Expr(env, argL);
HReg dst = newVRegV(env);
addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
UInt amt = argR->Iex.Const.con->Ico.U8;
UInt limit = 0;
- ARM64VecShiftOp op = ARM64vecsh_INVALID;
+ ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
switch (e->Iex.Binop.op) {
/* uu */
case Iop_QandQShrNnarrow64Uto32Ux2:
- op = ARM64vecsh_UQSHRN2SD; limit = 64; break;
+ op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
case Iop_QandQShrNnarrow32Uto16Ux4:
- op = ARM64vecsh_UQSHRN4HS; limit = 32; break;
+ op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
case Iop_QandQShrNnarrow16Uto8Ux8:
- op = ARM64vecsh_UQSHRN8BH; limit = 16; break;
+ op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
/* ss */
case Iop_QandQSarNnarrow64Sto32Sx2:
- op = ARM64vecsh_SQSHRN2SD; limit = 64; break;
+ op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
case Iop_QandQSarNnarrow32Sto16Sx4:
- op = ARM64vecsh_SQSHRN4HS; limit = 32; break;
+ op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
case Iop_QandQSarNnarrow16Sto8Sx8:
- op = ARM64vecsh_SQSHRN8BH; limit = 16; break;
+ op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
/* su */
case Iop_QandQSarNnarrow64Sto32Ux2:
- op = ARM64vecsh_SQSHRUN2SD; limit = 64; break;
+ op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
case Iop_QandQSarNnarrow32Sto16Ux4:
- op = ARM64vecsh_SQSHRUN4HS; limit = 32; break;
+ op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
case Iop_QandQSarNnarrow16Sto8Ux8:
- op = ARM64vecsh_SQSHRUN8BH; limit = 16; break;
+ op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
/* ruu */
case Iop_QandQRShrNnarrow64Uto32Ux2:
- op = ARM64vecsh_UQRSHRN2SD; limit = 64; break;
+ op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
case Iop_QandQRShrNnarrow32Uto16Ux4:
- op = ARM64vecsh_UQRSHRN4HS; limit = 32; break;
+ op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
case Iop_QandQRShrNnarrow16Uto8Ux8:
- op = ARM64vecsh_UQRSHRN8BH; limit = 16; break;
+ op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
/* rss */
case Iop_QandQRSarNnarrow64Sto32Sx2:
- op = ARM64vecsh_SQRSHRN2SD; limit = 64; break;
+ op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
case Iop_QandQRSarNnarrow32Sto16Sx4:
- op = ARM64vecsh_SQRSHRN4HS; limit = 32; break;
+ op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
case Iop_QandQRSarNnarrow16Sto8Sx8:
- op = ARM64vecsh_SQRSHRN8BH; limit = 16; break;
+ op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
/* rsu */
case Iop_QandQRSarNnarrow64Sto32Ux2:
- op = ARM64vecsh_SQRSHRUN2SD; limit = 64; break;
+ op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
case Iop_QandQRSarNnarrow32Sto16Ux4:
- op = ARM64vecsh_SQRSHRUN4HS; limit = 32; break;
+ op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
case Iop_QandQRSarNnarrow16Sto8Ux8:
- op = ARM64vecsh_SQRSHRUN8BH; limit = 16; break;
+ op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
/**/
default:
vassert(0);
}
- if (op != ARM64vecsh_INVALID && amt >= 1 && amt <= limit) {
+ if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
HReg src = iselV128Expr(env, argL);
HReg dst = newVRegV(env);
HReg fpsr = newVRegI(env);
case Iop_QandSQRsh32x4: vex_printf("QandSQRsh32x4"); return;
case Iop_QandSQRsh64x2: vex_printf("QandSQRsh64x2"); return;
+ case Iop_Sh8Sx16: vex_printf("Sh8Sx16"); return;
+ case Iop_Sh16Sx8: vex_printf("Sh16Sx8"); return;
+ case Iop_Sh32Sx4: vex_printf("Sh32Sx4"); return;
+ case Iop_Sh64Sx2: vex_printf("Sh64Sx2"); return;
+ case Iop_Sh8Ux16: vex_printf("Sh8Ux16"); return;
+ case Iop_Sh16Ux8: vex_printf("Sh16Ux8"); return;
+ case Iop_Sh32Ux4: vex_printf("Sh32Ux4"); return;
+ case Iop_Sh64Ux2: vex_printf("Sh64Ux2"); return;
+ case Iop_Rsh8Sx16: vex_printf("Rsh8Sx16"); return;
+ case Iop_Rsh16Sx8: vex_printf("Rsh16Sx8"); return;
+ case Iop_Rsh32Sx4: vex_printf("Rsh32Sx4"); return;
+ case Iop_Rsh64Sx2: vex_printf("Rsh64Sx2"); return;
+ case Iop_Rsh8Ux16: vex_printf("Rsh8Ux16"); return;
+ case Iop_Rsh16Ux8: vex_printf("Rsh16Ux8"); return;
+ case Iop_Rsh32Ux4: vex_printf("Rsh32Ux4"); return;
+ case Iop_Rsh64Ux2: vex_printf("Rsh64Ux2"); return;
+
case Iop_QandQShrNnarrow16Uto8Ux8:
vex_printf("QandQShrNnarrow16Uto8Ux8"); return;
case Iop_QandQShrNnarrow32Uto16Ux4:
case Iop_CipherLV128:
case Iop_NCipherV128:
case Iop_NCipherLV128:
+ case Iop_Sh8Sx16: case Iop_Sh16Sx8:
+ case Iop_Sh32Sx4: case Iop_Sh64Sx2:
+ case Iop_Sh8Ux16: case Iop_Sh16Ux8:
+ case Iop_Sh32Ux4: case Iop_Sh64Ux2:
+ case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
+ case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
+ case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
+ case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
BINARY(Ity_V128,Ity_V128, Ity_V128);
case Iop_PolynomialMull8x8:
Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2,
/* VECTOR x VECTOR BIDIRECTIONAL SATURATING (& MAYBE ROUNDING) SHIFT */
+ /* All of type (V128, V128) -> V256. */
/* The least significant 8 bits of each lane of the second
operand are used as the shift amount, and interpreted signedly.
Positive values mean a shift left, negative a shift right. The
Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
Iop_QandSQRsh32x4, Iop_QandSQRsh64x2,
+ /* VECTOR x VECTOR BIDIRECTIONAL (& MAYBE ROUNDING) SHIFT */
+ /* All of type (V128, V128) -> V128 */
+ /* The least significant 8 bits of each lane of the second
+ operand are used as the shift amount, and interpreted signedly.
+ Positive values mean a shift left, negative a shift right.
+ There are also rounding variants, which add 2^(shift_amount-1)
+ to the value before shifting, but only in the shift-right case.
+
+ For left shifts, the vacated places are filled with zeroes.
+ For right shifts, the vacated places are filled with zeroes
+ for the U variants and sign bits for the S variants. */
+ // Signed and unsigned, non-rounding
+ Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2,
+ Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2,
+
+ // Signed and unsigned, rounding
+ Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2,
+ Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2,
+
+ /* The least significant 8 bits of each lane of the second
+ operand are used as the shift amount, and interpreted signedly.
+ Positive values mean a shift left, negative a shift right. The
+ result is signedly or unsignedly saturated. There are also
+ rounding variants, which add 2^(shift_amount-1) to the value before
+ shifting, but only in the shift-right case. Vacated positions
+ are filled with zeroes. IOW, it's either SHR or SHL, but not SAR.
+ */
+
/* VECTOR x SCALAR SATURATING (& MAYBE ROUNDING) NARROWING SHIFT RIGHT */
/* All of type (V128, I8) -> V128 */
/* The first argument is shifted right, then narrowed to half the width