static
Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 29 23 21 20 15 14 11 9 4
+ 0 q 001110 size 0 m 0 opcode 10 n d
+ Decode fields: opcode
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,31) != 0
+ || INSN(29,24) != BITS6(0,0,1,1,1,0)
+ || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
+ return False;
+ }
+ UInt bitQ = INSN(30,30);
+ UInt size = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt opcode = INSN(14,12);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+
+ if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
+ /* -------- 001 UZP1 std7_std7_std7 -------- */
+ /* -------- 101 UZP2 std7_std7_std7 -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isUZP1 = opcode == BITS3(0,0,1);
+ IROp op = isUZP1 ? mkVecCATEVENLANES(size)
+ : mkVecCATODDLANES(size);
+ IRTemp preL = newTempV128();
+ IRTemp preR = newTempV128();
+ IRTemp res = newTempV128();
+ if (bitQ == 0) {
+ assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
+ getQReg128(nn)));
+ assign(preR, mkexpr(preL));
+ } else {
+ assign(preL, getQReg128(mm));
+ assign(preR, getQReg128(nn));
+ }
+ assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
+ if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
+ /* -------- 010 TRN1 std7_std7_std7 -------- */
+ /* -------- 110 TRN2 std7_std7_std7 -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isTRN1 = opcode == BITS3(0,1,0);
+ IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
+ : mkVecCATODDLANES(size);
+ IROp op2 = mkVecINTERLEAVEHI(size);
+ IRTemp srcM = newTempV128();
+ IRTemp srcN = newTempV128();
+ IRTemp res = newTempV128();
+ assign(srcM, getQReg128(mm));
+ assign(srcN, getQReg128(nn));
+ assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
+ binop(op1, mkexpr(srcN), mkexpr(srcN))));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* nm = isTRN1 ? "trn1" : "trn2";
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
+ if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
+ /* -------- 011 ZIP1 std7_std7_std7 -------- */
+ /* -------- 111 ZIP2 std7_std7_std7 -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isZIP1 = opcode == BITS3(0,1,1);
+ IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
+ : mkVecINTERLEAVEHI(size);
+ IRTemp preL = newTempV128();
+ IRTemp preR = newTempV128();
+ IRTemp res = newTempV128();
+ if (bitQ == 0 && !isZIP1) {
+ assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32)));
+ assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32)));
+ } else {
+ assign(preL, getQReg128(mm));
+ assign(preR, getQReg128(nn));
+ }
+ assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* nm = isZIP1 ? "zip1" : "zip2";
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
return False;
# undef INSN
}
return True;
}
+ if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
+ /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
+ /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
+ Bool isREC = bitU == 0;
+ IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
+ IRTemp res = newTempV128();
+ assign(res, unop(op, getQReg128(nn)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* nm = isREC ? "urecpe" : "ursqrte";
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
/* -------- 0,0x,11101: SCVTF -------- */
/* -------- 1,0x,11101: UCVTF -------- */
/*OUT*/const HChar** ar, ARM64VecUnaryOp op )
{
switch (op) {
- case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
- case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
- case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
- case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
- case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
- case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
- case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
- case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
- case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
- case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
- case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
- case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
- case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
- case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
- case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
- case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
- case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
- case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
- case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
- case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
- case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
- case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
- case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
+ case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
+ case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
+ case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
+ case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
+ case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
+ case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
+ case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
+ case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
+ case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
+ case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
+ case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
+ case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
+ case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
+ case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
+ case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
+ case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
+ case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
+ case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
+ case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
+ case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
+ case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
+ case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
+ case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
+ case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
+ case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
default: vpanic("showARM64VecUnaryOp");
}
}
#define X101110 BITS8(0,0, 1,0,1,1,1,0)
#define X110000 BITS8(0,0, 1,1,0,0,0,0)
#define X110001 BITS8(0,0, 1,1,0,0,0,1)
+#define X110010 BITS8(0,0, 1,1,0,0,1,0)
#define X110100 BITS8(0,0, 1,1,0,1,0,0)
#define X110101 BITS8(0,0, 1,1,0,1,0,1)
#define X110111 BITS8(0,0, 1,1,0,1,1,1)
010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
+
+ 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
+ 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
*/
UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
case ARM64vecu_REV644S:
*p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
break;
+ case ARM64vecu_URECPE32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
+ break;
+ case ARM64vecu_URSQRTE32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
+ break;
default:
goto bad;
}
typedef
enum {
- ARM64vecmo_SUQADD64x2=335, ARM64vecmo_SUQADD32x4,
+ ARM64vecmo_SUQADD64x2=300, ARM64vecmo_SUQADD32x4,
ARM64vecmo_SUQADD16x8, ARM64vecmo_SUQADD8x16,
ARM64vecmo_USQADD64x2, ARM64vecmo_USQADD32x4,
ARM64vecmo_USQADD16x8, ARM64vecmo_USQADD8x16,
typedef
enum {
- ARM64vecu_FNEG64x2=300, ARM64vecu_FNEG32x4,
+ ARM64vecu_FNEG64x2=350, ARM64vecu_FNEG32x4,
ARM64vecu_FABS64x2, ARM64vecu_FABS32x4,
ARM64vecu_NOT,
ARM64vecu_ABS64x2, ARM64vecu_ABS32x4,
ARM64vecu_REV1616B,
ARM64vecu_REV3216B, ARM64vecu_REV328H,
ARM64vecu_REV6416B, ARM64vecu_REV648H, ARM64vecu_REV644S,
+ ARM64vecu_URECPE32x4,
+ ARM64vecu_URSQRTE32x4,
ARM64vecu_INVALID
}
ARM64VecUnaryOp;
typedef
enum {
- ARM64vecshi_USHR64x2=350, ARM64vecshi_USHR32x4,
+ ARM64vecshi_USHR64x2=400, ARM64vecshi_USHR32x4,
ARM64vecshi_USHR16x8, ARM64vecshi_USHR8x16,
ARM64vecshi_SSHR64x2, ARM64vecshi_SSHR32x4,
ARM64vecshi_SSHR16x8, ARM64vecshi_SSHR8x16,
typedef
enum {
- ARM64vecna_XTN=400,
+ ARM64vecna_XTN=450,
ARM64vecna_SQXTN,
ARM64vecna_UQXTN,
ARM64vecna_SQXTUN,
case Iop_Reverse8sIn16_x8:
case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
- case Iop_Reverse32sIn64_x2:
+ case Iop_Reverse32sIn64_x2:
+ case Iop_RecipEst32Ux4:
+ case Iop_RSqrtEst32Ux4:
{
HReg res = newVRegV(env);
HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
ARM64VecUnaryOp op = ARM64vecu_INVALID;
switch (e->Iex.Unop.op) {
- case Iop_NotV128: op = ARM64vecu_NOT; break;
- case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
- case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
- case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
- case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
- case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
- case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
- case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
- case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
- case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
- case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
- case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
- case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
- case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
- case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
- case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
- case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
- case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
- case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
- case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
- case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
- case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
- case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
+ case Iop_NotV128: op = ARM64vecu_NOT; break;
+ case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
+ case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
+ case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
+ case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
+ case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
+ case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
+ case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
+ case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
+ case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
+ case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
+ case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
+ case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
+ case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
+ case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
+ case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
+ case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
+ case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
+ case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
+ case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
+ case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
+ case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
+ case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
+ case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
+ case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
default: vassert(0);
}
addInstr(env, ARM64Instr_VUnaryV(op, res, arg));