static
Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 29 23 21 20 15 14 10 9 4
+ 0 q 101110 op2 0 m 0 imm4 0 n d
+ Decode fields: op2
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,31) != 0
+ || INSN(29,24) != BITS6(1,0,1,1,1,0)
+ || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
+ return False;
+ }
+ UInt bitQ = INSN(30,30);
+ UInt op2 = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt imm4 = INSN(14,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+
+ if (op2 == BITS2(0,0)) {
+ /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
+ IRTemp sHi = newTemp(Ity_V128);
+ IRTemp sLo = newTemp(Ity_V128);
+ IRTemp res = newTemp(Ity_V128);
+ assign(sHi, getQReg128(mm));
+ assign(sLo, getQReg128(nn));
+ if (bitQ == 1) {
+ if (imm4 == 0) {
+ assign(res, mkexpr(sLo));
+ } else {
+ vassert(imm4 <= 15);
+ assign(res,
+ binop(Iop_OrV128,
+ binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
+ binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
+ }
+ putQReg128(dd, mkexpr(res));
+ DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
+ } else {
+ if (imm4 >= 8) return False;
+ if (imm4 == 0) {
+ assign(res, mkexpr(sLo));
+ } else {
+ assign(res,
+ binop(Iop_ShrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
+ mkU8(8 * imm4)));
+ }
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
+ }
+ return True;
+ }
+
return False;
# undef INSN
}
static
Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 28 20 15 14 10 9 4
+ 01 op 11110000 imm5 0 imm4 1 n d
+ Decode fields: op,imm4
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,30) != BITS2(0,1)
+ || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
+ || INSN(15,15) != 0 || INSN(10,10) != 1) {
+ return False;
+ }
+ UInt bitOP = INSN(29,29);
+ UInt imm5 = INSN(20,16);
+ UInt imm4 = INSN(14,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+
+ if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
+ /* -------- 0,0000 DUP (element, scalar) -------- */
+ IRTemp w0 = newTemp(Ity_I64);
+ const HChar* arTs = "??";
+ IRType laneTy = Ity_INVALID;
+ UInt laneNo = 16; /* invalid */
+ if (imm5 & 1) {
+ arTs = "b";
+ laneNo = (imm5 >> 1) & 15;
+ laneTy = Ity_I8;
+ assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 2) {
+ arTs = "h";
+ laneNo = (imm5 >> 2) & 7;
+ laneTy = Ity_I16;
+ assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 4) {
+ arTs = "s";
+ laneNo = (imm5 >> 3) & 3;
+ laneTy = Ity_I32;
+ assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 8) {
+ arTs = "d";
+ laneNo = (imm5 >> 4) & 1;
+ laneTy = Ity_I64;
+ assign(w0, getQRegLane(nn, laneNo, laneTy));
+ }
+ else {
+ /* invalid; leave laneTy unchanged. */
+ }
+ /* */
+ if (laneTy != Ity_INVALID) {
+ vassert(laneNo < 16);
+ putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
+ DIP("dup %s, %s.%s[%u]\n",
+ nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
+ return True;
+ }
+ /* else fall through */
+ }
+
return False;
# undef INSN
}
vassert(amt > 0 && amt <= maxSh);
return i;
}
+ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+ i->tag = ARM64in_VExtV;
+ i->ARM64in.VExtV.dst = dst;
+ i->ARM64in.VExtV.srcLo = srcLo;
+ i->ARM64in.VExtV.srcHi = srcHi;
+ i->ARM64in.VExtV.amtB = amtB;
+ vassert(amtB >= 1 && amtB <= 15);
+ return i;
+}
//ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
//ZZ i->tag = ARMin_VAluS;
vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
return;
}
+ case ARM64in_VExtV: {
+ vex_printf("ext ");
+ ppHRegARM64(i->ARM64in.VExtV.dst);
+ vex_printf(".16b, ");
+ ppHRegARM64(i->ARM64in.VExtV.srcLo);
+ vex_printf(".16b, ");
+ ppHRegARM64(i->ARM64in.VExtV.srcHi);
+ vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
+ return;
+ }
//ZZ case ARMin_VAluS:
//ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
//ZZ ppHRegARM(i->ARMin.VAluS.dst);
addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
return;
+ case ARM64in_VExtV:
+ addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
+ addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
+ addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
//ZZ case ARMin_VAluS:
//ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
i->ARM64in.VShiftImmV.src
= lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
return;
+ case ARM64in_VExtV:
+ i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
+ i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
+ i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
+ return;
+
//ZZ case ARMin_VAluS:
//ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
//ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
}
case ARM64in_VShiftImmV: {
/*
- 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
- 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
+ 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
+ 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
where immh:immb
= case T of
2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx
16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx
- 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
+ 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
where immh:immb
= case T of
2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
goto done;
}
break;
-
-
/* 8x16 cases */
case ARM64vecsh_SSHR8x16: syned = True;
case ARM64vecsh_USHR8x16: /* fallthrough */
goto done;
}
break;
-
default:
break;
}
goto bad;
}
+ case ARM64in_VExtV: {
+ /*
+ 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
+ where imm4 = the shift amount, in bytes,
+ Vn is low operand, Vm is high operand
+ */
+ UInt vD = qregNo(i->ARM64in.VExtV.dst);
+ UInt vN = qregNo(i->ARM64in.VExtV.srcLo);
+ UInt vM = qregNo(i->ARM64in.VExtV.srcHi);
+ UInt imm4 = i->ARM64in.VExtV.amtB;
+ vassert(imm4 >= 1 && imm4 <= 15);
+ *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
+ X000000 | (imm4 << 1), vN, vD);
+ goto done;
+ }
//ZZ case ARMin_VAluS: {
//ZZ UInt dN = fregNo(i->ARMin.VAluS.argL);
//ZZ UInt dD = fregNo(i->ARMin.VAluS.dst);
ARM64in_VUnaryV,
ARM64in_VNarrowV,
ARM64in_VShiftImmV,
+ ARM64in_VExtV,
//ZZ ARMin_VAluS,
//ZZ ARMin_VCMovD,
//ZZ ARMin_VCMovS,
HReg src;
UInt amt;
} VShiftImmV;
+ struct {
+ HReg dst;
+ HReg srcLo;
+ HReg srcHi;
+ UInt amtB;
+ } VExtV;
//ZZ /* 32-bit FP binary arithmetic */
//ZZ struct {
//ZZ ARMVfpOp op;
extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
HReg dst, HReg src, UInt amt );
+extern ARM64Instr* ARM64Instr_VExtV ( HReg dst,
+ HReg srcLo, HReg srcHi, UInt amtB );
//ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg );
//ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
//ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
/* else fall out; this is unhandled */
break;
}
+
+ case Iop_ShlV128:
+ case Iop_ShrV128: {
+ Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
+ /* This is tricky. Generate an EXT instruction with zeroes in
+ the high operand (shift right) or low operand (shift left).
+ Note that we can only slice in the EXT instruction at a byte
+ level of granularity, so the shift amount needs careful
+ checking. */
+ IRExpr* argL = e->Iex.Binop.arg1;
+ IRExpr* argR = e->Iex.Binop.arg2;
+ if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
+ UInt amt = argR->Iex.Const.con->Ico.U8;
+ Bool amtOK = False;
+ switch (amt) {
+ case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
+ case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
+ case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
+ amtOK = True; break;
+ }
+ /* We could also deal with amt==0 by copying the source to
+ the destination, but there's no need for that so far. */
+ if (amtOK) {
+ HReg src = iselV128Expr(env, argL);
+ HReg srcZ = newVRegV(env);
+ addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
+ UInt immB = amt / 8;
+ vassert(immB >= 1 && immB <= 15);
+ HReg dst = newVRegV(env);
+ if (isSHR) {
+ addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
+ immB));
+ } else {
+ addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
+ 16 - immB));
+ }
+ return dst;
+ }
+ }
+ /* else fall out; this is unhandled */
+ break;
+ }
+
//ZZ case Iop_CmpGT8Ux16:
//ZZ case Iop_CmpGT16Ux8:
//ZZ case Iop_CmpGT32Ux4: {