return ops[size];
}
-static IROp mkVecQANDUQSH ( UInt size )
-{
+static IROp mkVecQANDUQSH ( UInt size ) {
const IROp ops[4]
= { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
return ops[size];
}
-static IROp mkVecQANDSQSH ( UInt size )
-{
+static IROp mkVecQANDSQSH ( UInt size ) {
const IROp ops[4]
= { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
return ops[size];
}
-static IROp mkVecQANDUQRSH ( UInt size )
-{
+static IROp mkVecQANDUQRSH ( UInt size ) {
const IROp ops[4]
= { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
return ops[size];
}
-static IROp mkVecQANDSQRSH ( UInt size )
-{
+static IROp mkVecQANDSQRSH ( UInt size ) {
const IROp ops[4]
= { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
return ops[size];
}
+static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
+ Iop_NarrowUn64to32x2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
+ Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
+ Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
+ Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
+ Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
+ Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
+ Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
+ Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
+ Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
+static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
+ const IROp ops[4]
+ = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
+ Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
+ vassert(sizeNarrow < 4);
+ return ops[sizeNarrow];
+}
+
/* Generate IR to create 'arg rotated right by imm', for sane values
of 'ty' and 'imm'. */
static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
}
+/* Generate IR that takes an I64 and sign- or zero- widens each
+ lane, giving a V128 value. */
+static
+IRTemp math_WIDEN_LANES ( Bool zWiden, UInt sizeNarrow, IRExpr* srcE )
+{
+ IRTemp src = newTemp(Ity_I64);
+ assign(src, srcE);
+ return math_WIDEN_LO_OR_HI_LANES(
+ zWiden,
+ False/*!fromUpperHalf*/,
+ sizeNarrow,
+ binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src))
+ );
+}
+
+
/* Return a temp which holds the vector dup of the lane of width
(1 << size) obtained from src[laneNo]. */
static
}
+/* Zero all except the least significant lane of |srcE|, where |size|
+ indicates the lane size in the usual way. */
static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
{
vassert(size < 4);
return True;
}
+ if (opcode == BITS5(1,0,1,0,0)
+ || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
+ /* -------- 0,xx,10100: SQXTN -------- */
+ /* -------- 1,xx,10100: UQXTN -------- */
+ /* -------- 1,xx,10010: SQXTUN -------- */
+ if (size == X11) return False;
+ vassert(size < 3);
+ IROp opN = Iop_INVALID;
+ Bool zWiden = True;
+ const HChar* nm = "??";
+ /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
+ opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
+ opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
+ opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
+ }
+ else vassert(0);
+ IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
+ size+1, getQReg128(nn));
+ IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
+ size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
+ putQReg128(dd, mkexpr(resN));
+ /* This widens zero lanes to zero, and compares it against zero, so all
+ of the non-participating lanes make no contribution to the
+ Q flag state. */
+ IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
+ size, mkexpr(resN));
+ updateQCFLAGwithDifference(src, resW);
+ const HChar arrNarrow = "bhsd"[size];
+ const HChar arrWide = "bhsd"[size+1];
+ DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
+ return True;
+ }
+
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
return False;
# undef INSN
return True;
}
+ if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
+ || (bitU == 1
+ && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
+ /* -------- 0,10010 SQSHRN{,2} #imm -------- */
+ /* -------- 1,10010 UQSHRN{,2} #imm -------- */
+ /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
+ /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
+ /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
+ /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
+ UInt size = 0;
+ UInt shift = 0;
+ Bool is2 = bitQ == 1;
+ Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
+ if (!ok || size == X11) return False;
+ vassert(shift >= 1 && shift <= (8 << size));
+ const HChar* nm = "??";
+ IROp op = Iop_INVALID;
+ /* Decide on the name and the operation. */
+ /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
+ nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
+ nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
+ }
+ else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
+ nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
+ nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
+ nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
+ }
+ else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
+ nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
+ }
+ else vassert(0);
+ /* Compute the result (Q, shifted value) pair. */
+ IRTemp src128 = newTempV128();
+ assign(src128, getQReg128(nn));
+ IRTemp pair = newTempV128();
+ assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
+ /* Update the result reg */
+ IRTemp res64in128 = newTempV128();
+ assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
+ putLO64andZUorPutHI64(is2, dd, res64in128);
+ /* Update the Q flag. */
+ IRTemp q64q64 = newTempV128();
+ assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
+ IRTemp z128 = newTempV128();
+ assign(z128, mkV128(0x0000));
+ updateQCFLAGwithDifference(q64q64, z128);
+ /* */
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
+ const HChar* arrWide = nameArr_Q_SZ(1, size+1);
+ DIP("%s %s.%s, %s.%s, #%u\n", nm,
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
+ return True;
+ }
+
if (opcode == BITS5(1,0,1,0,0)) {
/* -------- 0,10100 SSHLL{,2} #imm -------- */
/* -------- 1,10100 USHLL{,2} #imm -------- */
if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
/* -------- 0,xx,10010: XTN{,2} -------- */
- /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
- 0q0 01110 size 100001 001010 n d
- */
- Bool isQ = bitQ == 1;
- IROp op = Iop_INVALID;
- const HChar* tb = NULL;
- const HChar* ta = NULL;
- switch ((size << 1) | (isQ ? 1 : 0)) {
- case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
- case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
- case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
- case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
- case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
- case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
- case 6: break;
- case 7: break;
- default: vassert(0);
+ if (size == X11) return False;
+ vassert(size < 3);
+ Bool is2 = bitQ == 1;
+ IROp opN = mkVecNARROWUN(size);
+ IRTemp resN = newTempV128();
+ assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
+ putLO64andZUorPutHI64(is2, dd, resN);
+ const HChar* nm = "xtn";
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
+ const HChar* arrWide = nameArr_Q_SZ(1, size+1);
+ DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+ return True;
+ }
+
+ if (opcode == BITS5(1,0,1,0,0)
+ || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
+ /* -------- 0,xx,10100: SQXTN{,2} -------- */
+ /* -------- 1,xx,10100: UQXTN{,2} -------- */
+ /* -------- 1,xx,10010: SQXTUN{,2} -------- */
+ if (size == X11) return False;
+ vassert(size < 3);
+ Bool is2 = bitQ == 1;
+ IROp opN = Iop_INVALID;
+ Bool zWiden = True;
+ const HChar* nm = "??";
+ /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
+ opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
}
- if (op != Iop_INVALID) {
- if (!isQ) {
- putQRegLane(dd, 1, mkU64(0));
- }
- putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
- DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
- nameQReg128(dd), tb, nameQReg128(nn), ta);
- return True;
+ else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
+ opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
}
- return False;
+ else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
+ opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
+ }
+ else vassert(0);
+ IRTemp src = newTempV128();
+ assign(src, getQReg128(nn));
+ IRTemp resN = newTempV128();
+ assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
+ putLO64andZUorPutHI64(is2, dd, resN);
+ IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
+ size, mkexpr(resN));
+ updateQCFLAGwithDifference(src, resW);
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
+ const HChar* arrWide = nameArr_Q_SZ(1, size+1);
+ DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+ return True;
}
if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
ARM64VecShiftOp op )
{
switch (op) {
- case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
- case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
- case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
- case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
- case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
- case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
- case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
- case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
- case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return;
- case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
- case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return;
- case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return;
- default: vpanic("showARM64VecShiftImmOp");
+ case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
+ case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
+ case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
+ case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
+ case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
+ case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
+ case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
+ case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
+ case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return;
+ case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
+ case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return;
+ case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return;
+ case ARM64vecsh_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
+ case ARM64vecsh_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
+ case ARM64vecsh_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
+ case ARM64vecsh_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
+ case ARM64vecsh_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
+ case ARM64vecsh_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
+ case ARM64vecsh_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
+ case ARM64vecsh_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
+ case ARM64vecsh_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
+ case ARM64vecsh_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
+ case ARM64vecsh_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
+ case ARM64vecsh_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
+ case ARM64vecsh_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
+ case ARM64vecsh_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
+ case ARM64vecsh_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
+ case ARM64vecsh_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
+ case ARM64vecsh_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
+ case ARM64vecsh_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
+ default: vpanic("showARM64VecShiftOp");
+ }
+}
+
+static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
+ switch (op) {
+ case ARM64vecna_XTN: return "xtn ";
+ case ARM64vecna_SQXTN: return "sqxtn ";
+ case ARM64vecna_UQXTN: return "uqxtn ";
+ case ARM64vecna_SQXTUN: return "sqxtun";
+ default: vpanic("showARM64VecNarrowOp");
}
}
i->ARM64in.VUnaryV.arg = arg;
return i;
}
-ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ) {
+ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
+ UInt dszBlg2, HReg dst, HReg src ) {
ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
i->tag = ARM64in_VNarrowV;
+ i->ARM64in.VNarrowV.op = op;
i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
i->ARM64in.VNarrowV.dst = dst;
i->ARM64in.VNarrowV.src = src;
i->ARM64in.VShiftImmV.amt = amt;
UInt maxSh = 0;
switch (op) {
+ /* NB: the comments below are wrong. Correct is: for right shifts,
+ the allowed shift amounts are 1 .. lane_size. For left shifts,
+ the allowed shift amoutns are 0 .. lane_size-1. */
+ /* For these ordinary, non-saturating non-magical shifts,
+ the min shift value is actually zero, but we reject such cases
+ and instead only accept 1 as the minimum shift value. */
case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2:
case ARM64vecsh_SHL64x2:
maxSh = 63; break;
case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
case ARM64vecsh_SHL8x16:
maxSh = 7; break;
+ /* Whereas for these shift right and narrow set, the min shift
+ value really is 1. */
+ case ARM64vecsh_UQSHRN2SD: case ARM64vecsh_SQSHRN2SD:
+ case ARM64vecsh_SQSHRUN2SD:
+ case ARM64vecsh_UQRSHRN2SD: case ARM64vecsh_SQRSHRN2SD:
+ case ARM64vecsh_SQRSHRUN2SD:
+ maxSh = 64; break;
+ case ARM64vecsh_UQSHRN4HS: case ARM64vecsh_SQSHRN4HS:
+ case ARM64vecsh_SQSHRUN4HS:
+ case ARM64vecsh_UQRSHRN4HS: case ARM64vecsh_SQRSHRN4HS:
+ case ARM64vecsh_SQRSHRUN4HS:
+ maxSh = 32; break;
+ case ARM64vecsh_UQSHRN8BH: case ARM64vecsh_SQSHRN8BH:
+ case ARM64vecsh_SQSHRUN8BH:
+ case ARM64vecsh_UQRSHRN8BH: case ARM64vecsh_SQRSHRN8BH:
+ case ARM64vecsh_SQRSHRUN8BH:
+ maxSh = 16; break;
default:
vassert(0);
}
UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
const HChar* darr[3] = { "8b", "4h", "2s" };
const HChar* sarr[3] = { "8h", "4s", "2d" };
- vex_printf("xtn ");
+ const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
+ vex_printf("%s ", nm);
ppHRegARM64(i->ARM64in.VNarrowV.dst);
vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
ppHRegARM64(i->ARM64in.VNarrowV.src);
000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
+
+ 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
+ 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
+ 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
+
+ 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
+ 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
+ 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
+
+ 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
+ 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
+ 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
*/
UInt vD = qregNo(i->ARM64in.VNarrowV.dst);
UInt vN = qregNo(i->ARM64in.VNarrowV.src);
UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
- *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
- X00001, X001010, vN, vD);
- goto done;
+ switch (i->ARM64in.VNarrowV.op) {
+ case ARM64vecna_XTN:
+ *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
+ X00001, X001010, vN, vD);
+ goto done;
+ case ARM64vecna_SQXTUN:
+ *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
+ X00001, X001010, vN, vD);
+ goto done;
+ case ARM64vecna_SQXTN:
+ *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
+ X00001, X010010, vN, vD);
+ goto done;
+ case ARM64vecna_UQXTN:
+ *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
+ X00001, X010010, vN, vD);
+ goto done;
+ default:
+ break;
+ }
+ goto bad;
}
case ARM64in_VShiftImmV: {
/*
- 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
- 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
+ 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
+ 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
+
+ 001 011110 immh immb 100101 n d UQSHRN ,,#sh
+ 000 011110 immh immb 100101 n d SQSHRN ,,#sh
+ 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
+
+ 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
+ 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
+ 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
where immh:immb
= case T of
- 2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
- 4s | sh in 1..31 -> let xxxxx = 32-sh in 01xx:xxx
- 8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx
- 16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx
+ 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
+ 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
+ 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
+ 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
where immh:immb
= case T of
- 2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
- 4s | sh in 1..31 -> let xxxxx = sh in 01xx:xxx
- 8h | sh in 1..15 -> let xxxx = sh in 001x:xxx
- 16b | sh in 1..7 -> let xxx = sh in 0001:xxx
+ 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
+ 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
+ 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
+ 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
*/
- UInt vD = qregNo(i->ARM64in.VShiftImmV.dst);
- UInt vN = qregNo(i->ARM64in.VShiftImmV.src);
- UInt sh = i->ARM64in.VShiftImmV.amt;
- ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op;
- Bool syned = False;
- switch (op) {
- /* 64x2 cases */
- case ARM64vecsh_SSHR64x2: syned = True;
- case ARM64vecsh_USHR64x2: /* fallthrough */
+ UInt vD = qregNo(i->ARM64in.VShiftImmV.dst);
+ UInt vN = qregNo(i->ARM64in.VShiftImmV.src);
+ UInt sh = i->ARM64in.VShiftImmV.amt;
+ UInt tmpl = 0; /* invalid */
+
+ const UInt tmpl_USHR
+ = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
+ const UInt tmpl_SSHR
+ = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
+
+ const UInt tmpl_UQSHRN
+ = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
+ const UInt tmpl_SQSHRN
+ = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
+ const UInt tmpl_SQSHRUN
+ = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
+
+ const UInt tmpl_UQRSHRN
+ = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
+ const UInt tmpl_SQRSHRN
+ = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
+ const UInt tmpl_SQRSHRUN
+ = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
+
+ const UInt tmpl_SHL
+ = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
+
+ switch (i->ARM64in.VShiftImmV.op) {
+ case ARM64vecsh_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
+ case ARM64vecsh_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
+ case ARM64vecsh_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
+
+ case ARM64vecsh_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
+ case ARM64vecsh_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
+ case ARM64vecsh_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
+ case ARM64vecsh_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
+ case ARM64vecsh_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
+ case ARM64vecsh_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
+ case ARM64vecsh_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
+ case ARM64vecsh_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
+ case ARM64vecsh_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
+
+ case ARM64vecsh_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
+ case ARM64vecsh_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
+ case ARM64vecsh_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
+ case ARM64vecsh_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
+ case ARM64vecsh_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
+ case ARM64vecsh_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
+ case ARM64vecsh_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
+ case ARM64vecsh_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
+ case ARM64vecsh_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
+
+ case ARM64vecsh_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
+ case ARM64vecsh_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
+ case ARM64vecsh_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
+ case ARM64vecsh_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
+ case ARM64vecsh_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
+ case ARM64vecsh_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
+ case ARM64vecsh_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
+ case ARM64vecsh_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
+ case ARM64vecsh_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
+
+ default: break;
+
+ right64x2:
if (sh >= 1 && sh <= 63) {
- UInt xxxxxx = 64-sh;
- *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
- X1000000 | xxxxxx, X000001, vN, vD);
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
goto done;
}
break;
- case ARM64vecsh_SHL64x2:
- if (sh >= 1 && sh <= 63) {
- UInt xxxxxx = sh;
- *p++ = X_3_6_7_6_5_5(X010, X011110,
- X1000000 | xxxxxx, X010101, vN, vD);
+ right32x4:
+ if (sh >= 1 && sh <= 32) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
goto done;
}
break;
- /* 32x4 cases */
- case ARM64vecsh_SSHR32x4: syned = True;
- case ARM64vecsh_USHR32x4: /* fallthrough */
- if (sh >= 1 && sh <= 31) {
- UInt xxxxx = 32-sh;
- *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
- X0100000 | xxxxx, X000001, vN, vD);
+ right16x8:
+ if (sh >= 1 && sh <= 16) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
goto done;
}
break;
- case ARM64vecsh_SHL32x4:
- if (sh >= 1 && sh <= 31) {
- UInt xxxxx = sh;
- *p++ = X_3_6_7_6_5_5(X010, X011110,
- X0100000 | xxxxx, X010101, vN, vD);
+ right8x16:
+ if (sh >= 1 && sh <= 8) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
goto done;
}
break;
- /* 16x8 cases */
- case ARM64vecsh_SSHR16x8: syned = True;
- case ARM64vecsh_USHR16x8: /* fallthrough */
- if (sh >= 1 && sh <= 15) {
- UInt xxxx = 16-sh;
- *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
- X0010000 | xxxx, X000001, vN, vD);
+
+ left64x2:
+ if (sh >= 1 && sh <= 63) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
goto done;
}
break;
- case ARM64vecsh_SHL16x8:
- if (sh >= 1 && sh <= 15) {
- UInt xxxx = sh;
- *p++ = X_3_6_7_6_5_5(X010, X011110,
- X0010000 | xxxx, X010101, vN, vD);
+ left32x4:
+ if (sh >= 1 && sh <= 31) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
goto done;
}
break;
- /* 8x16 cases */
- case ARM64vecsh_SSHR8x16: syned = True;
- case ARM64vecsh_USHR8x16: /* fallthrough */
- if (sh >= 1 && sh <= 7) {
- UInt xxx = 8-sh;
- *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
- X0001000 | xxx, X000001, vN, vD);
+ left16x8:
+ if (sh >= 1 && sh <= 15) {
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
goto done;
}
break;
- case ARM64vecsh_SHL8x16:
+ left8x16:
if (sh >= 1 && sh <= 7) {
- UInt xxx = sh;
- *p++ = X_3_6_7_6_5_5(X010, X011110,
- X0001000 | xxx, X010101, vN, vD);
+ *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
goto done;
}
break;
- default:
- break;
}
goto bad;
}
ARM64vecsh_SSHR16x8, ARM64vecsh_SSHR8x16,
ARM64vecsh_SHL64x2, ARM64vecsh_SHL32x4,
ARM64vecsh_SHL16x8, ARM64vecsh_SHL8x16,
+ /* These narrowing shifts zero out the top half of the destination
+ register. */
+ ARM64vecsh_SQSHRN2SD, ARM64vecsh_SQSHRN4HS, ARM64vecsh_SQSHRN8BH,
+ ARM64vecsh_UQSHRN2SD, ARM64vecsh_UQSHRN4HS, ARM64vecsh_UQSHRN8BH,
+ ARM64vecsh_SQSHRUN2SD, ARM64vecsh_SQSHRUN4HS, ARM64vecsh_SQSHRUN8BH,
+ ARM64vecsh_SQRSHRN2SD, ARM64vecsh_SQRSHRN4HS, ARM64vecsh_SQRSHRN8BH,
+ ARM64vecsh_UQRSHRN2SD, ARM64vecsh_UQRSHRN4HS, ARM64vecsh_UQRSHRN8BH,
+ ARM64vecsh_SQRSHRUN2SD, ARM64vecsh_SQRSHRUN4HS, ARM64vecsh_SQRSHRUN8BH,
ARM64vecsh_INVALID
}
ARM64VecShiftOp;
+typedef
+ enum {
+ ARM64vecna_XTN=400,
+ ARM64vecna_SQXTN,
+ ARM64vecna_UQXTN,
+ ARM64vecna_SQXTUN,
+ ARM64vecna_INVALID
+ }
+ ARM64VecNarrowOp;
+
typedef
enum {
/* baseline */
HReg arg;
} VUnaryV;
/* vector narrowing, Q -> Q. Result goes in the bottom half
- of dst and the top half is zeroed out. Iow is XTN. */
+ of dst and the top half is zeroed out. Iow one of the
+ XTN family. */
struct {
- UInt dszBlg2; // 0: 16to8_x8 1: 32to16_x4 2: 64to32_x2
- HReg dst; // Q reg
- HReg src; // Q reg
+ ARM64VecNarrowOp op;
+ UInt dszBlg2; // 0: 16to8_x8 1: 32to16_x4 2: 64to32_x2
+ HReg dst; // Q reg
+ HReg src; // Q reg
} VNarrowV;
/* Vector shift by immediate. |amt| needs to be > 0 and <
implied lane size of |op|. Zero shifts and out of range
extern ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg );
extern ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, HReg, HReg, HReg );
extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg );
-extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src );
+extern ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op, UInt dszBlg2,
+ HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
HReg dst, HReg src, UInt amt );
extern ARM64Instr* ARM64Instr_VExtV ( HReg dst,
}
case Iop_NarrowUn16to8x8:
case Iop_NarrowUn32to16x4:
- case Iop_NarrowUn64to32x2: {
+ case Iop_NarrowUn64to32x2:
+ case Iop_QNarrowUn16Sto8Sx8:
+ case Iop_QNarrowUn32Sto16Sx4:
+ case Iop_QNarrowUn64Sto32Sx2:
+ case Iop_QNarrowUn16Uto8Ux8:
+ case Iop_QNarrowUn32Uto16Ux4:
+ case Iop_QNarrowUn64Uto32Ux2:
+ case Iop_QNarrowUn16Sto8Ux8:
+ case Iop_QNarrowUn32Sto16Ux4:
+ case Iop_QNarrowUn64Sto32Ux2:
+ {
HReg src = iselV128Expr(env, e->Iex.Unop.arg);
HReg tmp = newVRegV(env);
HReg dst = newVRegI(env);
UInt dszBlg2 = 3; /* illegal */
+ ARM64VecNarrowOp op = ARM64vecna_INVALID;
switch (e->Iex.Unop.op) {
- case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8
- case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
- case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
- default: vassert(0);
+ case Iop_NarrowUn16to8x8:
+ dszBlg2 = 0; op = ARM64vecna_XTN; break;
+ case Iop_NarrowUn32to16x4:
+ dszBlg2 = 1; op = ARM64vecna_XTN; break;
+ case Iop_NarrowUn64to32x2:
+ dszBlg2 = 2; op = ARM64vecna_XTN; break;
+ case Iop_QNarrowUn16Sto8Sx8:
+ dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
+ case Iop_QNarrowUn32Sto16Sx4:
+ dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
+ case Iop_QNarrowUn64Sto32Sx2:
+ dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
+ case Iop_QNarrowUn16Uto8Ux8:
+ dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
+ case Iop_QNarrowUn32Uto16Ux4:
+ dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
+ case Iop_QNarrowUn64Uto32Ux2:
+ dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
+ case Iop_QNarrowUn16Sto8Ux8:
+ dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
+ case Iop_QNarrowUn32Sto16Ux4:
+ dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
+ case Iop_QNarrowUn64Sto32Ux2:
+ dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
+ default:
+ vassert(0);
}
- addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src));
+ addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
return dst;
}
iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
}
+ case Iop_64UtoV128: {
+ HReg res = newVRegV(env);
+ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARM64Instr_VQfromX(res, arg));
+ return res;
+ }
//ZZ case Iop_NotV128: {
//ZZ DECLARE_PATTERN(p_veqz_8x16);
HReg src = iselV128Expr(env, argL);
HReg dst = newVRegV(env);
if (amt > 0) {
+ /* For left shifts, the allowable amt values are
+ 0 .. lane_bits-1. For right shifts the allowable
+ values are 1 .. lane_bits. By restricting it to
+ 1 .. lane_bits-1, we are guaranteed to create a
+ valid instruction. */
addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
} else {
dst = src;
break;
}
+ /* uu */
+ case Iop_QandQShrNnarrow16Uto8Ux8:
+ case Iop_QandQShrNnarrow32Uto16Ux4:
+ case Iop_QandQShrNnarrow64Uto32Ux2:
+ /* ss */
+ case Iop_QandQSarNnarrow16Sto8Sx8:
+ case Iop_QandQSarNnarrow32Sto16Sx4:
+ case Iop_QandQSarNnarrow64Sto32Sx2:
+ /* su */
+ case Iop_QandQSarNnarrow16Sto8Ux8:
+ case Iop_QandQSarNnarrow32Sto16Ux4:
+ case Iop_QandQSarNnarrow64Sto32Ux2:
+ /* ruu */
+ case Iop_QandQRShrNnarrow16Uto8Ux8:
+ case Iop_QandQRShrNnarrow32Uto16Ux4:
+ case Iop_QandQRShrNnarrow64Uto32Ux2:
+ /* rss */
+ case Iop_QandQRSarNnarrow16Sto8Sx8:
+ case Iop_QandQRSarNnarrow32Sto16Sx4:
+ case Iop_QandQRSarNnarrow64Sto32Sx2:
+ /* rsu */
+ case Iop_QandQRSarNnarrow16Sto8Ux8:
+ case Iop_QandQRSarNnarrow32Sto16Ux4:
+ case Iop_QandQRSarNnarrow64Sto32Ux2:
+ {
+ IRExpr* argL = e->Iex.Binop.arg1;
+ IRExpr* argR = e->Iex.Binop.arg2;
+ if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
+ UInt amt = argR->Iex.Const.con->Ico.U8;
+ UInt limit = 0;
+ ARM64VecShiftOp op = ARM64vecsh_INVALID;
+ switch (e->Iex.Binop.op) {
+ /* uu */
+ case Iop_QandQShrNnarrow64Uto32Ux2:
+ op = ARM64vecsh_UQSHRN2SD; limit = 64; break;
+ case Iop_QandQShrNnarrow32Uto16Ux4:
+ op = ARM64vecsh_UQSHRN4HS; limit = 32; break;
+ case Iop_QandQShrNnarrow16Uto8Ux8:
+ op = ARM64vecsh_UQSHRN8BH; limit = 16; break;
+ /* ss */
+ case Iop_QandQSarNnarrow64Sto32Sx2:
+ op = ARM64vecsh_SQSHRN2SD; limit = 64; break;
+ case Iop_QandQSarNnarrow32Sto16Sx4:
+ op = ARM64vecsh_SQSHRN4HS; limit = 32; break;
+ case Iop_QandQSarNnarrow16Sto8Sx8:
+ op = ARM64vecsh_SQSHRN8BH; limit = 16; break;
+ /* su */
+ case Iop_QandQSarNnarrow64Sto32Ux2:
+ op = ARM64vecsh_SQSHRUN2SD; limit = 64; break;
+ case Iop_QandQSarNnarrow32Sto16Ux4:
+ op = ARM64vecsh_SQSHRUN4HS; limit = 32; break;
+ case Iop_QandQSarNnarrow16Sto8Ux8:
+ op = ARM64vecsh_SQSHRUN8BH; limit = 16; break;
+ /* ruu */
+ case Iop_QandQRShrNnarrow64Uto32Ux2:
+ op = ARM64vecsh_UQRSHRN2SD; limit = 64; break;
+ case Iop_QandQRShrNnarrow32Uto16Ux4:
+ op = ARM64vecsh_UQRSHRN4HS; limit = 32; break;
+ case Iop_QandQRShrNnarrow16Uto8Ux8:
+ op = ARM64vecsh_UQRSHRN8BH; limit = 16; break;
+ /* rss */
+ case Iop_QandQRSarNnarrow64Sto32Sx2:
+ op = ARM64vecsh_SQRSHRN2SD; limit = 64; break;
+ case Iop_QandQRSarNnarrow32Sto16Sx4:
+ op = ARM64vecsh_SQRSHRN4HS; limit = 32; break;
+ case Iop_QandQRSarNnarrow16Sto8Sx8:
+ op = ARM64vecsh_SQRSHRN8BH; limit = 16; break;
+ /* rsu */
+ case Iop_QandQRSarNnarrow64Sto32Ux2:
+ op = ARM64vecsh_SQRSHRUN2SD; limit = 64; break;
+ case Iop_QandQRSarNnarrow32Sto16Ux4:
+ op = ARM64vecsh_SQRSHRUN4HS; limit = 32; break;
+ case Iop_QandQRSarNnarrow16Sto8Ux8:
+ op = ARM64vecsh_SQRSHRUN8BH; limit = 16; break;
+ /**/
+ default:
+ vassert(0);
+ }
+ if (op != ARM64vecsh_INVALID && amt >= 1 && amt <= limit) {
+ HReg src = iselV128Expr(env, argL);
+ HReg dst = newVRegV(env);
+ HReg fpsr = newVRegI(env);
+ /* Clear FPSR.Q, do the operation, and return both its
+ result and the new value of FPSR.Q. We can simply
+ zero out FPSR since all the other bits have no relevance
+ in VEX generated code. */
+ addInstr(env, ARM64Instr_Imm64(fpsr, 0));
+ addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
+ addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
+ addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
+ addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
+ ARM64sh_SHR));
+ ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
+ vassert(ril_one);
+ addInstr(env, ARM64Instr_Logic(fpsr,
+ fpsr, ril_one, ARM64lo_AND));
+ /* Now we have: the main (shift) result in the bottom half
+ of |dst|, and the Q bit at the bottom of |fpsr|.
+ Combining them with a "InterleaveLO64x2" style operation
+ produces a 128 bit value, dst[63:0]:fpsr[63:0],
+ which is what we want. */
+ HReg scratch = newVRegV(env);
+ addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
+ addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
+ dst, dst, scratch));
+ return dst;
+ }
+ }
+ /* else fall out; this is unhandled */
+ break;
+ }
+
case Iop_ShlV128:
case Iop_ShrV128: {
Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
default: vassert(0);
}
/* Clear FPSR.Q, do the operation, and return both its result
- and the new value of FPSR.Q. We can simply zero the whole
- thing out since FPSR is essentially a scratch status register
- on the host. */
+ and the new value of FPSR.Q. We can simply zero out FPSR
+ since all the other bits have no relevance in VEX generated
+ code. */
addInstr(env, ARM64Instr_Imm64(fpsr, 0));
addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
case Iop_QandSQRsh32x4: vex_printf("QandSQRsh32x4"); return;
case Iop_QandSQRsh64x2: vex_printf("QandSQRsh64x2"); return;
+ case Iop_QandQShrNnarrow16Uto8Ux8:
+ vex_printf("QandQShrNnarrow16Uto8Ux8"); return;
+ case Iop_QandQShrNnarrow32Uto16Ux4:
+ vex_printf("QandQShrNnarrow32Uto16Ux4"); return;
+ case Iop_QandQShrNnarrow64Uto32Ux2:
+ vex_printf("QandQShrNnarrow64Uto32Ux2"); return;
+ case Iop_QandQSarNnarrow16Sto8Sx8:
+ vex_printf("QandQSarNnarrow16Sto8Sx8"); return;
+ case Iop_QandQSarNnarrow32Sto16Sx4:
+ vex_printf("QandQSarNnarrow32Sto16Sx4"); return;
+ case Iop_QandQSarNnarrow64Sto32Sx2:
+ vex_printf("QandQSarNnarrow64Sto32Sx2"); return;
+ case Iop_QandQSarNnarrow16Sto8Ux8:
+ vex_printf("QandQSarNnarrow16Sto8Ux8"); return;
+ case Iop_QandQSarNnarrow32Sto16Ux4:
+ vex_printf("QandQSarNnarrow32Sto16Ux4"); return;
+ case Iop_QandQSarNnarrow64Sto32Ux2:
+ vex_printf("QandQSarNnarrow64Sto32Ux2"); return;
+ case Iop_QandQRShrNnarrow16Uto8Ux8:
+ vex_printf("QandQRShrNnarrow16Uto8Ux8"); return;
+ case Iop_QandQRShrNnarrow32Uto16Ux4:
+ vex_printf("QandQRShrNnarrow32Uto16Ux4"); return;
+ case Iop_QandQRShrNnarrow64Uto32Ux2:
+ vex_printf("QandQRShrNnarrow64Uto32Ux2"); return;
+ case Iop_QandQRSarNnarrow16Sto8Sx8:
+ vex_printf("QandQRSarNnarrow16Sto8Sx8"); return;
+ case Iop_QandQRSarNnarrow32Sto16Sx4:
+ vex_printf("QandQRSarNnarrow32Sto16Sx4"); return;
+ case Iop_QandQRSarNnarrow64Sto32Sx2:
+ vex_printf("QandQRSarNnarrow64Sto32Sx2"); return;
+ case Iop_QandQRSarNnarrow16Sto8Ux8:
+ vex_printf("QandQRSarNnarrow16Sto8Ux8"); return;
+ case Iop_QandQRSarNnarrow32Sto16Ux4:
+ vex_printf("QandQRSarNnarrow32Sto16Ux4"); return;
+ case Iop_QandQRSarNnarrow64Sto32Ux2:
+ vex_printf("QandQRSarNnarrow64Sto32Ux2"); return;
+
case Iop_NarrowBin16to8x16: vex_printf("NarrowBin16to8x16"); return;
case Iop_NarrowBin32to16x8: vex_printf("NarrowBin32to16x8"); return;
case Iop_QNarrowBin16Uto8Ux16: vex_printf("QNarrowBin16Uto8Ux16"); return;
case Iop_QSalN8x16: case Iop_QSalN16x8:
case Iop_QSalN32x4: case Iop_QSalN64x2:
case Iop_SHA256: case Iop_SHA512:
+ case Iop_QandQShrNnarrow16Uto8Ux8:
+ case Iop_QandQShrNnarrow32Uto16Ux4:
+ case Iop_QandQShrNnarrow64Uto32Ux2:
+ case Iop_QandQSarNnarrow16Sto8Sx8:
+ case Iop_QandQSarNnarrow32Sto16Sx4:
+ case Iop_QandQSarNnarrow64Sto32Sx2:
+ case Iop_QandQSarNnarrow16Sto8Ux8:
+ case Iop_QandQSarNnarrow32Sto16Ux4:
+ case Iop_QandQSarNnarrow64Sto32Ux2:
+ case Iop_QandQRShrNnarrow16Uto8Ux8:
+ case Iop_QandQRShrNnarrow32Uto16Ux4:
+ case Iop_QandQRShrNnarrow64Uto32Ux2:
+ case Iop_QandQRSarNnarrow16Sto8Sx8:
+ case Iop_QandQRSarNnarrow32Sto16Sx4:
+ case Iop_QandQRSarNnarrow64Sto32Sx2:
+ case Iop_QandQRSarNnarrow16Sto8Ux8:
+ case Iop_QandQRSarNnarrow32Sto16Ux4:
+ case Iop_QandQRSarNnarrow64Sto32Ux2:
BINARY(Ity_V128,Ity_I8, Ity_V128);
case Iop_F32ToFixed32Ux4_RZ:
Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
Iop_QandSQRsh32x4, Iop_QandSQRsh64x2,
+ /* VECTOR x SCALAR SATURATING (& MAYBE ROUNDING) NARROWING SHIFT RIGHT */
+ /* All of type (V128, I8) -> V128 */
+ /* The first argument is shifted right, then narrowed to half the width
+ by saturating it. The second argument is a scalar shift amount that
+ applies to all lanes, and must be a value in the range 1 to lane_width.
+ The shift may be done signedly (Sar variants) or unsignedly (Shr
+ variants). The saturation is done according to the two signedness
+ indicators at the end of the name. For example 64Sto32U means a
+ signed 64 bit value is saturated into an unsigned 32 bit value.
+ Additionally, the QRS variants do rounding, that is, they add the
+ value (1 << (shift_amount-1)) to each source lane before shifting.
+
+ These operations return 65 bits: one bit ("Q") indicating whether
+ saturation occurred, and the shift result. The result type is V128,
+ of which the lower half is the shift result, and Q occupies the
+ least significant bit of the upper half. All other bits of the
+ upper half are zero. */
+ // No rounding, sat U->U
+ Iop_QandQShrNnarrow16Uto8Ux8,
+ Iop_QandQShrNnarrow32Uto16Ux4, Iop_QandQShrNnarrow64Uto32Ux2,
+ // No rounding, sat S->S
+ Iop_QandQSarNnarrow16Sto8Sx8,
+ Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQSarNnarrow64Sto32Sx2,
+ // No rounding, sat S->U
+ Iop_QandQSarNnarrow16Sto8Ux8,
+ Iop_QandQSarNnarrow32Sto16Ux4, Iop_QandQSarNnarrow64Sto32Ux2,
+
+ // Rounding, sat U->U
+ Iop_QandQRShrNnarrow16Uto8Ux8,
+ Iop_QandQRShrNnarrow32Uto16Ux4, Iop_QandQRShrNnarrow64Uto32Ux2,
+ // Rounding, sat S->S
+ Iop_QandQRSarNnarrow16Sto8Sx8,
+ Iop_QandQRSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2,
+ // Rounding, sat S->U
+ Iop_QandQRSarNnarrow16Sto8Ux8,
+ Iop_QandQRSarNnarrow32Sto16Ux4, Iop_QandQRSarNnarrow64Sto32Ux2,
+
/* NARROWING (binary)
-- narrow 2xV128 into 1xV128, hi half from left arg */
/* See comments above w.r.t. U vs S issues in saturated narrowing. */