// A bit of ATCery: bounce all cases we haven't seen an example of.
if (/* F32toI32S */
(op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
+ || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
+ || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
/* F32toI32U */
+ || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
+ || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
/* F32toI64S */
+ || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
/* F32toI64U */
|| (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
/* F64toI32S */
|| (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
|| (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
/* F64toI32U */
- || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
|| (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
+ || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
+ || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
/* F64toI64S */
|| (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
+ || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
+ || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
/* F64toI64U */
|| (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
+ || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
) {
/* validated */
} else {
return True;
}
+ /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
+ /* 30 23 20 18 15 9 4
+ 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
+ 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
+ Fn is Dn when x==1, Sn when x==0
+ */
+ if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
+ && INSN(21,16) == BITS6(1,0,0,1,0,0)
+ && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
+ Bool isI64 = INSN(31,31) == 1;
+ Bool isF64 = INSN(22,22) == 1;
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ /* Decide on the IR rounding mode to use. */
+ /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
+ IRRoundingMode irrm = Irrm_NEAREST;
+ /* Decide on the conversion primop. */
+ IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
+ : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
+ IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
+ IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
+ IRTemp src = newTemp(srcTy);
+ IRTemp dst = newTemp(dstTy);
+ assign(src, getQRegLO(nn, srcTy));
+ assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
+ putIRegOrZR(isI64, dd, mkexpr(dst));
+ DIP("fcvtas %s, %s (KLUDGED)\n",
+ nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
+ return True;
+ }
+
/* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
/* 31 23 21 17 14 9 4
000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
010 -inf (FRINTM)
011 zero (FRINTZ)
000 tieeven
- 100 tieaway
+ 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
110 per FPCR + "exact = TRUE"
101 unallocated
*/
case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
+ // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
+ case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
default: break;
}
if (irrmE) {
000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
--------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
--------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
- --------- 00 ----- 01 --------- FCVT Dd, Sn (unimp)
+ --------- 00 ----- 01 --------- FCVT Dd, Sn
--------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
- --------- 01 ----- 00 --------- FCVT Sd, Dn (unimp)
+ --------- 01 ----- 00 --------- FCVT Sd, Dn
Rounding, when dst is smaller than src, is per the FPCR.
*/
if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
{
switch (op) {
case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
+ case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
default: vpanic("showARM64VecShiftImmOp");
maxSh = 63; break;
case ARM64vecsh_SHL32x4:
maxSh = 31; break;
+ case ARM64vecsh_USHR16x8:
+ maxSh = 15; break;
default:
vassert(0);
}
#define X111110 BITS8(0,0, 1,1,1,1,1,0)
#define X111111 BITS8(0,0, 1,1,1,1,1,1)
+#define X0010000 BITS8(0, 0,0,1,0,0,0,0)
#define X0100000 BITS8(0, 0,1,0,0,0,0,0)
#define X1000000 BITS8(0, 1,0,0,0,0,0,0)
case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
*p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
break;
- /* UCVTF Sd, Wn ATC */
+ case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
+ break;
case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
*p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
break;
*p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
X000000, rN, rD);
break;
- /* */
case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
*p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
X000000, rN, rD);
break;
- /* */
+ case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
+ X000000, rN, rD);
+ break;
+ case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
+ X000000, rN, rD);
+ break;
case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
*p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
X000000, rN, rD);
goto done;
}
break;
+ //case ARM64vecsh_SSHR16x8: syned = True; ATC
+ case ARM64vecsh_USHR16x8: /* fallthrough */
+ if (sh >= 1 && sh <= 15) {
+ UInt xxxx = 16-sh;
+ *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
+ X0010000 | xxxx, X000001, vN, vD);
+ goto done;
+ }
+ break;
default:
break;
}
cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
case Iop_F32toI32S:
cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
+ case Iop_F32toI32U:
+ cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
+ case Iop_F32toI64S:
+ cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
case Iop_F32toI64U:
cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
default:
ARM64sh_SAR));
return dst;
}
+ case Iop_NarrowUn16to8x8:
case Iop_NarrowUn32to16x4:
case Iop_NarrowUn64to32x2: {
HReg src = iselV128Expr(env, e->Iex.Unop.arg);
HReg dst = newVRegI(env);
UInt dszBlg2 = 3; /* illegal */
switch (e->Iex.Unop.op) {
+ case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8
case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
default: vassert(0);
//ZZ case Iop_ShrN16x8:
//ZZ case Iop_ShrN32x4:
case Iop_ShrN64x2:
+ case Iop_ShrN16x8:
case Iop_SarN64x2:
- case Iop_ShlN32x4: {
+ case Iop_ShlN32x4:
+ {
IRExpr* argL = e->Iex.Binop.arg1;
IRExpr* argR = e->Iex.Binop.arg2;
if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
switch (e->Iex.Binop.op) {
case Iop_ShrN64x2:
op = ARM64vecsh_USHR64x2; limit = 63; break;
+ case Iop_ShrN16x8:
+ op = ARM64vecsh_USHR16x8; limit = 15; break;
case Iop_SarN64x2:
op = ARM64vecsh_SSHR64x2; limit = 63; break;
case Iop_ShlN32x4:
addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
return dstS;
}
+ case Iop_I32UtoF32:
case Iop_I32StoF32:
case Iop_I64UtoF32:
case Iop_I64StoF32: {
ARM64CvtOp cvt_op = ARM64cvt_INVALID;
switch (e->Iex.Binop.op) {
+ case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;