UInt laneSzB = 0;
switch (laneTy) {
case Ity_I8: laneSzB = 1; break;
- case Ity_I16: laneSzB = 2; break;
+ case Ity_F16: case Ity_I16: laneSzB = 2; break;
case Ity_F32: case Ity_I32: laneSzB = 4; break;
case Ity_F64: case Ity_I64: laneSzB = 8; break;
case Ity_V128: laneSzB = 16; break;
Int off = offsetQRegLane(qregNo, ty, 0);
switch (ty) {
case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
- case Ity_F32: case Ity_F64: case Ity_V128:
+ case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
break;
default:
vassert(0); // Other cases are probably invalid
Int off = offsetQRegLane(qregNo, ty, 0);
switch (ty) {
case Ity_I8:
- case Ity_I16:
+ case Ity_F16: case Ity_I16:
case Ity_I32: case Ity_I64:
case Ity_F32: case Ity_F64: case Ity_V128:
break;
switch (laneTy) {
case Ity_F64: case Ity_I64:
case Ity_I32: case Ity_F32:
- case Ity_I16:
+ case Ity_I16: case Ity_F16:
case Ity_I8:
break;
default:
Int off = offsetQRegLane(qregNo, laneTy, laneNo);
switch (laneTy) {
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
- case Ity_F64: case Ity_F32:
+ case Ity_F64: case Ity_F32: case Ity_F16:
break;
default:
vassert(0); // Other cases are ATC
return True;
}
+ ix = 0; /*INVALID*/
+ switch (opcode) {
+ case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
+ case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
+ case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
+ default: break;
+ }
+ if (ix > 0) {
+ /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+ /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+ /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+ /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+ /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+ /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+ /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+ /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+ /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+ /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+ Bool is64 = (size & 1) == 1;
+ IRType tyF = is64 ? Ity_F64 : Ity_F32;
+ IRType tyI = is64 ? Ity_I64 : Ity_I32;
+ IRRoundingMode irrm = 8; /*impossible*/
+ HChar ch = '?';
+ switch (ix) {
+ case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
+ case 2: ch = 'm'; irrm = Irrm_NegINF; break;
+ case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
+ case 4: ch = 'p'; irrm = Irrm_PosINF; break;
+ case 5: ch = 'z'; irrm = Irrm_ZERO; break;
+ default: vassert(0);
+ }
+ IROp cvt = Iop_INVALID;
+ if (bitU == 1) {
+ cvt = is64 ? Iop_F64toI64U : Iop_F32toI32U;
+ } else {
+ cvt = is64 ? Iop_F64toI64S : Iop_F32toI32S;
+ }
+ IRTemp src = newTemp(tyF);
+ IRTemp res = newTemp(tyI);
+ assign(src, getQRegLane(nn, 0, tyF));
+ assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
+ putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
+ if (!is64) {
+ putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
+ }
+ putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
+ HChar sOrD = is64 ? 'd' : 's';
+ DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
+ sOrD, dd, sOrD, nn);
+ return True;
+ }
+
if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
/* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
/* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
return True;
}
- if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
- /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
- IRTemp rm = mk_get_IR_rounding_mode();
- IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
- IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
- putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
- putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
+ if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
+ /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
+ UInt nLanes = size == X00 ? 4 : 2;
+ IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
+ IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
+ IRTemp rm = mk_get_IR_rounding_mode();
+ IRTemp src[nLanes];
+ for (UInt i = 0; i < nLanes; i++) {
+ src[i] = newTemp(srcTy);
+ assign(src[i], getQRegLane(nn, i, srcTy));
+ }
+ for (UInt i = 0; i < nLanes; i++) {
+ putQRegLane(dd, nLanes * bitQ + i,
+ binop(opCvt, mkexpr(rm), mkexpr(src[i])));
+ }
if (bitQ == 0) {
putQRegLane(dd, 1, mkU64(0));
}
- DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
- nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+ const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
+ DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+ return True;
+ }
+
+ if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
+ /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
+ UInt nLanes = size == X00 ? 4 : 2;
+ IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
+ IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
+ IRTemp src[nLanes];
+ for (UInt i = 0; i < nLanes; i++) {
+ src[i] = newTemp(srcTy);
+ assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
+ }
+ for (UInt i = 0; i < nLanes; i++) {
+ putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
+ }
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+ const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
+ DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+ nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
return True;
}
/* -------- 01,000111: FCVT h_d -------- */
/* -------- 01,000100: FCVT s_d -------- */
/* 31 23 21 16 14 9 4
- 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
- --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
- --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
+ 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
+ --------- 11 ----- 01 --------- FCVT Dd, Hn
+ --------- 00 ----- 11 --------- FCVT Hd, Sn
--------- 00 ----- 01 --------- FCVT Dd, Sn
- --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
+ --------- 01 ----- 11 --------- FCVT Hd, Dn
--------- 01 ----- 00 --------- FCVT Sd, Dn
Rounding, when dst is smaller than src, is per the FPCR.
*/
UInt b2322 = ty;
UInt b1615 = opcode & BITS2(1,1);
- if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
- /* Convert S to D */
- IRTemp res = newTemp(Ity_F64);
- assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
- putQReg128(dd, mkV128(0x0000));
- putQRegLO(dd, mkexpr(res));
- DIP("fcvt %s, %s\n",
- nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
- return True;
- }
- if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
- /* Convert D to S */
- IRTemp res = newTemp(Ity_F32);
- assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
- getQRegLO(nn, Ity_F64)));
- putQReg128(dd, mkV128(0x0000));
- putQRegLO(dd, mkexpr(res));
- DIP("fcvt %s, %s\n",
- nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
- return True;
+ switch ((b2322 << 2) | b1615) {
+ case BITS4(0,0,0,1): // S -> D
+ case BITS4(1,1,0,1): { // H -> D
+ Bool srcIsH = b2322 == BITS2(1,1);
+ IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
+ IRTemp res = newTemp(Ity_F64);
+ assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
+ getQRegLO(nn, srcTy)));
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLO(dd, mkexpr(res));
+ DIP("fcvt %s, %s\n",
+ nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
+ return True;
+ }
+ case BITS4(0,1,0,0): // D -> S
+ case BITS4(0,1,1,1): { // D -> H
+ Bool dstIsH = b1615 == BITS2(1,1);
+ IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
+ IRTemp res = newTemp(dstTy);
+ assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
+ mkexpr(mk_get_IR_rounding_mode()),
+ getQRegLO(nn, Ity_F64)));
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLO(dd, mkexpr(res));
+ DIP("fcvt %s, %s\n",
+ nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
+ return True;
+ }
+ case BITS4(0,0,1,1): // S -> H
+ case BITS4(1,1,0,0): { // H -> S
+ Bool toH = b1615 == BITS2(1,1);
+ IRType srcTy = toH ? Ity_F32 : Ity_F16;
+ IRType dstTy = toH ? Ity_F16 : Ity_F32;
+ IRTemp res = newTemp(dstTy);
+ if (toH) {
+ assign(res, binop(Iop_F32toF16,
+ mkexpr(mk_get_IR_rounding_mode()),
+ getQRegLO(nn, srcTy)));
+
+ } else {
+ assign(res, unop(Iop_F16toF32,
+ getQRegLO(nn, srcTy)));
+ }
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLO(dd, mkexpr(res));
+ DIP("fcvt %s, %s\n",
+ nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
+ return True;
+ }
+ default:
+ break;
}
/* else unhandled */
return False;
---------------- 01 -------------- FCVTP-------- (round to +inf)
---------------- 10 -------------- FCVTM-------- (round to -inf)
---------------- 11 -------------- FCVTZ-------- (round to zero)
-
---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
vex_printf("(S-reg)");
}
+static void ppHRegARM64asHreg ( HReg reg ) {
+ ppHRegARM64(reg);
+ vex_printf("(H-reg)");
+}
+
/* --------- Condition codes, ARM64 encoding. --------- */
i->tag = ARM64in_MFence;
return i;
}
+ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+ i->tag = ARM64in_VLdStH;
+ i->ARM64in.VLdStH.isLoad = isLoad;
+ i->ARM64in.VLdStH.hD = sD;
+ i->ARM64in.VLdStH.rN = rN;
+ i->ARM64in.VLdStH.uimm12 = uimm12;
+ vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
+ return i;
+}
ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
- i->tag = ARM64in_VLdStS;
+ i->tag = ARM64in_VLdStS;
i->ARM64in.VLdStS.isLoad = isLoad;
i->ARM64in.VLdStS.sD = sD;
i->ARM64in.VLdStS.rN = rN;
}
ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
- i->tag = ARM64in_VLdStD;
+ i->tag = ARM64in_VLdStD;
i->ARM64in.VLdStD.isLoad = isLoad;
i->ARM64in.VLdStD.dD = dD;
i->ARM64in.VLdStD.rN = rN;
}
ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
- i->tag = ARM64in_VCvtSD;
+ i->tag = ARM64in_VCvtSD;
i->ARM64in.VCvtSD.sToD = sToD;
i->ARM64in.VCvtSD.dst = dst;
i->ARM64in.VCvtSD.src = src;
return i;
}
+ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+ i->tag = ARM64in_VCvtHS;
+ i->ARM64in.VCvtHS.hToS = hToS;
+ i->ARM64in.VCvtHS.dst = dst;
+ i->ARM64in.VCvtHS.src = src;
+ return i;
+}
+ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+ i->tag = ARM64in_VCvtHD;
+ i->ARM64in.VCvtHD.hToD = hToD;
+ i->ARM64in.VCvtHD.dst = dst;
+ i->ARM64in.VCvtHD.src = src;
+ return i;
+}
ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
i->tag = ARM64in_VUnaryD;
case ARM64in_MFence:
vex_printf("(mfence) dsb sy; dmb sy; isb");
return;
+ case ARM64in_VLdStH:
+ if (i->ARM64in.VLdStH.isLoad) {
+ vex_printf("ldr ");
+ ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
+ vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
+ ppHRegARM64(i->ARM64in.VLdStH.rN);
+ vex_printf(")");
+ } else {
+ vex_printf("str ");
+ vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
+ ppHRegARM64(i->ARM64in.VLdStH.rN);
+ vex_printf("), ");
+ ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
+ }
+ return;
case ARM64in_VLdStS:
if (i->ARM64in.VLdStS.isLoad) {
vex_printf("ldr ");
ppHRegARM64(i->ARM64in.VCvtSD.src);
}
return;
+ case ARM64in_VCvtHS:
+ vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
+ if (i->ARM64in.VCvtHS.hToS) {
+ ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
+ vex_printf(", ");
+ ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
+ } else {
+ ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
+ vex_printf(", ");
+ ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
+ }
+ return;
+ case ARM64in_VCvtHD:
+ vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
+ if (i->ARM64in.VCvtHD.hToD) {
+ ppHRegARM64(i->ARM64in.VCvtHD.dst);
+ vex_printf(", ");
+ ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
+ } else {
+ ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
+ vex_printf(", ");
+ ppHRegARM64(i->ARM64in.VCvtHD.src);
+ }
+ return;
case ARM64in_VUnaryD:
vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
ppHRegARM64(i->ARM64in.VUnaryD.dst);
return;
case ARM64in_MFence:
return;
+ case ARM64in_VLdStH:
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
+ if (i->ARM64in.VLdStH.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
+ }
+ return;
case ARM64in_VLdStS:
addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
if (i->ARM64in.VLdStS.isLoad) {
addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
return;
+ case ARM64in_VCvtHS:
+ addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
+ addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
+ return;
+ case ARM64in_VCvtHD:
+ addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
+ addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
+ return;
case ARM64in_VUnaryD:
addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
return;
case ARM64in_MFence:
return;
+ case ARM64in_VLdStH:
+ i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
+ i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
+ return;
case ARM64in_VLdStS:
i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
return;
+ case ARM64in_VCvtHS:
+ i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
+ i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
+ return;
+ case ARM64in_VCvtHD:
+ i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
+ i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
+ return;
case ARM64in_VUnaryD:
i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
#define X11011000 BITS8(1,1,0,1,1,0,0,0)
#define X11011010 BITS8(1,1,0,1,1,0,1,0)
#define X11011110 BITS8(1,1,0,1,1,1,1,0)
+#define X11100010 BITS8(1,1,1,0,0,0,1,0)
#define X11110001 BITS8(1,1,1,1,0,0,0,1)
#define X11110011 BITS8(1,1,1,1,0,0,1,1)
#define X11110101 BITS8(1,1,1,1,0,1,0,1)
// *p++ = 0xD5033F5F; /* clrex */
// goto done;
//}
+ case ARM64in_VLdStH: {
+ /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
+ 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
+ */
+ UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
+ UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
+ UInt uimm12 = i->ARM64in.VLdStH.uimm12;
+ Bool isLD = i->ARM64in.VLdStH.isLoad;
+ vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
+ uimm12 >>= 1;
+ vassert(uimm12 < (1<<12));
+ vassert(hD < 32);
+ vassert(rN < 31);
+ *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
+ uimm12, rN, hD);
+ goto done;
+ }
case ARM64in_VLdStS: {
/* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
goto done;
}
case ARM64in_VCvtSD: {
- /* 31 23 21 16 14 9 4
+ /* 31 23 21 16 14 9 4
000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
Rounding, when dst is smaller than src, is per the FPCR.
}
goto done;
}
+ case ARM64in_VCvtHS: {
+ /* 31 23 21 16 14 9 4
+ 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
+ ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
+ Rounding, when dst is smaller than src, is per the FPCR.
+ */
+ UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
+ UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
+ if (i->ARM64in.VCvtHS.hToS) {
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
+ } else {
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
+ }
+ goto done;
+ }
+ case ARM64in_VCvtHD: {
+ /* 31 23 21 16 14 9 4
+ 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
+ ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
+ Rounding, when dst is smaller than src, is per the FPCR.
+ */
+ UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
+ UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
+ if (i->ARM64in.VCvtHD.hToD) {
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
+ } else {
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
+ }
+ goto done;
+ }
case ARM64in_VUnaryD: {
/* 31 23 21 16 14 9 4
000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
ARM64in_StrEX,
ARM64in_MFence,
/* ARM64in_V*: scalar ops involving vector registers */
- ARM64in_VLdStS, /* 32-bit FP load/store, with imm offset */
- ARM64in_VLdStD, /* 64-bit FP load/store, with imm offset */
- ARM64in_VLdStQ,
+ ARM64in_VLdStH, /* ld/st to/from low 16 bits of vec reg, imm offset */
+ ARM64in_VLdStS, /* ld/st to/from low 32 bits of vec reg, imm offset */
+ ARM64in_VLdStD, /* ld/st to/from low 64 bits of vec reg, imm offset */
+ ARM64in_VLdStQ, /* ld/st to/from all 128 bits of vec reg, no offset */
ARM64in_VCvtI2F,
ARM64in_VCvtF2I,
- ARM64in_VCvtSD,
+ ARM64in_VCvtSD, /* scalar 32 bit FP <--> 64 bit FP */
+ ARM64in_VCvtHS, /* scalar 16 bit FP <--> 32 bit FP */
+ ARM64in_VCvtHD, /* scalar 16 bit FP <--> 64 bit FP */
ARM64in_VUnaryD,
ARM64in_VUnaryS,
ARM64in_VBinD,
struct {
} MFence;
/* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */
- /* 32-bit Fp load/store */
+ /* ld/st to/from low 16 bits of vec reg, imm offset */
+ struct {
+ Bool isLoad;
+ HReg hD;
+ HReg rN;
+ UInt uimm12; /* 0 .. 8190 inclusive, 0 % 2 */
+ } VLdStH;
+ /* ld/st to/from low 32 bits of vec reg, imm offset */
struct {
Bool isLoad;
HReg sD;
HReg rN;
UInt uimm12; /* 0 .. 16380 inclusive, 0 % 4 */
} VLdStS;
- /* 64-bit Fp load/store */
+ /* ld/st to/from low 64 bits of vec reg, imm offset */
struct {
Bool isLoad;
HReg dD;
HReg rN;
UInt uimm12; /* 0 .. 32760 inclusive, 0 % 8 */
} VLdStD;
- /* 128-bit Vector load/store. */
+ /* ld/st to/from all 128 bits of vec reg, no offset */
struct {
Bool isLoad;
HReg rQ; // data
UChar armRM; // ARM encoded RM:
// 00=nearest, 01=+inf, 10=-inf, 11=zero
} VCvtF2I;
- /* Convert between 32-bit and 64-bit FP values (both
- ways). (FCVT) */
+ /* Convert between 32-bit and 64-bit FP values (both ways). (FCVT) */
struct {
Bool sToD; /* True: F32->F64. False: F64->F32 */
HReg dst;
HReg src;
} VCvtSD;
+ /* Convert between 16-bit and 32-bit FP values (both ways). (FCVT) */
+ struct {
+ Bool hToS; /* True: F16->F32. False: F32->F16 */
+ HReg dst;
+ HReg src;
+ } VCvtHS;
+ /* Convert between 16-bit and 64-bit FP values (both ways). (FCVT) */
+ struct {
+ Bool hToD; /* True: F16->F64. False: F64->F16 */
+ HReg dst;
+ HReg src;
+ } VCvtHD;
/* 64-bit FP unary */
struct {
ARM64FpUnaryOp op;
extern ARM64Instr* ARM64Instr_LdrEX ( Int szB );
extern ARM64Instr* ARM64Instr_StrEX ( Int szB );
extern ARM64Instr* ARM64Instr_MFence ( void );
+extern ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN,
+ UInt uimm12 /* 0 .. 8190, 0 % 2 */ );
extern ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN,
UInt uimm12 /* 0 .. 16380, 0 % 4 */ );
extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN,
extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
UChar armRM );
extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src );
+extern ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src );
+extern ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg );
#include "host_arm64_defs.h"
-//ZZ /*---------------------------------------------------------*/
-//ZZ /*--- ARMvfp control word stuff ---*/
-//ZZ /*---------------------------------------------------------*/
-//ZZ
-//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
-//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
-//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
-//ZZ this corresponds to a FPSCR value of zero.
-//ZZ
-//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
-//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
-//ZZ should be zero).
-//ZZ */
-//ZZ
-//ZZ #define DEFAULT_FPSCR 0
-
-
/*---------------------------------------------------------*/
/*--- ISelEnv ---*/
/*---------------------------------------------------------*/
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
+
static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
return ARM64cc_NE;
}
+ /* --- patterns rooted at: CmpNEZ16 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ16) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
+ addInstr(env, ARM64Instr_Test(r1, xFFFF));
+ return ARM64cc_NE;
+ }
+
/* --- patterns rooted at: CmpNEZ64 --- */
if (e->tag == Iex_Unop
addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
return dst;
}
+ case Iop_1Sto16:
case Iop_1Sto32:
case Iop_1Sto64: {
/* As with the iselStmt case for 'tmp:I1 = expr', we could
addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
return dst;
}
+ case Iop_F16toF64: {
+ HReg src = iselF16Expr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
+ return dst;
+ }
case Iop_I32UtoF64:
case Iop_I32StoF64: {
/* Rounding mode is not involved here, since the
addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
return dst;
}
+ case Iop_F16toF32: {
+ HReg src = iselF16Expr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
+ return dst;
+ }
default:
break;
}
HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
HReg dstS = newVRegD(env);
- addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
+ addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
return dstS;
}
case Iop_I32UtoF32:
}
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (16 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 16-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. Values are generated into HRcFlt64
+ registers despite the values themselves being Ity_F16s. */
+
+static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselF16Expr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F16);
+
+ if (e->tag == Iex_Get) {
+ Int offs = e->Iex.Get.offset;
+ if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
+ HReg rD = newVRegD(env);
+ HReg rN = get_baseblock_register();
+ addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
+ return rD;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_F32toF16: {
+ HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
+ HReg dstH = newVRegD(env);
+ addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
+ return dstH;
+ }
+ case Iop_F64toF16: {
+ HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
+ HReg dstH = newVRegD(env);
+ addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
+ return dstH;
+ }
+ default:
+ break;
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselF16Expr_wrk");
+}
+
+
/*---------------------------------------------------------*/
/*--- ISEL: Vector expressions (256 bit) ---*/
/*---------------------------------------------------------*/
return;
}
if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
- HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
+ HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
+ HReg bbp = get_baseblock_register();
+ addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
+ return;
+ }
+ if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
+ HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
HReg bbp = get_baseblock_register();
- addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
+ addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
return;
}
hreg = mkHReg(True, HRcInt64, 0, j++);
hregHI = mkHReg(True, HRcInt64, 0, j++);
break;
+ case Ity_F16: // we'll use HRcFlt64 regs for F16 too
case Ity_F32: // we'll use HRcFlt64 regs for F32 too
case Ity_F64:
hreg = mkHReg(True, HRcFlt64, 0, j++);