From: Julian Seward Date: Mon, 30 Mar 2015 09:01:51 +0000 (+0000) Subject: arm64: add support for X-Git-Tag: svn/VALGRIND_3_11_0^2~71 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f337256b202c176aee065aa5f8a03d722206c580;p=thirdparty%2Fvalgrind.git arm64: add support for FCVT{N,M,A,P,Z}{S,U} d_d, s_s FCVTN 4h/8h_4s, 2s/4s_2d FCVTL 4s_4h/8h, 2d_2s/4s FCVT Sd, Hn FCVT Dd, Hn FCVT Hd, Sn FCVT Hd, Dn git-svn-id: svn://svn.valgrind.org/vex/trunk@3111 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index fbd891e83a..293d65e9cc 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -1416,7 +1416,7 @@ static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) UInt laneSzB = 0; switch (laneTy) { case Ity_I8: laneSzB = 1; break; - case Ity_I16: laneSzB = 2; break; + case Ity_F16: case Ity_I16: laneSzB = 2; break; case Ity_F32: case Ity_I32: laneSzB = 4; break; case Ity_F64: case Ity_I64: laneSzB = 8; break; case Ity_V128: laneSzB = 16; break; @@ -1436,7 +1436,7 @@ static void putQRegLO ( UInt qregNo, IRExpr* e ) Int off = offsetQRegLane(qregNo, ty, 0); switch (ty) { case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: - case Ity_F32: case Ity_F64: case Ity_V128: + case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128: break; default: vassert(0); // Other cases are probably invalid @@ -1450,7 +1450,7 @@ static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) Int off = offsetQRegLane(qregNo, ty, 0); switch (ty) { case Ity_I8: - case Ity_I16: + case Ity_F16: case Ity_I16: case Ity_I32: case Ity_I64: case Ity_F32: case Ity_F64: case Ity_V128: break; @@ -1537,7 +1537,7 @@ static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) switch (laneTy) { case Ity_F64: case Ity_I64: case Ity_I32: case Ity_F32: - case Ity_I16: + case Ity_I16: case Ity_F16: case Ity_I8: break; default: @@ -1552,7 +1552,7 @@ static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) Int off = offsetQRegLane(qregNo, laneTy, laneNo); switch (laneTy) { case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: - case Ity_F64: case Ity_F32: + case Ity_F64: case Ity_F32: case Ity_F16: break; default: vassert(0); // Other cases are ATC @@ -9917,6 +9917,58 @@ Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + ix = 0; /*INVALID*/ + switch (opcode) { + case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break; + case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break; + case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break; + default: break; + } + if (ix > 0) { + /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ + /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ + /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ + /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ + /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ + /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ + /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ + /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ + /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ + /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ + Bool is64 = (size & 1) == 1; + IRType tyF = is64 ? Ity_F64 : Ity_F32; + IRType tyI = is64 ? Ity_I64 : Ity_I32; + IRRoundingMode irrm = 8; /*impossible*/ + HChar ch = '?'; + switch (ix) { + case 1: ch = 'n'; irrm = Irrm_NEAREST; break; + case 2: ch = 'm'; irrm = Irrm_NegINF; break; + case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */ + case 4: ch = 'p'; irrm = Irrm_PosINF; break; + case 5: ch = 'z'; irrm = Irrm_ZERO; break; + default: vassert(0); + } + IROp cvt = Iop_INVALID; + if (bitU == 1) { + cvt = is64 ? Iop_F64toI64U : Iop_F32toI32U; + } else { + cvt = is64 ? Iop_F64toI64S : Iop_F32toI32S; + } + IRTemp src = newTemp(tyF); + IRTemp res = newTemp(tyI); + assign(src, getQRegLane(nn, 0, tyF)); + assign(res, binop(cvt, mkU32(irrm), mkexpr(src))); + putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */ + if (!is64) { + putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */ + } + putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */ + HChar sOrD = is64 ? 'd' : 's'; + DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's', + sOrD, dd, sOrD, nn); + return True; + } + if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */ /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */ @@ -11906,18 +11958,48 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) { - /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */ - IRTemp rm = mk_get_IR_rounding_mode(); - IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64); - IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64); - putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo)); - putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi)); + if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) { + /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */ + UInt nLanes = size == X00 ? 4 : 2; + IRType srcTy = size == X00 ? Ity_F32 : Ity_F64; + IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32; + IRTemp rm = mk_get_IR_rounding_mode(); + IRTemp src[nLanes]; + for (UInt i = 0; i < nLanes; i++) { + src[i] = newTemp(srcTy); + assign(src[i], getQRegLane(nn, i, srcTy)); + } + for (UInt i = 0; i < nLanes; i++) { + putQRegLane(dd, nLanes * bitQ + i, + binop(opCvt, mkexpr(rm), mkexpr(src[i]))); + } if (bitQ == 0) { putQRegLane(dd, 1, mkU64(0)); } - DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "", - nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn)); + const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); + const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); + DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "", + nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); + return True; + } + + if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) { + /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */ + UInt nLanes = size == X00 ? 4 : 2; + IRType srcTy = size == X00 ? Ity_F16 : Ity_F32; + IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64; + IRTemp src[nLanes]; + for (UInt i = 0; i < nLanes; i++) { + src[i] = newTemp(srcTy); + assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy)); + } + for (UInt i = 0; i < nLanes; i++) { + putQRegLane(dd, i, unop(opCvt, mkexpr(src[i]))); + } + const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); + const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); + DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "", + nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow); return True; } @@ -12628,36 +12710,67 @@ Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn) /* -------- 01,000111: FCVT h_d -------- */ /* -------- 01,000100: FCVT s_d -------- */ /* 31 23 21 16 14 9 4 - 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp) - --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp) - --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp) + 000 11110 11 10001 00 10000 n d FCVT Sd, Hn + --------- 11 ----- 01 --------- FCVT Dd, Hn + --------- 00 ----- 11 --------- FCVT Hd, Sn --------- 00 ----- 01 --------- FCVT Dd, Sn - --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp) + --------- 01 ----- 11 --------- FCVT Hd, Dn --------- 01 ----- 00 --------- FCVT Sd, Dn Rounding, when dst is smaller than src, is per the FPCR. */ UInt b2322 = ty; UInt b1615 = opcode & BITS2(1,1); - if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) { - /* Convert S to D */ - IRTemp res = newTemp(Ity_F64); - assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32))); - putQReg128(dd, mkV128(0x0000)); - putQRegLO(dd, mkexpr(res)); - DIP("fcvt %s, %s\n", - nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32)); - return True; - } - if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) { - /* Convert D to S */ - IRTemp res = newTemp(Ity_F32); - assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()), - getQRegLO(nn, Ity_F64))); - putQReg128(dd, mkV128(0x0000)); - putQRegLO(dd, mkexpr(res)); - DIP("fcvt %s, %s\n", - nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64)); - return True; + switch ((b2322 << 2) | b1615) { + case BITS4(0,0,0,1): // S -> D + case BITS4(1,1,0,1): { // H -> D + Bool srcIsH = b2322 == BITS2(1,1); + IRType srcTy = srcIsH ? Ity_F16 : Ity_F32; + IRTemp res = newTemp(Ity_F64); + assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64, + getQRegLO(nn, srcTy))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fcvt %s, %s\n", + nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy)); + return True; + } + case BITS4(0,1,0,0): // D -> S + case BITS4(0,1,1,1): { // D -> H + Bool dstIsH = b1615 == BITS2(1,1); + IRType dstTy = dstIsH ? Ity_F16 : Ity_F32; + IRTemp res = newTemp(dstTy); + assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32, + mkexpr(mk_get_IR_rounding_mode()), + getQRegLO(nn, Ity_F64))); + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fcvt %s, %s\n", + nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64)); + return True; + } + case BITS4(0,0,1,1): // S -> H + case BITS4(1,1,0,0): { // H -> S + Bool toH = b1615 == BITS2(1,1); + IRType srcTy = toH ? Ity_F32 : Ity_F16; + IRType dstTy = toH ? Ity_F16 : Ity_F32; + IRTemp res = newTemp(dstTy); + if (toH) { + assign(res, binop(Iop_F32toF16, + mkexpr(mk_get_IR_rounding_mode()), + getQRegLO(nn, srcTy))); + + } else { + assign(res, unop(Iop_F16toF32, + getQRegLO(nn, srcTy))); + } + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); + DIP("fcvt %s, %s\n", + nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy)); + return True; + } + default: + break; } /* else unhandled */ return False; @@ -13029,7 +13142,6 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn) ---------------- 01 -------------- FCVTP-------- (round to +inf) ---------------- 10 -------------- FCVTM-------- (round to -inf) ---------------- 11 -------------- FCVTZ-------- (round to zero) - ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away) ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away) diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index f937c4ddd2..fc0984a9d5 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -176,6 +176,11 @@ static void ppHRegARM64asSreg ( HReg reg ) { vex_printf("(S-reg)"); } +static void ppHRegARM64asHreg ( HReg reg ) { + ppHRegARM64(reg); + vex_printf("(H-reg)"); +} + /* --------- Condition codes, ARM64 encoding. --------- */ @@ -1003,9 +1008,19 @@ ARM64Instr* ARM64Instr_MFence ( void ) { i->tag = ARM64in_MFence; return i; } +ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VLdStH; + i->ARM64in.VLdStH.isLoad = isLoad; + i->ARM64in.VLdStH.hD = sD; + i->ARM64in.VLdStH.rN = rN; + i->ARM64in.VLdStH.uimm12 = uimm12; + vassert(uimm12 < 8192 && 0 == (uimm12 & 1)); + return i; +} ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); - i->tag = ARM64in_VLdStS; + i->tag = ARM64in_VLdStS; i->ARM64in.VLdStS.isLoad = isLoad; i->ARM64in.VLdStS.sD = sD; i->ARM64in.VLdStS.rN = rN; @@ -1015,7 +1030,7 @@ ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) { } ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); - i->tag = ARM64in_VLdStD; + i->tag = ARM64in_VLdStD; i->ARM64in.VLdStD.isLoad = isLoad; i->ARM64in.VLdStD.dD = dD; i->ARM64in.VLdStD.rN = rN; @@ -1052,12 +1067,28 @@ ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, } ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); - i->tag = ARM64in_VCvtSD; + i->tag = ARM64in_VCvtSD; i->ARM64in.VCvtSD.sToD = sToD; i->ARM64in.VCvtSD.dst = dst; i->ARM64in.VCvtSD.src = src; return i; } +ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VCvtHS; + i->ARM64in.VCvtHS.hToS = hToS; + i->ARM64in.VCvtHS.dst = dst; + i->ARM64in.VCvtHS.src = src; + return i; +} +ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VCvtHD; + i->ARM64in.VCvtHD.hToD = hToD; + i->ARM64in.VCvtHD.dst = dst; + i->ARM64in.VCvtHD.src = src; + return i; +} ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); i->tag = ARM64in_VUnaryD; @@ -1534,6 +1565,21 @@ void ppARM64Instr ( const ARM64Instr* i ) { case ARM64in_MFence: vex_printf("(mfence) dsb sy; dmb sy; isb"); return; + case ARM64in_VLdStH: + if (i->ARM64in.VLdStH.isLoad) { + vex_printf("ldr "); + ppHRegARM64asHreg(i->ARM64in.VLdStH.hD); + vex_printf(", %u(", i->ARM64in.VLdStH.uimm12); + ppHRegARM64(i->ARM64in.VLdStH.rN); + vex_printf(")"); + } else { + vex_printf("str "); + vex_printf("%u(", i->ARM64in.VLdStH.uimm12); + ppHRegARM64(i->ARM64in.VLdStH.rN); + vex_printf("), "); + ppHRegARM64asHreg(i->ARM64in.VLdStH.hD); + } + return; case ARM64in_VLdStS: if (i->ARM64in.VLdStS.isLoad) { vex_printf("ldr "); @@ -1613,6 +1659,30 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64(i->ARM64in.VCvtSD.src); } return; + case ARM64in_VCvtHS: + vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h"); + if (i->ARM64in.VCvtHS.hToS) { + ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst); + vex_printf(", "); + ppHRegARM64asHreg(i->ARM64in.VCvtHS.src); + } else { + ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VCvtHS.src); + } + return; + case ARM64in_VCvtHD: + vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h"); + if (i->ARM64in.VCvtHD.hToD) { + ppHRegARM64(i->ARM64in.VCvtHD.dst); + vex_printf(", "); + ppHRegARM64asHreg(i->ARM64in.VCvtHD.src); + } else { + ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VCvtHD.src); + } + return; case ARM64in_VUnaryD: vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op)); ppHRegARM64(i->ARM64in.VUnaryD.dst); @@ -1986,6 +2056,14 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) return; case ARM64in_MFence: return; + case ARM64in_VLdStH: + addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN); + if (i->ARM64in.VLdStH.isLoad) { + addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD); + } else { + addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD); + } + return; case ARM64in_VLdStS: addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN); if (i->ARM64in.VLdStS.isLoad) { @@ -2021,6 +2099,14 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst); addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src); return; + case ARM64in_VCvtHS: + addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst); + addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src); + return; + case ARM64in_VCvtHD: + addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst); + addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src); + return; case ARM64in_VUnaryD: addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst); addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src); @@ -2230,6 +2316,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) return; case ARM64in_MFence: return; + case ARM64in_VLdStH: + i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD); + i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN); + return; case ARM64in_VLdStS: i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD); i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN); @@ -2254,6 +2344,14 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst); i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src); return; + case ARM64in_VCvtHS: + i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst); + i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src); + return; + case ARM64in_VCvtHD: + i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst); + i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src); + return; case ARM64in_VUnaryD: i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst); i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src); @@ -2633,6 +2731,7 @@ static inline UInt qregEnc ( HReg r ) #define X11011000 BITS8(1,1,0,1,1,0,0,0) #define X11011010 BITS8(1,1,0,1,1,0,1,0) #define X11011110 BITS8(1,1,0,1,1,1,1,0) +#define X11100010 BITS8(1,1,1,0,0,0,1,0) #define X11110001 BITS8(1,1,1,1,0,0,0,1) #define X11110011 BITS8(1,1,1,1,0,0,1,1) #define X11110101 BITS8(1,1,1,1,0,1,0,1) @@ -3702,6 +3801,23 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, // *p++ = 0xD5033F5F; /* clrex */ // goto done; //} + case ARM64in_VLdStH: { + /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2] + 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2] + */ + UInt hD = dregEnc(i->ARM64in.VLdStH.hD); + UInt rN = iregEnc(i->ARM64in.VLdStH.rN); + UInt uimm12 = i->ARM64in.VLdStH.uimm12; + Bool isLD = i->ARM64in.VLdStH.isLoad; + vassert(uimm12 < 8192 && 0 == (uimm12 & 1)); + uimm12 >>= 1; + vassert(uimm12 < (1<<12)); + vassert(hD < 32); + vassert(rN < 31); + *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00, + uimm12, rN, hD); + goto done; + } case ARM64in_VLdStS: { /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4] 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4] @@ -3852,7 +3968,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } case ARM64in_VCvtSD: { - /* 31 23 21 16 14 9 4 + /* 31 23 21 16 14 9 4 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D) ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S) Rounding, when dst is smaller than src, is per the FPCR. @@ -3866,6 +3982,36 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } goto done; } + case ARM64in_VCvtHS: { + /* 31 23 21 16 14 9 4 + 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S) + ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H) + Rounding, when dst is smaller than src, is per the FPCR. + */ + UInt dd = dregEnc(i->ARM64in.VCvtHS.dst); + UInt nn = dregEnc(i->ARM64in.VCvtHS.src); + if (i->ARM64in.VCvtHS.hToS) { + *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd); + } else { + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd); + } + goto done; + } + case ARM64in_VCvtHD: { + /* 31 23 21 16 14 9 4 + 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D) + ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H) + Rounding, when dst is smaller than src, is per the FPCR. + */ + UInt dd = dregEnc(i->ARM64in.VCvtHD.dst); + UInt nn = dregEnc(i->ARM64in.VCvtHD.src); + if (i->ARM64in.VCvtHD.hToD) { + *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd); + } else { + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd); + } + goto done; + } case ARM64in_VUnaryD: { /* 31 23 21 16 14 9 4 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled) diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index ab3d91754f..fce9f83422 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -482,12 +482,15 @@ typedef ARM64in_StrEX, ARM64in_MFence, /* ARM64in_V*: scalar ops involving vector registers */ - ARM64in_VLdStS, /* 32-bit FP load/store, with imm offset */ - ARM64in_VLdStD, /* 64-bit FP load/store, with imm offset */ - ARM64in_VLdStQ, + ARM64in_VLdStH, /* ld/st to/from low 16 bits of vec reg, imm offset */ + ARM64in_VLdStS, /* ld/st to/from low 32 bits of vec reg, imm offset */ + ARM64in_VLdStD, /* ld/st to/from low 64 bits of vec reg, imm offset */ + ARM64in_VLdStQ, /* ld/st to/from all 128 bits of vec reg, no offset */ ARM64in_VCvtI2F, ARM64in_VCvtF2I, - ARM64in_VCvtSD, + ARM64in_VCvtSD, /* scalar 32 bit FP <--> 64 bit FP */ + ARM64in_VCvtHS, /* scalar 16 bit FP <--> 32 bit FP */ + ARM64in_VCvtHD, /* scalar 16 bit FP <--> 64 bit FP */ ARM64in_VUnaryD, ARM64in_VUnaryS, ARM64in_VBinD, @@ -670,21 +673,28 @@ typedef struct { } MFence; /* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */ - /* 32-bit Fp load/store */ + /* ld/st to/from low 16 bits of vec reg, imm offset */ + struct { + Bool isLoad; + HReg hD; + HReg rN; + UInt uimm12; /* 0 .. 8190 inclusive, 0 % 2 */ + } VLdStH; + /* ld/st to/from low 32 bits of vec reg, imm offset */ struct { Bool isLoad; HReg sD; HReg rN; UInt uimm12; /* 0 .. 16380 inclusive, 0 % 4 */ } VLdStS; - /* 64-bit Fp load/store */ + /* ld/st to/from low 64 bits of vec reg, imm offset */ struct { Bool isLoad; HReg dD; HReg rN; UInt uimm12; /* 0 .. 32760 inclusive, 0 % 8 */ } VLdStD; - /* 128-bit Vector load/store. */ + /* ld/st to/from all 128 bits of vec reg, no offset */ struct { Bool isLoad; HReg rQ; // data @@ -704,13 +714,24 @@ typedef UChar armRM; // ARM encoded RM: // 00=nearest, 01=+inf, 10=-inf, 11=zero } VCvtF2I; - /* Convert between 32-bit and 64-bit FP values (both - ways). (FCVT) */ + /* Convert between 32-bit and 64-bit FP values (both ways). (FCVT) */ struct { Bool sToD; /* True: F32->F64. False: F64->F32 */ HReg dst; HReg src; } VCvtSD; + /* Convert between 16-bit and 32-bit FP values (both ways). (FCVT) */ + struct { + Bool hToS; /* True: F16->F32. False: F32->F16 */ + HReg dst; + HReg src; + } VCvtHS; + /* Convert between 16-bit and 64-bit FP values (both ways). (FCVT) */ + struct { + Bool hToD; /* True: F16->F64. False: F64->F16 */ + HReg dst; + HReg src; + } VCvtHD; /* 64-bit FP unary */ struct { ARM64FpUnaryOp op; @@ -887,6 +908,8 @@ extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); extern ARM64Instr* ARM64Instr_MFence ( void ); +extern ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, + UInt uimm12 /* 0 .. 8190, 0 % 2 */ ); extern ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 /* 0 .. 16380, 0 % 4 */ ); extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, @@ -896,6 +919,8 @@ extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ); extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, UChar armRM ); extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ); +extern ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 829c39a3d5..a6e507d050 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -40,23 +40,6 @@ #include "host_arm64_defs.h" -//ZZ /*---------------------------------------------------------*/ -//ZZ /*--- ARMvfp control word stuff ---*/ -//ZZ /*---------------------------------------------------------*/ -//ZZ -//ZZ /* Vex-generated code expects to run with the FPU set as follows: all -//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV -//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough, -//ZZ this corresponds to a FPSCR value of zero. -//ZZ -//ZZ fpscr should therefore be zero on entry to Vex-generated code, and -//ZZ should be unchanged at exit. (Or at least the bottom 28 bits -//ZZ should be zero). -//ZZ */ -//ZZ -//ZZ #define DEFAULT_FPSCR 0 - - /*---------------------------------------------------------*/ /*--- ISelEnv ---*/ /*---------------------------------------------------------*/ @@ -223,6 +206,9 @@ static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); +static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ); +static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ); + static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); @@ -1360,6 +1346,16 @@ static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) return ARM64cc_NE; } + /* --- patterns rooted at: CmpNEZ16 --- */ + + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_CmpNEZ16) { + HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); + ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF); + addInstr(env, ARM64Instr_Test(r1, xFFFF)); + return ARM64cc_NE; + } + /* --- patterns rooted at: CmpNEZ64 --- */ if (e->tag == Iex_Unop @@ -1854,6 +1850,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/)); return dst; } + case Iop_1Sto16: case Iop_1Sto32: case Iop_1Sto64: { /* As with the iselStmt case for 'tmp:I1 = expr', we could @@ -3051,6 +3048,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); return dst; } + case Iop_F16toF64: { + HReg src = iselF16Expr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src)); + return dst; + } case Iop_I32UtoF64: case Iop_I32StoF64: { /* Rounding mode is not involved here, since the @@ -3226,6 +3229,12 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); return dst; } + case Iop_F16toF32: { + HReg src = iselF16Expr(env, e->Iex.Unop.arg); + HReg dst = newVRegD(env); + addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src)); + return dst; + } default: break; } @@ -3253,7 +3262,7 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); HReg dstS = newVRegD(env); - addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD)); + addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD)); return dstS; } case Iop_I32UtoF32: @@ -3315,6 +3324,70 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) } +/*---------------------------------------------------------*/ +/*--- ISEL: Floating point expressions (16 bit) ---*/ +/*---------------------------------------------------------*/ + +/* Compute a 16-bit floating point value into a register, the identity + of which is returned. As with iselIntExpr_R, the reg may be either + real or virtual; in any case it must not be changed by subsequent + code emitted by the caller. Values are generated into HRcFlt64 + registers despite the values themselves being Ity_F16s. */ + +static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ) +{ + HReg r = iselF16Expr_wrk( env, e ); +# if 0 + vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); +# endif + vassert(hregClass(r) == HRcFlt64); + vassert(hregIsVirtual(r)); + return r; +} + +/* DO NOT CALL THIS DIRECTLY */ +static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ) +{ + IRType ty = typeOfIRExpr(env->type_env,e); + vassert(e); + vassert(ty == Ity_F16); + + if (e->tag == Iex_Get) { + Int offs = e->Iex.Get.offset; + if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) { + HReg rD = newVRegD(env); + HReg rN = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs)); + return rD; + } + } + + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + case Iop_F32toF16: { + HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + HReg dstH = newVRegD(env); + addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS)); + return dstH; + } + case Iop_F64toF16: { + HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + HReg dstH = newVRegD(env); + addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD)); + return dstH; + } + default: + break; + } + } + + ppIRExpr(e); + vpanic("iselF16Expr_wrk"); +} + + /*---------------------------------------------------------*/ /*--- ISEL: Vector expressions (256 bit) ---*/ /*---------------------------------------------------------*/ @@ -3534,9 +3607,15 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) return; } if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { - HReg dD = iselFltExpr(env, stmt->Ist.Put.data); + HReg sD = iselFltExpr(env, stmt->Ist.Put.data); + HReg bbp = get_baseblock_register(); + addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs)); + return; + } + if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) { + HReg hD = iselF16Expr(env, stmt->Ist.Put.data); HReg bbp = get_baseblock_register(); - addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs)); + addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs)); return; } @@ -3965,6 +4044,7 @@ HInstrArray* iselSB_ARM64 ( const IRSB* bb, hreg = mkHReg(True, HRcInt64, 0, j++); hregHI = mkHReg(True, HRcInt64, 0, j++); break; + case Ity_F16: // we'll use HRcFlt64 regs for F16 too case Ity_F32: // we'll use HRcFlt64 regs for F32 too case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++);