From: Julian Seward Date: Tue, 24 Feb 2015 12:21:01 +0000 (+0000) Subject: arm64: implement: X-Git-Tag: svn/VALGRIND_3_11_0^2~90 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a73a71bc5597d67cd8b01ebaff8424bd42839738;p=thirdparty%2Fvalgrind.git arm64: implement: FRECPS d_d_d, s_s_s FRSQRTS d_d_d, s_s_s FRECPE d_d, s_s FRSQRTE d_d, s_s FRECPX d_d, s_s FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s FRECPE 2d_2d, 4s_4s, 2s_2s FRSQRTE 2d_2d, 4s_4s, 2s_2s git-svn-id: svn://svn.valgrind.org/vex/trunk@3092 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 884318223e..c40d44b3bd 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -9700,6 +9700,23 @@ Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { + /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */ + /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */ + Bool isSQRT = (size & 2) == 2; + Bool isD = (size & 1) == 1; + IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) + : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); + IRTemp res = newTempV128(); + assign(res, binop(op, getQReg128(nn), getQReg128(mm))); + putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, + mkexpr(res)))); + HChar c = isD ? 'd' : 's'; + DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps", + c, dd, c, nn, c, mm); + return True; + } + return False; # undef INSN } @@ -9900,7 +9917,37 @@ Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } -# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { + /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */ + /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */ + Bool isSQRT = bitU == 1; + Bool isD = (size & 1) == 1; + IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) + : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); + IRTemp resV = newTempV128(); + assign(resV, unop(op, getQReg128(nn))); + putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, + mkexpr(resV)))); + HChar c = isD ? 'd' : 's'; + DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn); + return True; + } + + if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { + /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */ + Bool isD = (size & 1) == 1; + IRType ty = isD ? Ity_F64 : Ity_F32; + IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32; + IRTemp res = newTemp(ty); + IRTemp rm = mk_get_IR_rounding_mode(); + assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty))); + putQReg128(dd, mkV128(0x0000)); + putQRegLane(dd, 0, mkexpr(res)); + HChar c = isD ? 'd' : 's'; + DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn); + return True; + } + return False; # undef INSN } @@ -11449,6 +11496,23 @@ Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { + /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ + /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ + Bool isSQRT = (size & 2) == 2; + Bool isD = (size & 1) == 1; + if (bitQ == 0 && isD) return False; // implied 1d case + IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) + : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); + IRTemp res = newTempV128(); + assign(res, binop(op, getQReg128(nn), getQReg128(mm))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); + DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps", + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + return False; # undef INSN } @@ -11857,7 +11921,6 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - ix = 0; if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) { ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0)); @@ -11928,8 +11991,6 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - - if (size == X10 && opcode == BITS5(1,1,1,0,0)) { /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */ /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */ @@ -11983,6 +12044,23 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) /* else fall through */ } + if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { + /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */ + /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */ + Bool isSQRT = bitU == 1; + Bool isD = (size & 1) == 1; + IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) + : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); + if (bitQ == 0 && isD) return False; // implied 1d case + IRTemp resV = newTempV128(); + assign(resV, unop(op, getQReg128(nn))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); + const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); + DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe", + nameQReg128(dd), arr, nameQReg128(nn), arr); + return True; + } + return False; # undef INSN } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 8b5114e073..7cc0910a12 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -559,10 +559,11 @@ static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) { static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) { switch (op) { - case ARM64fpu_NEG: return "neg "; - case ARM64fpu_ABS: return "abs "; - case ARM64fpu_SQRT: return "sqrt "; - case ARM64fpu_RINT: return "rinti"; + case ARM64fpu_NEG: return "neg "; + case ARM64fpu_ABS: return "abs "; + case ARM64fpu_SQRT: return "sqrt "; + case ARM64fpu_RINT: return "rinti"; + case ARM64fpu_RECPX: return "recpx"; default: vpanic("showARM64FpUnaryOp"); } } @@ -687,22 +688,26 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return; case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return; case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return; - case ARM64vecb_SSHL64x2: *nm = "sshl"; *ar = "2d"; return; - case ARM64vecb_SSHL32x4: *nm = "sshl"; *ar = "4s"; return; - case ARM64vecb_SSHL16x8: *nm = "sshl"; *ar = "8h"; return; - case ARM64vecb_SSHL8x16: *nm = "sshl"; *ar = "16b"; return; - case ARM64vecb_USHL64x2: *nm = "ushl"; *ar = "2d"; return; - case ARM64vecb_USHL32x4: *nm = "ushl"; *ar = "4s"; return; - case ARM64vecb_USHL16x8: *nm = "ushl"; *ar = "8h"; return; - case ARM64vecb_USHL8x16: *nm = "ushl"; *ar = "16b"; return; - case ARM64vecb_SRSHL64x2: *nm = "srshl"; *ar = "2d"; return; - case ARM64vecb_SRSHL32x4: *nm = "srshl"; *ar = "4s"; return; - case ARM64vecb_SRSHL16x8: *nm = "srshl"; *ar = "8h"; return; - case ARM64vecb_SRSHL8x16: *nm = "srshl"; *ar = "16b"; return; - case ARM64vecb_URSHL64x2: *nm = "urshl"; *ar = "2d"; return; - case ARM64vecb_URSHL32x4: *nm = "urshl"; *ar = "4s"; return; - case ARM64vecb_URSHL16x8: *nm = "urshl"; *ar = "8h"; return; - case ARM64vecb_URSHL8x16: *nm = "urshl"; *ar = "16b"; return; + case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return; + case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return; + case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return; + case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return; + case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return; + case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return; + case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return; + case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return; + case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return; + case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return; + case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return; + case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return; + case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return; + case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return; + case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return; + case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return; + case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return; + case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return; + case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return; + case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return; default: vpanic("showARM64VecBinOp"); } } @@ -752,6 +757,10 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return; case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return; case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return; + case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return; + case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return; + case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return; + case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return; default: vpanic("showARM64VecUnaryOp"); } } @@ -2601,6 +2610,7 @@ static inline UChar qregNo ( HReg r ) #define X110010 BITS8(0,0, 1,1,0,0,1,0) #define X110100 BITS8(0,0, 1,1,0,1,0,0) #define X110101 BITS8(0,0, 1,1,0,1,0,1) +#define X110110 BITS8(0,0, 1,1,0,1,1,0) #define X110111 BITS8(0,0, 1,1,0,1,1,1) #define X111000 BITS8(0,0, 1,1,1,0,0,0) #define X111001 BITS8(0,0, 1,1,1,0,0,1) @@ -2642,6 +2652,8 @@ static inline UChar qregNo ( HReg r ) #define X11011110 BITS8(1,1,0,1,1,1,1,0) #define X11110001 BITS8(1,1,1,1,0,0,0,1) #define X11110011 BITS8(1,1,1,1,0,0,1,1) +#define X11110101 BITS8(1,1,1,1,0,1,0,1) +#define X11110111 BITS8(1,1,1,1,0,1,1,1) /* --- 4 fields --- */ @@ -3878,7 +3890,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled) ------------------- 0,1 --------- FABS ------ ------------------- 1,0 --------- FNEG ------ - ------------------- 1,1 --------- FQSRT ----- + ------------------- 1,1 --------- FSQRT ----- */ UInt dD = dregNo(i->ARM64in.VUnaryD.dst); UInt dN = dregNo(i->ARM64in.VUnaryD.src); @@ -3902,6 +3914,13 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD); goto done; } + /* + 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm + */ + if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) { + *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD); + goto done; + } goto bad; } case ARM64in_VUnaryS: { @@ -3909,7 +3928,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled) ------------------- 0,1 --------- FABS ------ ------------------- 1,0 --------- FNEG ------ - ------------------- 1,1 --------- FQSRT ----- + ------------------- 1,1 --------- FSQRT ----- */ UInt sD = dregNo(i->ARM64in.VUnaryS.dst); UInt sN = dregNo(i->ARM64in.VUnaryS.src); @@ -3933,6 +3952,13 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD); goto done; } + /* + 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm + */ + if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) { + *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD); + goto done; + } goto bad; } case ARM64in_VBinD: { @@ -4176,6 +4202,11 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm + + 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d + 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s + 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -4616,6 +4647,19 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD); break; + case ARM64vecb_FRECPS64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD); + break; + case ARM64vecb_FRECPS32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD); + break; + case ARM64vecb_FRSQRTS64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD); + break; + case ARM64vecb_FRSQRTS32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD); + break; + default: goto bad; } @@ -4692,6 +4736,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s + + 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d + 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s + + 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d + 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s */ UInt vD = qregNo(i->ARM64in.VUnaryV.dst); UInt vN = qregNo(i->ARM64in.VUnaryV.arg); @@ -4771,6 +4821,18 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_URSQRTE32x4: *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD); break; + case ARM64vecu_FRECPE64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD); + break; + case ARM64vecu_FRECPE32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD); + break; + case ARM64vecu_FRSQRTE64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD); + break; + case ARM64vecu_FRSQRTE32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD); + break; default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index b25cf20a2b..281cb0e86e 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -301,6 +301,7 @@ typedef ARM64fpu_ABS, ARM64fpu_SQRT, ARM64fpu_RINT, + ARM64fpu_RECPX, ARM64fpu_INVALID } ARM64FpUnaryOp; @@ -383,6 +384,8 @@ typedef ARM64vecb_SRSHL16x8, ARM64vecb_SRSHL8x16, ARM64vecb_URSHL64x2, ARM64vecb_URSHL32x4, ARM64vecb_URSHL16x8, ARM64vecb_URSHL8x16, + ARM64vecb_FRECPS64x2, ARM64vecb_FRECPS32x4, + ARM64vecb_FRSQRTS64x2, ARM64vecb_FRSQRTS32x4, ARM64vecb_INVALID } ARM64VecBinOp; @@ -413,6 +416,8 @@ typedef ARM64vecu_REV6416B, ARM64vecu_REV648H, ARM64vecu_REV644S, ARM64vecu_URECPE32x4, ARM64vecu_URSQRTE32x4, + ARM64vecu_FRECPE64x2, ARM64vecu_FRECPE32x4, + ARM64vecu_FRSQRTE64x2, ARM64vecu_FRSQRTE32x4, ARM64vecu_INVALID } ARM64VecUnaryOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 9baed6b8c5..afde38f610 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -366,7 +366,7 @@ static Bool isZeroU64 ( IRExpr* e ) { /* Set the FP rounding mode: 'mode' is an I32-typed expression denoting a value in the range 0 .. 3, indicating a round mode encoded as per type IRRoundingMode -- the first four values only - (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC + (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64 FSCR to have the same rounding. For speed & simplicity, we're setting the *entire* FPCR here. @@ -2244,9 +2244,12 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Reverse32sIn64_x2: case Iop_RecipEst32Ux4: case Iop_RSqrtEst32Ux4: + case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4: + case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4: { - HReg res = newVRegV(env); - HReg arg = iselV128Expr(env, e->Iex.Unop.arg); + HReg res = newVRegV(env); + HReg arg = iselV128Expr(env, e->Iex.Unop.arg); + Bool setRM = False; ARM64VecUnaryOp op = ARM64vecu_INVALID; switch (e->Iex.Unop.op) { case Iop_NotV128: op = ARM64vecu_NOT; break; @@ -2274,8 +2277,23 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break; case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break; case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break; + case Iop_RecipEst64Fx2: setRM = True; + op = ARM64vecu_FRECPE64x2; break; + case Iop_RecipEst32Fx4: setRM = True; + op = ARM64vecu_FRECPE32x4; break; + case Iop_RSqrtEst64Fx2: setRM = True; + op = ARM64vecu_FRSQRTE64x2; break; + case Iop_RSqrtEst32Fx4: setRM = True; + op = ARM64vecu_FRSQRTE32x4; break; default: vassert(0); } + if (setRM) { + // This is a bit of a kludge. We should do rm properly for + // these recip-est insns, but that would require changing the + // primop's type to take an rmode. + set_FPCR_rounding_mode(env, IRExpr_Const( + IRConst_U32(Irrm_NEAREST))); + } addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); return res; } @@ -2407,11 +2425,14 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Rsh32Ux4: case Iop_Rsh64Ux2: case Iop_Max64Fx2: case Iop_Max32Fx4: case Iop_Min64Fx2: case Iop_Min32Fx4: + case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4: + case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4: { - HReg res = newVRegV(env); - HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); - HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); - Bool sw = False; + HReg res = newVRegV(env); + HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); + HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); + Bool sw = False; + Bool setRM = False; ARM64VecBinOp op = ARM64vecb_INVALID; switch (e->Iex.Binop.op) { case Iop_AndV128: op = ARM64vecb_AND; break; @@ -2528,8 +2549,23 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break; case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break; case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break; + case Iop_RecipStep64Fx2: setRM = True; + op = ARM64vecb_FRECPS64x2; break; + case Iop_RecipStep32Fx4: setRM = True; + op = ARM64vecb_FRECPS32x4; break; + case Iop_RSqrtStep64Fx2: setRM = True; + op = ARM64vecb_FRSQRTS64x2; break; + case Iop_RSqrtStep32Fx4: setRM = True; + op = ARM64vecb_FRSQRTS32x4; break; default: vassert(0); } + if (setRM) { + // This is a bit of a kludge. We should do rm properly for + // these recip-step insns, but that would require changing the + // primop's type to take an rmode. + set_FPCR_rounding_mode(env, IRExpr_Const( + IRConst_U32(Irrm_NEAREST))); + } if (sw) { addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); } else { @@ -3034,18 +3070,20 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { - case Iop_RoundF64toInt: { - HReg src = iselDblExpr(env, e->Iex.Binop.arg2); - HReg dst = newVRegD(env); - set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); - addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src)); - return dst; - } - case Iop_SqrtF64: { + case Iop_RoundF64toInt: + case Iop_SqrtF64: + case Iop_RecpExpF64: { HReg src = iselDblExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); - addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src)); + ARM64FpUnaryOp op = ARM64fpu_INVALID; + switch (e->Iex.Binop.op) { + case Iop_RoundF64toInt: op = ARM64fpu_RINT; break; + case Iop_SqrtF64: op = ARM64fpu_SQRT; break; + case Iop_RecpExpF64: op = ARM64fpu_RECPX; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_VUnaryD(op, dst, src)); return dst; } case Iop_I64StoF64: @@ -3195,18 +3233,20 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { - case Iop_RoundF32toInt: { - HReg src = iselFltExpr(env, e->Iex.Binop.arg2); - HReg dst = newVRegD(env); - set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); - addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src)); - return dst; - } - case Iop_SqrtF32: { + case Iop_RoundF32toInt: + case Iop_SqrtF32: + case Iop_RecpExpF32: { HReg src = iselFltExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); - addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src)); + ARM64FpUnaryOp op = ARM64fpu_INVALID; + switch (e->Iex.Binop.op) { + case Iop_RoundF32toInt: op = ARM64fpu_RINT; break; + case Iop_SqrtF32: op = ARM64fpu_SQRT; break; + case Iop_RecpExpF32: op = ARM64fpu_RECPX; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_VUnaryS(op, dst, src)); return dst; } case Iop_F64toF32: { diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 64e487d1b3..c56095cb58 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -337,6 +337,9 @@ void ppIROp ( IROp op ) case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return; + case Iop_RecpExpF64: vex_printf("RecpExpF64"); return; + case Iop_RecpExpF32: vex_printf("RecpExpF32"); return; + case Iop_QAdd32S: vex_printf("QAdd32S"); return; case Iop_QSub32S: vex_printf("QSub32S"); return; case Iop_Add16x2: vex_printf("Add16x2"); return; @@ -637,10 +640,15 @@ void ppIROp ( IROp op ) case Iop_RecipEst32F0x4: vex_printf("RecipEst32F0x4"); return; case Iop_RecipStep32Fx2: vex_printf("RecipStep32Fx2"); return; case Iop_RecipStep32Fx4: vex_printf("RecipStep32Fx4"); return; + case Iop_RecipEst64Fx2: vex_printf("RecipEst64Fx2"); return; + case Iop_RecipStep64Fx2: vex_printf("RecipStep64Fx2"); return; + case Iop_Abs32Fx4: vex_printf("Abs32Fx4"); return; case Iop_Abs64Fx2: vex_printf("Abs64Fx2"); return; case Iop_RSqrtStep32Fx4: vex_printf("RSqrtStep32Fx4"); return; + case Iop_RSqrtStep64Fx2: vex_printf("RSqrtStep64Fx2"); return; case Iop_RSqrtStep32Fx2: vex_printf("RSqrtStep32Fx2"); return; + case Iop_RSqrtEst64Fx2: vex_printf("RSqrtEst64Fx2"); return; case Iop_RSqrtEst32F0x4: vex_printf("RSqrtEst32F0x4"); return; case Iop_RSqrtEst32Fx8: vex_printf("RSqrtEst32Fx8"); return; @@ -2753,10 +2761,12 @@ void typeOfPrimop ( IROp op, UNARY(Ity_F32, Ity_F32); case Iop_SqrtF64: + case Iop_RecpExpF64: BINARY(ity_RMode,Ity_F64, Ity_F64); case Iop_SqrtF32: case Iop_RoundF32toInt: + case Iop_RecpExpF32: BINARY(ity_RMode,Ity_F32, Ity_F32); case Iop_CmpF32: @@ -2971,8 +2981,8 @@ void typeOfPrimop ( IROp op, case Iop_InterleaveOddLanes16x8: case Iop_InterleaveEvenLanes16x8: case Iop_InterleaveOddLanes32x4: case Iop_InterleaveEvenLanes32x4: case Iop_Perm8x16: case Iop_Perm32x4: - case Iop_RecipStep32Fx4: - case Iop_RSqrtStep32Fx4: + case Iop_RecipStep32Fx4: case Iop_RecipStep64Fx2: + case Iop_RSqrtStep32Fx4: case Iop_RSqrtStep64Fx2: case Iop_CipherV128: case Iop_CipherLV128: case Iop_NCipherV128: @@ -2995,6 +3005,7 @@ void typeOfPrimop ( IROp op, case Iop_NotV128: case Iop_RecipEst32Fx4: case Iop_RecipEst32F0x4: + case Iop_RecipEst64Fx2: case Iop_RSqrtEst64Fx2: case Iop_RecipEst32Ux4: case Iop_RSqrtEst32F0x4: case Iop_Sqrt32Fx4: case Iop_Sqrt32F0x4: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index bd97f87102..3d2c2b2be7 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -754,6 +754,11 @@ typedef /* NB: pretty much the same as Iop_F64toF32, except no change of type. */ + /* --- guest arm64 specifics, not mandated by 754. --- */ + + Iop_RecpExpF64, /* FRECPX d :: IRRoundingMode(I32) x F64 -> F64 */ + Iop_RecpExpF32, /* FRECPX s :: IRRoundingMode(I32) x F32 -> F32 */ + /* ------------------ 32-bit SIMD Integer ------------------ */ /* 32x1 saturating add/sub (ok, well, not really SIMD :) */ @@ -1284,8 +1289,8 @@ typedef Iop_Neg32Fx4, /* Vector Reciprocal Estimate finds an approximate reciprocal of each - element in the operand vector, and places the results in the destination - vector. */ + element in the operand vector, and places the results in the + destination vector. */ Iop_RecipEst32Fx4, /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). @@ -1348,6 +1353,12 @@ typedef Iop_Sqrt64Fx2, Iop_Neg64Fx2, + /* see 32Fx4 variants for description */ + Iop_RecipEst64Fx2, // unary + Iop_RecipStep64Fx2, // binary + Iop_RSqrtEst64Fx2, // unary + Iop_RSqrtStep64Fx2, // binary + /* --- 64x2 lowest-lane-only scalar FP --- */ /* In binary cases, upper half is copied from first operand. In