From: Julian Seward Date: Mon, 6 Apr 2015 19:34:03 +0000 (+0000) Subject: arm64: implement FSQRT 2d_2d, 4s_4s, 2s_2s X-Git-Tag: svn/VALGRIND_3_11_0^2~64 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=467b00ceb67e6a6fcef821bcc9d86ef059a33352;p=thirdparty%2Fvalgrind.git arm64: implement FSQRT 2d_2d, 4s_4s, 2s_2s AFAICS this completes the AArch64 SIMD implementation, except for the crypto instructions. This changes the type of Iop_Sqrt64x2 and Iop_Sqrt32x4 so as to take a rounding mode argument. This will (temporarily, of course) break all of the other targets that implement vector fsqrt. git-svn-id: svn://svn.valgrind.org/vex/trunk@3120 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index a5b7d2137c..14c27f1abf 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -12437,6 +12437,21 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { + /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */ + Bool isD = (size & 1) == 1; + IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4; + if (bitQ == 0 && isD) return False; // implied 1d case + IRTemp resV = newTempV128(); + assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()), + getQReg128(nn))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); + const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); + DIP("%s %s.%s, %s.%s\n", "fsqrt", + nameQReg128(dd), arr, nameQReg128(nn), arr); + return True; + } + return False; # undef INSN } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index fc0984a9d5..b886e81cc2 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -749,6 +749,8 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return; case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return; case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return; + case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return; + case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return; default: vpanic("showARM64VecUnaryOp"); } } @@ -4869,6 +4871,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s + + 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d + 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s */ UInt vD = qregEnc(i->ARM64in.VUnaryV.dst); UInt vN = qregEnc(i->ARM64in.VUnaryV.arg); @@ -4960,6 +4965,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_FRSQRTE32x4: *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD); break; + case ARM64vecu_FSQRT64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD); + break; + case ARM64vecu_FSQRT32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD); + break; default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index fce9f83422..039fce1682 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -414,6 +414,7 @@ typedef ARM64vecu_URSQRTE32x4, ARM64vecu_FRECPE64x2, ARM64vecu_FRECPE32x4, ARM64vecu_FRSQRTE64x2, ARM64vecu_FRSQRTE32x4, + ARM64vecu_FSQRT64x2, ARM64vecu_FSQRT32x4, ARM64vecu_INVALID } ARM64VecUnaryOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index a6e507d050..b6e42f1a79 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2363,6 +2363,17 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { + case Iop_Sqrt32Fx4: + case Iop_Sqrt64Fx2: { + HReg arg = iselV128Expr(env, e->Iex.Binop.arg2); + HReg res = newVRegV(env); + set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); + ARM64VecUnaryOp op + = e->Iex.Binop.op == Iop_Sqrt32Fx4 + ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2; + addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); + return res; + } case Iop_64HLtoV128: { HReg res = newVRegV(env); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 0b70f39fc2..596379e4b0 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -2859,6 +2859,10 @@ void typeOfPrimop ( IROp op, case Iop_RSqrtEst32Ux4: UNARY(Ity_V128, Ity_V128); + case Iop_Sqrt64Fx2: + case Iop_Sqrt32Fx4: + BINARY(ity_RMode,Ity_V128, Ity_V128); + case Iop_64HLtoV128: BINARY(Ity_I64,Ity_I64, Ity_V128); @@ -3019,8 +3023,8 @@ void typeOfPrimop ( IROp op, case Iop_RecipEst64Fx2: case Iop_RSqrtEst64Fx2: case Iop_RecipEst32Ux4: case Iop_RSqrtEst32F0x4: - case Iop_Sqrt32Fx4: case Iop_Sqrt32F0x4: - case Iop_Sqrt64Fx2: case Iop_Sqrt64F0x2: + case Iop_Sqrt32F0x4: + case Iop_Sqrt64F0x2: case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8: case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2: case Iop_Cnt8x16: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 7178c7e2e1..145caa481c 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1294,9 +1294,11 @@ typedef /* unary */ Iop_Abs32Fx4, - Iop_Sqrt32Fx4, Iop_Neg32Fx4, + /* binary :: IRRoundingMode(I32) x V128 -> V128 */ + Iop_Sqrt32Fx4, + /* Vector Reciprocal Estimate finds an approximate reciprocal of each element in the operand vector, and places the results in the destination vector. */ @@ -1359,9 +1361,11 @@ typedef /* unary */ Iop_Abs64Fx2, - Iop_Sqrt64Fx2, Iop_Neg64Fx2, + /* binary :: IRRoundingMode(I32) x V128 -> V128 */ + Iop_Sqrt64Fx2, + /* see 32Fx4 variants for description */ Iop_RecipEst64Fx2, // unary Iop_RecipStep64Fx2, // binary