From 55d7ed3e773dae4800764d0c0d811689b5afd920 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Fri, 7 Mar 2014 22:52:19 +0000 Subject: [PATCH] Support extra instruction bits and pieces, enough to get Firefox started: * more scalar int <-> FP conversions * more vector integer narrowing * a few more vector shift by imm cases * FCVTAS (kludged) git-svn-id: svn://svn.valgrind.org/vex/trunk@2833 --- VEX/priv/guest_arm64_toIR.c | 50 ++++++++++++++++++++++++++++++++++--- VEX/priv/host_arm64_defs.c | 27 +++++++++++++++++--- VEX/priv/host_arm64_defs.h | 1 + VEX/priv/host_arm64_isel.c | 14 ++++++++++- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 73c7f8bb16..3369492f70 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -5401,8 +5401,13 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) // A bit of ATCery: bounce all cases we haven't seen an example of. if (/* F32toI32S */ (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ + || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ + || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ /* F32toI32U */ + || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ + || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ /* F32toI64S */ + || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ /* F32toI64U */ || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ /* F64toI32S */ @@ -5410,12 +5415,16 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ /* F64toI32U */ - || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ + || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ + || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ /* F64toI64S */ || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ + || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ + || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ /* F64toI64U */ || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ + || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ ) { /* validated */ } else { @@ -5433,6 +5442,37 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */ + /* 30 23 20 18 15 9 4 + 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn + 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn + Fn is Dn when x==1, Sn when x==0 + */ + if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) + && INSN(21,16) == BITS6(1,0,0,1,0,0) + && INSN(15,10) == BITS6(0,0,0,0,0,0)) { + Bool isI64 = INSN(31,31) == 1; + Bool isF64 = INSN(22,22) == 1; + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* Decide on the IR rounding mode to use. */ + /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */ + IRRoundingMode irrm = Irrm_NEAREST; + /* Decide on the conversion primop. */ + IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S) + : (isF64 ? Iop_F64toI32S : Iop_F32toI32S); + IRType srcTy = isF64 ? Ity_F64 : Ity_F32; + IRType dstTy = isI64 ? Ity_I64 : Ity_I32; + IRTemp src = newTemp(srcTy); + IRTemp dst = newTemp(dstTy); + assign(src, getQRegLO(nn, srcTy)); + assign(dst, binop(op, mkU32(irrm), mkexpr(src))); + putIRegOrZR(isI64, dd, mkexpr(dst)); + DIP("fcvtas %s, %s (KLUDGED)\n", + nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); + return True; + } + /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */ /* 31 23 21 17 14 9 4 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) @@ -5444,7 +5484,7 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 010 -inf (FRINTM) 011 zero (FRINTZ) 000 tieeven - 100 tieaway + 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! 110 per FPCR + "exact = TRUE" 101 unallocated */ @@ -5461,6 +5501,8 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; + // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 + case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; default: break; } if (irrmE) { @@ -5483,9 +5525,9 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp) --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp) --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp) - --------- 00 ----- 01 --------- FCVT Dd, Sn (unimp) + --------- 00 ----- 01 --------- FCVT Dd, Sn --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp) - --------- 01 ----- 00 --------- FCVT Sd, Dn (unimp) + --------- 01 ----- 00 --------- FCVT Sd, Dn Rounding, when dst is smaller than src, is per the FPCR. */ if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0) diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index f4eceb5faa..049b459e91 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -905,6 +905,7 @@ static void showARM64VecShiftOp(/*OUT*/const HChar** nm, { switch (op) { case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return; + case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return; case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return; case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return; default: vpanic("showARM64VecShiftImmOp"); @@ -1620,6 +1621,8 @@ ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, maxSh = 63; break; case ARM64vecsh_SHL32x4: maxSh = 31; break; + case ARM64vecsh_USHR16x8: + maxSh = 15; break; default: vassert(0); } @@ -3351,6 +3354,7 @@ static inline UChar qregNo ( HReg r ) #define X111110 BITS8(0,0, 1,1,1,1,1,0) #define X111111 BITS8(0,0, 1,1,1,1,1,1) +#define X0010000 BITS8(0, 0,0,1,0,0,0,0) #define X0100000 BITS8(0, 0,1,0,0,0,0,0) #define X1000000 BITS8(0, 1,0,0,0,0,0,0) @@ -4679,7 +4683,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD); break; - /* UCVTF Sd, Wn ATC */ + case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD); + break; case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD); break; @@ -4728,12 +4734,18 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3), X000000, rN, rD); break; - /* */ case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3), X000000, rN, rD); break; - /* */ + case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */ + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3), + X000000, rN, rD); + break; + case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */ + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3), + X000000, rN, rD); + break; case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3), X000000, rN, rD); @@ -5130,6 +5142,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; + //case ARM64vecsh_SSHR16x8: syned = True; ATC + case ARM64vecsh_USHR16x8: /* fallthrough */ + if (sh >= 1 && sh <= 15) { + UInt xxxx = 16-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0010000 | xxxx, X000001, vN, vD); + goto done; + } + break; default: break; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index b8c7dc166d..a3531341fc 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -357,6 +357,7 @@ typedef typedef enum { ARM64vecsh_USHR64x2=350, + ARM64vecsh_USHR16x8, ARM64vecsh_SSHR64x2, ARM64vecsh_SHL32x4, ARM64vecsh_INVALID diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index e3b21af0b8..4d68804792 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -1812,6 +1812,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; case Iop_F32toI32S: cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; + case Iop_F32toI32U: + cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; + case Iop_F32toI64S: + cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; case Iop_F32toI64U: cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; default: @@ -2118,6 +2122,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) ARM64sh_SAR)); return dst; } + case Iop_NarrowUn16to8x8: case Iop_NarrowUn32to16x4: case Iop_NarrowUn64to32x2: { HReg src = iselV128Expr(env, e->Iex.Unop.arg); @@ -2125,6 +2130,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) HReg dst = newVRegI(env); UInt dszBlg2 = 3; /* illegal */ switch (e->Iex.Unop.op) { + case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2 default: vassert(0); @@ -5425,8 +5431,10 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ case Iop_ShrN16x8: //ZZ case Iop_ShrN32x4: case Iop_ShrN64x2: + case Iop_ShrN16x8: case Iop_SarN64x2: - case Iop_ShlN32x4: { + case Iop_ShlN32x4: + { IRExpr* argL = e->Iex.Binop.arg1; IRExpr* argR = e->Iex.Binop.arg2; if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { @@ -5436,6 +5444,8 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) switch (e->Iex.Binop.op) { case Iop_ShrN64x2: op = ARM64vecsh_USHR64x2; limit = 63; break; + case Iop_ShrN16x8: + op = ARM64vecsh_USHR16x8; limit = 15; break; case Iop_SarN64x2: op = ARM64vecsh_SSHR64x2; limit = 63; break; case Iop_ShlN32x4: @@ -6167,11 +6177,13 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD)); return dstS; } + case Iop_I32UtoF32: case Iop_I32StoF32: case Iop_I64UtoF32: case Iop_I64StoF32: { ARM64CvtOp cvt_op = ARM64cvt_INVALID; switch (e->Iex.Binop.op) { + case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; -- 2.47.2