From: Julian Seward Date: Thu, 12 Jun 2014 13:16:01 +0000 (+0000) Subject: Implement FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[]. X-Git-Tag: svn/VALGRIND_3_10_1^2~97 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ec6971bfd348e93019069b9c9f6e565d622dfbb;p=thirdparty%2Fvalgrind.git Implement FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[]. git-svn-id: svn://svn.valgrind.org/vex/trunk@2874 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index cd83232bec..91cd6dc3fc 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -1112,7 +1112,7 @@ static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) Int off = offsetQRegLane(qregNo, laneTy, laneNo); switch (laneTy) { case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: - case Ity_F64: + case Ity_F64: case Ity_F32: break; default: vassert(0); // Other cases are ATC @@ -1688,6 +1688,30 @@ static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) } +/* Duplicates the src element exactly so as to fill a V128 value. Only + handles src types of F64 and F32. */ +static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy ) +{ + IRTemp res = newTemp(Ity_V128); + if (srcTy == Ity_F64) { + IRTemp i64 = newTemp(Ity_I64); + assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src))); + assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64))); + return res; + } + if (srcTy == Ity_F32) { + IRTemp i64a = newTemp(Ity_I64); + assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src)))); + IRTemp i64b = newTemp(Ity_I64); + assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)), + mkexpr(i64a))); + assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b))); + return res; + } + vassert(0); +} + + /*------------------------------------------------------------*/ /*--- FP comparison helpers ---*/ /*------------------------------------------------------------*/ @@ -6842,7 +6866,53 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) static Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn) { + /* 31 28 23 21 20 19 15 11 9 4 + 0 Q U 01111 size L M m opcode H 0 n d + Decode fields are: u,size,opcode + */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,31) != 0 + && INSN(28,24) != BITS5(0,1,1,1,1) && INSN(10,10) !=0) { + return False; + } + UInt bitQ = INSN(30,30); + UInt bitU = INSN(29,29); + UInt size = INSN(23,22); + UInt bitL = INSN(21,21); + UInt bitM = INSN(20,20); + UInt mmLO4 = INSN(19,16); + UInt opcode = INSN(15,12); + UInt bitH = INSN(11,11); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + UInt mm = (bitM << 4) | mmLO4; + vassert(size < 4); + + if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) { + /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ + if (bitQ == 0 && size == X11) return False; // implied 1d case + Bool isD = (size & 1) == 1; + UInt index; + if (!isD) index = (bitH << 1) | bitL; + else if (isD && bitL == 0) index = bitH; + else return False; // sz:L == x11 => unallocated encoding + vassert(index < (isD ? 2 : 4)); + IRType ity = isD ? Ity_F64 : Ity_F32; + IRTemp elem = newTemp(ity); + assign(elem, getQRegLane(mm, index, ity)); + IRTemp dupd = math_DUP_TO_V128(elem, ity); + IRTemp res = newTemp(Ity_V128); + assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4, + mkexpr(mk_get_IR_rounding_mode()), + getQReg128(nn), mkexpr(dupd))); + putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) + : mkexpr(res)); + const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); + DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr, + nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index); + return True; + } + return False; # undef INSN } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 3bc52172f8..aa88f7bfef 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1809,6 +1809,14 @@ ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) { vassert(laneNo <= 1); return i; } +ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VXfromDorS; + i->ARM64in.VXfromDorS.rX = rX; + i->ARM64in.VXfromDorS.rDorS = rDorS; + i->ARM64in.VXfromDorS.fromD = fromD; + return i; +} ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) { ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); i->tag = ARM64in_VMov; @@ -2472,12 +2480,19 @@ void ppARM64Instr ( ARM64Instr* i ) { ppHRegARM64(i->ARM64in.VQfromXX.rXlo); return; case ARM64in_VXfromQ: - vex_printf("mov "); + vex_printf("fmov "); ppHRegARM64(i->ARM64in.VXfromQ.rX); vex_printf(", "); ppHRegARM64(i->ARM64in.VXfromQ.rQ); vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo); return; + case ARM64in_VXfromDorS: + vex_printf("fmov "); + ppHRegARM64(i->ARM64in.VXfromDorS.rX); + vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W'); + ppHRegARM64(i->ARM64in.VXfromDorS.rDorS); + vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S'); + return; case ARM64in_VMov: { UChar aux = '?'; switch (i->ARM64in.VMov.szB) { @@ -2865,6 +2880,10 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX); addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ); return; + case ARM64in_VXfromDorS: + addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX); + addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS); + return; case ARM64in_VMov: addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst); addHRegUse(u, HRmRead, i->ARM64in.VMov.src); @@ -3144,6 +3163,12 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VXfromQ.rQ = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ); return; + case ARM64in_VXfromDorS: + i->ARM64in.VXfromDorS.rX + = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX); + i->ARM64in.VXfromDorS.rDorS + = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS); + return; case ARM64in_VMov: i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst); i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src); @@ -3349,6 +3374,7 @@ static inline UChar qregNo ( HReg r ) #define X00000 BITS8(0,0,0, 0,0,0,0,0) #define X00001 BITS8(0,0,0, 0,0,0,0,1) +#define X00110 BITS8(0,0,0, 0,0,1,1,0) #define X00111 BITS8(0,0,0, 0,0,1,1,1) #define X01000 BITS8(0,0,0, 0,1,0,0,0) #define X10000 BITS8(0,0,0, 1,0,0,0,0) @@ -6294,6 +6320,20 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } + case ARM64in_VXfromDorS: { + /* 000 11110001 00110 000000 n d FMOV Wd, Sn + 100 11110011 00110 000000 n d FMOV Xd, Dn + */ + UInt dd = iregNo(i->ARM64in.VXfromDorS.rX); + UInt nn = dregNo(i->ARM64in.VXfromDorS.rDorS); + Bool fromD = i->ARM64in.VXfromDorS.fromD; + vassert(dd < 31); + *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000, + fromD ? X11110011 : X11110001, + X00110, X000000, nn, dd); + goto done; + } + case ARM64in_VMov: { /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn 000 11110 01 10000 00 10000 n d FMOV Dd, Dn diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 99e9b063d9..437834e5f1 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -598,6 +598,7 @@ typedef ARM64in_VDfromX, /* Move an Xreg to a Dreg */ ARM64in_VQfromXX, /* Move 2 Xregs to a Qreg */ ARM64in_VXfromQ, /* Move half a Qreg to an Xreg */ + ARM64in_VXfromDorS, /* Move Dreg or Sreg(ZX) to an Xreg */ ARM64in_VMov, /* vector reg-reg move, 16, 8 or 4 bytes */ /* infrastructure */ ARM64in_EvCheck, /* Event check */ @@ -990,6 +991,11 @@ typedef HReg rQ; UInt laneNo; /* either 0 or 1 */ } VXfromQ; + struct { + HReg rX; + HReg rDorS; + Bool fromD; + } VXfromDorS; /* MOV dst, src -- reg-reg move for vector registers */ struct { UInt szB; // 16=mov qD,qS; 8=mov dD,dS; 4=mov sD,sS @@ -1084,6 +1090,7 @@ extern ARM64Instr* ARM64Instr_VImmQ ( HReg, UShort ); extern ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ); extern ARM64Instr* ARM64Instr_VQfromXX( HReg rQ, HReg rXhi, HReg rXlo ); extern ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ); +extern ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ); extern ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter, diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 470df6bd93..792d0747bc 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2109,6 +2109,18 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); return dst; } + case Iop_ReinterpF64asI64: { + HReg dst = newVRegI(env); + HReg src = iselDblExpr(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/)); + return dst; + } + case Iop_ReinterpF32asI32: { + HReg dst = newVRegI(env); + HReg src = iselFltExpr(env, e->Iex.Unop.arg); + addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/)); + return dst; + } case Iop_1Sto32: case Iop_1Sto64: { /* As with the iselStmt case for 'tmp:I1 = expr', we could