From: Julian Seward Date: Wed, 8 Feb 2006 19:30:46 +0000 (+0000) Subject: Redo the way FP multiply-accumulate insns are done on ppc32/64. X-Git-Tag: svn/VALGRIND_3_2_3^2~88 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=685cef79a246cd6f2bebe6015f25f0257ee202bd;p=thirdparty%2Fvalgrind.git Redo the way FP multiply-accumulate insns are done on ppc32/64. Instead of splitting them up into a multiply and an add/sub, add 4 new primops which keeps the operation as a single unit. Then, in the back end, re-emit the as a single instruction. Reason for this is that so-called fused-multiply-accumulate -- which is what ppc does -- generates a double-double length intermediate result (of the multiply, 112 mantissa bits) before doing the add, and so it is impossible to do a bit-accurate simulation of it using AddF64 and MulF64. Unfortunately the new primops unavoidably take 4 args (a rounding mode + 3 FP args) and so there is a new IRExpr expression type, IRExpr_Qop and associated supporting junk. git-svn-id: svn://svn.valgrind.org/vex/trunk@1573 --- diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c index 1d5b8bc47e..103a490625 100644 --- a/VEX/priv/guest-ppc/toIR.c +++ b/VEX/priv/guest-ppc/toIR.c @@ -64,8 +64,6 @@ - Floating Point: - All exceptions disabled in FPSCR - condition codes not set in FPSCR - - some error in accuracy - - flt->int conversions are dubious in overflow cases - Altivec floating point: - vmaddfp, vnmsubfp @@ -483,17 +481,17 @@ static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) return IRExpr_Triop(op, a1, a2, a3); } +static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2, + IRExpr* a3, IRExpr* a4 ) +{ + return IRExpr_Qop(op, a1, a2, a3, a4); +} + static IRExpr* mkexpr ( IRTemp tmp ) { return IRExpr_Tmp(tmp); } -//uu static IRExpr* mkU1 ( UInt i ) -//uu { -//uu vassert(i < 2); -//uu return IRExpr_Const(IRConst_U1( toBool(i) )); -//uu } - static IRExpr* mkU8 ( UChar i ) { return IRExpr_Const(IRConst_U8(i)); @@ -6064,39 +6062,31 @@ static Bool dis_fp_multadd ( UInt theInstr ) case 0x1C: // fmsubs (Floating Mult-Subtr Single, PPC32 p412) DIP("fmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); - assign( frD, triop( Iop_SubF64r32, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) )); + assign( frD, qop( Iop_MSubF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); break; case 0x1D: // fmadds (Floating Mult-Add Single, PPC32 p409) DIP("fmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); - assign( frD, triop( Iop_AddF64r32, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) )); + assign( frD, qop( Iop_MAddF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); break; case 0x1E: // fnmsubs (Float Neg Mult-Subtr Single, PPC32 p420) DIP("fnmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); assign( frD, unop( Iop_NegF64, - triop( Iop_SubF64r32, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) ))); + qop( Iop_MSubF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); break; case 0x1F: // fnmadds (Floating Negative Multiply-Add Single, PPC32 p418) DIP("fnmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); assign( frD, unop( Iop_NegF64, - triop( Iop_AddF64r32, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) ))); + qop( Iop_MAddF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); break; default: @@ -6110,39 +6100,31 @@ static Bool dis_fp_multadd ( UInt theInstr ) case 0x1C: // fmsub (Float Mult-Sub (Dbl Precision), PPC32 p411) DIP("fmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); - assign( frD, triop( Iop_SubF64, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) )); + assign( frD, qop( Iop_MSubF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); break; case 0x1D: // fmadd (Float Mult-Add (Dbl Precision), PPC32 p408) DIP("fmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); - assign( frD, triop( Iop_AddF64, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) )); + assign( frD, qop( Iop_MAddF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); break; case 0x1E: // fnmsub (Float Neg Mult-Subtr (Dbl Precision), PPC32 p419) DIP("fnmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); assign( frD, unop( Iop_NegF64, - triop( Iop_SubF64, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) ))); + qop( Iop_MSubF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); break; case 0x1F: // fnmadd (Float Neg Mult-Add (Dbl Precision), PPC32 p417) DIP("fnmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr, frC_addr, frB_addr); assign( frD, unop( Iop_NegF64, - triop( Iop_AddF64, rm, - triop( Iop_MulF64, rm, mkexpr(frA), - mkexpr(frC) ), - mkexpr(frB) ))); + qop( Iop_MAddF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); break; default: diff --git a/VEX/priv/host-generic/h_generic_regs.h b/VEX/priv/host-generic/h_generic_regs.h index b1ece43414..8575191731 100644 --- a/VEX/priv/host-generic/h_generic_regs.h +++ b/VEX/priv/host-generic/h_generic_regs.h @@ -188,7 +188,7 @@ extern void addHRegUse ( HRegUsage*, HRegMode, HReg ); This is precisely the behaviour that the register allocator needs to impose its decisions on the instructions it processes. */ -#define N_HREG_REMAP 4 +#define N_HREG_REMAP 5 typedef struct { diff --git a/VEX/priv/host-ppc/hdefs.c b/VEX/priv/host-ppc/hdefs.c index 8d6098ca69..801997a327 100644 --- a/VEX/priv/host-ppc/hdefs.c +++ b/VEX/priv/host-ppc/hdefs.c @@ -616,6 +616,10 @@ HChar* showPPCFpOp ( PPCFpOp op ) { case Pfp_SUBD: return "fsub"; case Pfp_MULD: return "fmul"; case Pfp_DIVD: return "fdiv"; + case Pfp_MADDD: return "fmadd"; + case Pfp_MSUBD: return "fmsub"; + case Pfp_MADDS: return "fmadds"; + case Pfp_MSUBS: return "fmsubs"; case Pfp_ADDS: return "fadds"; case Pfp_SUBS: return "fsubs"; case Pfp_MULS: return "fmuls"; @@ -905,6 +909,18 @@ PPCInstr* PPCInstr_FpBinary ( PPCFpOp op, HReg dst, i->Pin.FpBinary.srcR = srcR; return i; } +PPCInstr* PPCInstr_FpMulAcc ( PPCFpOp op, HReg dst, HReg srcML, + HReg srcMR, HReg srcAcc ) +{ + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_FpMulAcc; + i->Pin.FpMulAcc.op = op; + i->Pin.FpMulAcc.dst = dst; + i->Pin.FpMulAcc.srcML = srcML; + i->Pin.FpMulAcc.srcMR = srcMR; + i->Pin.FpMulAcc.srcAcc = srcAcc; + return i; +} PPCInstr* PPCInstr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, PPCAMode* addr ) { PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); @@ -1400,6 +1416,16 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 ) vex_printf(","); ppHRegPPC(i->Pin.FpBinary.srcR); return; + case Pin_FpMulAcc: + vex_printf("%s ", showPPCFpOp(i->Pin.FpMulAcc.op)); + ppHRegPPC(i->Pin.FpMulAcc.dst); + vex_printf(","); + ppHRegPPC(i->Pin.FpMulAcc.srcML); + vex_printf(","); + ppHRegPPC(i->Pin.FpMulAcc.srcMR); + vex_printf(","); + ppHRegPPC(i->Pin.FpMulAcc.srcAcc); + return; case Pin_FpLdSt: { UChar sz = i->Pin.FpLdSt.sz; Bool idxd = toBool(i->Pin.FpLdSt.addr->tag == Pam_RR); @@ -1774,6 +1800,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Pin.FpBinary.srcL); addHRegUse(u, HRmRead, i->Pin.FpBinary.srcR); return; + case Pin_FpMulAcc: + addHRegUse(u, HRmWrite, i->Pin.FpMulAcc.dst); + addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcML); + addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcMR); + addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcAcc); + return; case Pin_FpLdSt: addHRegUse(u, (i->Pin.FpLdSt.isLoad ? HRmWrite : HRmRead), i->Pin.FpLdSt.reg); @@ -1973,6 +2005,12 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) mapReg(m, &i->Pin.FpBinary.srcL); mapReg(m, &i->Pin.FpBinary.srcR); return; + case Pin_FpMulAcc: + mapReg(m, &i->Pin.FpMulAcc.dst); + mapReg(m, &i->Pin.FpMulAcc.srcML); + mapReg(m, &i->Pin.FpMulAcc.srcMR); + mapReg(m, &i->Pin.FpMulAcc.srcAcc); + return; case Pin_FpLdSt: mapReg(m, &i->Pin.FpLdSt.reg); mapRegs_PPCAMode(m, i->Pin.FpLdSt.addr); @@ -3190,6 +3228,30 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, goto done; } + case Pin_FpMulAcc: { + UInt fr_dst = fregNo(i->Pin.FpMulAcc.dst); + UInt fr_srcML = fregNo(i->Pin.FpMulAcc.srcML); + UInt fr_srcMR = fregNo(i->Pin.FpMulAcc.srcMR); + UInt fr_srcAcc = fregNo(i->Pin.FpMulAcc.srcAcc); + switch (i->Pin.FpMulAcc.op) { + case Pfp_MADDD: // fmadd, PPC32 p408 + p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 ); + break; + case Pfp_MADDS: // fmadds, PPC32 p409 + p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 ); + break; + case Pfp_MSUBD: // fmsub, PPC32 p411 + p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 ); + break; + case Pfp_MSUBS: // fmsubs, PPC32 p412 + p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 ); + break; + default: + goto bad; + } + goto done; + } + case Pin_FpLdSt: { PPCAMode* am_addr = i->Pin.FpLdSt.addr; UInt f_reg = fregNo(i->Pin.FpLdSt.reg); diff --git a/VEX/priv/host-ppc/hdefs.h b/VEX/priv/host-ppc/hdefs.h index 7a8b1aa79b..dc2a60fd92 100644 --- a/VEX/priv/host-ppc/hdefs.h +++ b/VEX/priv/host-ppc/hdefs.h @@ -367,6 +367,11 @@ HChar* showPPCShftOp ( PPCShftOp, typedef enum { Pfp_INVALID, + + /* Ternary */ + Pfp_MADDD, Pfp_MSUBD, + Pfp_MADDS, Pfp_MSUBS, + /* Binary */ Pfp_ADDD, Pfp_SUBD, Pfp_MULD, Pfp_DIVD, Pfp_ADDS, Pfp_SUBS, Pfp_MULS, Pfp_DIVS, @@ -460,6 +465,7 @@ typedef Pin_FpUnary, /* FP unary op */ Pin_FpBinary, /* FP binary op */ + Pin_FpMulAcc, /* FP multipy-accumulate style op */ Pin_FpLdSt, /* FP load/store */ Pin_FpSTFIW, /* stfiwx */ Pin_FpRSP, /* FP round IEEE754 double to IEEE754 single */ @@ -630,6 +636,13 @@ typedef HReg srcL; HReg srcR; } FpBinary; + struct { + PPCFpOp op; + HReg dst; + HReg srcML; + HReg srcMR; + HReg srcAcc; + } FpMulAcc; struct { Bool isLoad; UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */ @@ -785,6 +798,8 @@ extern PPCInstr* PPCInstr_MFence ( void ); extern PPCInstr* PPCInstr_FpUnary ( PPCFpOp op, HReg dst, HReg src ); extern PPCInstr* PPCInstr_FpBinary ( PPCFpOp op, HReg dst, HReg srcL, HReg srcR ); +extern PPCInstr* PPCInstr_FpMulAcc ( PPCFpOp op, HReg dst, HReg srcML, + HReg srcMR, HReg srcAcc ); extern PPCInstr* PPCInstr_FpLdSt ( Bool isLoad, UChar sz, HReg, PPCAMode* ); extern PPCInstr* PPCInstr_FpSTFIW ( HReg addr, HReg data ); extern PPCInstr* PPCInstr_FpRSP ( HReg dst, HReg src ); diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c index 4830f3c14b..bfbe660822 100644 --- a/VEX/priv/host-ppc/isel.c +++ b/VEX/priv/host-ppc/isel.c @@ -2932,6 +2932,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) } } + /* --------- LOAD --------- */ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) { HReg r_dst = newVRegF(env); PPCAMode* am_addr; @@ -2941,6 +2942,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) return r_dst; } + /* --------- GET --------- */ if (e->tag == Iex_Get) { HReg r_dst = newVRegF(env); PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset, @@ -2949,6 +2951,28 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) return r_dst; } + /* --------- OPS --------- */ + if (e->tag == Iex_Qop) { + PPCFpOp fpop = Pfp_INVALID; + switch (e->Iex.Qop.op) { + case Iop_MAddF64: fpop = Pfp_MADDD; break; + case Iop_MAddF64r32: fpop = Pfp_MADDS; break; + case Iop_MSubF64: fpop = Pfp_MSUBD; break; + case Iop_MSubF64r32: fpop = Pfp_MSUBS; break; + default: break; + } + if (fpop != Pfp_INVALID) { + HReg r_dst = newVRegF(env); + HReg r_srcML = iselDblExpr(env, e->Iex.Qop.arg2); + HReg r_srcMR = iselDblExpr(env, e->Iex.Qop.arg3); + HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.arg4); + set_FPU_rounding_mode( env, e->Iex.Qop.arg1 ); + addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst, + r_srcML, r_srcMR, r_srcAcc)); + return r_dst; + } + } + if (e->tag == Iex_Triop) { PPCFpOp fpop = Pfp_INVALID; switch (e->Iex.Triop.op) { diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c index bb48e71d57..60606ab289 100644 --- a/VEX/priv/ir/irdefs.c +++ b/VEX/priv/ir/irdefs.c @@ -267,8 +267,14 @@ void ppIROp ( IROp op ) case Iop_TanF64: vex_printf("TanF64"); return; case Iop_2xm1F64: vex_printf("2xm1F64"); return; - case Iop_Est5FRSqrt: vex_printf("Est5FRSqrt"); return; + case Iop_MAddF64: vex_printf("MAddF64"); return; + case Iop_MSubF64: vex_printf("MSubF64"); return; + case Iop_MAddF64r32: vex_printf("MAddF64r32"); return; + case Iop_MSubF64r32: vex_printf("MSubF64r32"); return; + + case Iop_Est5FRSqrt: vex_printf("Est5FRSqrt"); return; case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return; + case Iop_CalcFPRF: vex_printf("CalcFPRF"); case Iop_CmpF64: vex_printf("CmpF64"); return; @@ -584,6 +590,18 @@ void ppIRExpr ( IRExpr* e ) case Iex_Tmp: ppIRTemp(e->Iex.Tmp.tmp); break; + case Iex_Qop: + ppIROp(e->Iex.Qop.op); + vex_printf( "(" ); + ppIRExpr(e->Iex.Qop.arg1); + vex_printf( "," ); + ppIRExpr(e->Iex.Qop.arg2); + vex_printf( "," ); + ppIRExpr(e->Iex.Qop.arg3); + vex_printf( "," ); + ppIRExpr(e->Iex.Qop.arg4); + vex_printf( ")" ); + break; case Iex_Triop: ppIROp(e->Iex.Triop.op); vex_printf( "(" ); @@ -935,6 +953,17 @@ IRExpr* IRExpr_Tmp ( IRTemp tmp ) { e->Iex.Tmp.tmp = tmp; return e; } +IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, + IRExpr* arg3, IRExpr* arg4 ) { + IRExpr* e = LibVEX_Alloc(sizeof(IRExpr)); + e->tag = Iex_Qop; + e->Iex.Qop.op = op; + e->Iex.Qop.arg1 = arg1; + e->Iex.Qop.arg2 = arg2; + e->Iex.Qop.arg3 = arg3; + e->Iex.Qop.arg4 = arg4; + return e; +} IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, IRExpr* arg2, IRExpr* arg3 ) { IRExpr* e = LibVEX_Alloc(sizeof(IRExpr)); @@ -1368,7 +1397,8 @@ static void typeOfPrimop ( IROp op, /*OUTs*/ IRType* t_dst, - IRType* t_arg1, IRType* t_arg2, IRType* t_arg3 ) + IRType* t_arg1, IRType* t_arg2, + IRType* t_arg3, IRType* t_arg4 ) { # define UNARY(_ta1,_td) \ *t_dst = (_td); *t_arg1 = (_ta1); break @@ -1377,6 +1407,10 @@ void typeOfPrimop ( IROp op, # define TERNARY(_ta1,_ta2,_ta3,_td) \ *t_dst = (_td); *t_arg1 = (_ta1); \ *t_arg2 = (_ta2); *t_arg3 = (_ta3); break +# define QUATERNARY(_ta1,_ta2,_ta3,_ta4,_td) \ + *t_dst = (_td); *t_arg1 = (_ta1); \ + *t_arg2 = (_ta2); *t_arg3 = (_ta3); \ + *t_arg4 = (_ta4); break # define COMPARISON(_ta) \ *t_dst = Ity_I1; *t_arg1 = *t_arg2 = (_ta); break; # define UNARY_COMPARISON(_ta) \ @@ -1390,6 +1424,7 @@ void typeOfPrimop ( IROp op, *t_arg1 = Ity_INVALID; *t_arg2 = Ity_INVALID; *t_arg3 = Ity_INVALID; + *t_arg4 = Ity_INVALID; switch (op) { case Iop_Add8: case Iop_Sub8: case Iop_Mul8: case Iop_Or8: case Iop_And8: case Iop_Xor8: @@ -1593,6 +1628,10 @@ void typeOfPrimop ( IROp op, case Iop_2xm1F64: case Iop_RoundF64toInt: BINARY(ity_RMode,Ity_F64, Ity_F64); + case Iop_MAddF64: case Iop_MSubF64: + case Iop_MAddF64r32: case Iop_MSubF64r32: + QUATERNARY(ity_RMode,Ity_F64,Ity_F64,Ity_F64, Ity_F64); + case Iop_Est5FRSqrt: UNARY(Ity_F64, Ity_F64); case Iop_RoundF64toF32: @@ -1794,7 +1833,7 @@ IRType typeOfIRConst ( IRConst* con ) IRType typeOfIRExpr ( IRTypeEnv* tyenv, IRExpr* e ) { - IRType t_dst, t_arg1, t_arg2, t_arg3; + IRType t_dst, t_arg1, t_arg2, t_arg3, t_arg4; start: switch (e->tag) { case Iex_Load: @@ -1807,14 +1846,21 @@ IRType typeOfIRExpr ( IRTypeEnv* tyenv, IRExpr* e ) return typeOfIRTemp(tyenv, e->Iex.Tmp.tmp); case Iex_Const: return typeOfIRConst(e->Iex.Const.con); + case Iex_Qop: + typeOfPrimop(e->Iex.Qop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); + return t_dst; case Iex_Triop: - typeOfPrimop(e->Iex.Triop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(e->Iex.Triop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); return t_dst; case Iex_Binop: - typeOfPrimop(e->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(e->Iex.Binop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); return t_dst; case Iex_Unop: - typeOfPrimop(e->Iex.Unop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(e->Iex.Unop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); return t_dst; case Iex_CCall: return e->Iex.CCall.retty; @@ -1884,6 +1930,11 @@ Bool isFlatIRStmt ( IRStmt* st ) case Iex_Get: return True; case Iex_GetI: return isIRAtom(e->Iex.GetI.ix); case Iex_Tmp: return True; + case Iex_Qop: return toBool( + isIRAtom(e->Iex.Qop.arg1) + && isIRAtom(e->Iex.Qop.arg2) + && isIRAtom(e->Iex.Qop.arg3) + && isIRAtom(e->Iex.Qop.arg4)); case Iex_Triop: return toBool( isIRAtom(e->Iex.Triop.arg1) && isIRAtom(e->Iex.Triop.arg2) @@ -2026,6 +2077,12 @@ void useBeforeDef_Expr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, Int* def_counts ) case Iex_Tmp: useBeforeDef_Temp(bb,stmt,expr->Iex.Tmp.tmp,def_counts); break; + case Iex_Qop: + useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg1,def_counts); + useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg2,def_counts); + useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg3,def_counts); + useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg4,def_counts); + break; case Iex_Triop: useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg1,def_counts); useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg2,def_counts); @@ -2104,7 +2161,7 @@ static void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy ) { Int i; - IRType t_dst, t_arg1, t_arg2, t_arg3; + IRType t_dst, t_arg1, t_arg2, t_arg3, t_arg4; IRTypeEnv* tyenv = bb->tyenv; switch (expr->tag) { case Iex_Get: @@ -2117,14 +2174,66 @@ void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy ) if (!saneIRArray(expr->Iex.GetI.descr)) sanityCheckFail(bb,stmt,"IRExpr.GetI.descr: invalid descr"); break; + case Iex_Qop: { + IRType ttarg1, ttarg2, ttarg3, ttarg4; + tcExpr(bb,stmt, expr->Iex.Qop.arg1, gWordTy ); + tcExpr(bb,stmt, expr->Iex.Qop.arg2, gWordTy ); + tcExpr(bb,stmt, expr->Iex.Qop.arg3, gWordTy ); + tcExpr(bb,stmt, expr->Iex.Qop.arg4, gWordTy ); + typeOfPrimop(expr->Iex.Qop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); + if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID + || t_arg3 == Ity_INVALID || t_arg4 == Ity_INVALID) { + vex_printf(" op name: " ); + ppIROp(expr->Iex.Qop.op); + vex_printf("\n"); + sanityCheckFail(bb,stmt, + "Iex.Qop: wrong arity op\n" + "... name of op precedes BB printout\n"); + } + ttarg1 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg1); + ttarg2 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg2); + ttarg3 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg3); + ttarg4 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg4); + if (t_arg1 != ttarg1 || t_arg2 != ttarg2 + || t_arg3 != ttarg3 || t_arg4 != ttarg4) { + vex_printf(" op name: "); + ppIROp(expr->Iex.Qop.op); + vex_printf("\n"); + vex_printf(" op type is ("); + ppIRType(t_arg1); + vex_printf(","); + ppIRType(t_arg2); + vex_printf(","); + ppIRType(t_arg3); + vex_printf(","); + ppIRType(t_arg4); + vex_printf(") -> "); + ppIRType (t_dst); + vex_printf("\narg tys are ("); + ppIRType(ttarg1); + vex_printf(","); + ppIRType(ttarg2); + vex_printf(","); + ppIRType(ttarg3); + vex_printf(","); + ppIRType(ttarg4); + vex_printf(")\n"); + sanityCheckFail(bb,stmt, + "Iex.Qop: arg tys don't match op tys\n" + "... additional details precede BB printout\n"); + } + break; + } case Iex_Triop: { IRType ttarg1, ttarg2, ttarg3; tcExpr(bb,stmt, expr->Iex.Triop.arg1, gWordTy ); tcExpr(bb,stmt, expr->Iex.Triop.arg2, gWordTy ); tcExpr(bb,stmt, expr->Iex.Triop.arg3, gWordTy ); - typeOfPrimop(expr->Iex.Triop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(expr->Iex.Triop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID - || t_arg3 == Ity_INVALID) { + || t_arg3 == Ity_INVALID || t_arg4 != Ity_INVALID) { vex_printf(" op name: " ); ppIROp(expr->Iex.Triop.op); vex_printf("\n"); @@ -2164,9 +2273,10 @@ void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy ) IRType ttarg1, ttarg2; tcExpr(bb,stmt, expr->Iex.Binop.arg1, gWordTy ); tcExpr(bb,stmt, expr->Iex.Binop.arg2, gWordTy ); - typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(expr->Iex.Binop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID - || t_arg3 != Ity_INVALID) { + || t_arg3 != Ity_INVALID || t_arg4 != Ity_INVALID) { vex_printf(" op name: " ); ppIROp(expr->Iex.Binop.op); vex_printf("\n"); @@ -2199,9 +2309,10 @@ void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy ) } case Iex_Unop: tcExpr(bb,stmt, expr->Iex.Unop.arg, gWordTy ); - typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3); + typeOfPrimop(expr->Iex.Binop.op, + &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 != Ity_INVALID - || t_arg3 != Ity_INVALID) + || t_arg3 != Ity_INVALID || t_arg4 != Ity_INVALID) sanityCheckFail(bb,stmt,"Iex.Unop: wrong arity op"); if (t_arg1 != typeOfIRExpr(tyenv, expr->Iex.Unop.arg)) sanityCheckFail(bb,stmt,"Iex.Unop: arg ty doesn't match op ty"); diff --git a/VEX/priv/ir/iropt.c b/VEX/priv/ir/iropt.c index 43dfe4b934..a240e1d78f 100644 --- a/VEX/priv/ir/iropt.c +++ b/VEX/priv/ir/iropt.c @@ -297,6 +297,16 @@ static IRExpr* flatten_Expr ( IRBB* bb, IRExpr* ex ) IRStmt_Tmp(t1, ex)); return IRExpr_Tmp(t1); + case Iex_Qop: + t1 = newIRTemp(bb->tyenv, ty); + addStmtToIRBB(bb, IRStmt_Tmp(t1, + IRExpr_Qop(ex->Iex.Qop.op, + flatten_Expr(bb, ex->Iex.Qop.arg1), + flatten_Expr(bb, ex->Iex.Qop.arg2), + flatten_Expr(bb, ex->Iex.Qop.arg3), + flatten_Expr(bb, ex->Iex.Qop.arg4)))); + return IRExpr_Tmp(t1); + case Iex_Triop: t1 = newIRTemp(bb->tyenv, ty); addStmtToIRBB(bb, IRStmt_Tmp(t1, @@ -1524,6 +1534,19 @@ static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) ex->Iex.GetI.bias ); + case Iex_Qop: + vassert(isIRAtom(ex->Iex.Qop.arg1)); + vassert(isIRAtom(ex->Iex.Qop.arg2)); + vassert(isIRAtom(ex->Iex.Qop.arg3)); + vassert(isIRAtom(ex->Iex.Qop.arg4)); + return IRExpr_Qop( + ex->Iex.Qop.op, + subst_Expr(env, ex->Iex.Qop.arg1), + subst_Expr(env, ex->Iex.Qop.arg2), + subst_Expr(env, ex->Iex.Qop.arg3), + subst_Expr(env, ex->Iex.Qop.arg4) + ); + case Iex_Triop: vassert(isIRAtom(ex->Iex.Triop.arg1)); vassert(isIRAtom(ex->Iex.Triop.arg2)); @@ -1814,6 +1837,12 @@ static void addUses_Expr ( Bool* set, IRExpr* e ) case Iex_Load: addUses_Expr(set, e->Iex.Load.addr); return; + case Iex_Qop: + addUses_Expr(set, e->Iex.Qop.arg1); + addUses_Expr(set, e->Iex.Qop.arg2); + addUses_Expr(set, e->Iex.Qop.arg3); + addUses_Expr(set, e->Iex.Qop.arg4); + return; case Iex_Triop: addUses_Expr(set, e->Iex.Triop.arg1); addUses_Expr(set, e->Iex.Triop.arg2); @@ -3432,6 +3461,12 @@ static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e ) setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0); setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX); return; + case Iex_Qop: + setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1); + setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2); + setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3); + setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4); + return; case Iex_Triop: setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1); setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2); @@ -3499,6 +3534,13 @@ static void aoccCount_Expr ( UShort* uses, IRExpr* e ) aoccCount_Expr(uses, e->Iex.Mux0X.exprX); return; + case Iex_Qop: + aoccCount_Expr(uses, e->Iex.Qop.arg1); + aoccCount_Expr(uses, e->Iex.Qop.arg2); + aoccCount_Expr(uses, e->Iex.Qop.arg3); + aoccCount_Expr(uses, e->Iex.Qop.arg4); + return; + case Iex_Triop: aoccCount_Expr(uses, e->Iex.Triop.arg1); aoccCount_Expr(uses, e->Iex.Triop.arg2); @@ -3634,6 +3676,14 @@ static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) atbSubst_Expr(env, e->Iex.Mux0X.expr0), atbSubst_Expr(env, e->Iex.Mux0X.exprX) ); + case Iex_Qop: + return IRExpr_Qop( + e->Iex.Qop.op, + atbSubst_Expr(env, e->Iex.Qop.arg1), + atbSubst_Expr(env, e->Iex.Qop.arg2), + atbSubst_Expr(env, e->Iex.Qop.arg3), + atbSubst_Expr(env, e->Iex.Qop.arg4) + ); case Iex_Triop: return IRExpr_Triop( e->Iex.Triop.op, diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 7634edd50e..39d6b613bc 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -217,7 +217,7 @@ extern void ppIRTemp ( IRTemp ); #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) -/* ------------------ Binary and unary ops ------------------ */ +/* --------------- Primops (arity 1,2,3 and 4) --------------- */ typedef enum { @@ -432,6 +432,18 @@ typedef /* --- guest ppc32/64 specifics, not mandated by 754. --- */ + /* Ternary operations, with rounding. */ + /* Fused multiply-add/sub, with 112-bit intermediate + precision */ + /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 + (computes arg2 * arg3 +/- arg4) */ + Iop_MAddF64, Iop_MSubF64, + + /* Variants of the above which produce a 64-bit result but which + round their result to a IEEE float range first. */ + /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ + Iop_MAddF64r32, Iop_MSubF64r32, + /* :: F64 -> F64 */ Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */ @@ -760,6 +772,7 @@ typedef Iex_Get, /* read guest state, fixed offset */ Iex_GetI, /* read guest state, run-time offset */ Iex_Tmp, /* value of temporary */ + Iex_Qop, /* quaternary operation */ Iex_Triop, /* ternary operation */ Iex_Binop, /* binary operation */ Iex_Unop, /* unary operation */ @@ -789,6 +802,13 @@ typedef struct { IRTemp tmp; } Tmp; + struct { + IROp op; + struct _IRExpr* arg1; + struct _IRExpr* arg2; + struct _IRExpr* arg3; + struct _IRExpr* arg4; + } Qop; struct { IROp op; struct _IRExpr* arg1; @@ -830,6 +850,8 @@ extern IRExpr* IRExpr_Binder ( Int binder ); extern IRExpr* IRExpr_Get ( Int off, IRType ty ); extern IRExpr* IRExpr_GetI ( IRArray* descr, IRExpr* ix, Int bias ); extern IRExpr* IRExpr_Tmp ( IRTemp tmp ); +extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, + IRExpr* arg3, IRExpr* arg4 ); extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, IRExpr* arg2, IRExpr* arg3 ); extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 );