From: Petar Jovanovic Date: Thu, 28 Sep 2017 17:29:51 +0000 (+0200) Subject: mips: optimize multiplication Iops X-Git-Tag: VALGRIND_3_14_0~244 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eb18bd1b447061ba42e6c7c24210675f2c8557ac;p=thirdparty%2Fvalgrind.git mips: optimize multiplication Iops Optimize and refactor some of mul* Iop code in VEX/priv/host_mips_. Patch from Aleksandar Rikalo. --- diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index 9a6993eda0..66c226dbee 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -811,21 +811,40 @@ MIPSInstr *MIPSInstr_Cmp(Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR, return i; } -/* multiply */ -MIPSInstr *MIPSInstr_Mul(Bool syned, Bool wid, Bool sz32, HReg dst, HReg srcL, - HReg srcR) +/* mul */ +MIPSInstr *MIPSInstr_Mul(HReg dst, HReg srcL, HReg srcR) { MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); i->tag = Min_Mul; - i->Min.Mul.syned = syned; - i->Min.Mul.widening = wid; /* widen=True else False */ - i->Min.Mul.sz32 = sz32; /* True = 32 bits */ i->Min.Mul.dst = dst; i->Min.Mul.srcL = srcL; i->Min.Mul.srcR = srcR; return i; } +/* mult, multu / dmult, dmultu */ +MIPSInstr *MIPSInstr_Mult(Bool syned, HReg srcL, HReg srcR) +{ + MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Min_Mult; + i->Min.Mult.syned = syned; + i->Min.Mult.srcL = srcL; + i->Min.Mult.srcR = srcR; + return i; +} + +/* ext / dext, dextm, dextu */ +MIPSInstr *MIPSInstr_Ext(HReg dst, HReg src, UInt pos, UInt size) +{ + MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Min_Ext; + i->Min.Ext.dst = dst; + i->Min.Ext.src = src; + i->Min.Ext.pos = pos; + i->Min.Ext.size = size; + return i; +} + /* msub */ MIPSInstr *MIPSInstr_Msub(Bool syned, HReg srcL, HReg srcR) { @@ -1228,26 +1247,35 @@ void ppMIPSInstr(const MIPSInstr * i, Bool mode64) return; } case Min_Mul: { - switch (i->Min.Mul.widening) { - case False: - vex_printf("mul "); - ppHRegMIPS(i->Min.Mul.dst, mode64); - vex_printf(", "); - ppHRegMIPS(i->Min.Mul.srcL, mode64); - vex_printf(", "); - ppHRegMIPS(i->Min.Mul.srcR, mode64); - return; - case True: - vex_printf("%s%s ", i->Min.Mul.sz32 ? "mult" : "dmult", - i->Min.Mul.syned ? "" : "u"); - ppHRegMIPS(i->Min.Mul.dst, mode64); - vex_printf(", "); - ppHRegMIPS(i->Min.Mul.srcL, mode64); - vex_printf(", "); - ppHRegMIPS(i->Min.Mul.srcR, mode64); - return; - } - break; + vex_printf("mul "); + ppHRegMIPS(i->Min.Mul.dst, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Mul.srcL, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Mul.srcR, mode64); + return; + } + case Min_Mult: { + vex_printf("%s%s ", mode64 ? "dmult" : "mult", + i->Min.Mult.syned ? "" : "u"); + ppHRegMIPS(i->Min.Mult.srcL, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Mult.srcR, mode64); + return; + } + case Min_Ext: { + vassert(mode64); + vassert(i->Min.Ext.pos < 32); + vassert(i->Min.Ext.size > 0); + vassert(i->Min.Ext.size <= 32); + vassert(i->Min.Ext.size + i->Min.Ext.pos > 0); + vassert(i->Min.Ext.size + i->Min.Ext.pos <= 63); + vex_printf("dext "); + ppHRegMIPS(i->Min.Ext.dst, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Ext.src, mode64); + vex_printf(", %u, %u", i->Min.Ext.pos, i->Min.Ext.size); + return; } case Min_Mthi: { vex_printf("mthi "); @@ -1597,6 +1625,18 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64) addHRegUse(u, HRmWrite, i->Min.Mul.dst); addHRegUse(u, HRmRead, i->Min.Mul.srcL); addHRegUse(u, HRmRead, i->Min.Mul.srcR); + addHRegUse(u, HRmWrite, hregMIPS_HI(mode64)); + addHRegUse(u, HRmWrite, hregMIPS_LO(mode64)); + return; + case Min_Mult: + addHRegUse(u, HRmRead, i->Min.Mult.srcL); + addHRegUse(u, HRmRead, i->Min.Mult.srcR); + addHRegUse(u, HRmWrite, hregMIPS_HI(mode64)); + addHRegUse(u, HRmWrite, hregMIPS_LO(mode64)); + return; + case Min_Ext: + addHRegUse(u, HRmWrite, i->Min.Ext.dst); + addHRegUse(u, HRmRead, i->Min.Ext.src); return; case Min_Mthi: case Min_Mtlo: @@ -1817,6 +1857,14 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64) mapReg(m, &i->Min.Mul.srcL); mapReg(m, &i->Min.Mul.srcR); return; + case Min_Mult: + mapReg(m, &i->Min.Mult.srcL); + mapReg(m, &i->Min.Mult.srcR); + return; + case Min_Ext: + mapReg(m, &i->Min.Ext.src); + mapReg(m, &i->Min.Ext.dst); + return; case Min_Mthi: case Min_Mtlo: mapReg(m, &i->Min.MtHL.src); @@ -2808,38 +2856,52 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc, } case Min_Mul: { - Bool syned = i->Min.Mul.syned; - Bool widening = i->Min.Mul.widening; - Bool sz32 = i->Min.Mul.sz32; UInt r_srcL = iregNo(i->Min.Mul.srcL, mode64); UInt r_srcR = iregNo(i->Min.Mul.srcR, mode64); UInt r_dst = iregNo(i->Min.Mul.dst, mode64); - if (widening) { - if (sz32) { - if (syned) - /* mult */ - p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 24); - else - /* multu */ - p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 25); - } else { - if (syned) /* DMULT r_dst,r_srcL,r_srcR */ - p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 28); - else /* DMULTU r_dst,r_srcL,r_srcR */ - p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 29); - } + /* mul r_dst, r_srcL, r_srcR */ + p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2); + goto done; + } + + case Min_Mult: { + Bool syned = i->Min.Mult.syned; + UInt r_srcL = iregNo(i->Min.Mult.srcL, mode64); + UInt r_srcR = iregNo(i->Min.Mult.srcR, mode64); + if (mode64) { + if (syned) + /* dmult r_srcL, r_srcR */ + p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 28); + else + /* dmultu r_srcL, r_srcR */ + p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 29); } else { - if (sz32) - /* mul */ - p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2); - else if (mode64 && !sz32) - p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2); + if (syned) + /* mult r_srcL, r_srcR */ + p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 24); else - goto bad; + /* multu r_srcL, r_srcR */ + p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 25); } goto done; } + case Min_Ext: { + UInt r_src = iregNo(i->Min.Ext.src, mode64); + UInt r_dst = iregNo(i->Min.Ext.dst, mode64); + /* For now, only DEXT is implemented. */ + vassert(mode64); + vassert(i->Min.Ext.pos < 32); + vassert(i->Min.Ext.size > 0); + vassert(i->Min.Ext.size <= 32); + vassert(i->Min.Ext.size + i->Min.Ext.pos > 0); + vassert(i->Min.Ext.size + i->Min.Ext.pos <= 63); + /* DEXT r_dst, r_src, pos, size */ + p = mkFormR(p, 0x1F, r_src, r_dst, + i->Min.Ext.size - 1, i->Min.Ext.pos, 3); + goto done; + } + case Min_Macc: { Bool syned = i->Min.Macc.syned; UInt r_srcL = iregNo(i->Min.Macc.srcL, mode64); diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index a4c0e789cd..be1e3a80c7 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -276,10 +276,12 @@ typedef enum { Min_Alu, /* word add/sub/and/or/xor/nor/others? */ Min_Shft, /* word sll/srl/sra */ Min_Unary, /* clo, clz, nop, neg */ + Min_Ext, /* ext / dext, dextm, dextu */ Min_Cmp, /* word compare (fake insn) */ - Min_Mul, /* widening/non-widening multiply */ + Min_Mul, /* non-widening, 32-bit, signed multiply */ + Min_Mult, /* widening multiply */ Min_Div, /* div */ Min_Call, /* call to address in register */ @@ -415,6 +417,13 @@ typedef struct { HReg dst; HReg src; } Unary; + /* Bit extract */ + struct { + HReg dst; + HReg src; + UInt pos; + UInt size; + } Ext; /* Word compare. Fake instruction, used for basic block ending */ struct { Bool syned; @@ -433,6 +442,11 @@ typedef struct { HReg srcL; HReg srcR; } Mul; + struct { + Bool syned; /* signed/unsigned */ + HReg srcL; + HReg srcR; + } Mult; struct { Bool syned; /* signed/unsigned - meaningless if widenind = False */ Bool sz32; @@ -615,10 +629,11 @@ extern MIPSInstr *MIPSInstr_LI(HReg, ULong); extern MIPSInstr *MIPSInstr_Alu(MIPSAluOp, HReg, HReg, MIPSRH *); extern MIPSInstr *MIPSInstr_Shft(MIPSShftOp, Bool sz32, HReg, HReg, MIPSRH *); extern MIPSInstr *MIPSInstr_Unary(MIPSUnaryOp op, HReg dst, HReg src); +extern MIPSInstr *MIPSInstr_Ext(HReg, HReg, UInt, UInt); extern MIPSInstr *MIPSInstr_Cmp(Bool, Bool, HReg, HReg, HReg, MIPSCondCode); -extern MIPSInstr *MIPSInstr_Mul(Bool syned, Bool hi32, Bool sz32, HReg, - HReg, HReg); +extern MIPSInstr *MIPSInstr_Mul(HReg, HReg, HReg); +extern MIPSInstr *MIPSInstr_Mult(Bool, HReg, HReg); extern MIPSInstr *MIPSInstr_Div(Bool syned, Bool sz32, HReg, HReg); extern MIPSInstr *MIPSInstr_Madd(Bool, HReg, HReg); extern MIPSInstr *MIPSInstr_Msub(Bool, HReg, HReg); diff --git a/VEX/priv/host_mips_isel.c b/VEX/priv/host_mips_isel.c index deb33f2b4d..711af61140 100644 --- a/VEX/priv/host_mips_isel.c +++ b/VEX/priv/host_mips_isel.c @@ -55,6 +55,9 @@ static Bool mode64 = False; /* Host CPU has FPU and 32 dbl. prec. FP registers. */ static Bool fp_mode64 = False; +/* Host hwcaps */ +static UInt hwcaps_host = 0; + /* GPR register class for mips32/64 */ #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32) @@ -1058,52 +1061,46 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return r_dst; } - if (e->Iex.Binop.op == Iop_Mul32 || e->Iex.Binop.op == Iop_Mul64) { - Bool sz32 = (e->Iex.Binop.op == Iop_Mul32); + if (e->Iex.Binop.op == Iop_Mul32) { HReg r_dst = newVRegI(env); HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - addInstr(env, MIPSInstr_Mul(False/*Unsigned or Signed */ , - False /*widen */ , - sz32 /*32bit or 64bit */, - r_dst, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mul(r_dst, r_srcL, r_srcR)); return r_dst; } - if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { + if (e->Iex.Binop.op == Iop_Mul64 || + e->Iex.Binop.op == Iop_MullS32) { + vassert(mode64); HReg r_dst = newVRegI(env); - HReg tHi = newVRegI(env); - HReg tLo = newVRegI(env); - HReg tLo_1 = newVRegI(env); - HReg tHi_1 = newVRegI(env); - HReg mask = newVRegI(env); - - Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); - Bool size = toBool(e->Iex.Binop.op == Iop_MullS32) - || toBool(e->Iex.Binop.op == Iop_MullU32); HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - addInstr(env, MIPSInstr_Mul(syned /*Unsigned or Signed */ , - True /*widen */ , - size /*32bit or 64bit mul */ , - r_dst, r_srcL, r_srcR)); - - addInstr(env, MIPSInstr_Mfhi(tHi)); - addInstr(env, MIPSInstr_Mflo(tLo)); - - addInstr(env, MIPSInstr_Shft(Mshft_SLL, False, tHi_1, - tHi, MIPSRH_Imm(False, 32))); - - addInstr(env, MIPSInstr_LI(mask, 0xffffffff)); - addInstr(env, MIPSInstr_Alu(Malu_AND, tLo_1, tLo, - MIPSRH_Reg(mask))); - - addInstr(env, MIPSInstr_Alu(Malu_OR, r_dst, tHi_1, - MIPSRH_Reg(tLo_1))); - + addInstr(env, MIPSInstr_Mult(True, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mflo(r_dst)); return r_dst; } + if (e->Iex.Binop.op == Iop_MullU32) { + vassert(mode64); + HReg r_tmpL = newVRegI(env); + HReg r_tmpR = newVRegI(env); + HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); + HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); + if (VEX_MIPS_CPU_HAS_MIPS64R2(hwcaps_host)) { + addInstr(env, MIPSInstr_Ext(r_tmpL, r_srcL, 0, 32)); + addInstr(env, MIPSInstr_Ext(r_tmpR, r_srcR, 0, 32)); + } else { + addInstr(env, MIPSInstr_LI(r_tmpL, 0xFFFFFFFF)); + addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmpR, r_srcR, + MIPSRH_Reg(r_tmpL))); + addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmpL, r_srcL, + MIPSRH_Reg(r_tmpL))); + } + addInstr(env, MIPSInstr_Mult(False, r_tmpL, r_tmpR)); + addInstr(env, MIPSInstr_Mflo(r_tmpR)); + return r_tmpR; + } + if (e->Iex.Binop.op == Iop_CmpF64) { HReg r_srcL, r_srcR; if (mode64) { @@ -2198,11 +2195,9 @@ static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, HReg tLo = newVRegI(env); HReg tHi = newVRegI(env); Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); - HReg r_dst = newVRegI(env); HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - addInstr(env, MIPSInstr_Mul(syned, True, False /*64bit mul */ , - r_dst, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR)); addInstr(env, MIPSInstr_Mfhi(tHi)); addInstr(env, MIPSInstr_Mflo(tLo)); *rHi = tHi; @@ -2411,14 +2406,10 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) case Iop_MullS32: { HReg tLo = newVRegI(env); HReg tHi = newVRegI(env); - HReg r_dst = newVRegI(env); Bool syned = toBool(op_binop == Iop_MullS32); HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - - addInstr(env, MIPSInstr_Mul(syned /*Unsigned or Signed */, - True /*widen */ , True, - r_dst, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR)); addInstr(env, MIPSInstr_Mfhi(tHi)); addInstr(env, MIPSInstr_Mflo(tLo)); *rHi = tHi; @@ -4155,9 +4146,10 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb, Int i, j; HReg hreg, hregHI; ISelEnv* env; - UInt hwcaps_host = archinfo_host->hwcaps; MIPSAMode *amCounter, *amFailAddr; + hwcaps_host = archinfo_host->hwcaps; + /* sanity ... */ vassert(arch_host == VexArchMIPS32 || arch_host == VexArchMIPS64); vassert(VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(hwcaps_host) diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 6f55ec93b9..8ae3e3648b 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -246,6 +246,9 @@ typedef /* Check if the processor supports MIPS32R2. */ #define VEX_MIPS_CPU_HAS_MIPS32R2(x) (VEX_MIPS_EX_INFO(x) & \ VEX_MIPS_CPU_ISA_M32R2) +/* Check if the processor supports MIPS64R2. */ +#define VEX_MIPS_CPU_HAS_MIPS64R2(x) (VEX_MIPS_EX_INFO(x) & \ + VEX_MIPS_CPU_ISA_M64R2) /* Check if the processor supports DSP ASE Rev 2. */ #define VEX_MIPS_PROC_DSP2(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \ (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_74K))