From: Julian Seward Date: Sat, 25 Aug 2007 21:29:03 +0000 (+0000) Subject: Merge, from CGTUNE branch: X-Git-Tag: svn/VALGRIND_3_3_1^2~34 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d4426233f4238043694bf9913a7e78fe9cbd637c;p=thirdparty%2Fvalgrind.git Merge, from CGTUNE branch: r1768: Cosmetic (non-functional) changes associated with r1767. r1767: Add a second spill-code-avoidance optimisation, which could be called 'directReload' for lack of a better name. If an instruction reads exactly one vreg which is currently in a spill slot, and this is last use of that vreg, see if the instruction can be converted into one that reads directly from the spill slot. This is clearly only possible for x86 and amd64 targets, since ppc is a load-store architecture. So, for example, orl %vreg, %dst where %vreg is in a spill slot, and this is its last use, would previously be converted to movl $spill-offset(%ebp), %tmp orl %tmp, %dst whereas now it becomes orl $spill-offset(%ebp), %dst This not only avoids an instruction, it eliminates the need for a reload temporary (%tmp in this example) and so potentially further reduces spilling. Implementation is in two parts: an architecture independent part, in reg_alloc2.c, which finds candidate instructions, and a host dependent function (directReload_ARCH) for each arch supporting the optimisation. The directReload_ function does the instruction form conversion, when possible. Currently only x86 hosts are supported. As a side effect, change the form of the X86_Test32 instruction from reg-only to reg/mem so it can participate in such transformations. This gives a code size reduction of 0.6% for perf/bz2 on x86 memcheck, but tends to be more effective for long blocks of x86 FP code. git-svn-id: svn://svn.valgrind.org/vex/trunk@1779 --- diff --git a/VEX/priv/host-generic/h_generic_regs.h b/VEX/priv/host-generic/h_generic_regs.h index 82fc9ad40e..7e9923b68d 100644 --- a/VEX/priv/host-generic/h_generic_regs.h +++ b/VEX/priv/host-generic/h_generic_regs.h @@ -266,9 +266,10 @@ HInstrArray* doRegisterAllocation ( void (*mapRegs) (HRegRemap*, HInstr*, Bool), /* Return an insn to spill/restore a real reg to a spill slot - offset. */ + offset. And optionally a function to do direct reloads. */ HInstr* (*genSpill) ( HReg, Int, Bool ), HInstr* (*genReload) ( HReg, Int, Bool ), + HInstr* (*directReload) ( HInstr*, HReg, Short ), Int guest_sizeB, /* For debug printing only. */ diff --git a/VEX/priv/host-generic/reg_alloc2.c b/VEX/priv/host-generic/reg_alloc2.c index 6a58c3b04c..e826ab4d3b 100644 --- a/VEX/priv/host-generic/reg_alloc2.c +++ b/VEX/priv/host-generic/reg_alloc2.c @@ -323,10 +323,14 @@ HInstrArray* doRegisterAllocation ( /* Apply a reg-reg mapping to an insn. */ void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - /* Return an insn to spill/restore a real reg to a spill slot - byte offset. */ + /* Return an insn to spill/restore a real reg to a spill slot byte + offset. Also (optionally) a 'directReload' function, which + attempts to replace a given instruction by one which reads + directly from a specified spill slot. May be NULL, in which + case the optimisation is not attempted. */ HInstr* (*genSpill) ( HReg, Int, Bool ), HInstr* (*genReload) ( HReg, Int, Bool ), + HInstr* (*directReload) ( HInstr*, HReg, Short ), Int guest_sizeB, /* For debug printing only. */ @@ -1162,6 +1166,76 @@ HInstrArray* doRegisterAllocation ( initHRegRemap(&remap); + /* ------------ BEGIN directReload optimisation ----------- */ + + /* If the instruction reads exactly one vreg which is currently + in a spill slot, and this is last use of that vreg, see if we + can convert the instruction into one reads directly from the + spill slot. This is clearly only possible for x86 and amd64 + targets, since ppc is a load-store architecture. If + successful, replace instrs_in->arr[ii] with this new + instruction, and recompute its reg usage, so that the change + is invisible to the standard-case handling that follows. */ + + if (directReload && reg_usage.n_used <= 2) { + Bool debug_direct_reload = True && False; + HReg cand = INVALID_HREG; + Bool nreads = 0; + Short spilloff = 0; + + for (j = 0; j < reg_usage.n_used; j++) { + + vreg = reg_usage.hreg[j]; + + if (!hregIsVirtual(vreg)) + continue; + + if (reg_usage.mode[j] == HRmRead) { + nreads++; + m = hregNumber(vreg); + vassert(IS_VALID_VREGNO(m)); + k = vreg_state[m]; + if (!IS_VALID_RREGNO(k)) { + /* ok, it is spilled. Now, is this its last use? */ + vassert(vreg_lrs[m].dead_before >= ii+1); + if (vreg_lrs[m].dead_before == ii+1 + && cand == INVALID_HREG) { + spilloff = vreg_lrs[m].spill_offset; + cand = vreg; + } + } + } + } + + if (nreads == 1 && cand != INVALID_HREG) { + HInstr* reloaded; + if (reg_usage.n_used == 2) + vassert(reg_usage.hreg[0] != reg_usage.hreg[1]); + + reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); + if (debug_direct_reload && !reloaded) { + vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); + ppInstr(instrs_in->arr[ii], mode64); + } + if (reloaded) { + /* Update info about the insn, so it looks as if it had + been in this form all along. */ + instrs_in->arr[ii] = reloaded; + (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 ); + if (debug_direct_reload && !reloaded) { + vex_printf(" --> "); + ppInstr(reloaded, mode64); + } + } + + if (debug_direct_reload && !reloaded) + vex_printf("\n"); + } + + } + + /* ------------ END directReload optimisation ------------ */ + /* for each reg mentioned in the insn ... */ for (j = 0; j < reg_usage.n_used; j++) { diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index 914794faec..0cd32bb32a 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -598,7 +598,7 @@ X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { i->Xin.Sh32.dst = dst; return i; } -X86Instr* X86Instr_Test32 ( UInt imm32, HReg dst ) { +X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_Test32; i->Xin.Test32.imm32 = imm32; @@ -908,7 +908,7 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { return; case Xin_Test32: vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); - ppHRegX86(i->Xin.Test32.dst); + ppX86RM(i->Xin.Test32.dst); return; case Xin_Unary32: vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); @@ -1173,7 +1173,7 @@ void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) addHRegUse(u, HRmRead, hregX86_ECX()); return; case Xin_Test32: - addHRegUse(u, HRmRead, i->Xin.Test32.dst); + addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); return; case Xin_Unary32: addHRegUse(u, HRmModify, i->Xin.Unary32.dst); @@ -1402,7 +1402,7 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) mapReg(m, &i->Xin.Sh32.dst); return; case Xin_Test32: - mapReg(m, &i->Xin.Test32.dst); + mapRegs_X86RM(m, i->Xin.Test32.dst); return; case Xin_Unary32: mapReg(m, &i->Xin.Unary32.dst); @@ -1610,6 +1610,82 @@ X86Instr* genReload_X86 ( HReg rreg, Int offsetB, Bool mode64 ) } } +/* The given instruction reads the specified vreg exactly once, and + that vreg is currently located at the given spill offset. If + possible, return a variant of the instruction which instead + references the spill slot directly. */ + +X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) +{ + vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ + + /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg + Convert to: src=RMI_Mem, dst=Reg + */ + if (i->tag == Xin_Alu32R + && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR + || i->Xin.Alu32R.op == Xalu_XOR) + && i->Xin.Alu32R.src->tag == Xrmi_Reg + && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) { + vassert(i->Xin.Alu32R.dst != vreg); + return X86Instr_Alu32R( + i->Xin.Alu32R.op, + X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), + i->Xin.Alu32R.dst + ); + } + + /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg + Convert to: src=RI_Imm, dst=Mem + */ + if (i->tag == Xin_Alu32R + && (i->Xin.Alu32R.op == Xalu_CMP) + && i->Xin.Alu32R.src->tag == Xrmi_Imm + && i->Xin.Alu32R.dst == vreg) { + return X86Instr_Alu32M( + i->Xin.Alu32R.op, + X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), + X86AMode_IR( spill_off, hregX86_EBP()) + ); + } + + /* Deal with form: Push(RMI_Reg) + Convert to: Push(RMI_Mem) + */ + if (i->tag == Xin_Push + && i->Xin.Push.src->tag == Xrmi_Reg + && i->Xin.Push.src->Xrmi.Reg.reg == vreg) { + return X86Instr_Push( + X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) + ); + } + + /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src + Convert to CMov32(RM_Mem, dst) */ + if (i->tag == Xin_CMov32 + && i->Xin.CMov32.src->tag == Xrm_Reg + && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) { + vassert(i->Xin.CMov32.dst != vreg); + return X86Instr_CMov32( + i->Xin.CMov32.cond, + X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), + i->Xin.CMov32.dst + ); + } + + /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ + if (i->tag == Xin_Test32 + && i->Xin.Test32.dst->tag == Xrm_Reg + && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) { + return X86Instr_Test32( + i->Xin.Test32.imm32, + X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) + ); + } + + return NULL; +} + /* --------- The x86 assembler (bleh.) --------- */ @@ -2010,6 +2086,7 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, switch (i->Xin.Alu32M.op) { case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; + case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; default: goto bad; } switch (i->Xin.Alu32M.src->tag) { @@ -2054,11 +2131,19 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, goto done; case Xin_Test32: - /* testl $imm32, %reg */ - *p++ = 0xF7; - p = doAMode_R(p, fake(0), i->Xin.Test32.dst); - p = emit32(p, i->Xin.Test32.imm32); - goto done; + if (i->Xin.Test32.dst->tag == Xrm_Reg) { + /* testl $imm32, %reg */ + *p++ = 0xF7; + p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); + p = emit32(p, i->Xin.Test32.imm32); + goto done; + } else { + /* testl $imm32, amode */ + *p++ = 0xF7; + p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); + p = emit32(p, i->Xin.Test32.imm32); + goto done; + } case Xin_Unary32: if (i->Xin.Unary32.op == Xun_NOT) { diff --git a/VEX/priv/host-x86/hdefs.h b/VEX/priv/host-x86/hdefs.h index 03f4fd451c..be80251728 100644 --- a/VEX/priv/host-x86/hdefs.h +++ b/VEX/priv/host-x86/hdefs.h @@ -351,7 +351,7 @@ typedef Xin_Alu32R, /* 32-bit mov/arith/logical, dst=REG */ Xin_Alu32M, /* 32-bit mov/arith/logical, dst=MEM */ Xin_Sh32, /* 32-bit shift/rotate, dst=REG */ - Xin_Test32, /* 32-bit test of REG against imm32 (AND, set + Xin_Test32, /* 32-bit test of REG or MEM against imm32 (AND, set flags, discard result) */ Xin_Unary32, /* 32-bit not and neg */ Xin_Lea32, /* 32-bit compute EA into a reg */ @@ -413,8 +413,8 @@ typedef HReg dst; } Sh32; struct { - UInt imm32; - HReg dst; /* not written, only read */ + UInt imm32; + X86RM* dst; /* not written, only read */ } Test32; /* Not and Neg */ struct { @@ -624,7 +624,7 @@ extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ); extern X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ); extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, HReg ); -extern X86Instr* X86Instr_Test32 ( UInt imm32, HReg dst ); +extern X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ); extern X86Instr* X86Instr_MulL ( Bool syned, X86RM* ); extern X86Instr* X86Instr_Div ( Bool syned, X86RM* ); extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst ); @@ -672,6 +672,8 @@ extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*, Bool, void* dispatch ); extern X86Instr* genSpill_X86 ( HReg rreg, Int offset, Bool ); extern X86Instr* genReload_X86 ( HReg rreg, Int offset, Bool ); +extern X86Instr* directReload_X86 ( X86Instr* i, + HReg vreg, Short spill_off ); extern void getAllocableRegs_X86 ( Int*, HReg** ); extern HInstrArray* iselSB_X86 ( IRSB*, VexArch, VexArchInfo*, diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index cb38262cdb..4a6522f951 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -113,6 +113,12 @@ static IRExpr* bind ( Int binder ) return IRExpr_Binder(binder); } +static Bool isZeroU8 ( IRExpr* e ) +{ + return e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U8 + && e->Iex.Const.con->Ico.U8 == 0; +} /*---------------------------------------------------------*/ @@ -1248,12 +1254,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) case Iex_Mux0X: { if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { - HReg r8; - HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); - X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); - HReg dst = newVRegI(env); + X86RM* r8; + HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); + X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); + HReg dst = newVRegI(env); addInstr(env, mk_iMOVsd_RR(rX,dst)); - r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); + r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); addInstr(env, X86Instr_Test32(0xFF, r8)); addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst)); return dst; @@ -1552,7 +1558,7 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_RdTmp) { HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); /* Test32 doesn't modify r32; so this is OK. */ - addInstr(env, X86Instr_Test32(1,r32)); + addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); return Xcc_NZ; } @@ -1597,8 +1603,8 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) unop(Iop_32to1,bind(0)) ); if (matchIRExpr(&mi,p_32to1,e)) { - HReg r = iselIntExpr_R(env, mi.bindee[0]); - addInstr(env, X86Instr_Test32(1,r)); + X86RM* rm = iselIntExpr_RM(env, mi.bindee[0]); + addInstr(env, X86Instr_Test32(1,rm)); return Xcc_NZ; } @@ -1607,8 +1613,8 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* CmpNEZ8(x) */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ8) { - HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); - addInstr(env, X86Instr_Test32(0xFF,r)); + X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); + addInstr(env, X86Instr_Test32(0xFF,rm)); return Xcc_NZ; } @@ -1617,8 +1623,8 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* CmpNEZ16(x) */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ16) { - HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); - addInstr(env, X86Instr_Test32(0xFFFF,r)); + X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); + addInstr(env, X86Instr_Test32(0xFFFF,rm)); return Xcc_NZ; } @@ -1721,16 +1727,26 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Binop && (e->Iex.Binop.op == Iop_CmpEQ8 || e->Iex.Binop.op == Iop_CmpNE8)) { - HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); - X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); - HReg r = newVRegI(env); - addInstr(env, mk_iMOVsd_RR(r1,r)); - addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); - addInstr(env, X86Instr_Test32(0xFF,r)); - switch (e->Iex.Binop.op) { - case Iop_CmpEQ8: return Xcc_Z; - case Iop_CmpNE8: return Xcc_NZ; - default: vpanic("iselCondCode(x86): CmpXX8"); + if (isZeroU8(e->Iex.Binop.arg2)) { + HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); + addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); + switch (e->Iex.Binop.op) { + case Iop_CmpEQ8: return Xcc_Z; + case Iop_CmpNE8: return Xcc_NZ; + default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); + } + } else { + HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); + X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); + HReg r = newVRegI(env); + addInstr(env, mk_iMOVsd_RR(r1,r)); + addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); + addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); + switch (e->Iex.Binop.op) { + case Iop_CmpEQ8: return Xcc_Z; + case Iop_CmpNE8: return Xcc_NZ; + default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); + } } } @@ -1743,7 +1759,7 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) HReg r = newVRegI(env); addInstr(env, mk_iMOVsd_RR(r1,r)); addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); - addInstr(env, X86Instr_Test32(0xFFFF,r)); + addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); switch (e->Iex.Binop.op) { case Iop_CmpEQ16: return Xcc_Z; case Iop_CmpNE16: return Xcc_NZ; @@ -1901,15 +1917,16 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) /* 64-bit Mux0X */ if (e->tag == Iex_Mux0X) { - HReg e0Lo, e0Hi, eXLo, eXHi, r8; - HReg tLo = newVRegI(env); - HReg tHi = newVRegI(env); + X86RM* rm8; + HReg e0Lo, e0Hi, eXLo, eXHi; + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); - r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); - addInstr(env, X86Instr_Test32(0xFF, r8)); + rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + addInstr(env, X86Instr_Test32(0xFF, rm8)); /* This assumes the first cmov32 doesn't trash the condition codes, so they are still available for the second cmov32 */ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); @@ -2047,7 +2064,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) and those regs are legitimately modifiable. */ addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); - addInstr(env, X86Instr_Test32(32, hregX86_ECX())); + addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); @@ -2089,7 +2106,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) and those regs are legitimately modifiable. */ addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); - addInstr(env, X86Instr_Test32(32, hregX86_ECX())); + addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); @@ -2812,12 +2829,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Mux0X) { if (ty == Ity_F64 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { - HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); - HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); - HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); - HReg dst = newVRegF(env); + X86RM* rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); + HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); + HReg dst = newVRegF(env); addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst)); - addInstr(env, X86Instr_Test32(0xFF, r8)); + addInstr(env, X86Instr_Test32(0xFF, rm8)); addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst)); return dst; } @@ -3333,12 +3350,12 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) } /* if (e->tag == Iex_Binop) */ if (e->tag == Iex_Mux0X) { - HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); - HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); - HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); - HReg dst = newVRegV(env); + X86RM* rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); + HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); + HReg dst = newVRegV(env); addInstr(env, mk_vMOVsd_RR(rX,dst)); - addInstr(env, X86Instr_Test32(0xFF, r8)); + addInstr(env, X86Instr_Test32(0xFF, rm8)); addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst)); return dst; } diff --git a/VEX/priv/main/vex_main.c b/VEX/priv/main/vex_main.c index 108720c21d..9fabf70107 100644 --- a/VEX/priv/main/vex_main.c +++ b/VEX/priv/main/vex_main.c @@ -186,17 +186,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) from the target instruction set. */ HReg* available_real_regs; Int n_available_real_regs; - Bool (*isMove) ( HInstr*, HReg*, HReg* ); - void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ); - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); - HInstr* (*genSpill) ( HReg, Int, Bool ); - HInstr* (*genReload) ( HReg, Int, Bool ); - void (*ppInstr) ( HInstr*, Bool ); - void (*ppReg) ( HReg ); - HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, - VexAbiInfo* ); - Int (*emit) ( UChar*, Int, HInstr*, Bool, void* ); - IRExpr* (*specHelper) ( HChar*, IRExpr** ); + Bool (*isMove) ( HInstr*, HReg*, HReg* ); + void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ); + void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); + HInstr* (*genSpill) ( HReg, Int, Bool ); + HInstr* (*genReload) ( HReg, Int, Bool ); + HInstr* (*directReload) ( HInstr*, HReg, Short ); + void (*ppInstr) ( HInstr*, Bool ); + void (*ppReg) ( HReg ); + HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, + VexAbiInfo* ); + Int (*emit) ( UChar*, Int, HInstr*, Bool, void* ); + IRExpr* (*specHelper) ( HChar*, IRExpr** ); Bool (*preciseMemExnsFn) ( Int, Int ); DisOneInstrFn disInstrFn; @@ -221,6 +222,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) mapRegs = NULL; genSpill = NULL; genReload = NULL; + directReload = NULL; ppInstr = NULL; ppReg = NULL; iselSB = NULL; @@ -246,18 +248,19 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) switch (vta->arch_host) { case VexArchX86: - mode64 = False; + mode64 = False; getAllocableRegs_X86 ( &n_available_real_regs, &available_real_regs ); - isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_X86Instr; - getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_X86Instr; - mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_X86Instr; - genSpill = (HInstr*(*)(HReg,Int, Bool)) genSpill_X86; - genReload = (HInstr*(*)(HReg,Int, Bool)) genReload_X86; - ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr; - ppReg = (void(*)(HReg)) ppHRegX86; - iselSB = iselSB_X86; - emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr; + isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_X86Instr; + getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_X86Instr; + mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_X86Instr; + genSpill = (HInstr*(*)(HReg,Int, Bool)) genSpill_X86; + genReload = (HInstr*(*)(HReg,Int, Bool)) genReload_X86; + directReload = (HInstr*(*)(HInstr*,HReg,Short)) directReload_X86; + ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr; + ppReg = (void(*)(HReg)) ppHRegX86; + iselSB = iselSB_X86; + emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr; host_is_bigendian = False; host_word_type = Ity_I32; vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps)); @@ -581,7 +584,8 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) rcode = doRegisterAllocation ( vcode, available_real_regs, n_available_real_regs, isMove, getRegUsage, mapRegs, - genSpill, genReload, guest_sizeB, + genSpill, genReload, directReload, + guest_sizeB, ppInstr, ppReg, mode64 ); vexAllocSanityCheck();