From: Julian Seward Date: Sat, 31 Mar 2007 14:30:12 +0000 (+0000) Subject: Teach the x86 back end how generate 'lea' instructions, and generate X-Git-Tag: svn/VALGRIND_3_3_1^2~45 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a226803fa2a4dfb436bd0906eec536b766731888;p=thirdparty%2Fvalgrind.git Teach the x86 back end how generate 'lea' instructions, and generate them in a couple of places which are important. This reduces the amount of generated code for memcheck and none by about 1%, and (in very unscientific tests on perf/bz2) speeds memcheck up by about 1%. git-svn-id: svn://svn.valgrind.org/vex/trunk@1745 --- diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index 127285cf64..914794faec 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -612,6 +612,13 @@ X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { i->Xin.Unary32.dst = dst; return i; } +X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_Lea32; + i->Xin.Lea32.am = am; + i->Xin.Lea32.dst = dst; + return i; +} X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_MulL; @@ -907,6 +914,12 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); ppHRegX86(i->Xin.Unary32.dst); return; + case Xin_Lea32: + vex_printf("leal "); + ppX86AMode(i->Xin.Lea32.am); + vex_printf(","); + ppHRegX86(i->Xin.Lea32.dst); + return; case Xin_MulL: vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); ppX86RM(i->Xin.MulL.src); @@ -1165,6 +1178,10 @@ void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) case Xin_Unary32: addHRegUse(u, HRmModify, i->Xin.Unary32.dst); return; + case Xin_Lea32: + addRegUsage_X86AMode(u, i->Xin.Lea32.am); + addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); + return; case Xin_MulL: addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); addHRegUse(u, HRmModify, hregX86_EAX()); @@ -1390,6 +1407,10 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) case Xin_Unary32: mapReg(m, &i->Xin.Unary32.dst); return; + case Xin_Lea32: + mapRegs_X86AMode(m, i->Xin.Lea32.am); + mapReg(m, &i->Xin.Lea32.dst); + return; case Xin_MulL: mapRegs_X86RM(m, i->Xin.MulL.src); return; @@ -2052,6 +2073,11 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, } break; + case Xin_Lea32: + *p++ = 0x8D; + p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); + goto done; + case Xin_MulL: subopc = i->Xin.MulL.syned ? 5 : 4; *p++ = 0xF7; diff --git a/VEX/priv/host-x86/hdefs.h b/VEX/priv/host-x86/hdefs.h index 1615cae96a..03f4fd451c 100644 --- a/VEX/priv/host-x86/hdefs.h +++ b/VEX/priv/host-x86/hdefs.h @@ -354,6 +354,7 @@ typedef Xin_Test32, /* 32-bit test of REG against imm32 (AND, set flags, discard result) */ Xin_Unary32, /* 32-bit not and neg */ + Xin_Lea32, /* 32-bit compute EA into a reg */ Xin_MulL, /* 32 x 32 -> 64 multiply */ Xin_Div, /* 64/32 -> (32,32) div and mod */ Xin_Sh3232, /* shldl or shrdl */ @@ -420,6 +421,11 @@ typedef X86UnaryOp op; HReg dst; } Unary32; + /* 32-bit compute EA into a reg */ + struct { + X86AMode* am; + HReg dst; + } Lea32; /* EDX:EAX = EAX *s/u r/m32 */ struct { Bool syned; @@ -615,6 +621,8 @@ typedef extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg ); extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* ); extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ); +extern X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ); + extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, HReg ); extern X86Instr* X86Instr_Test32 ( UInt imm32, HReg dst ); extern X86Instr* X86Instr_MulL ( Bool syned, X86RM* ); diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index 94a1f5d559..cb38262cdb 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -1315,6 +1315,30 @@ static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I32); + /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ + if (e->tag == Iex_Binop + && e->Iex.Binop.op == Iop_Add32 + && e->Iex.Binop.arg2->tag == Iex_Const + && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 + && e->Iex.Binop.arg1->tag == Iex_Binop + && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 + && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop + && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 + && e->Iex.Binop.arg1 + ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const + && e->Iex.Binop.arg1 + ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { + UInt shift = e->Iex.Binop.arg1 + ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; + UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; + if (shift == 1 || shift == 2 || shift == 3) { + HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); + HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 + ->Iex.Binop.arg2->Iex.Binop.arg1 ); + return X86AMode_IRRS(imm32, r1, r2, shift); + } + } + /* Add32(expr1, Shl32(expr2, imm)) */ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add32 @@ -3489,6 +3513,30 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) case Ist_WrTmp: { IRTemp tmp = stmt->Ist.WrTmp.tmp; IRType ty = typeOfIRTemp(env->type_env, tmp); + + /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), + compute it into an AMode and then use LEA. This usually + produces fewer instructions, often because (for memcheck + created IR) we get t = address-expression, (t is later used + twice) and so doing this naturally turns address-expression + back into an X86 amode. */ + if (ty == Ity_I32 + && stmt->Ist.WrTmp.data->tag == Iex_Binop + && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { + X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); + if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { + /* Hmm, iselIntExpr_AMode wimped out and just computed the + value into a register. Just emit a normal reg-reg move + so reg-alloc can coalesce it away in the usual way. */ + HReg src = am->Xam.IR.reg; + addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); + } else { + addInstr(env, X86Instr_Lea32(am,dst)); + } + return; + } + if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp);