From: Julian Seward Date: Tue, 28 Mar 2017 14:57:17 +0000 (+0000) Subject: Implement the most important cases for amd64 direct-reload optimisation: X-Git-Tag: svn/VALGRIND_3_13_0^2~41 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4a6236aaa1acfa44f2f17d331443ccdbf835ac35;p=thirdparty%2Fvalgrind.git Implement the most important cases for amd64 direct-reload optimisation: cmpq $imm32, %vreg -> cmpq $imm32, (stack-slot-of-vreg) orq %vreg, %reg -> orq (stack-slot-of-vreg), %reg This is in support of "Bug 375839 - Temporary storage exhausted, when long sequence of vfmadd231ps instructions to be executed", and reduces code size by around 3% in that case. git-svn-id: svn://svn.valgrind.org/vex/trunk@3335 --- diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 9dec78c109..b0c9db8568 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -1995,6 +1995,43 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) +{ + vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ + + /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg + Convert to: src=RMI_Mem, dst=Reg + */ + if (i->tag == Ain_Alu64R + && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR + || i->Ain.Alu64R.op == Aalu_XOR) + && i->Ain.Alu64R.src->tag == Armi_Reg + && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) { + vassert(! sameHReg(i->Ain.Alu64R.dst, vreg)); + return AMD64Instr_Alu64R( + i->Ain.Alu64R.op, + AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())), + i->Ain.Alu64R.dst + ); + } + + /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg + Convert to: src=RI_Imm, dst=Mem + */ + if (i->tag == Ain_Alu64R + && (i->Ain.Alu64R.op == Aalu_CMP) + && i->Ain.Alu64R.src->tag == Armi_Imm + && sameHReg(i->Ain.Alu64R.dst, vreg)) { + return AMD64Instr_Alu64M( + i->Ain.Alu64R.op, + AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ), + AMD64AMode_IR( spill_off, hregAMD64_RBP()) + ); + } + + return NULL; +} + /* --------- The amd64 assembler (bleh.) --------- */ @@ -2607,6 +2644,39 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, goto bad; } } + /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not + allowed here. (This is derived from the x86 version of same). */ + opc = subopc_imm = opc_imma = 0; + switch (i->Ain.Alu64M.op) { + case Aalu_CMP: opc = 0x39; subopc_imm = 7; break; + default: goto bad; + } + switch (i->Ain.Alu64M.src->tag) { + /* + case Xri_Reg: + *p++ = toUChar(opc); + p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, + i->Xin.Alu32M.dst); + goto done; + */ + case Ari_Imm: + if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) { + *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst); + *p++ = 0x83; + p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst); + *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32); + goto done; + } else { + *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst); + *p++ = 0x81; + p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst); + p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32); + goto done; + } + default: + goto bad; + } + break; case Ain_Sh64: diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index fe999f2e9c..7796062bb3 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -802,6 +802,9 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, + HReg vreg, Short spill_off ); + extern const RRegUniverse* getRRegUniverse_AMD64 ( void ); extern HInstrArray* iselSB_AMD64 ( const IRSB*, diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index e263754c6f..f5afc87850 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -433,6 +433,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) mapRegs = (__typeof__(mapRegs)) AMD64FN(mapRegs_AMD64Instr); genSpill = (__typeof__(genSpill)) AMD64FN(genSpill_AMD64); genReload = (__typeof__(genReload)) AMD64FN(genReload_AMD64); + directReload = (__typeof__(directReload)) AMD64FN(directReload_AMD64); ppInstr = (__typeof__(ppInstr)) AMD64FN(ppAMD64Instr); ppReg = (__typeof__(ppReg)) AMD64FN(ppHRegAMD64); iselSB = AMD64FN(iselSB_AMD64);