]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Implement the most important cases for amd64 direct-reload optimisation:
authorJulian Seward <jseward@acm.org>
Tue, 28 Mar 2017 14:57:17 +0000 (14:57 +0000)
committerJulian Seward <jseward@acm.org>
Tue, 28 Mar 2017 14:57:17 +0000 (14:57 +0000)
   cmpq $imm32, %vreg  ->  cmpq $imm32, (stack-slot-of-vreg)
   orq %vreg, %reg     ->  orq (stack-slot-of-vreg), %reg

This is in support of "Bug 375839 - Temporary storage exhausted, when long
sequence of vfmadd231ps instructions to be executed", and reduces code size by
around 3% in that case.

git-svn-id: svn://svn.valgrind.org/vex/trunk@3335

VEX/priv/host_amd64_defs.c
VEX/priv/host_amd64_defs.h
VEX/priv/main_main.c

index 9dec78c10945f14285d9ac90894f286ab2e05d9f..b0c9db8568b900108f5e0ce4aab84cb9b387e5ff 100644 (file)
@@ -1995,6 +1995,43 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
    }
 }
 
+AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
+{
+   vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
+
+   /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 
+      Convert to: src=RMI_Mem, dst=Reg 
+   */
+   if (i->tag == Ain_Alu64R
+       && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
+           || i->Ain.Alu64R.op == Aalu_XOR)
+       && i->Ain.Alu64R.src->tag == Armi_Reg
+       && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
+      vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
+      return AMD64Instr_Alu64R( 
+                i->Ain.Alu64R.op, 
+                AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
+                i->Ain.Alu64R.dst
+             );
+   }
+
+   /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 
+      Convert to: src=RI_Imm, dst=Mem
+   */
+   if (i->tag == Ain_Alu64R
+       && (i->Ain.Alu64R.op == Aalu_CMP)
+       && i->Ain.Alu64R.src->tag == Armi_Imm
+       && sameHReg(i->Ain.Alu64R.dst, vreg)) {
+      return AMD64Instr_Alu64M( 
+                i->Ain.Alu64R.op,
+                AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
+                AMD64AMode_IR( spill_off, hregAMD64_RBP())
+             );
+   }
+
+   return NULL;
+}
+
 
 /* --------- The amd64 assembler (bleh.) --------- */
 
@@ -2607,6 +2644,39 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
                goto bad;
          }
       }
+      /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
+         allowed here. (This is derived from the x86 version of same). */
+      opc = subopc_imm = opc_imma = 0;
+      switch (i->Ain.Alu64M.op) {
+         case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
+         default: goto bad;
+      }
+      switch (i->Ain.Alu64M.src->tag) {
+         /*
+         case Xri_Reg:
+            *p++ = toUChar(opc);
+            p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
+                             i->Xin.Alu32M.dst);
+            goto done;
+         */
+         case Ari_Imm:
+            if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
+               *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
+               *p++ = 0x83;
+               p    = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
+               *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
+               goto done;
+            } else {
+               *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
+               *p++ = 0x81;
+               p    = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
+               p    = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
+               goto done;
+            }
+         default: 
+            goto bad;
+      }
+
       break;
 
    case Ain_Sh64:
index fe999f2e9ccb33290f594072e47da2961dc6e4e3..7796062bb38336173569490c4630af879ef5ed3c 100644 (file)
@@ -802,6 +802,9 @@ extern void genSpill_AMD64  ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
 extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
                               HReg rreg, Int offset, Bool );
 
+extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i,
+                                        HReg vreg, Short spill_off );
+
 extern const RRegUniverse* getRRegUniverse_AMD64 ( void );
 
 extern HInstrArray* iselSB_AMD64           ( const IRSB*, 
index e263754c6f690b4a687197ee23d8c05e4c0e9a9d..f5afc87850acc0f814a0fe28010609d2eb2d57cb 100644 (file)
@@ -433,6 +433,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          mapRegs      = (__typeof__(mapRegs)) AMD64FN(mapRegs_AMD64Instr);
          genSpill     = (__typeof__(genSpill)) AMD64FN(genSpill_AMD64);
          genReload    = (__typeof__(genReload)) AMD64FN(genReload_AMD64);
+         directReload = (__typeof__(directReload)) AMD64FN(directReload_AMD64);
          ppInstr      = (__typeof__(ppInstr)) AMD64FN(ppAMD64Instr);
          ppReg        = (__typeof__(ppReg)) AMD64FN(ppHRegAMD64);
          iselSB       = AMD64FN(iselSB_AMD64);