x86 back end: use 80-bit loads/stores for floating point spills rather

author Julian Seward <jseward@acm.org>

Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)

committer Julian Seward <jseward@acm.org>

Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)
author Julian Seward <jseward@acm.org>
Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)
committer Julian Seward <jseward@acm.org>
Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)
diff --git a/VEX/priv/host-generic/h_generic_regs.h b/VEX/priv/host-generic/h_generic_regs.h

index 8c5a006c26fcecc6b1ec3b8a81be0b69c969f48f..82fc9ad40e4e346f407b0648a88bd35b0993e162 100644 (file)
--- a/VEX/priv/host-generic/h_generic_regs.h
+++ b/VEX/priv/host-generic/h_generic_regs.h
@@ -87,10 +87,17 @@ typedef UInt HReg;
     available on any specific host.  For example on x86, the available
     classes are: Int32, Flt64, Vec128 only.
  
-   IMPORTANT NOTE: Vec128 is the only >= 128-bit-sized class, and
-   reg_alloc2.c handles it specially when assigning spill slots.  If
-   you add another 128-bit or larger regclass, you must remember to
-   update reg_alloc2.c accordingly.
+   IMPORTANT NOTE: reg_alloc2.c needs how much space is needed to spill
+   each class of register.  It has the following knowledge hardwired in:
+
+      HRcInt32     32 bits
+      HRcInt64     64 bits
+      HRcFlt64     80 bits (on x86 these are spilled by fstpt/fldt)
+      HRcVec64     64 bits
+      HRcVec128    128 bits
+
+   If you add another regclass, you must remember to update
+   reg_alloc2.c accordingly.
  */
  typedef
     enum { 
diff --git a/VEX/priv/host-generic/reg_alloc2.c b/VEX/priv/host-generic/reg_alloc2.c

index 9a6695e0a31495ab357f14a4afada60d0bafed78..638570fe4e6196f8acee851d061df0f613f6e545 100644 (file)
--- a/VEX/priv/host-generic/reg_alloc2.c
+++ b/VEX/priv/host-generic/reg_alloc2.c
@@ -778,8 +778,9 @@ HInstrArray* doRegisterAllocation (
  
     /* --------- Stage 3: allocate spill slots. --------- */
  
-   /* Each spill slot is 8 bytes long.  For 128-bit vregs
-      we have to allocate two spill slots.
+   /* Each spill slot is 8 bytes long.  For vregs which take more than
+      64 bits to spill (classes Flt64 and Vec128), we have to allocate
+      two spill slots.
  
        Do a rank-based allocation of vregs to spill slot numbers.  We
        put as few values as possible in spill slows, but nevertheless
@@ -799,43 +800,44 @@ HInstrArray* doRegisterAllocation (
           continue;
        }
  
-      /* The spill slots are 64 bits in size.  That means, to spill a
-         Vec128-class vreg, we'll need to find two adjacent spill
-         slots to use.  Note, this special-casing needs to happen for
-         all 128-bit sized register classes.  Currently though
-         HRcVector is the only such class. */
+      /* The spill slots are 64 bits in size.  As per the comment on
+         definition of HRegClass in h_generic_regs.h, that means, to
+         spill a vreg of class Flt64 or Vec128, we'll need to find two
+         adjacent spill slots to use.  Note, this logic needs to kept
+         in sync with the size info on the definition of HRegClass. */
  
-      if (vreg_lrs[j].reg_class != HRcVec128) {
+      if (vreg_lrs[j].reg_class == HRcVec128
+          || vreg_lrs[j].reg_class == HRcFlt64) {
  
-         /* The ordinary case -- just find a single spill slot. */
+         /* Find two adjacent free slots in which between them provide
+            up to 128 bits in which to spill the vreg. */
  
-         /* Find the lowest-numbered spill slot which is available at
-            the start point of this interval, and assign the interval
-            to it. */
-         for (k = 0; k < N_SPILL64S; k++)
-            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
+         for (k = 0; k < N_SPILL64S-1; k++)
+            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
+                && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
                 break;
-         if (k == N_SPILL64S) {
+         if (k == N_SPILL64S-1) {
              vpanic("LibVEX_N_SPILL_BYTES is too low.  " 
                     "Increase and recompile.");
           }
-         ss_busy_until_before[k] = vreg_lrs[j].dead_before;
+         ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
+         ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
  
        } else {
  
-       /* Find two adjacent free slots in which to spill a 128-bit
-           vreg. */
+         /* The ordinary case -- just find a single spill slot. */
  
-         for (k = 0; k < N_SPILL64S-1; k++)
-            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
-                && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
+         /* Find the lowest-numbered spill slot which is available at
+            the start point of this interval, and assign the interval
+            to it. */
+         for (k = 0; k < N_SPILL64S; k++)
+            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
                 break;
-         if (k == N_SPILL64S-1) {
+         if (k == N_SPILL64S) {
              vpanic("LibVEX_N_SPILL_BYTES is too low.  " 
                     "Increase and recompile.");
           }
-         ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
-         ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
+         ss_busy_until_before[k] = vreg_lrs[j].dead_before;
  
        }
  
diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c

index 9f6157f1a9996efcaa0c9bcff89047b8f5224e85..127285cf64a8913fd8252a3218952652ceb752b0 100644 (file)
--- a/VEX/priv/host-x86/hdefs.c
+++ b/VEX/priv/host-x86/hdefs.c
@@ -737,7 +737,7 @@ X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
     i->Xin.FpLdSt.sz     = sz;
     i->Xin.FpLdSt.reg    = reg;
     i->Xin.FpLdSt.addr   = addr;
-   vassert(sz == 4 || sz == 8);
+   vassert(sz == 4 || sz == 8 || sz == 10);
     return i;
  }
  X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,  
@@ -1005,12 +1005,14 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) {
           break;
        case Xin_FpLdSt:
           if (i->Xin.FpLdSt.isLoad) {
-            vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+            vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
+                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
              ppX86AMode(i->Xin.FpLdSt.addr);
              vex_printf(", ");
              ppHRegX86(i->Xin.FpLdSt.reg);
           } else {
-            vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+            vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
+                                  : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
              ppHRegX86(i->Xin.FpLdSt.reg);
              vex_printf(", ");
              ppX86AMode(i->Xin.FpLdSt.addr);
@@ -1558,7 +1560,7 @@ X86Instr* genSpill_X86 ( HReg rreg, Int offsetB, Bool mode64 )
        case HRcInt32:
           return X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
        case HRcFlt64:
-         return X86Instr_FpLdSt ( False/*store*/, 8, rreg, am );
+         return X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
        case HRcVec128:
           return X86Instr_SseLdSt ( False/*store*/, rreg, am );
        default: 
@@ -1578,7 +1580,7 @@ X86Instr* genReload_X86 ( HReg rreg, Int offsetB, Bool mode64 )
        case HRcInt32:
           return X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
        case HRcFlt64:
-         return X86Instr_FpLdSt ( True/*load*/, 8, rreg, am );
+         return X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
        case HRcVec128:
           return X86Instr_SseLdSt ( True/*load*/, rreg, am );
        default: 
@@ -2497,14 +2499,27 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
        goto done;
  
     case Xin_FpLdSt:
-      vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
        if (i->Xin.FpLdSt.isLoad) {
           /* Load from memory into %fakeN.  
-            --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1) 
+            --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 
           */
           p = do_ffree_st7(p);
-         *p++ = toUChar(i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD);
-        p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+         switch (i->Xin.FpLdSt.sz) {
+            case 4:
+               *p++ = 0xD9;
+               p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            case 8:
+               *p++ = 0xDD;
+               p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            case 10:
+               *p++ = 0xDB;
+               p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            default:
+               vpanic("emitX86Instr(FpLdSt,load)");
+         }
           p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
           goto done;
        } else {
@@ -2513,8 +2528,22 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
          */
           p = do_ffree_st7(p);
           p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
-         *p++ = toUChar(i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD);
-         p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+         switch (i->Xin.FpLdSt.sz) {
+            case 4:
+               *p++ = 0xD9;
+               p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            case 8:
+               *p++ = 0xDD;
+               p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            case 10:
+               *p++ = 0xDB;
+               p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
+               break;
+            default:
+               vpanic("emitX86Instr(FpLdSt,store)");
+         }
           goto done;
        }
        break;
author	Julian Seward <jseward@acm.org>
	Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)
committer	Julian Seward <jseward@acm.org>
	Sun, 25 Mar 2007 04:14:58 +0000 (04:14 +0000)
VEX/priv/host-generic/h_generic_regs.h		patch \| blob \| blame \| history
VEX/priv/host-generic/reg_alloc2.c		patch \| blob \| blame \| history
VEX/priv/host-x86/hdefs.c		patch \| blob \| blame \| history