From: Julian Seward <jseward@acm.org>
Date: Sun, 23 Nov 2014 12:14:41 +0000 (+0000)
Subject: Merge, from trunk, 2962, 2966, 2967, 2973
X-Git-Tag: svn/VALGRIND_3_10_1^2~19
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=53ed52bc50dbb52d8bfba38b1e3cd7c73fe1b10d;p=thirdparty%2Fvalgrind.git

Merge, from trunk, 2962, 2966, 2967, 2973
339433  ppc64 lxvw4x instruction uses four 32-byte loads


git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_10_BRANCH@3003
---

diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 8c1e70a3f9..6ee9a61b9d 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -15319,26 +15319,27 @@ dis_vx_load ( UInt theInstr )
    }
    case 0x30C:
    {
-      IRExpr * t3, *t2, *t1, *t0;
-      UInt ea_off = 0;
-      IRExpr* irx_addr;
+      IRExpr *t0;
 
       DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      t3 = load( Ity_I32,  mkexpr( EA ) );
-      ea_off += 4;
-      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
-                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t2 = load( Ity_I32, irx_addr );
-      ea_off += 4;
-      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
-                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t1 = load( Ity_I32, irx_addr );
-      ea_off += 4;
-      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
-                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t0 = load( Ity_I32, irx_addr );
-      putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ),
-                           binop( Iop_32HLto64, t1, t0 ) ) );
+
+      /* The load will result in the data being in BE order. */
+      if (host_endness == VexEndnessLE) {
+         IRExpr *t0_BE;
+         IRTemp perm_LE = newTemp(Ity_V128);
+
+         t0_BE = load( Ity_V128, mkexpr( EA ) );
+
+         /*  Permute the data to LE format */
+         assign( perm_LE, binop( Iop_64HLtoV128, mkU64(0x0c0d0e0f08090a0b),
+                                 mkU64(0x0405060700010203)));
+
+         t0 = binop( Iop_Perm8x16, t0_BE, mkexpr(perm_LE) );
+      } else {
+         t0 = load( Ity_V128, mkexpr( EA ) );
+      }
+
+      putVSReg( XT, t0 );
       break;
    }
    default:
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index 70b65fc609..cff6aa5ba1 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -1426,6 +1426,14 @@ PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) {
    i->Pin.AvSel.srcR = srcR;
    return i;
 }
+PPCInstr* PPCInstr_AvSh ( Bool shLeft, HReg dst, PPCAMode* addr ) {
+   PPCInstr*  i       = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag             = Pin_AvSh;
+   i->Pin.AvSh.shLeft = shLeft;
+   i->Pin.AvSh.dst    = dst;
+   i->Pin.AvSh.addr   = addr;
+   return i;
+}
 PPCInstr* PPCInstr_AvShlDbl ( UChar shift, HReg dst,
                               HReg srcL, HReg srcR ) {
    PPCInstr* i           = LibVEX_Alloc(sizeof(PPCInstr));
@@ -2008,6 +2016,30 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 )
       ppHRegPPC(i->Pin.AvSel.ctl);
       return;
 
+   case Pin_AvSh:
+      /* This only generates the following instructions with RA
+       * register number set to 0.
+       */
+      if (i->Pin.AvSh.addr->tag == Pam_IR) {
+         ppLoadImm(hregPPC_GPR30(mode64),
+                   i->Pin.AvSh.addr->Pam.IR.index, mode64);
+         vex_printf(" ; ");
+      }
+
+      if (i->Pin.AvSh.shLeft)
+         vex_printf("lvsl ");
+      else
+         vex_printf("lvsr ");
+
+      ppHRegPPC(i->Pin.AvSh.dst);
+      if (i->Pin.AvSh.addr->tag == Pam_IR)
+         vex_printf("%%r30");
+      else
+         ppHRegPPC(i->Pin.AvSh.addr->Pam.RR.index);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvSh.addr->Pam.RR.base);
+      return;
+
    case Pin_AvShlDbl:
       vex_printf("vsldoi ");
       ppHRegPPC(i->Pin.AvShlDbl.dst);
@@ -2517,6 +2549,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
       addHRegUse(u, HRmRead,  i->Pin.AvSel.srcL);
       addHRegUse(u, HRmRead,  i->Pin.AvSel.srcR);
       return;
+   case Pin_AvSh:
+      addHRegUse(u, HRmWrite, i->Pin.AvSh.dst);
+      if (i->Pin.AvSh.addr->tag == Pam_IR)
+         addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64));
+      addRegUsage_PPCAMode(u, i->Pin.AvSh.addr);
+      return;
    case Pin_AvShlDbl:
       addHRegUse(u, HRmWrite, i->Pin.AvShlDbl.dst);
       addHRegUse(u, HRmRead,  i->Pin.AvShlDbl.srcL);
@@ -2846,6 +2884,10 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
       mapReg(m, &i->Pin.AvSel.srcR);
       mapReg(m, &i->Pin.AvSel.ctl);
       return;
+   case Pin_AvSh:
+      mapReg(m, &i->Pin.AvSh.dst);
+      mapRegs_PPCAMode(m, i->Pin.AvSh.addr);
+      return;
    case Pin_AvShlDbl:
       mapReg(m, &i->Pin.AvShlDbl.dst);
       mapReg(m, &i->Pin.AvShlDbl.srcL);
@@ -3709,6 +3751,19 @@ static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2,
    return emit32(p, theInstr, endness_host);
 }
 
+static UChar* mkFormVXI ( UChar* p, UInt opc1, UInt r1, UInt r2,
+                          UInt r3, UInt opc2, VexEndness endness_host )
+{
+   UInt theInstr;
+   vassert(opc1 < 0x40);
+   vassert(r1   < 0x20);
+   vassert(r2   < 0x20);
+   vassert(r3   < 0x20);
+   vassert(opc2 < 0x27);
+   theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | opc2<<1);
+   return emit32(p, theInstr, endness_host);
+}
+
 static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2,
                           UInt r3, UInt Rc, UInt opc2,
                           VexEndness endness_host )
@@ -5214,6 +5269,30 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       goto done;
    }
 
+   case Pin_AvSh: {  // vsl or vsr
+      UInt v_dst  = vregNo(i->Pin.AvSh.dst);
+      Bool  idxd = toBool(i->Pin.AvSh.addr->tag == Pam_RR);
+      UInt r_idx, r_base;
+
+      r_base = iregNo(i->Pin.AvSh.addr->Pam.RR.base, mode64);
+
+      if (!idxd) {
+         r_idx = 30; // XXX: Using r30 as temp
+         p = mkLoadImm(p, r_idx,
+                       i->Pin.AvSh.addr->Pam.IR.index, mode64, endness_host);
+      } else {
+         r_idx  = iregNo(i->Pin.AvSh.addr->Pam.RR.index, mode64);
+      }
+
+      if (i->Pin.AvSh.shLeft)
+         //vsl VRT,RA,RB
+         p = mkFormVXI( p, 31, v_dst, r_idx, r_base, 6, endness_host );
+      else
+         //vsr VRT,RA,RB
+         p = mkFormVXI( p, 31, v_dst, r_idx, r_base, 38, endness_host );
+      goto done;
+   }
+
    case Pin_AvShlDbl: {  // vsldoi
       UInt shift  = i->Pin.AvShlDbl.shift;
       UInt v_dst  = vregNo(i->Pin.AvShlDbl.dst);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 7f3043f9f9..d7bf1a3214 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -522,6 +522,7 @@ typedef
 
       Pin_AvPerm,     /* AV permute (shuffle) */
       Pin_AvSel,      /* AV select */
+      Pin_AvSh,       /* AV shift left or right */
       Pin_AvShlDbl,   /* AV shift-left double by imm */
       Pin_AvSplat,    /* One elem repeated throughout dst */
       Pin_AvLdVSCR,   /* mtvscr */
@@ -854,6 +855,11 @@ typedef
             HReg srcR;
             HReg ctl;
          } AvSel;
+         struct {
+            Bool  shLeft;
+            HReg  dst;
+            PPCAMode* addr;
+         } AvSh;
          struct {
             UChar shift;
             HReg  dst;
@@ -1077,6 +1083,7 @@ extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg s
 extern PPCInstr* PPCInstr_AvUn32Fx4  ( PPCAvFpOp op, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_AvPerm     ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
 extern PPCInstr* PPCInstr_AvSel      ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvSh       ( Bool shLeft, HReg dst, PPCAMode* am_addr );
 extern PPCInstr* PPCInstr_AvShlDbl   ( UChar shift, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvSplat    ( UChar sz, HReg dst, PPCVI5s* src );
 extern PPCInstr* PPCInstr_AvCMov     ( PPCCondCode, HReg dst, HReg src );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index fcbb53e845..00f7145844 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -4871,12 +4871,57 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
    }
 
    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
-      PPCAMode* am_addr;
+      /* Need to be able to do V128 unaligned loads. The BE unaligned load
+       * can be accomplised using the following code sequece from the ISA.
+       * It uses the lvx instruction that does two aligned loads and then
+       * permute the data to store the required data as if it had been an
+       * unaligned load.
+       *
+       *   lvx  Vhi,0,Rb        # load MSQ, using the unaligned address in Rb
+       *   lvsl Vp, 0,Rb        # Set permute control vector
+       *   addi Rb,Rb,15        # Address of LSQ
+       *   lvx  Vlo,0,Rb        # load LSQ
+       *   vperm Vt,Vhi,Vlo,Vp  # align the data as requested
+       */
+
+      HReg Vhi   = newVRegV(env);
+      HReg Vlo   = newVRegV(env);
+      HReg Vp    = newVRegV(env);
       HReg v_dst = newVRegV(env);
+      HReg rB;
+      HReg rB_plus_15 = newVRegI(env);
+
       vassert(e->Iex.Load.ty == Ity_V128);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/,
-                                   IEndianess);
-      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, v_dst, am_addr));
+      rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
+
+      // lvx  Vhi, 0, Rb
+      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
+                                     PPCAMode_IR(0, rB)) );
+
+      if (IEndianess == Iend_LE)
+         // lvsr Vp, 0, Rb
+         addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
+                                      PPCAMode_IR(0, rB)) );
+      else
+         // lvsl Vp, 0, Rb
+         addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
+                                      PPCAMode_IR(0, rB)) );
+
+      // addi Rb_plus_15, Rb, 15
+      addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
+                                  rB, PPCRH_Imm(True, toUShort(15))) );
+
+      // lvx  Vlo, 0, Rb_plus_15
+      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
+                                     PPCAMode_IR(0, rB_plus_15)) );
+
+      if (IEndianess == Iend_LE)
+         // vperm Vt, Vhi, Vlo, Vp
+         addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
+      else
+         // vperm Vt, Vhi, Vlo, Vp
+         addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
+
       return v_dst;
    }