From: Carl Love <cel@us.ibm.com>
Date: Mon, 12 Aug 2013 18:01:40 +0000 (+0000)
Subject: Initial ISA 2.07 support for POWER8-tuned libc
X-Git-Tag: svn/VALGRIND_3_9_0^2~55
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5e62d7f87a92f8b0cc3858e6fdb592e49c732b74;p=thirdparty%2Fvalgrind.git

Initial ISA 2.07 support for POWER8-tuned libc

The IBM Power ISA 2.07 has been published on power.org, and IBM's new POWER8
processor is under development to implement that ISA. This patch provides
initial VEX support for running Valgrind on POWER8 systems running a soon-to-be
released Linux distribution. This Linux distro will include a POWER8-tuned
libc that uses a subset of the new instructions from ISA 2.07. Since virtually
all applications link with libc, it would be impossible to run an application
under Valgrind on this distro without adding support for these new instructions
to Valgrind, so that's the intent of this patch. Note that applications built
on this distro will *not* employ new POWER8 instructions by default. There are
roughly 150 new instructions in the Power ISA 2.07, including hardware
transaction management (HTM). Support for these new instructions (modulo the
subset included in this bug) will be added to Valgrind in a phased approach,
similar to what we did for Power ISA 2.06.

Bugzilla 322294

git-svn-id: svn://svn.valgrind.org/vex/trunk@2740
---

diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index e8301dbee0..a4d69450f9 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -4933,11 +4933,17 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
    case 0x3E:
       switch ((b1<<1) | b0) {
       case 0x0: // std (Store DWord, PPC64 p580)
+         if (!mode64)
+            return False;
+
          DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
          storeBE( mkexpr(EA), mkexpr(rS) );
          break;
 
       case 0x1: // stdu (Store DWord, Update, PPC64 p583)
+         if (!mode64)
+            return False;
+
          DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
          putIReg( rA_addr, mkexpr(EA) );
          storeBE( mkexpr(EA), mkexpr(rS) );
@@ -6463,7 +6469,7 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
       }
       /* not decodable */
       return False;
-    
+
    /* XFX-Form */
    case 0x153: // mfspr (Move from Special-Purpose Register, PPC32 p470)
       
@@ -6631,7 +6637,79 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
          return False;
       }
       break;
-      
+
+   case 0x33:                // mfvsrd
+   {
+      UChar XS = ifieldRegXS( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRExpr * high64;
+      IRTemp vS = newTemp( Ity_V128 );
+      DIP("mfvsrd r%u,vsr%d\n", rA_addr, (UInt)XS);
+
+      /*  XS = SX || S
+       *  For SX=0, mfvsrd is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mfvsrd is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      assign( vS, getVSReg( XS ) );
+      high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
+      putIReg( rA_addr, (mode64) ? high64 :
+      unop( Iop_64to32, high64 ) );
+      break;
+   }
+
+   case 0xB3:                // mtvsrd
+   {
+      UChar XT = ifieldRegXT( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRTemp rA = newTemp(ty);
+      DIP("mtvsrd vsr%d,r%u\n", (UInt)XT, rA_addr);
+      /*  XS = SX || S
+       *  For SX=0, mfvsrd is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mfvsrd is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      assign( rA, getIReg(rA_addr) );
+
+      if (mode64)
+         putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( rA ), mkU64( 0 ) ) );
+      else
+         putVSReg( XT, binop( Iop_64HLtoV128,
+                              binop( Iop_32HLto64,
+                                     mkU32( 0 ),
+                                     mkexpr( rA ) ),
+                                     mkU64( 0 ) ) );
+      break;
+   }
+
+   case 0xD3:                // mtvsrwa
+   {
+      UChar XT = ifieldRegXT( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRTemp rA = newTemp( Ity_I32 );
+      DIP("mtvsrwa vsr%d,r%u\n", (UInt)XT, rA_addr);
+      /*  XS = SX || S
+       *  For SX=0, mtvsrwa is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mtvsrwa is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      if (mode64)
+         assign( rA, unop( Iop_64to32, getIReg( rA_addr ) ) );
+      else
+         assign( rA, getIReg(rA_addr) );
+
+      putVSReg( XT, binop( Iop_64HLtoV128,
+                           unop( Iop_32Sto64, mkexpr( rA ) ),
+                           mkU64( 0 ) ) );
+      break;
+   }
+
    default:
       vex_printf("dis_proc_ctl(ppc)(opc2)\n");
       return False;
@@ -11692,7 +11770,7 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
    /* Create and assign temps only as needed for the given instruction. */
    switch (opc2) {
       // scalar double-precision floating point argument
-      case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x090:
+      case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x216: case 0x090:
          xB = newTemp(Ity_F64);
          assign( xB,
                  unop( Iop_ReinterpI64asF64,
@@ -11734,6 +11812,11 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
          assign( xB,
                  unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
          break;
+      case 0x296: // xscvspdpn (non signaling version of xscvpdp)
+         xB = newTemp(Ity_I32);
+         assign( xB,
+                 unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+         break;
 
       /* Certain instructions have their complete implementation in the main switch statement
        * that follows this one; thus we have a "do nothing" case for those instructions here.
@@ -11881,6 +11964,18 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
                                  mkU32( 0 ) ),
                           mkU64( 0ULL ) ) );
          break;
+      case 0x216: /* xscvdpspn (VSX Scalar convert scalar Single-Precision to
+                              vector single Convert to Single-Precision non-signalling */
+         DIP("xscvdpspn v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             mkexpr( xB ) ) ),
+                                             mkU32( 0 ) ),
+                                             mkU64( 0ULL ) ) );
+         break;
       case 0x090: // xscvdpuxws (VSX Scalar truncate Double-Precision to integer
                   //             and Convert to Unsigned Integer Word format with Saturate)
          DIP("xscvdpuxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
@@ -11902,6 +11997,15 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
                                       unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
                           mkU64( 0ULL ) ) );
          break;
+      case 0x296: // xscvspdpn (VSX Scalar Convert Single-Precision to Double-Precision format Non signaling)
+         DIP("xscvspdpn v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_F32toF64,
+                                      unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
+                                      mkU64( 0ULL ) ) );
+         break;
       case 0x312: // xvcvdpsp (VSX Vector round Double-Precision to single-precision
                   //           and Convert to Single-Precision format)
          DIP("xvcvdpsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
@@ -14627,6 +14731,11 @@ static Bool dis_av_arith ( UInt theInstr )
       putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
       break;
 
+   case 0x0C0: // vaddudm (Add Unsigned Double Word Modulo)
+      DIP("vaddudm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, binop(Iop_Add64x2, mkexpr(vA), mkexpr(vB)) );
+      break;
+
    case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142)
       DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
       putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) );
@@ -15899,6 +16008,12 @@ static Bool dis_av_pack ( UInt theInstr )
       return True;
    }
 
+   case 0x44E: // vpkudum (Pack Unsigned Double Word Unsigned Modulo)
+      DIP("vpkudum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr,
+               binop(Iop_NarrowBin64to32x4, mkexpr(vA), mkexpr(vB)) );
+      return True;
+
    default:
       break; // Fall through...
    }
@@ -16431,6 +16546,7 @@ static struct vsx_insn vsx_all[] = {
       { 0x1f4, "xvtdivdp" },
       { 0x208, "xxland" },
       { 0x212, "xscvdpsp" },
+      { 0x216, "xscvdpspn" },
       { 0x228, "xxlandc" },
       { 0x248 , "xxlor" },
       { 0x268, "xxlxor" },
@@ -16439,6 +16555,7 @@ static struct vsx_insn vsx_all[] = {
       { 0x288, "xxlnor" },
       { 0x290, "xscvdpuxds" },
       { 0x292, "xscvspdp" },
+      { 0x296, "xscvspdpn" },
       { 0x2a0, "xsmindp" },
       { 0x2a4, "xsnmaddmdp" },
       { 0x2b0, "xscvdpsxds" },
@@ -16487,7 +16604,8 @@ static struct vsx_insn vsx_all[] = {
       { 0x3f0, "xvcvsxddp" },
       { 0x3f2, "xvnegdp" }
 };
-#define VSX_ALL_LEN 135
+#define VSX_ALL_LEN (sizeof vsx_all / sizeof *vsx_all)
+
 
 // ATTENTION: This search function assumes vsx_all array is sorted.
 static Int findVSXextOpCode(UInt opcode)
@@ -16565,6 +16683,7 @@ DisResult disInstr_PPC_WRK (
    Bool      allow_GX = False;
    Bool      allow_VX = False;  // Equates to "supports Power ISA 2.06
    Bool      allow_DFP = False;
+   Bool      allow_isa_2_07 = False;
    UInt      hwcaps = archinfo->hwcaps;
    Long      delta;
 
@@ -16576,6 +16695,7 @@ DisResult disInstr_PPC_WRK (
       allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
       allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC64_VX));
       allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC64_DFP));
+      allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC64_ISA2_07));
    } else {
       allow_F  = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
       allow_V  = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
@@ -16583,6 +16703,7 @@ DisResult disInstr_PPC_WRK (
       allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
       allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC32_VX));
       allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC32_DFP));
+      allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC32_ISA2_07));
    }
 
    /* The running delta */
@@ -17025,8 +17146,9 @@ DisResult disInstr_PPC_WRK (
          case 0x2B0: case 0x2F0: // xscvdpsxds, xscvsxddp
          case 0x1b0: case 0x130: // xvcvdpsxws, xvcvspsxws
          case 0x0b0: case 0x290: // xscvdpsxws, xscvdpuxds
-         case 0x212: // xscvdpsp
-         case 0x292: case 0x312: // xscvspdp, xvcvdpsp
+         case 0x212: case 0x216: // xscvdpsp, xscvdpspn
+         case 0x292: case 0x296: // xscvspdp, xscvspdpn
+         case 0x312: // xvcvdpsp
          case 0x390: case 0x190: // xvcvdpuxds, xvcvdpuxws
          case 0x3B0: case 0x310: // xvcvdpsxds, xvcvspuxds
          case 0x392: case 0x330: // xvcvspdp, xvcvspsxds
@@ -17070,7 +17192,6 @@ DisResult disInstr_PPC_WRK (
 
    /* 64bit Integer Stores */
    case 0x3E:  // std, stdu
-      if (!mode64) goto decode_failure;
       if (dis_int_store( theInstr, abiinfo )) goto decode_success;
       goto decode_failure;
 
@@ -17105,7 +17226,7 @@ DisResult disInstr_PPC_WRK (
          if (!allow_GX) goto decode_noGX;
          if (dis_fp_arith(theInstr)) goto decode_success;
          goto decode_failure;
-         
+
       default:
          break; // Fall through
       }
@@ -17455,6 +17576,8 @@ DisResult disInstr_PPC_WRK (
          goto decode_failure;
 
       /* Processor Control Instructions */
+      case 0x33:  // mfvsrd
+      case 0xB3:  case 0xD3: // mtvsrd, mtvsrwa
       case 0x200: case 0x013: case 0x153: // mcrxr, mfcr,  mfspr
       case 0x173: case 0x090: case 0x1D3: // mftb,  mtcrf, mtspr
          if (dis_proc_ctl( abiinfo, theInstr )) goto decode_success;
@@ -17662,6 +17785,11 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_arith( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x0C0:                         // vaddudm
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_arith( theInstr )) goto decode_success;
+         goto decode_failure;
+
       /* AV Rotate, Shift */
       case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
       case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
@@ -17725,6 +17853,11 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_pack( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x44E: // vpkudum
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_pack( theInstr )) goto decode_success;
+         goto decode_failure;
+
       default:
          break;  // Fall through...
       }
@@ -17782,6 +17915,11 @@ DisResult disInstr_PPC_WRK (
       vex_printf("disInstr(ppc): "
                "declined to decode a Decimal Floating Point insn.\n");
       goto decode_failure;
+   decode_noP8:
+      vassert(!allow_isa_2_07);
+      vex_printf("disInstr(ppc): "
+               "declined to decode a Power 8 insn.\n");
+      goto decode_failure;
 
 
    decode_failure:
@@ -17870,10 +18008,11 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
             | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
-            | VEX_HWCAPS_PPC32_DFP;
+            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
 
    mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
-		   | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+            | VEX_HWCAPS_PPC64_ISA2_07;
 
    if (mode64) {
       vassert((hwcaps_guest & mask32) == 0);
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index 8d3c007e36..71e1335b0e 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -674,7 +674,7 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
    case Pav_UNPCKLPIX: return "vupklpx";
 
    /* Integer binary */
-   case Pav_ADDU:      return "vaddu_m";  // b,h,w
+   case Pav_ADDU:      return "vaddu_m";  // b,h,w,dw
    case Pav_QADDU:     return "vaddu_s";  // b,h,w
    case Pav_QADDS:     return "vadds_s";  // b,h,w
      
@@ -708,7 +708,7 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
    case Pav_ROTL:      return "vrl";      // b,h,w
 
    /* Pack */
-   case Pav_PACKUU:    return "vpku_um";  // h,w
+   case Pav_PACKUU:    return "vpku_um";  // h,w,dw
    case Pav_QPACKUU:   return "vpku_us";  // h,w
    case Pav_QPACKSU:   return "vpks_us";  // h,w
    case Pav_QPACKSS:   return "vpks_ss";  // h,w
@@ -1348,6 +1348,17 @@ PPCInstr* PPCInstr_AvBin32x4 ( PPCAvOp op, HReg dst,
    i->Pin.AvBin32x4.srcR = srcR;
    return i;
 }
+PPCInstr* PPCInstr_AvBin64x2 ( PPCAvOp op, HReg dst,
+                               HReg srcL, HReg srcR ) {
+   PPCInstr* i           = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                = Pin_AvBin64x2;
+   i->Pin.AvBin64x2.op   = op;
+   i->Pin.AvBin64x2.dst  = dst;
+   i->Pin.AvBin64x2.srcL = srcL;
+   i->Pin.AvBin64x2.srcR = srcR;
+   return i;
+}
+
 PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst,
                                 HReg srcL, HReg srcR ) {
    PPCInstr* i            = LibVEX_Alloc(sizeof(PPCInstr));
@@ -1883,6 +1894,14 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 )
       vex_printf(",");
       ppHRegPPC(i->Pin.AvBin32x4.srcR);
       return;
+   case Pin_AvBin64x2:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBin64x2.op));
+      ppHRegPPC(i->Pin.AvBin64x2.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBin64x2.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBin64x2.srcR);
+      return;
    case Pin_AvBin32Fx4:
       vex_printf("%s ", showPPCAvFpOp(i->Pin.AvBin32Fx4.op));
       ppHRegPPC(i->Pin.AvBin32Fx4.dst);
@@ -2364,6 +2383,11 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
       addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcL);
       addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcR);
       return;
+   case Pin_AvBin64x2:
+      addHRegUse(u, HRmWrite, i->Pin.AvBin64x2.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvBin64x2.srcL);
+      addHRegUse(u, HRmRead,  i->Pin.AvBin64x2.srcR);
+      return;
    case Pin_AvBin32Fx4:
       addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
       addHRegUse(u, HRmRead,  i->Pin.AvBin32Fx4.srcL);
@@ -2670,6 +2694,11 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
       mapReg(m, &i->Pin.AvBin32x4.srcL);
       mapReg(m, &i->Pin.AvBin32x4.srcR);
       return;
+   case Pin_AvBin64x2:
+      mapReg(m, &i->Pin.AvBin64x2.dst);
+      mapReg(m, &i->Pin.AvBin64x2.srcL);
+      mapReg(m, &i->Pin.AvBin64x2.srcR);
+      return;
    case Pin_AvBin32Fx4:
       mapReg(m, &i->Pin.AvBin32Fx4.dst);
       mapReg(m, &i->Pin.AvBin32Fx4.srcL);
@@ -4785,6 +4814,24 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       goto done;
    }
 
+   case Pin_AvBin64x2: {
+      UInt v_dst  = vregNo(i->Pin.AvBin64x2.dst);
+      UInt v_srcL = vregNo(i->Pin.AvBin64x2.srcL);
+      UInt v_srcR = vregNo(i->Pin.AvBin64x2.srcR);
+      UInt opc2;
+      switch (i->Pin.AvBin64x2.op) {
+         case Pav_ADDU:    opc2 =  192; break; // vaddudm  vector double add
+         case Pav_PACKUU:  opc2 = 1102; break; // vpkudum
+         // FIXME: We currently don't have a vector compare equal double word, so it's a hack
+         // to use vcmpequw, but it works.
+         case Pav_CMPEQU:  opc2 =  134; break; // vcmpequw
+         default:
+            goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      goto done;
+   }
+
    case Pin_AvBin32Fx4: {
       UInt v_dst  = vregNo(i->Pin.AvBin32Fx4.dst);
       UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 9c2d05de1e..ce170a1d2a 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -492,6 +492,7 @@ typedef
       Pin_AvBin8x16,  /* AV binary, 8x4 */
       Pin_AvBin16x8,  /* AV binary, 16x4 */
       Pin_AvBin32x4,  /* AV binary, 32x4 */
+      Pin_AvBin64x2,  /* AV binary, 64x2 */
 
       Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
       Pin_AvUn32Fx4,  /* AV FP unary,  32Fx4 */
@@ -795,6 +796,13 @@ typedef
             HReg    srcL;
             HReg    srcR;
          } AvBin32x4;
+         /* Can only be generated for CPUs capable of ISA 2.07 or above */
+         struct {
+            PPCAvOp op;
+            HReg    dst;
+            HReg    srcL;
+            HReg    srcR;
+         } AvBin64x2;
          struct {
             PPCAvFpOp op;
             HReg      dst;
@@ -1013,6 +1021,7 @@ extern PPCInstr* PPCInstr_AvBinary   ( PPCAvOp op, HReg dst, HReg srcL, HReg src
 extern PPCInstr* PPCInstr_AvBin8x16  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvBin16x8  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvBin32x4  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin64x2  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvUn32Fx4  ( PPCAvFpOp op, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_AvPerm     ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 54048efdf5..0969944d36 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -4781,6 +4781,16 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
          return dst;
       }
 
+      case Iop_CmpNEZ64x2: {
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg zero = newVRegV(env);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+         addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
+         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+         return dst;
+      }
+
       case Iop_Recip32Fx4:    fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
       case Iop_RSqrt32Fx4:    fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
       case Iop_I32UtoFx4:     fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
@@ -5045,6 +5055,16 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
          return dst;
       }
 
+      case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
+      case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
+      do_AvBin64x2: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
+         return dst;
+      }
+
       case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
       case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
       do_AvShift8x16: {
@@ -5779,10 +5799,12 @@ HInstrArray* iselSB_PPC ( IRSB* bb,
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
             | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
-            | VEX_HWCAPS_PPC32_DFP;
+            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
+
 
    mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
-	   | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+            | VEX_HWCAPS_PPC64_ISA2_07;
 
    if (mode64) {
       vassert((hwcaps_host & mask32) == 0);
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 288870cc51..f6b57ae083 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -528,6 +528,7 @@ void ppIROp ( IROp op )
       case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
       case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
       case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
+      case Iop_NarrowBin64to32x4: vex_printf("NarrowBin64to32x4"); return;
       case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
       case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
       case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -2806,6 +2807,7 @@ void typeOfPrimop ( IROp op,
       case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin32Sto16Sx8:
       case Iop_QNarrowBin16Uto8Ux16: case Iop_QNarrowBin32Uto16Ux8:
       case Iop_NarrowBin16to8x16:   case Iop_NarrowBin32to16x8:
+      case Iop_NarrowBin64to32x4:
       case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
       case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
       case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index 287e7c8b06..e42595080f 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -1294,6 +1294,7 @@ static const HChar* show_hwcaps_ppc32 ( UInt hwcaps )
    const UInt GX = VEX_HWCAPS_PPC32_GX;
    const UInt VX = VEX_HWCAPS_PPC32_VX;
    const UInt DFP = VEX_HWCAPS_PPC32_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC32_ISA2_07;
          UInt c  = hwcaps;
    if (c == 0)           return "ppc32-int";
    if (c == F)           return "ppc32-int-flt";
@@ -1306,6 +1307,9 @@ static const HChar* show_hwcaps_ppc32 ( UInt hwcaps )
    if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
    if (c == (F|V|FX|GX|DFP))    return "ppc32-int-flt-vmx-FX-GX-DFP";
    if (c == (F|V|FX|GX|VX|DFP)) return "ppc32-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (F|V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc32-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
+
    return NULL;
 }
 
@@ -1318,6 +1322,7 @@ static const HChar* show_hwcaps_ppc64 ( UInt hwcaps )
    const UInt GX = VEX_HWCAPS_PPC64_GX;
    const UInt VX = VEX_HWCAPS_PPC64_VX;
    const UInt DFP = VEX_HWCAPS_PPC64_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC64_ISA2_07;
          UInt c  = hwcaps;
    if (c == 0)         return "ppc64-int-flt";
    if (c == FX)        return "ppc64-int-flt-FX";
@@ -1329,6 +1334,8 @@ static const HChar* show_hwcaps_ppc64 ( UInt hwcaps )
    if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
    if (c == (V|FX|GX|DFP))    return "ppc64-int-flt-vmx-FX-GX-DFP";
    if (c == (V|FX|GX|VX|DFP)) return "ppc64-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc64-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
    return NULL;
 }
 
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
index 3830b710bd..4b36727b71 100644
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -95,6 +95,7 @@ typedef
                                           (fres,frsqrte,fsel,stfiwx) */
 #define VEX_HWCAPS_PPC32_VX    (1<<12) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher  */
 #define VEX_HWCAPS_PPC32_DFP   (1<<17) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC32_ISA2_07   (1<<19) /* ISA 2.07 -- e.g., mtvsrd */
 
 /* ppc64: baseline capability is integer and basic FP insns */
 #define VEX_HWCAPS_PPC64_V     (1<<13) /* Altivec (VMX) */
@@ -103,6 +104,7 @@ typedef
                                           (fres,frsqrte,fsel,stfiwx) */
 #define VEX_HWCAPS_PPC64_VX    (1<<16) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher  */
 #define VEX_HWCAPS_PPC64_DFP   (1<<18) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC64_ISA2_07   (1<<20) /* ISA 2.07 -- e.g., mtvsrd */
 
 /* s390x: Hardware capability encoding
 
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index ca109963dd..43d6e6345a 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -1467,6 +1467,7 @@ typedef
       Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
       Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
       Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
+      Iop_NarrowBin64to32x4,
 
       /* NARROWING (unary) -- narrow V128 into I64 */
       Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,