Initial ISA 2.07 support for POWER8-tuned libc

author Carl Love <cel@us.ibm.com>

Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)

committer Carl Love <cel@us.ibm.com>

Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)
author Carl Love <cel@us.ibm.com>
Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)
committer Carl Love <cel@us.ibm.com>
Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c

index e8301dbee09225c87928291f86f9dbbc8ffe98c6..a4d69450f958719d7927a67c5535cf990534ade0 100644 (file)
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -4933,11 +4933,17 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
     case 0x3E:
        switch ((b1<<1) | b0) {
        case 0x0: // std (Store DWord, PPC64 p580)
+         if (!mode64)
+            return False;
+
           DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
           storeBE( mkexpr(EA), mkexpr(rS) );
           break;
  
        case 0x1: // stdu (Store DWord, Update, PPC64 p583)
+         if (!mode64)
+            return False;
+
           DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
           putIReg( rA_addr, mkexpr(EA) );
           storeBE( mkexpr(EA), mkexpr(rS) );
@@ -6463,7 +6469,7 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
        }
        /* not decodable */
        return False;
-    
+
     /* XFX-Form */
     case 0x153: // mfspr (Move from Special-Purpose Register, PPC32 p470)
        
@@ -6631,7 +6637,79 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
           return False;
        }
        break;
-      
+
+   case 0x33:                // mfvsrd
+   {
+      UChar XS = ifieldRegXS( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRExpr * high64;
+      IRTemp vS = newTemp( Ity_V128 );
+      DIP("mfvsrd r%u,vsr%d\n", rA_addr, (UInt)XS);
+
+      /*  XS = SX || S
+       *  For SX=0, mfvsrd is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mfvsrd is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      assign( vS, getVSReg( XS ) );
+      high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
+      putIReg( rA_addr, (mode64) ? high64 :
+      unop( Iop_64to32, high64 ) );
+      break;
+   }
+
+   case 0xB3:                // mtvsrd
+   {
+      UChar XT = ifieldRegXT( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRTemp rA = newTemp(ty);
+      DIP("mtvsrd vsr%d,r%u\n", (UInt)XT, rA_addr);
+      /*  XS = SX || S
+       *  For SX=0, mfvsrd is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mfvsrd is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      assign( rA, getIReg(rA_addr) );
+
+      if (mode64)
+         putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( rA ), mkU64( 0 ) ) );
+      else
+         putVSReg( XT, binop( Iop_64HLtoV128,
+                              binop( Iop_32HLto64,
+                                     mkU32( 0 ),
+                                     mkexpr( rA ) ),
+                                     mkU64( 0 ) ) );
+      break;
+   }
+
+   case 0xD3:                // mtvsrwa
+   {
+      UChar XT = ifieldRegXT( theInstr );
+      UChar rA_addr = ifieldRegA(theInstr);
+      IRTemp rA = newTemp( Ity_I32 );
+      DIP("mtvsrwa vsr%d,r%u\n", (UInt)XT, rA_addr);
+      /*  XS = SX || S
+       *  For SX=0, mtvsrwa is treated as a Floating-Point
+       *            instruction in terms of resource availability.
+       *  For SX=1, mtvsrwa is treated as a Vector instruction in
+       *            terms of resource availability.
+       *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+       */
+      if (mode64)
+         assign( rA, unop( Iop_64to32, getIReg( rA_addr ) ) );
+      else
+         assign( rA, getIReg(rA_addr) );
+
+      putVSReg( XT, binop( Iop_64HLtoV128,
+                           unop( Iop_32Sto64, mkexpr( rA ) ),
+                           mkU64( 0 ) ) );
+      break;
+   }
+
     default:
        vex_printf("dis_proc_ctl(ppc)(opc2)\n");
        return False;
@@ -11692,7 +11770,7 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
     /* Create and assign temps only as needed for the given instruction. */
     switch (opc2) {
        // scalar double-precision floating point argument
-      case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x090:
+      case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x216: case 0x090:
           xB = newTemp(Ity_F64);
           assign( xB,
                   unop( Iop_ReinterpI64asF64,
@@ -11734,6 +11812,11 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
           assign( xB,
                   unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
           break;
+      case 0x296: // xscvspdpn (non signaling version of xscvpdp)
+         xB = newTemp(Ity_I32);
+         assign( xB,
+                 unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+         break;
  
        /* Certain instructions have their complete implementation in the main switch statement
         * that follows this one; thus we have a "do nothing" case for those instructions here.
@@ -11881,6 +11964,18 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
                                   mkU32( 0 ) ),
                            mkU64( 0ULL ) ) );
           break;
+      case 0x216: /* xscvdpspn (VSX Scalar convert scalar Single-Precision to
+                              vector single Convert to Single-Precision non-signalling */
+         DIP("xscvdpspn v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             mkexpr( xB ) ) ),
+                                             mkU32( 0 ) ),
+                                             mkU64( 0ULL ) ) );
+         break;
        case 0x090: // xscvdpuxws (VSX Scalar truncate Double-Precision to integer
                    //             and Convert to Unsigned Integer Word format with Saturate)
           DIP("xscvdpuxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
@@ -11902,6 +11997,15 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
                                        unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
                            mkU64( 0ULL ) ) );
           break;
+      case 0x296: // xscvspdpn (VSX Scalar Convert Single-Precision to Double-Precision format Non signaling)
+         DIP("xscvspdpn v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_F32toF64,
+                                      unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
+                                      mkU64( 0ULL ) ) );
+         break;
        case 0x312: // xvcvdpsp (VSX Vector round Double-Precision to single-precision
                    //           and Convert to Single-Precision format)
           DIP("xvcvdpsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
@@ -14627,6 +14731,11 @@ static Bool dis_av_arith ( UInt theInstr )
        putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
        break;
  
+   case 0x0C0: // vaddudm (Add Unsigned Double Word Modulo)
+      DIP("vaddudm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, binop(Iop_Add64x2, mkexpr(vA), mkexpr(vB)) );
+      break;
+
     case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142)
        DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
        putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) );
@@ -15899,6 +16008,12 @@ static Bool dis_av_pack ( UInt theInstr )
        return True;
     }
  
+   case 0x44E: // vpkudum (Pack Unsigned Double Word Unsigned Modulo)
+      DIP("vpkudum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr,
+               binop(Iop_NarrowBin64to32x4, mkexpr(vA), mkexpr(vB)) );
+      return True;
+
     default:
        break; // Fall through...
     }
@@ -16431,6 +16546,7 @@ static struct vsx_insn vsx_all[] = {
        { 0x1f4, "xvtdivdp" },
        { 0x208, "xxland" },
        { 0x212, "xscvdpsp" },
+      { 0x216, "xscvdpspn" },
        { 0x228, "xxlandc" },
        { 0x248 , "xxlor" },
        { 0x268, "xxlxor" },
@@ -16439,6 +16555,7 @@ static struct vsx_insn vsx_all[] = {
        { 0x288, "xxlnor" },
        { 0x290, "xscvdpuxds" },
        { 0x292, "xscvspdp" },
+      { 0x296, "xscvspdpn" },
        { 0x2a0, "xsmindp" },
        { 0x2a4, "xsnmaddmdp" },
        { 0x2b0, "xscvdpsxds" },
@@ -16487,7 +16604,8 @@ static struct vsx_insn vsx_all[] = {
        { 0x3f0, "xvcvsxddp" },
        { 0x3f2, "xvnegdp" }
  };
-#define VSX_ALL_LEN 135
+#define VSX_ALL_LEN (sizeof vsx_all / sizeof *vsx_all)
+
  
  // ATTENTION: This search function assumes vsx_all array is sorted.
  static Int findVSXextOpCode(UInt opcode)
@@ -16565,6 +16683,7 @@ DisResult disInstr_PPC_WRK (
     Bool      allow_GX = False;
     Bool      allow_VX = False;  // Equates to "supports Power ISA 2.06
     Bool      allow_DFP = False;
+   Bool      allow_isa_2_07 = False;
     UInt      hwcaps = archinfo->hwcaps;
     Long      delta;
  
@@ -16576,6 +16695,7 @@ DisResult disInstr_PPC_WRK (
        allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
        allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC64_VX));
        allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC64_DFP));
+      allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC64_ISA2_07));
     } else {
        allow_F  = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
        allow_V  = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
@@ -16583,6 +16703,7 @@ DisResult disInstr_PPC_WRK (
        allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
        allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC32_VX));
        allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC32_DFP));
+      allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC32_ISA2_07));
     }
  
     /* The running delta */
@@ -17025,8 +17146,9 @@ DisResult disInstr_PPC_WRK (
           case 0x2B0: case 0x2F0: // xscvdpsxds, xscvsxddp
           case 0x1b0: case 0x130: // xvcvdpsxws, xvcvspsxws
           case 0x0b0: case 0x290: // xscvdpsxws, xscvdpuxds
-         case 0x212: // xscvdpsp
-         case 0x292: case 0x312: // xscvspdp, xvcvdpsp
+         case 0x212: case 0x216: // xscvdpsp, xscvdpspn
+         case 0x292: case 0x296: // xscvspdp, xscvspdpn
+         case 0x312: // xvcvdpsp
           case 0x390: case 0x190: // xvcvdpuxds, xvcvdpuxws
           case 0x3B0: case 0x310: // xvcvdpsxds, xvcvspuxds
           case 0x392: case 0x330: // xvcvspdp, xvcvspsxds
@@ -17070,7 +17192,6 @@ DisResult disInstr_PPC_WRK (
  
     /* 64bit Integer Stores */
     case 0x3E:  // std, stdu
-      if (!mode64) goto decode_failure;
        if (dis_int_store( theInstr, abiinfo )) goto decode_success;
        goto decode_failure;
  
@@ -17105,7 +17226,7 @@ DisResult disInstr_PPC_WRK (
           if (!allow_GX) goto decode_noGX;
           if (dis_fp_arith(theInstr)) goto decode_success;
           goto decode_failure;
-         
+
        default:
           break; // Fall through
        }
@@ -17455,6 +17576,8 @@ DisResult disInstr_PPC_WRK (
           goto decode_failure;
  
        /* Processor Control Instructions */
+      case 0x33:  // mfvsrd
+      case 0xB3:  case 0xD3: // mtvsrd, mtvsrwa
        case 0x200: case 0x013: case 0x153: // mcrxr, mfcr,  mfspr
        case 0x173: case 0x090: case 0x1D3: // mftb,  mtcrf, mtspr
           if (dis_proc_ctl( abiinfo, theInstr )) goto decode_success;
@@ -17662,6 +17785,11 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_arith( theInstr )) goto decode_success;
           goto decode_failure;
  
+      case 0x0C0:                         // vaddudm
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_arith( theInstr )) goto decode_success;
+         goto decode_failure;
+
        /* AV Rotate, Shift */
        case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
        case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
@@ -17725,6 +17853,11 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_pack( theInstr )) goto decode_success;
           goto decode_failure;
  
+      case 0x44E: // vpkudum
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_pack( theInstr )) goto decode_success;
+         goto decode_failure;
+
        default:
           break;  // Fall through...
        }
@@ -17782,6 +17915,11 @@ DisResult disInstr_PPC_WRK (
        vex_printf("disInstr(ppc): "
                 "declined to decode a Decimal Floating Point insn.\n");
        goto decode_failure;
+   decode_noP8:
+      vassert(!allow_isa_2_07);
+      vex_printf("disInstr(ppc): "
+               "declined to decode a Power 8 insn.\n");
+      goto decode_failure;
  
  
     decode_failure:
@@ -17870,10 +18008,11 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
     /* do some sanity checks */
     mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
              | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
-            | VEX_HWCAPS_PPC32_DFP;
+            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
  
     mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
-                  | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+            | VEX_HWCAPS_PPC64_ISA2_07;
  
     if (mode64) {
        vassert((hwcaps_guest & mask32) == 0);
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c

index 8d3c007e36f1c61e1737d507af4c0d7bbc209162..71e1335b0ec5a1d9b504c26f523188be4f1692c6 100644 (file)
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -674,7 +674,7 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
     case Pav_UNPCKLPIX: return "vupklpx";
  
     /* Integer binary */
-   case Pav_ADDU:      return "vaddu_m";  // b,h,w
+   case Pav_ADDU:      return "vaddu_m";  // b,h,w,dw
     case Pav_QADDU:     return "vaddu_s";  // b,h,w
     case Pav_QADDS:     return "vadds_s";  // b,h,w
       
@@ -708,7 +708,7 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
     case Pav_ROTL:      return "vrl";      // b,h,w
  
     /* Pack */
-   case Pav_PACKUU:    return "vpku_um";  // h,w
+   case Pav_PACKUU:    return "vpku_um";  // h,w,dw
     case Pav_QPACKUU:   return "vpku_us";  // h,w
     case Pav_QPACKSU:   return "vpks_us";  // h,w
     case Pav_QPACKSS:   return "vpks_ss";  // h,w
@@ -1348,6 +1348,17 @@ PPCInstr* PPCInstr_AvBin32x4 ( PPCAvOp op, HReg dst,
     i->Pin.AvBin32x4.srcR = srcR;
     return i;
  }
+PPCInstr* PPCInstr_AvBin64x2 ( PPCAvOp op, HReg dst,
+                               HReg srcL, HReg srcR ) {
+   PPCInstr* i           = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                = Pin_AvBin64x2;
+   i->Pin.AvBin64x2.op   = op;
+   i->Pin.AvBin64x2.dst  = dst;
+   i->Pin.AvBin64x2.srcL = srcL;
+   i->Pin.AvBin64x2.srcR = srcR;
+   return i;
+}
+
  PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst,
                                  HReg srcL, HReg srcR ) {
     PPCInstr* i            = LibVEX_Alloc(sizeof(PPCInstr));
@@ -1883,6 +1894,14 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 )
        vex_printf(",");
        ppHRegPPC(i->Pin.AvBin32x4.srcR);
        return;
+   case Pin_AvBin64x2:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBin64x2.op));
+      ppHRegPPC(i->Pin.AvBin64x2.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBin64x2.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBin64x2.srcR);
+      return;
     case Pin_AvBin32Fx4:
        vex_printf("%s ", showPPCAvFpOp(i->Pin.AvBin32Fx4.op));
        ppHRegPPC(i->Pin.AvBin32Fx4.dst);
@@ -2364,6 +2383,11 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
        addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcL);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcR);
        return;
+   case Pin_AvBin64x2:
+      addHRegUse(u, HRmWrite, i->Pin.AvBin64x2.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvBin64x2.srcL);
+      addHRegUse(u, HRmRead,  i->Pin.AvBin64x2.srcR);
+      return;
     case Pin_AvBin32Fx4:
        addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32Fx4.srcL);
@@ -2670,6 +2694,11 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
        mapReg(m, &i->Pin.AvBin32x4.srcL);
        mapReg(m, &i->Pin.AvBin32x4.srcR);
        return;
+   case Pin_AvBin64x2:
+      mapReg(m, &i->Pin.AvBin64x2.dst);
+      mapReg(m, &i->Pin.AvBin64x2.srcL);
+      mapReg(m, &i->Pin.AvBin64x2.srcR);
+      return;
     case Pin_AvBin32Fx4:
        mapReg(m, &i->Pin.AvBin32Fx4.dst);
        mapReg(m, &i->Pin.AvBin32Fx4.srcL);
@@ -4785,6 +4814,24 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        goto done;
     }
  
+   case Pin_AvBin64x2: {
+      UInt v_dst  = vregNo(i->Pin.AvBin64x2.dst);
+      UInt v_srcL = vregNo(i->Pin.AvBin64x2.srcL);
+      UInt v_srcR = vregNo(i->Pin.AvBin64x2.srcR);
+      UInt opc2;
+      switch (i->Pin.AvBin64x2.op) {
+         case Pav_ADDU:    opc2 =  192; break; // vaddudm  vector double add
+         case Pav_PACKUU:  opc2 = 1102; break; // vpkudum
+         // FIXME: We currently don't have a vector compare equal double word, so it's a hack
+         // to use vcmpequw, but it works.
+         case Pav_CMPEQU:  opc2 =  134; break; // vcmpequw
+         default:
+            goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      goto done;
+   }
+
     case Pin_AvBin32Fx4: {
        UInt v_dst  = vregNo(i->Pin.AvBin32Fx4.dst);
        UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h

index 9c2d05de1ec888dfebdbff9d67f86df27f4ac861..ce170a1d2aa7ab0e72ffa26b455531d6e86b9b41 100644 (file)
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -492,6 +492,7 @@ typedef
        Pin_AvBin8x16,  /* AV binary, 8x4 */
        Pin_AvBin16x8,  /* AV binary, 16x4 */
        Pin_AvBin32x4,  /* AV binary, 32x4 */
+      Pin_AvBin64x2,  /* AV binary, 64x2 */
  
        Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
        Pin_AvUn32Fx4,  /* AV FP unary,  32Fx4 */
@@ -795,6 +796,13 @@ typedef
              HReg    srcL;
              HReg    srcR;
           } AvBin32x4;
+         /* Can only be generated for CPUs capable of ISA 2.07 or above */
+         struct {
+            PPCAvOp op;
+            HReg    dst;
+            HReg    srcL;
+            HReg    srcR;
+         } AvBin64x2;
           struct {
              PPCAvFpOp op;
              HReg      dst;
@@ -1013,6 +1021,7 @@ extern PPCInstr* PPCInstr_AvBinary   ( PPCAvOp op, HReg dst, HReg srcL, HReg src
  extern PPCInstr* PPCInstr_AvBin8x16  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvBin16x8  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvBin32x4  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin64x2  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvUn32Fx4  ( PPCAvFpOp op, HReg dst, HReg src );
  extern PPCInstr* PPCInstr_AvPerm     ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c

index 54048efdf510e409ca6efbc5e6238982ae5d93d5..0969944d362e366853c58dcf1192af917ba65971 100644 (file)
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -4781,6 +4781,16 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
+      case Iop_CmpNEZ64x2: {
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg zero = newVRegV(env);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+         addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
+         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+         return dst;
+      }
+
        case Iop_Recip32Fx4:    fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
        case Iop_RSqrt32Fx4:    fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
        case Iop_I32UtoFx4:     fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
@@ -5045,6 +5055,16 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
+      case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
+      case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
+      do_AvBin64x2: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
+         return dst;
+      }
+
        case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
        case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
        do_AvShift8x16: {
@@ -5779,10 +5799,12 @@ HInstrArray* iselSB_PPC ( IRSB* bb,
     /* do some sanity checks */
     mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
              | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
-            | VEX_HWCAPS_PPC32_DFP;
+            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
+
  
     mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
-          | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+            | VEX_HWCAPS_PPC64_ISA2_07;
  
     if (mode64) {
        vassert((hwcaps_host & mask32) == 0);
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index 288870cc516ab67e98a4c973aeb13b0c0e1e1fb7..f6b57ae0833c687bd7b9beb437117df3fcfe2051 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -528,6 +528,7 @@ void ppIROp ( IROp op )
        case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
        case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
        case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
+      case Iop_NarrowBin64to32x4: vex_printf("NarrowBin64to32x4"); return;
        case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
        case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
        case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -2806,6 +2807,7 @@ void typeOfPrimop ( IROp op,
        case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin32Sto16Sx8:
        case Iop_QNarrowBin16Uto8Ux16: case Iop_QNarrowBin32Uto16Ux8:
        case Iop_NarrowBin16to8x16:   case Iop_NarrowBin32to16x8:
+      case Iop_NarrowBin64to32x4:
        case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
        case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
        case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c

index 287e7c8b0653574b34ebcf0f1082f4fa7d20003d..e42595080f8840288f96e7f7edbfbf2abeb1d5de 100644 (file)
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -1294,6 +1294,7 @@ static const HChar* show_hwcaps_ppc32 ( UInt hwcaps )
     const UInt GX = VEX_HWCAPS_PPC32_GX;
     const UInt VX = VEX_HWCAPS_PPC32_VX;
     const UInt DFP = VEX_HWCAPS_PPC32_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC32_ISA2_07;
           UInt c  = hwcaps;
     if (c == 0)           return "ppc32-int";
     if (c == F)           return "ppc32-int-flt";
@@ -1306,6 +1307,9 @@ static const HChar* show_hwcaps_ppc32 ( UInt hwcaps )
     if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
     if (c == (F|V|FX|GX|DFP))    return "ppc32-int-flt-vmx-FX-GX-DFP";
     if (c == (F|V|FX|GX|VX|DFP)) return "ppc32-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (F|V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc32-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
+
     return NULL;
  }
  
@@ -1318,6 +1322,7 @@ static const HChar* show_hwcaps_ppc64 ( UInt hwcaps )
     const UInt GX = VEX_HWCAPS_PPC64_GX;
     const UInt VX = VEX_HWCAPS_PPC64_VX;
     const UInt DFP = VEX_HWCAPS_PPC64_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC64_ISA2_07;
           UInt c  = hwcaps;
     if (c == 0)         return "ppc64-int-flt";
     if (c == FX)        return "ppc64-int-flt-FX";
@@ -1329,6 +1334,8 @@ static const HChar* show_hwcaps_ppc64 ( UInt hwcaps )
     if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
     if (c == (V|FX|GX|DFP))    return "ppc64-int-flt-vmx-FX-GX-DFP";
     if (c == (V|FX|GX|VX|DFP)) return "ppc64-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc64-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
     return NULL;
  }
  
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h

index 3830b710bd7419935cf088e90adce785570ee341..4b36727b7157de70ae4675e58adf247a9c8a333f 100644 (file)
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -95,6 +95,7 @@ typedef
                                            (fres,frsqrte,fsel,stfiwx) */
  #define VEX_HWCAPS_PPC32_VX    (1<<12) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher  */
  #define VEX_HWCAPS_PPC32_DFP   (1<<17) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC32_ISA2_07   (1<<19) /* ISA 2.07 -- e.g., mtvsrd */
  
  /* ppc64: baseline capability is integer and basic FP insns */
  #define VEX_HWCAPS_PPC64_V     (1<<13) /* Altivec (VMX) */
@@ -103,6 +104,7 @@ typedef
                                            (fres,frsqrte,fsel,stfiwx) */
  #define VEX_HWCAPS_PPC64_VX    (1<<16) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher  */
  #define VEX_HWCAPS_PPC64_DFP   (1<<18) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC64_ISA2_07   (1<<20) /* ISA 2.07 -- e.g., mtvsrd */
  
  /* s390x: Hardware capability encoding
  
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index ca109963dd5ce9409cc776e1c24150bc52b23698..43d6e6345a222c269bd65568bd75fdc04f56455e 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -1467,6 +1467,7 @@ typedef
        Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
        Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
        Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
+      Iop_NarrowBin64to32x4,
  
        /* NARROWING (unary) -- narrow V128 into I64 */
        Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
author	Carl Love <cel@us.ibm.com>
	Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)
committer	Carl Love <cel@us.ibm.com>
	Mon, 12 Aug 2013 18:01:40 +0000 (18:01 +0000)
VEX/priv/guest_ppc_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.h		patch \| blob \| blame \| history
VEX/priv/host_ppc_isel.c		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/priv/main_main.c		patch \| blob \| blame \| history
VEX/pub/libvex.h		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history