Power 8 support, phase 5

author Carl Love <cel@us.ibm.com>

Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)

committer Carl Love <cel@us.ibm.com>

Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)
author Carl Love <cel@us.ibm.com>
Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)
committer Carl Love <cel@us.ibm.com>
Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c

index 98c90bffa1b7bde8d798b6149008a6a0d1af5fa6..2a01726e5459e388007bc912d5ea5a909694edc7 100644 (file)
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -791,6 +791,16 @@ static void breakV128to4x32( IRExpr* t128,
     assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
  }
  
+static IRExpr* mkV128from32( IRTemp t3, IRTemp t2,
+                               IRTemp t1, IRTemp t0 )
+{
+   return
+      binop( Iop_64HLtoV128,
+             binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
+             binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
+   );
+}
+
  
  /* Signed saturating narrow 64S to 32 */
  static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 )
@@ -1364,12 +1374,29 @@ static Int guestCR0offset ( UInt cr )
     }
  }
  
+typedef enum {
+   _placeholder0,
+   _placeholder1,
+   _placeholder2,
+   BYTE,
+   HWORD,
+   WORD,
+   DWORD
+} _popcount_data_type;
+
  /* Generate an IR sequence to do a popcount operation on the supplied
     IRTemp, and return a new IRTemp holding the result.  'ty' may be
     Ity_I32 or Ity_I64 only. */
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
  {
-   Int i, shift[6], max;
+  /* Do count across 2^data_type bits,
+     byte:        data_type = 3
+     half word:   data_type = 4
+     word:        data_type = 5
+     double word: data_type = 6  (not supported for 32-bit type)
+    */
+   Int shift[6];
+   _popcount_data_type idx, i;
     IRTemp mask[6];
     IRTemp old = IRTemp_INVALID;
     IRTemp nyu = IRTemp_INVALID;
@@ -1377,17 +1404,10 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
     vassert(ty == Ity_I64 || ty == Ity_I32);
  
     if (ty == Ity_I32) {
-      if (byte_count)
-         /* Return the population count across each byte not across the entire
-          * 32-bit value.  Stop after third iteration.
-          */
-         max = 3;
-      else
-         max = 5;
  
-      for (i = 0; i < 5; i++) {
-         mask[i]  = newTemp(ty);
-         shift[i] = 1 << i;
+      for (idx = 0; idx < WORD; idx++) {
+         mask[idx]  = newTemp(ty);
+         shift[idx] = 1 << idx;
        }
        assign(mask[0], mkU32(0x55555555));
        assign(mask[1], mkU32(0x33333333));
@@ -1395,7 +1415,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
        assign(mask[3], mkU32(0x00FF00FF));
        assign(mask[4], mkU32(0x0000FFFF));
        old = src;
-      for (i = 0; i < max; i++) {
+      for (i = 0; i < data_type; i++) {
           nyu = newTemp(ty);
           assign(nyu,
                  binop(Iop_Add32,
@@ -1409,16 +1429,11 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
        }
        return nyu;
     }
+
  // else, ty == Ity_I64
-   if (byte_count)
-      /* Return the population count across each byte not across the entire
-       * 64-bit value.  Stop after third iteration.
-       */
-      max = 3;
-   else
-      max = 6;
+   vassert(mode64);
  
-   for (i = 0; i < 6; i++) {
+   for (i = 0; i < DWORD; i++) {
        mask[i] = newTemp( Ity_I64 );
        shift[i] = 1 << i;
     }
@@ -1429,7 +1444,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
     assign( mask[4], mkU64( 0x0000FFFF0000FFFFULL ) );
     assign( mask[5], mkU64( 0x00000000FFFFFFFFULL ) );
     old = src;
-   for (i = 0; i < max; i++) {
+   for (i = 0; i < data_type; i++) {
        nyu = newTemp( Ity_I64 );
        assign( nyu,
                binop( Iop_Add64,
@@ -1442,6 +1457,60 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
     return nyu;
  }
  
+/* Special purpose population count function for
+ * vpopcntd in 32-bit mode.
+ */
+static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
+{
+   Int i, shift[6];
+   IRTemp mask[6];
+   IRTemp old = IRTemp_INVALID;
+   IRTemp nyu1 = IRTemp_INVALID;
+   IRTemp nyu2 = IRTemp_INVALID;
+   IRTemp retval = newTemp(Ity_I64);
+
+   vassert(!mode64);
+
+   for (i = 0; i < WORD; i++) {
+      mask[i]  = newTemp(Ity_I32);
+      shift[i] = 1 << i;
+   }
+   assign(mask[0], mkU32(0x55555555));
+   assign(mask[1], mkU32(0x33333333));
+   assign(mask[2], mkU32(0x0F0F0F0F));
+   assign(mask[3], mkU32(0x00FF00FF));
+   assign(mask[4], mkU32(0x0000FFFF));
+   old = src1;
+   for (i = 0; i < WORD; i++) {
+      nyu1 = newTemp(Ity_I32);
+      assign(nyu1,
+             binop(Iop_Add32,
+                   binop(Iop_And32,
+                         mkexpr(old),
+                         mkexpr(mask[i])),
+                   binop(Iop_And32,
+                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+                         mkexpr(mask[i]))));
+      old = nyu1;
+   }
+
+   old = src2;
+   for (i = 0; i < WORD; i++) {
+      nyu2 = newTemp(Ity_I32);
+      assign(nyu2,
+             binop(Iop_Add32,
+                   binop(Iop_And32,
+                         mkexpr(old),
+                         mkexpr(mask[i])),
+                   binop(Iop_And32,
+                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+                         mkexpr(mask[i]))));
+      old = nyu2;
+   }
+   assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
+   return retval;
+}
+
  
  // ROTL(src32/64, rot_amt5/6)
  static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src,
@@ -4143,7 +4212,7 @@ static Bool dis_int_logic ( UInt theInstr )
        case 0x1FA: // popcntd (population count doubleword
        {
           DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
-         IRTemp result = gen_POPCOUNT(ty, rS, False);
+         IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
           putIReg( rA_addr, mkexpr(result) );
           return True;
        }
@@ -4156,11 +4225,11 @@ static Bool dis_int_logic ( UInt theInstr )
              IRTemp argHi = newTemp(Ity_I32);
              assign(argLo, unop(Iop_64to32, mkexpr(rS)));
              assign(argHi, unop(Iop_64HIto32, mkexpr(rS)));
-            resultLo = gen_POPCOUNT(Ity_I32, argLo, False);
-            resultHi = gen_POPCOUNT(Ity_I32, argHi, False);
+            resultLo = gen_POPCOUNT(Ity_I32, argLo, WORD);
+            resultHi = gen_POPCOUNT(Ity_I32, argHi, WORD);
              putIReg( rA_addr, binop(Iop_32HLto64, mkexpr(resultHi), mkexpr(resultLo)));
           } else {
-            IRTemp result = gen_POPCOUNT(ty, rS, False);
+            IRTemp result = gen_POPCOUNT(ty, rS, WORD);
              putIReg( rA_addr, mkexpr(result) );
           }
           return True;
@@ -4175,12 +4244,12 @@ static Bool dis_int_logic ( UInt theInstr )
              IRTemp argHi = newTemp(Ity_I32);
              assign(argLo, unop(Iop_64to32, mkexpr(rS)));
              assign(argHi, unop(Iop_64HIto32, mkexpr(rS)));
-            resultLo = gen_POPCOUNT(Ity_I32, argLo, True);
-            resultHi = gen_POPCOUNT(Ity_I32, argHi, True);
+            resultLo = gen_POPCOUNT(Ity_I32, argLo, BYTE);
+            resultHi = gen_POPCOUNT(Ity_I32, argHi, BYTE);
              putIReg( rA_addr, binop(Iop_32HLto64, mkexpr(resultHi),
                                      mkexpr(resultLo)));
           } else {
-            IRTemp result = gen_POPCOUNT(ty, rS, True);
+            IRTemp result = gen_POPCOUNT(ty, rS, BYTE);
              putIReg( rA_addr, mkexpr(result) );
           }
           return True;
@@ -13201,6 +13270,167 @@ dis_vxv_sp_arith ( UInt theInstr, UInt opc2 )
     return True;
  }
  
+/*
+ * VSX vector Population Count
+ */
+static Bool
+dis_vxv_population_count ( UInt theInstr, UInt opc2 )
+{
+   UChar vRB_addr = ifieldRegB(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar opc1 = ifieldOPC( theInstr );
+   IRTemp vB = newTemp(Ity_V128);
+   assign( vB, getVReg(vRB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf( "dis_vxv_population_count(ppc)(instr)\n" );
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x702:    // vclzb
+         DIP("vclzb v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz8Sx16, mkexpr( vB ) ) );
+         break;
+
+      case 0x742:    // vclzh
+         DIP("vclzh v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz16Sx8, mkexpr( vB ) ) );
+         break;
+
+      case 0x782:    // vclzw
+         DIP("vclzw v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz32Sx4, mkexpr( vB ) ) );
+         break;
+
+      case 0x7C2:    // vclzd
+         DIP("vclzd v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz64x2, mkexpr( vB ) ) );
+         break;
+
+      case 0x703:    // vpopcntb
+      {
+         /* Break vector into 32-bit words and do the population count
+          * on byte in the words
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcntb v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   BYTE);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  BYTE);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  BYTE);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, BYTE);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x743:    // vpopcnth
+      {
+         /* Break vector into 32-bit words and do the population count
+          * for each half word
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcnth v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   HWORD);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  HWORD);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  HWORD);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, HWORD);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x783:    // vpopcntw
+      {
+         /* Break vector into 32-bit words and do the population count
+          * on each word.
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcntw v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   WORD);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  WORD);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  WORD);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, WORD);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x7C3:    // vpopcntd
+      {
+         if (mode64) {
+            /* Break vector into 64-bit double words and do the population count
+             * on each double word.
+             */
+            IRType ty = Ity_I64;
+            IRTemp bits0_63   = newTemp(Ity_I64);
+            IRTemp bits64_127 = newTemp(Ity_I64);
+            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
+            IRTemp cnt_bits64_127 = newTemp(Ity_I64);
+
+            DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
+
+            assign(bits0_63,   unop( Iop_V128to64,   mkexpr( vB ) ) );
+            assign(bits64_127, unop( Iop_V128HIto64, mkexpr( vB ) ) );
+            cnt_bits0_63   = gen_POPCOUNT(ty, bits0_63,   DWORD);
+            cnt_bits64_127 = gen_POPCOUNT(ty, bits64_127, DWORD);
+
+            putVReg( vRT_addr, binop( Iop_64HLtoV128,
+                                      mkexpr( cnt_bits64_127 ),
+                                      mkexpr( cnt_bits0_63 ) ) );
+         } else {
+            /* Break vector into 32-bit words and do the population count
+             * on each doubleword.
+             */
+            IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+            bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
+            IRTemp cnt_bits64_127  = newTemp(Ity_I64);
+
+            DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
+            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+            cnt_bits0_63   = gen_vpopcntd_mode32(bits0_31, bits32_63);
+            cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
+
+            putVReg( vRT_addr, binop( Iop_64HLtoV128,
+                                      mkexpr( cnt_bits64_127 ),
+                                      mkexpr( cnt_bits0_63 ) ) );
+         }
+         break;
+      }
+
+      default:
+         vex_printf("dis_vxv_population_count(ppc)(opc2)\n");
+         return False;
+      break;
+   }
+   return True;
+}
+
  typedef enum {
     PPC_CMP_EQ = 2,
     PPC_CMP_GT = 4,
@@ -15925,6 +16155,27 @@ static Bool dis_av_logic ( UInt theInstr )
           unop(Iop_NotV128, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB))) );
        break;
  
+   case 0x544: // vorc (vA Or'd with complement of vb)
+      DIP("vorc v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, binop( Iop_OrV128,
+                               mkexpr( vA ),
+                               unop( Iop_NotV128, mkexpr( vB ) ) ) );
+      break;
+
+   case 0x584: // vnand (Nand)
+      DIP("vnand v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, unop( Iop_NotV128,
+                              binop(Iop_AndV128, mkexpr( vA ),
+                              mkexpr( vB ) ) ) );
+      break;
+
+   case 0x684: // veqv (complemented XOr)
+      DIP("veqv v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, unop( Iop_NotV128,
+                              binop( Iop_XorV128, mkexpr( vA ),
+                              mkexpr( vB ) ) ) );
+      break;
+
     default:
        vex_printf("dis_av_logic(ppc)(opc2=0x%x)\n", opc2);
        return False;
@@ -16308,6 +16559,60 @@ static Bool dis_av_multarith ( UInt theInstr )
     return True;
  }
  
+/*
+  AltiVec Polynomial Multiply-Sum Instructions
+*/
+static Bool dis_av_polymultarith ( UInt theInstr )
+{
+   /* VA-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vD_addr  = ifieldRegDS(theInstr);
+   UChar vA_addr  = ifieldRegA(theInstr);
+   UChar vB_addr  = ifieldRegB(theInstr);
+   UChar vC_addr  = ifieldRegC(theInstr);
+   UInt  opc2     = IFIELD(theInstr, 0, 11);
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   IRTemp vC    = newTemp(Ity_V128);
+
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+   assign( vC, getVReg(vC_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_polymultarith(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+      /* Polynomial Multiply-Add */
+      case 0x408:  // vpmsumb   Vector Polynomial Multipy-sum Byte
+         DIP("vpmsumb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd8x16,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x448:  // vpmsumd   Vector Polynomial Multipy-sum Double Word
+         DIP("vpmsumd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd64x2,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x488:  // vpmsumw   Vector Polynomial Multipy-sum Word
+         DIP("vpmsumw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd32x4,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x4C8:  // vpmsumh   Vector Polynomial Multipy-sum Half Word
+         DIP("vpmsumh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd16x8,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      default:
+         vex_printf("dis_av_polymultarith(ppc)(opc2=0x%x)\n", opc2);
+         return False;
+   }
+   return True;
+}
+
  /*
    AltiVec Shift/Rotate Instructions
  */
@@ -16550,7 +16855,25 @@ static Bool dis_av_permute ( UInt theInstr )
                    binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)),
                    binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm4)*8))) );
        return True;
-
+   case 0x2D: {  // vpermxor (Vector Permute and Exclusive-OR)
+      IRTemp a_perm  = newTemp(Ity_V128);
+      IRTemp b_perm  = newTemp(Ity_V128);
+      IRTemp vrc_a   = newTemp(Ity_V128);
+      IRTemp vrc_b   = newTemp(Ity_V128);
+
+      /* IBM index  is 0:7, Change index value to index 7:0 */
+      assign( vrc_b, binop( Iop_AndV128, mkexpr( vC ),
+                            unop( Iop_Dup8x16, mkU8( 0xF ) ) ) );
+      assign( vrc_a, binop( Iop_ShrV128,
+                            binop( Iop_AndV128, mkexpr( vC ),
+                                   unop( Iop_Dup8x16, mkU8( 0xF0 ) ) ),
+                            mkU8 ( 4 ) ) );
+      assign( a_perm, binop( Iop_Perm8x16, mkexpr( vA ), mkexpr( vrc_a ) ) );
+      assign( b_perm, binop( Iop_Perm8x16, mkexpr( vB ), mkexpr( vrc_b ) ) );
+      putVReg( vD_addr, binop( Iop_XorV128,
+                               mkexpr( a_perm ), mkexpr( b_perm) ) );
+      return True;
+   }
     default:
       break; // Fall through...
     }
@@ -16989,6 +17312,158 @@ static Bool dis_av_pack ( UInt theInstr )
     return True;
  }
  
+/*
+  AltiVec Cipher Instructions
+*/
+static Bool dis_av_cipher ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vD_addr  = ifieldRegDS(theInstr);
+   UChar vA_addr  = ifieldRegA(theInstr);
+   UChar vB_addr  = ifieldRegB(theInstr);
+   UInt  opc2     = IFIELD( theInstr, 0, 11 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_cipher(ppc)(instr)\n");
+      return False;
+   }
+   switch (opc2) {
+      case 0x508: // vcipher (Vector Inverser Cipher)
+         DIP("vcipher v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_CipherV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x509: // vcipherlast (Vector Inverser Cipher Last)
+         DIP("vcipherlast v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_CipherLV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x548: // vncipher (Vector Inverser Cipher)
+         DIP("vncipher v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_NCipherV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x549: // vncipherlast (Vector Inverser Cipher Last)
+         DIP("vncipherlast v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_NCipherLV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x5C8: /* vsbox (Vector SubBytes, this does the cipher
+       * subBytes transform)
+       */
+         DIP("vsbox v%d,v%d\n", vD_addr, vA_addr);
+         putVReg( vD_addr,
+                  unop(Iop_CipherSV128, mkexpr(vA) ) );
+         return True;
+
+      default:
+         vex_printf("dis_av_cipher(ppc)(opc2)\n");
+         return False;
+   }
+   return True;
+}
+
+/*
+  AltiVec Secure Hash Instructions
+*/
+static Bool dis_av_hash ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar vRA_addr  = ifieldRegA(theInstr);
+   UChar s_field  = IFIELD( theInstr, 11, 5 );  // st and six field
+   UChar st       = IFIELD( theInstr, 15, 1 );  // st
+   UChar six      = IFIELD( theInstr, 11, 4 );  // six field
+   UInt  opc2     = IFIELD( theInstr, 0, 11 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp dst    = newTemp(Ity_V128);
+   assign( vA, getVReg(vRA_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_hash(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x682:  // vshasigmaw
+         DIP("vshasigmaw v%d,v%d,%u,%u\n", vRT_addr, vRA_addr, st, six);
+         assign( dst, binop( Iop_SHA256, mkexpr( vA ), mkU8( s_field) ) );
+         putVReg( vRT_addr, mkexpr(dst));
+         return True;
+
+      case 0x6C2:  // vshasigmad,
+         DIP("vshasigmad v%d,v%d,%u,%u\n", vRT_addr, vRA_addr, st, six);
+         putVReg( vRT_addr, binop( Iop_SHA512, mkexpr( vA ), mkU8( s_field) ) );
+         return True;
+
+      default:
+         vex_printf("dis_av_hash(ppc)(opc2)\n");
+         return False;
+   }
+   return True;
+}
+
+/*
+  AltiVec BCD Arithmetic instructions.
+  These instructions modify CR6 for various conditions in the result,
+  including when an overflow occurs.  We could easily detect all conditions
+  except when an overflow occurs.  But since we can't be 100% accurate
+  in our emulation of CR6, it seems best to just not support it all.
+*/
+static Bool dis_av_bcd ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar vRA_addr = ifieldRegA(theInstr);
+   UChar vRB_addr = ifieldRegB(theInstr);
+   UChar ps       = IFIELD( theInstr, 9, 1 );
+   UInt  opc2     = IFIELD( theInstr, 0, 9 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   IRTemp dst    = newTemp(Ity_V128);
+   assign( vA, getVReg(vRA_addr));
+   assign( vB, getVReg(vRB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_bcd(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+   case 0x1:  // bcdadd
+     DIP("bcdadd. v%d,v%d,v%d,%u\n", vRT_addr, vRA_addr, vRB_addr, ps);
+     assign( dst, triop( Iop_BCDAdd, mkexpr( vA ),
+                         mkexpr( vB ), mkU8( ps ) ) );
+     putVReg( vRT_addr, mkexpr(dst));
+     return True;
+
+   case 0x41:  // bcdsub
+     DIP("bcdsub. v%d,v%d,v%d,%u\n", vRT_addr, vRA_addr, vRB_addr, ps);
+     assign( dst, triop( Iop_BCDSub, mkexpr( vA ),
+                         mkexpr( vB ), mkU8( ps ) ) );
+     putVReg( vRT_addr, mkexpr(dst));
+     return True;
+
+   default:
+      vex_printf("dis_av_bcd(ppc)(opc2)\n");
+      return False;
+   }
+   return True;
+}
  
  /*
    AltiVec Floating Point Arithmetic Instructions
@@ -18822,6 +19297,11 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_permute( theInstr )) goto decode_success;
           goto decode_failure;
  
+      case 0x2D:                       // vpermxor
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_permute( theInstr )) goto decode_success;
+         goto decode_failure;
+
        /* AV Floating Point Mult-Add/Sub */
        case 0x2E: case 0x2F:            // vmaddfp, vnmsubfp
           if (!allow_V) goto decode_noV;
@@ -18832,6 +19312,18 @@ DisResult disInstr_PPC_WRK (
           break;  // Fall through...
        }
  
+      opc2 = IFIELD(theInstr, 0, 9);
+      switch (opc2) {
+      /* BCD arithmetic */
+      case 0x1: case 0x41:             // bcdadd, bcdsub
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_bcd( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      default:
+         break;  // Fall through...
+      }
+
        opc2 = IFIELD(theInstr, 0, 11);
        switch (opc2) {
        /* AV Arithmetic */
@@ -18868,6 +19360,13 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_arith( theInstr )) goto decode_success;
           goto decode_failure;
  
+      /* AV Polynomial Vector Multiply Add */
+      case 0x408: case 0x448:            // vpmsumb, vpmsumd
+      case 0x488: case 0x4C8:            // vpmsumw, vpmsumh
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_polymultarith( theInstr )) goto decode_success;
+         goto decode_failure;
+
        /* AV Rotate, Shift */
        case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
        case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
@@ -18892,6 +19391,12 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_logic( theInstr )) goto decode_success;
           goto decode_failure;
  
+      case 0x544:                         // vorc
+      case 0x584: case 0x684:             // vnand, veqv
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_logic( theInstr )) goto decode_success;
+         goto decode_failure;
+
        /* AV Processor Control */
        case 0x604: case 0x644:             // mfvscr, mtvscr
           if (!allow_V) goto decode_noV;
@@ -18948,6 +19453,30 @@ DisResult disInstr_PPC_WRK (
           if (dis_av_pack( theInstr )) goto decode_success;
           goto decode_failure;
  
+      case 0x508: case 0x509:             // vcipher, vcipherlast
+      case 0x548: case 0x549:             // vncipher, vncipherlast
+      case 0x5C8:                         // vsbox
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_cipher( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x6C2: case 0x682:             // vshasigmaw, vshasigmad
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_hash( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x702: case 0x742:             // vclzb, vclzh
+      case 0x782: case 0x7c2:             // vclzw, vclzd
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_vxv_population_count( theInstr, opc2 )) goto decode_success;
+         goto decode_failure;
+
+      case 0x703: case 0x743:             // vpopcntb, vpopcnth
+      case 0x783: case 0x7c3:             // vpopcntw, vpopcntd
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_vxv_population_count( theInstr, opc2 )) goto decode_success;
+         goto decode_failure;
+
        default:
           break;  // Fall through...
        }
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c

index e6742ca22cf705b66aa51236983e0da82cafd444..446a563dbb2aaa570e150fa6a5120449cf1cc801 100644 (file)
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -723,6 +723,27 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
     case Pav_CATODD:     return "vmrgow";    // w
     case Pav_CATEVEN:    return "vmrgew";    // w
  
+   /* SHA */
+   case Pav_SHA256:     return "vshasigmaw"; // w
+   case Pav_SHA512:     return "vshasigmaw"; // dw
+
+   /* BCD */
+   case Pav_BCDAdd:     return "bcdadd.";  // qw
+   case Pav_BCDSub:     return "bcdsub.";  // qw
+
+   /* Polynomial arith */
+   case Pav_POLYMULADD: return "vpmsum";   // b, h, w, d
+
+   /* Cipher */
+   case Pav_CIPHERV128:  case Pav_CIPHERLV128:
+   case Pav_NCIPHERV128: case Pav_NCIPHERLV128:
+   case Pav_CIPHERSUBV128: return "v_cipher_";  // qw
+
+   /* zero count */
+   case Pav_ZEROCNTBYTE: case Pav_ZEROCNTWORD:
+   case Pav_ZEROCNTHALF: case Pav_ZEROCNTDBL:
+      return "vclz_";                           // b, h, w, d
+
     default: vpanic("showPPCAvOp");
     }
  }
@@ -1434,6 +1455,45 @@ PPCInstr* PPCInstr_AvLdVSCR ( HReg src ) {
     i->Pin.AvLdVSCR.src = src;
     return i;
  }
+PPCInstr* PPCInstr_AvCipherV128Unary ( PPCAvOp op, HReg dst, HReg src ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvCipherV128Unary;
+   i->Pin.AvCipherV128Unary.op   = op;
+   i->Pin.AvCipherV128Unary.dst  = dst;
+   i->Pin.AvCipherV128Unary.src  = src;
+   return i;
+}
+PPCInstr* PPCInstr_AvCipherV128Binary ( PPCAvOp op, HReg dst,
+                                        HReg srcL, HReg srcR ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvCipherV128Binary;
+   i->Pin.AvCipherV128Binary.op   = op;
+   i->Pin.AvCipherV128Binary.dst  = dst;
+   i->Pin.AvCipherV128Binary.srcL = srcL;
+   i->Pin.AvCipherV128Binary.srcR = srcR;
+   return i;
+}
+PPCInstr* PPCInstr_AvHashV128Binary ( PPCAvOp op, HReg dst,
+                                      HReg src, PPCRI* s_field ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvHashV128Binary;
+   i->Pin.AvHashV128Binary.op  = op;
+   i->Pin.AvHashV128Binary.dst = dst;
+   i->Pin.AvHashV128Binary.src = src;
+   i->Pin.AvHashV128Binary.s_field = s_field;
+   return i;
+}
+PPCInstr* PPCInstr_AvBCDV128Trinary ( PPCAvOp op, HReg dst,
+                                      HReg src1, HReg src2, PPCRI* ps ) {
+   PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag      = Pin_AvBCDV128Trinary;
+   i->Pin.AvBCDV128Trinary.op   = op;
+   i->Pin.AvBCDV128Trinary.dst  = dst;
+   i->Pin.AvBCDV128Trinary.src1 = src1;
+   i->Pin.AvBCDV128Trinary.src2 = src2;
+   i->Pin.AvBCDV128Trinary.ps   = ps;
+   return i;
+}
  
  
  /* Pretty Print instructions */
@@ -1991,6 +2051,42 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 )
        ppHRegPPC(i->Pin.AvLdVSCR.src);
        return;
  
+   case Pin_AvCipherV128Unary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvCipherV128Unary.op));
+      ppHRegPPC(i->Pin.AvCipherV128Unary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Unary.src);
+      return;
+
+   case Pin_AvCipherV128Binary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvCipherV128Binary.op));
+      ppHRegPPC(i->Pin.AvCipherV128Binary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Binary.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Binary.srcR);
+      return;
+
+   case Pin_AvHashV128Binary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvHashV128Binary.op));
+      ppHRegPPC(i->Pin.AvHashV128Binary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvHashV128Binary.src);
+      vex_printf(",");
+      ppPPCRI(i->Pin.AvHashV128Binary.s_field);
+      return;
+
+   case Pin_AvBCDV128Trinary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBCDV128Trinary.op));
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.src1);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.src2);
+      vex_printf(",");
+      ppPPCRI(i->Pin.AvBCDV128Trinary.ps);
+      return;
+
     case Pin_Dfp64Unary:
        vex_printf("%s ", showPPCFpOp(i->Pin.Dfp64Unary.op));
        ppHRegPPC(i->Pin.Dfp64Unary.dst);
@@ -2433,6 +2529,26 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
     case Pin_AvLdVSCR:
        addHRegUse(u, HRmRead, i->Pin.AvLdVSCR.src);
        return;
+   case Pin_AvCipherV128Unary:
+      addHRegUse(u, HRmWrite, i->Pin.AvCipherV128Unary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Unary.src);
+      return;
+   case Pin_AvCipherV128Binary:
+      addHRegUse(u, HRmWrite, i->Pin.AvCipherV128Binary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Binary.srcL);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Binary.srcR);
+      return;
+   case Pin_AvHashV128Binary:
+      addHRegUse(u, HRmWrite, i->Pin.AvHashV128Binary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvHashV128Binary.src);
+      addRegUsage_PPCRI(u,    i->Pin.AvHashV128Binary.s_field);
+      return;
+   case Pin_AvBCDV128Trinary:
+      addHRegUse(u, HRmWrite, i->Pin.AvBCDV128Trinary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvBCDV128Trinary.src1);
+      addHRegUse(u, HRmRead,  i->Pin.AvBCDV128Trinary.src2);
+      addRegUsage_PPCRI(u,    i->Pin.AvBCDV128Trinary.ps);
+      return;
     case Pin_Dfp64Unary:
        addHRegUse(u, HRmWrite, i->Pin.Dfp64Unary.dst);
        addHRegUse(u, HRmRead, i->Pin.Dfp64Unary.src);
@@ -2742,6 +2858,26 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
     case Pin_AvLdVSCR:
        mapReg(m, &i->Pin.AvLdVSCR.src);
        return;
+   case Pin_AvCipherV128Unary:
+      mapReg(m, &i->Pin.AvCipherV128Unary.dst);
+      mapReg(m, &i->Pin.AvCipherV128Unary.src);
+      return;
+   case Pin_AvCipherV128Binary:
+      mapReg(m, &i->Pin.AvCipherV128Binary.dst);
+      mapReg(m, &i->Pin.AvCipherV128Binary.srcL);
+      mapReg(m, &i->Pin.AvCipherV128Binary.srcR);
+      return;
+   case Pin_AvHashV128Binary:
+      mapRegs_PPCRI(m, i->Pin.AvHashV128Binary.s_field);
+      mapReg(m, &i->Pin.AvHashV128Binary.dst);
+      mapReg(m, &i->Pin.AvHashV128Binary.src);
+      return;
+   case Pin_AvBCDV128Trinary:
+      mapReg(m, &i->Pin.AvBCDV128Trinary.dst);
+      mapReg(m, &i->Pin.AvBCDV128Trinary.src1);
+      mapReg(m, &i->Pin.AvBCDV128Trinary.src2);
+      mapRegs_PPCRI(m, i->Pin.AvBCDV128Trinary.ps);
+      return;
     case Pin_Dfp64Unary:
        mapReg(m, &i->Pin.Dfp64Unary.dst);
        mapReg(m, &i->Pin.Dfp64Unary.src);
@@ -4632,6 +4768,11 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_UNPCKL16S: opc2 =  718; break; // vupklsh
        case Pav_UNPCKHPIX: opc2 =  846; break; // vupkhpx
        case Pav_UNPCKLPIX: opc2 =  974; break; // vupklpx
+
+      case Pav_ZEROCNTBYTE: opc2 = 1794; break; // vclzb
+      case Pav_ZEROCNTHALF: opc2 = 1858; break; // vclzh
+      case Pav_ZEROCNTWORD: opc2 = 1922; break; // vclzw
+      case Pav_ZEROCNTDBL:  opc2 = 1986; break; // vclzd
        default:
           goto bad;
        }
@@ -4713,6 +4854,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_MRGHI:    opc2 =   12; break; // vmrghb
        case Pav_MRGLO:    opc2 =  268; break; // vmrglb
  
+      case Pav_POLYMULADD: opc2 = 1032; break; // vpmsumb
+
        default:
           goto bad;
        }
@@ -4765,6 +4908,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_MRGHI:   opc2 =   76; break; // vmrghh
        case Pav_MRGLO:   opc2 =  332; break; // vmrglh
  
+      case Pav_POLYMULADD: opc2 = 1224; break; // vpmsumh
+
        default:
           goto bad;
        }
@@ -4822,6 +4967,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_CATODD:  opc2 = 1676; break; // vmrgow
        case Pav_CATEVEN: opc2 = 1932; break; // vmrgew
  
+      case Pav_POLYMULADD: opc2 = 1160; break; // vpmsumw
+
        default:
           goto bad;
        }
@@ -4853,13 +5000,71 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_QPACKSS: opc2 = 1486; break; // vpksdsm
        case Pav_MRGHI:   opc2 = 1614; break; // vmrghw
        case Pav_MRGLO:   opc2 = 1742; break; // vmrglw
+      case Pav_POLYMULADD: opc2 = 1096; break; // vpmsumd
        default:
           goto bad;
        }
        p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
        goto done;
     }
-
+   case Pin_AvCipherV128Unary: {
+      UInt v_dst = vregNo(i->Pin.AvCipherV128Unary.dst);
+      UInt v_src = vregNo(i->Pin.AvCipherV128Unary.src);
+      UInt opc2;
+      switch (i->Pin.AvCipherV128Unary.op) {
+      case Pav_CIPHERSUBV128:   opc2 =  1480; break; // vsbox
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_src, 0, opc2 );
+      goto done;
+   }
+   case Pin_AvCipherV128Binary: {
+      UInt v_dst  = vregNo(i->Pin.AvCipherV128Binary.dst);
+      UInt v_srcL = vregNo(i->Pin.AvCipherV128Binary.srcL);
+      UInt v_srcR = vregNo(i->Pin.AvCipherV128Binary.srcR);
+      UInt opc2;
+      switch (i->Pin.AvCipherV128Binary.op) {
+      case Pav_CIPHERV128:     opc2 =  1288; break; // vcipher
+      case Pav_CIPHERLV128:    opc2 =  1289; break; // vcipherlast
+      case Pav_NCIPHERV128:    opc2 =  1352; break; // vncipher
+      case Pav_NCIPHERLV128:   opc2 =  1353; break; // vncipherlast
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      goto done;
+   }
+   case Pin_AvHashV128Binary: {
+      UInt v_dst = vregNo(i->Pin.AvHashV128Binary.dst);
+      UInt v_src = vregNo(i->Pin.AvHashV128Binary.src);
+      PPCRI* s_field = i->Pin.AvHashV128Binary.s_field;
+      UInt opc2;
+      switch (i->Pin.AvHashV128Binary.op) {
+      case Pav_SHA256:   opc2 =  1666; break; // vshasigmaw
+      case Pav_SHA512:   opc2 =  1730; break; // vshasigmad
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_src, s_field->Pri.Imm, opc2 );
+      goto done;
+   }
+   case Pin_AvBCDV128Trinary: {
+      UInt v_dst  = vregNo(i->Pin.AvBCDV128Trinary.dst);
+      UInt v_src1 = vregNo(i->Pin.AvBCDV128Trinary.src1);
+      UInt v_src2 = vregNo(i->Pin.AvBCDV128Trinary.src2);
+      PPCRI* ps   = i->Pin.AvBCDV128Trinary.ps;
+      UInt opc2;
+      switch (i->Pin.AvBCDV128Trinary.op) {
+      case Pav_BCDAdd:   opc2 =  1; break; // bcdadd
+      case Pav_BCDSub:   opc2 = 65; break; // bcdsub
+      default:
+         goto bad;
+      }
+      p = mkFormVXR( p, 4, v_dst, v_src1, v_src2,
+                     0x1, (ps->Pri.Imm << 9) | opc2 );
+      goto done;
+   }
     case Pin_AvBin32Fx4: {
        UInt v_dst  = vregNo(i->Pin.AvBin32Fx4.dst);
        UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h

index 6e6ec241c5213de95fa58568734fc6cfb3450687..85727b23b25edf895b1250de0c4a37b6267ce4f1 100644 (file)
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -426,6 +426,22 @@ typedef
  
        /* Concatenation */
        Pav_CATODD, Pav_CATEVEN,
+
+      /* Polynomial Multipy-Add */
+      Pav_POLYMULADD,
+
+      /* Cipher */
+      Pav_CIPHERV128, Pav_CIPHERLV128, Pav_NCIPHERV128, Pav_NCIPHERLV128,
+      Pav_CIPHERSUBV128,
+
+      /* Hash */
+      Pav_SHA256, Pav_SHA512,
+
+      /* BCD Arithmetic */
+      Pav_BCDAdd, Pav_BCDSub,
+
+      /* zero count */
+      Pav_ZEROCNTBYTE, Pav_ZEROCNTWORD, Pav_ZEROCNTHALF, Pav_ZEROCNTDBL,
     }
     PPCAvOp;
  
@@ -507,6 +523,10 @@ typedef
        Pin_AvSplat,    /* One elem repeated throughout dst */
        Pin_AvLdVSCR,   /* mtvscr */
        Pin_AvCMov,     /* AV conditional move */
+      Pin_AvCipherV128Unary,  /* AV Vector unary Cipher */
+      Pin_AvCipherV128Binary, /* AV Vector binary Cipher */
+      Pin_AvHashV128Binary, /* AV Vector binary Hash */
+      Pin_AvBCDV128Trinary, /* BCD Arithmetic */
        Pin_Dfp64Unary,   /* DFP64  unary op */
        Pin_Dfp128Unary,  /* DFP128 unary op */
        Pin_DfpShift,     /* Decimal floating point shift by immediate value */
@@ -853,6 +873,30 @@ typedef
           struct {
              HReg src;
           } AvLdVSCR;
+         struct {
+            PPCAvOp   op;
+            HReg      dst;
+            HReg      src;
+         } AvCipherV128Unary;
+         struct {
+            PPCAvOp     op;
+            HReg       dst;
+            HReg       src;
+            PPCRI* s_field;
+         } AvHashV128Binary;
+         struct {
+            PPCAvOp     op;
+            HReg       dst;
+            HReg      src1;
+            HReg      src2;
+            PPCRI*      ps;
+         } AvBCDV128Trinary;
+         struct {
+            PPCAvOp   op;
+            HReg      dst;
+            HReg      srcL;
+            HReg      srcR;
+         } AvCipherV128Binary;
           struct {
              PPCFpOp op;
              HReg dst;
@@ -1034,7 +1078,15 @@ extern PPCInstr* PPCInstr_AvShlDbl   ( UChar shift, HReg dst, HReg srcL, HReg sr
  extern PPCInstr* PPCInstr_AvSplat    ( UChar sz, HReg dst, PPCVI5s* src );
  extern PPCInstr* PPCInstr_AvCMov     ( PPCCondCode, HReg dst, HReg src );
  extern PPCInstr* PPCInstr_AvLdVSCR   ( HReg src );
-
+extern PPCInstr* PPCInstr_AvCipherV128Unary  ( PPCAvOp op, HReg dst,
+                                               HReg srcR );
+extern PPCInstr* PPCInstr_AvCipherV128Binary ( PPCAvOp op, HReg dst,
+                                               HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvHashV128Binary ( PPCAvOp op, HReg dst,
+                                             HReg src, PPCRI* s_field );
+extern PPCInstr* PPCInstr_AvBCDV128Trinary ( PPCAvOp op, HReg dst,
+                                             HReg src1, HReg src2,
+                                             PPCRI* ps );
  extern PPCInstr* PPCInstr_Dfp64Unary  ( PPCFpOp op, HReg dst, HReg src );
  extern PPCInstr* PPCInstr_Dfp64Binary ( PPCFpOp op, HReg dst, HReg srcL,
                                          HReg srcR );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c

index e75ad43ba33a063ba2eb6549518ce4651426884d..fa5cb7a910a5f54475e89f54b571795c573b91f5 100644 (file)
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -4845,6 +4845,26 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_Dup32x4:
           return mk_AvDuplicateRI(env, e->Iex.Unop.arg);
  
+      case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
+      do_AvCipherV128Un: {
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg dst = newVRegV(env);
+         addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
+         return dst;
+      }
+
+      case Iop_Clz8Sx16: fpop = Pav_ZEROCNTBYTE;   goto do_zerocnt;
+      case Iop_Clz16Sx8: fpop = Pav_ZEROCNTHALF;   goto do_zerocnt;
+      case Iop_Clz32Sx4: fpop = Pav_ZEROCNTWORD;   goto do_zerocnt;
+      case Iop_Clz64x2:  fpop = Pav_ZEROCNTDBL;    goto do_zerocnt;
+      do_zerocnt:
+      {
+        HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+        HReg dst = newVRegV(env);
+        addInstr(env, PPCInstr_AvUnary(fpop, dst, arg));
+        return dst;
+      }
+
        default:
           break;
        } /* switch (e->Iex.Unop.op) */
@@ -4981,6 +5001,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
        case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
        case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
+      case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
        do_AvBin8x16: {
           HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
           HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5015,6 +5036,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
        case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
        case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
+      case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
        do_AvBin16x8: {
           HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
           HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5052,6 +5074,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
        case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
        case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
+      case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
        do_AvBin32x4: {
           HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
           HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5078,6 +5101,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
        case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
        case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
+      case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
        do_AvBin64x2: {
           HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
           HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5148,11 +5172,52 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
+      case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
+      case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
+      case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
+      case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
+      do_AvCipherV128: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
+         return dst;
+      }
+
+      case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
+      case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
+      do_AvHashV128: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg dst  = newVRegV(env);
+         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2);
+         addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
+         return dst;
+      }
        default:
           break;
        } /* switch (e->Iex.Binop.op) */
     } /* if (e->tag == Iex_Binop) */
  
+   if (e->tag == Iex_Triop) {
+      IRTriop *triop = e->Iex.Triop.details;
+      switch (triop->op) {
+      case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
+      case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
+      do_AvBCDV128: {
+         HReg arg1 = iselVecExpr(env, triop->arg1);
+         HReg arg2 = iselVecExpr(env, triop->arg2);
+         HReg dst  = newVRegV(env);
+         PPCRI* ps = iselWordExpr_RI(env, triop->arg3);
+         addInstr(env, PPCInstr_AvBCDV128Trinary(op, dst, arg1, arg2, ps));
+         return dst;
+      }
+
+      default:
+         break;
+      } /* switch (e->Iex.Triop.op) */
+   } /* if (e->tag == Iex_Trinop) */
+
+
     if (e->tag == Iex_Const ) {
        vassert(e->Iex.Const.con->tag == Ico_V128);
        if (e->Iex.Const.con->Ico.V128 == 0x0000) {
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index 9b62bcaa6b9db1fdd663df2cb056149c95eccbf9..692abfbc5745a3352f9873a00831bc60e5bd18a9 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -782,6 +782,15 @@ void ppIROp ( IROp op )
        case Iop_MullEven16Sx8: vex_printf("MullEven16Sx8"); return;
        case Iop_MullEven32Sx4: vex_printf("MullEven32Sx4"); return;
  
+      case Iop_PolynomialMulAdd8x16:
+         vex_printf("PolynomialMulAdd8x16"); return;
+      case Iop_PolynomialMulAdd16x8:
+         vex_printf("PolynomialMulAdd16x8"); return;
+      case Iop_PolynomialMulAdd32x4:
+         vex_printf("PolynomialMulAdd32x4"); return;
+      case Iop_PolynomialMulAdd64x2:
+         vex_printf("PolynomialMulAdd64x2"); return;
+
        case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return;
        case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return;
        case Iop_Avg32Ux4: vex_printf("Avg32Ux4"); return;
@@ -824,6 +833,7 @@ void ppIROp ( IROp op )
        case Iop_Clz8Sx16: vex_printf("Clz8Sx16"); return;
        case Iop_Clz16Sx8: vex_printf("Clz16Sx8"); return;
        case Iop_Clz32Sx4: vex_printf("Clz32Sx4"); return;
+      case Iop_Clz64x2: vex_printf("Clz64x2"); return;
        case Iop_Cls8Sx16: vex_printf("Cls8Sx16"); return;
        case Iop_Cls16Sx8: vex_printf("Cls16Sx8"); return;
        case Iop_Cls32Sx4: vex_printf("Cls32Sx4"); return;
@@ -1127,6 +1137,17 @@ void ppIROp ( IROp op )
  
        case Iop_Perm32x8:   vex_printf("Perm32x8"); return;
  
+      case Iop_CipherV128:   vex_printf("CipherV128"); return;
+      case Iop_CipherLV128:  vex_printf("CipherLV128"); return;
+      case Iop_NCipherV128:  vex_printf("NCipherV128"); return;
+      case Iop_NCipherLV128: vex_printf("NCipherLV128"); return;
+      case Iop_CipherSV128:  vex_printf("CipherSV128"); return;
+
+      case Iop_SHA256:  vex_printf("SHA256"); return;
+      case Iop_SHA512:  vex_printf("SHA512"); return;
+      case Iop_BCDAdd:  vex_printf("BCDAdd"); return;
+      case Iop_BCDSub:  vex_printf("BCDSub"); return;
+
        default: vpanic("ppIROp(1)");
     }
  
@@ -2795,6 +2816,8 @@ void typeOfPrimop ( IROp op,
        case Iop_QSub32Sx4: case Iop_QSub64Sx2:
        case Iop_Mul8x16: case Iop_Mul16x8: case Iop_Mul32x4:
        case Iop_PolynomialMul8x16:
+      case Iop_PolynomialMulAdd8x16: case Iop_PolynomialMulAdd16x8:
+      case Iop_PolynomialMulAdd32x4: case Iop_PolynomialMulAdd64x2:
        case Iop_MulHi16Ux8: case Iop_MulHi32Ux4: 
        case Iop_MulHi16Sx8: case Iop_MulHi32Sx4: 
        case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4:
@@ -2845,6 +2868,10 @@ void typeOfPrimop ( IROp op,
        case Iop_Perm8x16: case Iop_Perm32x4:
        case Iop_Recps32Fx4:
        case Iop_Rsqrts32Fx4:
+      case Iop_CipherV128:
+      case Iop_CipherLV128:
+      case Iop_NCipherV128:
+      case Iop_NCipherLV128:
           BINARY(Ity_V128,Ity_V128, Ity_V128);
  
        case Iop_PolynomialMull8x8:
@@ -2864,7 +2891,7 @@ void typeOfPrimop ( IROp op,
        case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8:
        case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2:
        case Iop_Cnt8x16:
-      case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4:
+      case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4: case Iop_Clz64x2:
        case Iop_Cls8Sx16: case Iop_Cls16Sx8: case Iop_Cls32Sx4:
        case Iop_PwAddL8Ux16: case Iop_PwAddL16Ux8: case Iop_PwAddL32Ux4:
        case Iop_PwAddL8Sx16: case Iop_PwAddL16Sx8: case Iop_PwAddL32Sx4:
@@ -2873,6 +2900,7 @@ void typeOfPrimop ( IROp op,
        case Iop_Reverse16_8x16:
        case Iop_Neg32Fx4:
        case Iop_Abs8x16: case Iop_Abs16x8: case Iop_Abs32x4:
+      case Iop_CipherSV128:
           UNARY(Ity_V128, Ity_V128);
  
        case Iop_ShlV128: case Iop_ShrV128:
@@ -2888,6 +2916,7 @@ void typeOfPrimop ( IROp op,
        case Iop_QShlN32Sx4: case Iop_QShlN64Sx2:
        case Iop_QSalN8x16: case Iop_QSalN16x8:
        case Iop_QSalN32x4: case Iop_QSalN64x2:
+      case Iop_SHA256:    case Iop_SHA512:
           BINARY(Ity_V128,Ity_I8, Ity_V128);
  
        case Iop_F32ToFixed32Ux4_RZ:
@@ -2928,6 +2957,9 @@ void typeOfPrimop ( IROp op,
        case Iop_ExtractV128:
           TERNARY(Ity_V128, Ity_V128, Ity_I8, Ity_V128);
  
+      case Iop_BCDAdd:
+      case Iop_BCDSub:
+         TERNARY(Ity_V128,Ity_V128, Ity_I8, Ity_V128);
        case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
           BINARY(Ity_I64, Ity_I64, Ity_V128);
  
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index 50d986bbb168245c605d74074957ea0059f84757..1c888d5412b3f22045da7aa5f2b5cad2d050949a 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -896,6 +896,7 @@ typedef
        Iop_Cnt8x8,
        Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
        Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
+      Iop_Clz64x2,
  
        /* VECTOR x VECTOR SHIFT / ROTATE */
        Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
@@ -1237,6 +1238,12 @@ typedef
         */
        Iop_BCDtoDPB,
  
+      /* BCD arithmetic instructions, (V128, V128) -> V128
+       * The BCD format is the same as that used in the BCD<->DPB conversion
+       * routines, except using 124 digits (vs 60) plus the trailing 4-bit signed code.
+       * */
+      Iop_BCDAdd, Iop_BCDSub,
+
        /* Conversion I64 -> D64 */
        Iop_ReinterpI64asD64,
  
@@ -1403,6 +1410,39 @@ typedef
        Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
        Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
  
+      /* Vector Polynomial multiplication add.   (V128, V128) -> V128
+
+       *** Below is the algorithm for the instructions. These Iops could
+           be emulated to get this functionality, but the emulation would
+           be long and messy.
+
+        Example for polynomial multiply add for vector of bytes
+        do i = 0 to 15
+            prod[i].bit[0:14] <- 0
+            srcA <- VR[argL].byte[i]
+            srcB <- VR[argR].byte[i]
+            do j = 0 to 7
+                do k = 0 to j
+                    gbit <- srcA.bit[k] & srcB.bit[j-k]
+                    prod[i].bit[j] <- prod[i].bit[j] ^ gbit
+                end
+            end
+
+            do j = 8 to 14
+                do k = j-7 to 7
+                     gbit <- (srcA.bit[k] & srcB.bit[j-k])
+                     prod[i].bit[j] <- prod[i].bit[j] ^ gbit
+                end
+            end
+        end
+
+        do i = 0 to 7
+            VR[dst].hword[i] <- 0b0 || (prod[2×i] ^ prod[2×i+1])
+        end
+      */
+      Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8,
+      Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2,
+
        /* PAIRWISE operations */
        /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
              [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
@@ -1598,6 +1638,17 @@ typedef
  
        Iop_Perm32x8,
  
+      /* (V128, V128) -> V128 */
+      Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128,
+      Iop_NCipherV128, Iop_NCipherLV128,
+
+      /* Hash instructions, Federal Information Processing Standards
+       * Publication 180-3 Secure Hash Standard. */
+      /* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and
+       * SIX are fields from the insn. See ISA 2.07 description of
+       * vshasigmad and vshasigmaw insns.*/
+      Iop_SHA512, Iop_SHA256,
+
        /* ------------------ 256-bit SIMD FP. ------------------ */
        Iop_Add64Fx4,
        Iop_Sub64Fx4,
author	Carl Love <cel@us.ibm.com>
	Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)
committer	Carl Love <cel@us.ibm.com>
	Tue, 15 Oct 2013 18:11:20 +0000 (18:11 +0000)
VEX/priv/guest_ppc_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.h		patch \| blob \| blame \| history
VEX/priv/host_ppc_isel.c		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history