From be7180d014d6881fba2b448b2519f03a4918701a Mon Sep 17 00:00:00 2001
From: Carl Love <cel@us.ibm.com>
Date: Tue, 15 Oct 2013 18:11:20 +0000
Subject: [PATCH] Power 8 support, phase 5

This commit adds support for the following instructions for doing
various arithmetic, bit transformation, cipher, count, logical,
and SHA operations.

  vpmsumb, vpmsumh, vpmsumw, vpmsumd, vpermxor, vcipher, vcipherlast,
  vncipher, vncipherlast, vsbox,
  vclzb, vclzw, vclzh, vclzd,
  vpopcntb, vpopcnth, vpopcntw, vpopcntd,
  vnand, vorc, veqv,
  vshasigmaw, vshasigmad,
  bcdadd, bcdsub

The following Iops were added to support the above instructions:
  Iop_BCDAdd, Iop_BCDSub,
  Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8,
  Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2,
  Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128,
  Iop_NCipherV128, Iop_NCipherLV128,
  Iop_SHA512, Iop_SHA256, Iop_Clz64x2

The patch is for Bugzilla 325628


git-svn-id: svn://svn.valgrind.org/vex/trunk@2789
---
 VEX/priv/guest_ppc_toIR.c | 589 ++++++++++++++++++++++++++++++++++++--
 VEX/priv/host_ppc_defs.c  | 207 +++++++++++++-
 VEX/priv/host_ppc_defs.h  |  54 +++-
 VEX/priv/host_ppc_isel.c  |  65 +++++
 VEX/priv/ir_defs.c        |  34 ++-
 VEX/pub/libvex_ir.h       |  51 ++++
 6 files changed, 967 insertions(+), 33 deletions(-)

diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 98c90bffa1..2a01726e54 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -791,6 +791,16 @@ static void breakV128to4x32( IRExpr* t128,
    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
 }
 
+static IRExpr* mkV128from32( IRTemp t3, IRTemp t2,
+                               IRTemp t1, IRTemp t0 )
+{
+   return
+      binop( Iop_64HLtoV128,
+             binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
+             binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
+   );
+}
+
 
 /* Signed saturating narrow 64S to 32 */
 static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 )
@@ -1364,12 +1374,29 @@ static Int guestCR0offset ( UInt cr )
    }
 }
 
+typedef enum {
+   _placeholder0,
+   _placeholder1,
+   _placeholder2,
+   BYTE,
+   HWORD,
+   WORD,
+   DWORD
+} _popcount_data_type;
+
 /* Generate an IR sequence to do a popcount operation on the supplied
    IRTemp, and return a new IRTemp holding the result.  'ty' may be
    Ity_I32 or Ity_I64 only. */
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
 {
-   Int i, shift[6], max;
+  /* Do count across 2^data_type bits,
+     byte:        data_type = 3
+     half word:   data_type = 4
+     word:        data_type = 5
+     double word: data_type = 6  (not supported for 32-bit type)
+    */
+   Int shift[6];
+   _popcount_data_type idx, i;
    IRTemp mask[6];
    IRTemp old = IRTemp_INVALID;
    IRTemp nyu = IRTemp_INVALID;
@@ -1377,17 +1404,10 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
    vassert(ty == Ity_I64 || ty == Ity_I32);
 
    if (ty == Ity_I32) {
-      if (byte_count)
-         /* Return the population count across each byte not across the entire
-          * 32-bit value.  Stop after third iteration.
-          */
-         max = 3;
-      else
-         max = 5;
 
-      for (i = 0; i < 5; i++) {
-         mask[i]  = newTemp(ty);
-         shift[i] = 1 << i;
+      for (idx = 0; idx < WORD; idx++) {
+         mask[idx]  = newTemp(ty);
+         shift[idx] = 1 << idx;
       }
       assign(mask[0], mkU32(0x55555555));
       assign(mask[1], mkU32(0x33333333));
@@ -1395,7 +1415,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
       assign(mask[3], mkU32(0x00FF00FF));
       assign(mask[4], mkU32(0x0000FFFF));
       old = src;
-      for (i = 0; i < max; i++) {
+      for (i = 0; i < data_type; i++) {
          nyu = newTemp(ty);
          assign(nyu,
                 binop(Iop_Add32,
@@ -1409,16 +1429,11 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
       }
       return nyu;
    }
+
 // else, ty == Ity_I64
-   if (byte_count)
-      /* Return the population count across each byte not across the entire
-       * 64-bit value.  Stop after third iteration.
-       */
-      max = 3;
-   else
-      max = 6;
+   vassert(mode64);
 
-   for (i = 0; i < 6; i++) {
+   for (i = 0; i < DWORD; i++) {
       mask[i] = newTemp( Ity_I64 );
       shift[i] = 1 << i;
    }
@@ -1429,7 +1444,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
    assign( mask[4], mkU64( 0x0000FFFF0000FFFFULL ) );
    assign( mask[5], mkU64( 0x00000000FFFFFFFFULL ) );
    old = src;
-   for (i = 0; i < max; i++) {
+   for (i = 0; i < data_type; i++) {
       nyu = newTemp( Ity_I64 );
       assign( nyu,
               binop( Iop_Add64,
@@ -1442,6 +1457,60 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, Bool byte_count )
    return nyu;
 }
 
+/* Special purpose population count function for
+ * vpopcntd in 32-bit mode.
+ */
+static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
+{
+   Int i, shift[6];
+   IRTemp mask[6];
+   IRTemp old = IRTemp_INVALID;
+   IRTemp nyu1 = IRTemp_INVALID;
+   IRTemp nyu2 = IRTemp_INVALID;
+   IRTemp retval = newTemp(Ity_I64);
+
+   vassert(!mode64);
+
+   for (i = 0; i < WORD; i++) {
+      mask[i]  = newTemp(Ity_I32);
+      shift[i] = 1 << i;
+   }
+   assign(mask[0], mkU32(0x55555555));
+   assign(mask[1], mkU32(0x33333333));
+   assign(mask[2], mkU32(0x0F0F0F0F));
+   assign(mask[3], mkU32(0x00FF00FF));
+   assign(mask[4], mkU32(0x0000FFFF));
+   old = src1;
+   for (i = 0; i < WORD; i++) {
+      nyu1 = newTemp(Ity_I32);
+      assign(nyu1,
+             binop(Iop_Add32,
+                   binop(Iop_And32,
+                         mkexpr(old),
+                         mkexpr(mask[i])),
+                   binop(Iop_And32,
+                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+                         mkexpr(mask[i]))));
+      old = nyu1;
+   }
+
+   old = src2;
+   for (i = 0; i < WORD; i++) {
+      nyu2 = newTemp(Ity_I32);
+      assign(nyu2,
+             binop(Iop_Add32,
+                   binop(Iop_And32,
+                         mkexpr(old),
+                         mkexpr(mask[i])),
+                   binop(Iop_And32,
+                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+                         mkexpr(mask[i]))));
+      old = nyu2;
+   }
+   assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
+   return retval;
+}
+
 
 // ROTL(src32/64, rot_amt5/6)
 static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src,
@@ -4143,7 +4212,7 @@ static Bool dis_int_logic ( UInt theInstr )
       case 0x1FA: // popcntd (population count doubleword
       {
     	  DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
-    	  IRTemp result = gen_POPCOUNT(ty, rS, False);
+    	  IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
     	  putIReg( rA_addr, mkexpr(result) );
     	  return True;
       }
@@ -4156,11 +4225,11 @@ static Bool dis_int_logic ( UInt theInstr )
             IRTemp argHi = newTemp(Ity_I32);
             assign(argLo, unop(Iop_64to32, mkexpr(rS)));
             assign(argHi, unop(Iop_64HIto32, mkexpr(rS)));
-            resultLo = gen_POPCOUNT(Ity_I32, argLo, False);
-            resultHi = gen_POPCOUNT(Ity_I32, argHi, False);
+            resultLo = gen_POPCOUNT(Ity_I32, argLo, WORD);
+            resultHi = gen_POPCOUNT(Ity_I32, argHi, WORD);
             putIReg( rA_addr, binop(Iop_32HLto64, mkexpr(resultHi), mkexpr(resultLo)));
          } else {
-            IRTemp result = gen_POPCOUNT(ty, rS, False);
+            IRTemp result = gen_POPCOUNT(ty, rS, WORD);
             putIReg( rA_addr, mkexpr(result) );
          }
          return True;
@@ -4175,12 +4244,12 @@ static Bool dis_int_logic ( UInt theInstr )
             IRTemp argHi = newTemp(Ity_I32);
             assign(argLo, unop(Iop_64to32, mkexpr(rS)));
             assign(argHi, unop(Iop_64HIto32, mkexpr(rS)));
-            resultLo = gen_POPCOUNT(Ity_I32, argLo, True);
-            resultHi = gen_POPCOUNT(Ity_I32, argHi, True);
+            resultLo = gen_POPCOUNT(Ity_I32, argLo, BYTE);
+            resultHi = gen_POPCOUNT(Ity_I32, argHi, BYTE);
             putIReg( rA_addr, binop(Iop_32HLto64, mkexpr(resultHi),
                                     mkexpr(resultLo)));
          } else {
-            IRTemp result = gen_POPCOUNT(ty, rS, True);
+            IRTemp result = gen_POPCOUNT(ty, rS, BYTE);
             putIReg( rA_addr, mkexpr(result) );
          }
          return True;
@@ -13201,6 +13270,167 @@ dis_vxv_sp_arith ( UInt theInstr, UInt opc2 )
    return True;
 }
 
+/*
+ * VSX vector Population Count
+ */
+static Bool
+dis_vxv_population_count ( UInt theInstr, UInt opc2 )
+{
+   UChar vRB_addr = ifieldRegB(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar opc1 = ifieldOPC( theInstr );
+   IRTemp vB = newTemp(Ity_V128);
+   assign( vB, getVReg(vRB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf( "dis_vxv_population_count(ppc)(instr)\n" );
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x702:    // vclzb
+         DIP("vclzb v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz8Sx16, mkexpr( vB ) ) );
+         break;
+
+      case 0x742:    // vclzh
+         DIP("vclzh v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz16Sx8, mkexpr( vB ) ) );
+         break;
+
+      case 0x782:    // vclzw
+         DIP("vclzw v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz32Sx4, mkexpr( vB ) ) );
+         break;
+
+      case 0x7C2:    // vclzd
+         DIP("vclzd v%d,v%d\n", vRT_addr, vRB_addr);
+         putVReg( vRT_addr, unop(Iop_Clz64x2, mkexpr( vB ) ) );
+         break;
+
+      case 0x703:    // vpopcntb
+      {
+         /* Break vector into 32-bit words and do the population count
+          * on byte in the words
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcntb v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   BYTE);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  BYTE);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  BYTE);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, BYTE);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x743:    // vpopcnth
+      {
+         /* Break vector into 32-bit words and do the population count
+          * for each half word
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcnth v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   HWORD);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  HWORD);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  HWORD);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, HWORD);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x783:    // vpopcntw
+      {
+         /* Break vector into 32-bit words and do the population count
+          * on each word.
+          */
+         IRType ty = Ity_I32;
+         IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+         bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+         IRTemp cnt_bits0_31, cnt_bits32_63, cnt_bits64_95, cnt_bits96_127;
+         cnt_bits0_31 = cnt_bits32_63 = cnt_bits64_95 = cnt_bits96_127 = IRTemp_INVALID;
+
+         DIP("vpopcntw v%d,v%d\n", vRT_addr, vRB_addr);
+         breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+         cnt_bits0_31   = gen_POPCOUNT(ty, bits0_31,   WORD);
+         cnt_bits32_63  = gen_POPCOUNT(ty, bits32_63,  WORD);
+         cnt_bits64_95  = gen_POPCOUNT(ty, bits64_95,  WORD);
+         cnt_bits96_127 = gen_POPCOUNT(ty, bits96_127, WORD);
+
+         putVReg( vRT_addr, mkV128from32(cnt_bits96_127, cnt_bits64_95,
+                                         cnt_bits32_63, cnt_bits0_31) );
+         break;
+      }
+
+      case 0x7C3:    // vpopcntd
+      {
+         if (mode64) {
+            /* Break vector into 64-bit double words and do the population count
+             * on each double word.
+             */
+            IRType ty = Ity_I64;
+            IRTemp bits0_63   = newTemp(Ity_I64);
+            IRTemp bits64_127 = newTemp(Ity_I64);
+            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
+            IRTemp cnt_bits64_127 = newTemp(Ity_I64);
+
+            DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
+
+            assign(bits0_63,   unop( Iop_V128to64,   mkexpr( vB ) ) );
+            assign(bits64_127, unop( Iop_V128HIto64, mkexpr( vB ) ) );
+            cnt_bits0_63   = gen_POPCOUNT(ty, bits0_63,   DWORD);
+            cnt_bits64_127 = gen_POPCOUNT(ty, bits64_127, DWORD);
+
+            putVReg( vRT_addr, binop( Iop_64HLtoV128,
+                                      mkexpr( cnt_bits64_127 ),
+                                      mkexpr( cnt_bits0_63 ) ) );
+         } else {
+            /* Break vector into 32-bit words and do the population count
+             * on each doubleword.
+             */
+            IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+            bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
+            IRTemp cnt_bits64_127  = newTemp(Ity_I64);
+
+            DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
+            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+
+            cnt_bits0_63   = gen_vpopcntd_mode32(bits0_31, bits32_63);
+            cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
+
+            putVReg( vRT_addr, binop( Iop_64HLtoV128,
+                                      mkexpr( cnt_bits64_127 ),
+                                      mkexpr( cnt_bits0_63 ) ) );
+         }
+         break;
+      }
+
+      default:
+         vex_printf("dis_vxv_population_count(ppc)(opc2)\n");
+         return False;
+      break;
+   }
+   return True;
+}
+
 typedef enum {
    PPC_CMP_EQ = 2,
    PPC_CMP_GT = 4,
@@ -15925,6 +16155,27 @@ static Bool dis_av_logic ( UInt theInstr )
          unop(Iop_NotV128, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB))) );
       break;
 
+   case 0x544: // vorc (vA Or'd with complement of vb)
+      DIP("vorc v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, binop( Iop_OrV128,
+                               mkexpr( vA ),
+                               unop( Iop_NotV128, mkexpr( vB ) ) ) );
+      break;
+
+   case 0x584: // vnand (Nand)
+      DIP("vnand v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, unop( Iop_NotV128,
+                              binop(Iop_AndV128, mkexpr( vA ),
+                              mkexpr( vB ) ) ) );
+      break;
+
+   case 0x684: // veqv (complemented XOr)
+      DIP("veqv v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+      putVReg( vD_addr, unop( Iop_NotV128,
+                              binop( Iop_XorV128, mkexpr( vA ),
+                              mkexpr( vB ) ) ) );
+      break;
+
    default:
       vex_printf("dis_av_logic(ppc)(opc2=0x%x)\n", opc2);
       return False;
@@ -16308,6 +16559,60 @@ static Bool dis_av_multarith ( UInt theInstr )
    return True;
 }
 
+/*
+  AltiVec Polynomial Multiply-Sum Instructions
+*/
+static Bool dis_av_polymultarith ( UInt theInstr )
+{
+   /* VA-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vD_addr  = ifieldRegDS(theInstr);
+   UChar vA_addr  = ifieldRegA(theInstr);
+   UChar vB_addr  = ifieldRegB(theInstr);
+   UChar vC_addr  = ifieldRegC(theInstr);
+   UInt  opc2     = IFIELD(theInstr, 0, 11);
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   IRTemp vC    = newTemp(Ity_V128);
+
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+   assign( vC, getVReg(vC_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_polymultarith(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+      /* Polynomial Multiply-Add */
+      case 0x408:  // vpmsumb   Vector Polynomial Multipy-sum Byte
+         DIP("vpmsumb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd8x16,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x448:  // vpmsumd   Vector Polynomial Multipy-sum Double Word
+         DIP("vpmsumd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd64x2,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x488:  // vpmsumw   Vector Polynomial Multipy-sum Word
+         DIP("vpmsumw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd32x4,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      case 0x4C8:  // vpmsumh   Vector Polynomial Multipy-sum Half Word
+         DIP("vpmsumh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr, binop(Iop_PolynomialMulAdd16x8,
+                                 mkexpr(vA), mkexpr(vB)) );
+         break;
+      default:
+         vex_printf("dis_av_polymultarith(ppc)(opc2=0x%x)\n", opc2);
+         return False;
+   }
+   return True;
+}
+
 /*
   AltiVec Shift/Rotate Instructions
 */
@@ -16550,7 +16855,25 @@ static Bool dis_av_permute ( UInt theInstr )
                   binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)),
                   binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm4)*8))) );
       return True;
-
+   case 0x2D: {  // vpermxor (Vector Permute and Exclusive-OR)
+      IRTemp a_perm  = newTemp(Ity_V128);
+      IRTemp b_perm  = newTemp(Ity_V128);
+      IRTemp vrc_a   = newTemp(Ity_V128);
+      IRTemp vrc_b   = newTemp(Ity_V128);
+
+      /* IBM index  is 0:7, Change index value to index 7:0 */
+      assign( vrc_b, binop( Iop_AndV128, mkexpr( vC ),
+                            unop( Iop_Dup8x16, mkU8( 0xF ) ) ) );
+      assign( vrc_a, binop( Iop_ShrV128,
+                            binop( Iop_AndV128, mkexpr( vC ),
+                                   unop( Iop_Dup8x16, mkU8( 0xF0 ) ) ),
+                            mkU8 ( 4 ) ) );
+      assign( a_perm, binop( Iop_Perm8x16, mkexpr( vA ), mkexpr( vrc_a ) ) );
+      assign( b_perm, binop( Iop_Perm8x16, mkexpr( vB ), mkexpr( vrc_b ) ) );
+      putVReg( vD_addr, binop( Iop_XorV128,
+                               mkexpr( a_perm ), mkexpr( b_perm) ) );
+      return True;
+   }
    default:
      break; // Fall through...
    }
@@ -16989,6 +17312,158 @@ static Bool dis_av_pack ( UInt theInstr )
    return True;
 }
 
+/*
+  AltiVec Cipher Instructions
+*/
+static Bool dis_av_cipher ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vD_addr  = ifieldRegDS(theInstr);
+   UChar vA_addr  = ifieldRegA(theInstr);
+   UChar vB_addr  = ifieldRegB(theInstr);
+   UInt  opc2     = IFIELD( theInstr, 0, 11 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_cipher(ppc)(instr)\n");
+      return False;
+   }
+   switch (opc2) {
+      case 0x508: // vcipher (Vector Inverser Cipher)
+         DIP("vcipher v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_CipherV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x509: // vcipherlast (Vector Inverser Cipher Last)
+         DIP("vcipherlast v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_CipherLV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x548: // vncipher (Vector Inverser Cipher)
+         DIP("vncipher v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_NCipherV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x549: // vncipherlast (Vector Inverser Cipher Last)
+         DIP("vncipherlast v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+         putVReg( vD_addr,
+                  binop(Iop_NCipherLV128, mkexpr(vA), mkexpr(vB)) );
+         return True;
+
+      case 0x5C8: /* vsbox (Vector SubBytes, this does the cipher
+       * subBytes transform)
+       */
+         DIP("vsbox v%d,v%d\n", vD_addr, vA_addr);
+         putVReg( vD_addr,
+                  unop(Iop_CipherSV128, mkexpr(vA) ) );
+         return True;
+
+      default:
+         vex_printf("dis_av_cipher(ppc)(opc2)\n");
+         return False;
+   }
+   return True;
+}
+
+/*
+  AltiVec Secure Hash Instructions
+*/
+static Bool dis_av_hash ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar vRA_addr  = ifieldRegA(theInstr);
+   UChar s_field  = IFIELD( theInstr, 11, 5 );  // st and six field
+   UChar st       = IFIELD( theInstr, 15, 1 );  // st
+   UChar six      = IFIELD( theInstr, 11, 4 );  // six field
+   UInt  opc2     = IFIELD( theInstr, 0, 11 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp dst    = newTemp(Ity_V128);
+   assign( vA, getVReg(vRA_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_hash(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x682:  // vshasigmaw
+         DIP("vshasigmaw v%d,v%d,%u,%u\n", vRT_addr, vRA_addr, st, six);
+         assign( dst, binop( Iop_SHA256, mkexpr( vA ), mkU8( s_field) ) );
+         putVReg( vRT_addr, mkexpr(dst));
+         return True;
+
+      case 0x6C2:  // vshasigmad,
+         DIP("vshasigmad v%d,v%d,%u,%u\n", vRT_addr, vRA_addr, st, six);
+         putVReg( vRT_addr, binop( Iop_SHA512, mkexpr( vA ), mkU8( s_field) ) );
+         return True;
+
+      default:
+         vex_printf("dis_av_hash(ppc)(opc2)\n");
+         return False;
+   }
+   return True;
+}
+
+/*
+  AltiVec BCD Arithmetic instructions.
+  These instructions modify CR6 for various conditions in the result,
+  including when an overflow occurs.  We could easily detect all conditions
+  except when an overflow occurs.  But since we can't be 100% accurate
+  in our emulation of CR6, it seems best to just not support it all.
+*/
+static Bool dis_av_bcd ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vRT_addr = ifieldRegDS(theInstr);
+   UChar vRA_addr = ifieldRegA(theInstr);
+   UChar vRB_addr = ifieldRegB(theInstr);
+   UChar ps       = IFIELD( theInstr, 9, 1 );
+   UInt  opc2     = IFIELD( theInstr, 0, 9 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   IRTemp vB    = newTemp(Ity_V128);
+   IRTemp dst    = newTemp(Ity_V128);
+   assign( vA, getVReg(vRA_addr));
+   assign( vB, getVReg(vRB_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_bcd(ppc)(instr)\n");
+      return False;
+   }
+
+   switch (opc2) {
+   case 0x1:  // bcdadd
+     DIP("bcdadd. v%d,v%d,v%d,%u\n", vRT_addr, vRA_addr, vRB_addr, ps);
+     assign( dst, triop( Iop_BCDAdd, mkexpr( vA ),
+                         mkexpr( vB ), mkU8( ps ) ) );
+     putVReg( vRT_addr, mkexpr(dst));
+     return True;
+
+   case 0x41:  // bcdsub
+     DIP("bcdsub. v%d,v%d,v%d,%u\n", vRT_addr, vRA_addr, vRB_addr, ps);
+     assign( dst, triop( Iop_BCDSub, mkexpr( vA ),
+                         mkexpr( vB ), mkU8( ps ) ) );
+     putVReg( vRT_addr, mkexpr(dst));
+     return True;
+
+   default:
+      vex_printf("dis_av_bcd(ppc)(opc2)\n");
+      return False;
+   }
+   return True;
+}
 
 /*
   AltiVec Floating Point Arithmetic Instructions
@@ -18822,6 +19297,11 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_permute( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x2D:                       // vpermxor
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_permute( theInstr )) goto decode_success;
+         goto decode_failure;
+
       /* AV Floating Point Mult-Add/Sub */
       case 0x2E: case 0x2F:            // vmaddfp, vnmsubfp
          if (!allow_V) goto decode_noV;
@@ -18832,6 +19312,18 @@ DisResult disInstr_PPC_WRK (
          break;  // Fall through...
       }
 
+      opc2 = IFIELD(theInstr, 0, 9);
+      switch (opc2) {
+      /* BCD arithmetic */
+      case 0x1: case 0x41:             // bcdadd, bcdsub
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_bcd( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      default:
+         break;  // Fall through...
+      }
+
       opc2 = IFIELD(theInstr, 0, 11);
       switch (opc2) {
       /* AV Arithmetic */
@@ -18868,6 +19360,13 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_arith( theInstr )) goto decode_success;
          goto decode_failure;
 
+      /* AV Polynomial Vector Multiply Add */
+      case 0x408: case 0x448:            // vpmsumb, vpmsumd
+      case 0x488: case 0x4C8:            // vpmsumw, vpmsumh
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_polymultarith( theInstr )) goto decode_success;
+         goto decode_failure;
+
       /* AV Rotate, Shift */
       case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
       case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
@@ -18892,6 +19391,12 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_logic( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x544:                         // vorc
+      case 0x584: case 0x684:             // vnand, veqv
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_logic( theInstr )) goto decode_success;
+         goto decode_failure;
+
       /* AV Processor Control */
       case 0x604: case 0x644:             // mfvscr, mtvscr
          if (!allow_V) goto decode_noV;
@@ -18948,6 +19453,30 @@ DisResult disInstr_PPC_WRK (
          if (dis_av_pack( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x508: case 0x509:             // vcipher, vcipherlast
+      case 0x548: case 0x549:             // vncipher, vncipherlast
+      case 0x5C8:                         // vsbox
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_cipher( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x6C2: case 0x682:             // vshasigmaw, vshasigmad
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_av_hash( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x702: case 0x742:             // vclzb, vclzh
+      case 0x782: case 0x7c2:             // vclzw, vclzd
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_vxv_population_count( theInstr, opc2 )) goto decode_success;
+         goto decode_failure;
+
+      case 0x703: case 0x743:             // vpopcntb, vpopcnth
+      case 0x783: case 0x7c3:             // vpopcntw, vpopcntd
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_vxv_population_count( theInstr, opc2 )) goto decode_success;
+         goto decode_failure;
+
       default:
          break;  // Fall through...
       }
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index e6742ca22c..446a563dbb 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -723,6 +723,27 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
    case Pav_CATODD:     return "vmrgow";    // w
    case Pav_CATEVEN:    return "vmrgew";    // w
 
+   /* SHA */
+   case Pav_SHA256:     return "vshasigmaw"; // w
+   case Pav_SHA512:     return "vshasigmaw"; // dw
+
+   /* BCD */
+   case Pav_BCDAdd:     return "bcdadd.";  // qw
+   case Pav_BCDSub:     return "bcdsub.";  // qw
+
+   /* Polynomial arith */
+   case Pav_POLYMULADD: return "vpmsum";   // b, h, w, d
+
+   /* Cipher */
+   case Pav_CIPHERV128:  case Pav_CIPHERLV128:
+   case Pav_NCIPHERV128: case Pav_NCIPHERLV128:
+   case Pav_CIPHERSUBV128: return "v_cipher_";  // qw
+
+   /* zero count */
+   case Pav_ZEROCNTBYTE: case Pav_ZEROCNTWORD:
+   case Pav_ZEROCNTHALF: case Pav_ZEROCNTDBL:
+      return "vclz_";                           // b, h, w, d
+
    default: vpanic("showPPCAvOp");
    }
 }
@@ -1434,6 +1455,45 @@ PPCInstr* PPCInstr_AvLdVSCR ( HReg src ) {
    i->Pin.AvLdVSCR.src = src;
    return i;
 }
+PPCInstr* PPCInstr_AvCipherV128Unary ( PPCAvOp op, HReg dst, HReg src ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvCipherV128Unary;
+   i->Pin.AvCipherV128Unary.op   = op;
+   i->Pin.AvCipherV128Unary.dst  = dst;
+   i->Pin.AvCipherV128Unary.src  = src;
+   return i;
+}
+PPCInstr* PPCInstr_AvCipherV128Binary ( PPCAvOp op, HReg dst,
+                                        HReg srcL, HReg srcR ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvCipherV128Binary;
+   i->Pin.AvCipherV128Binary.op   = op;
+   i->Pin.AvCipherV128Binary.dst  = dst;
+   i->Pin.AvCipherV128Binary.srcL = srcL;
+   i->Pin.AvCipherV128Binary.srcR = srcR;
+   return i;
+}
+PPCInstr* PPCInstr_AvHashV128Binary ( PPCAvOp op, HReg dst,
+                                      HReg src, PPCRI* s_field ) {
+   PPCInstr* i              = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag                   = Pin_AvHashV128Binary;
+   i->Pin.AvHashV128Binary.op  = op;
+   i->Pin.AvHashV128Binary.dst = dst;
+   i->Pin.AvHashV128Binary.src = src;
+   i->Pin.AvHashV128Binary.s_field = s_field;
+   return i;
+}
+PPCInstr* PPCInstr_AvBCDV128Trinary ( PPCAvOp op, HReg dst,
+                                      HReg src1, HReg src2, PPCRI* ps ) {
+   PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+   i->tag      = Pin_AvBCDV128Trinary;
+   i->Pin.AvBCDV128Trinary.op   = op;
+   i->Pin.AvBCDV128Trinary.dst  = dst;
+   i->Pin.AvBCDV128Trinary.src1 = src1;
+   i->Pin.AvBCDV128Trinary.src2 = src2;
+   i->Pin.AvBCDV128Trinary.ps   = ps;
+   return i;
+}
 
 
 /* Pretty Print instructions */
@@ -1991,6 +2051,42 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 )
       ppHRegPPC(i->Pin.AvLdVSCR.src);
       return;
 
+   case Pin_AvCipherV128Unary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvCipherV128Unary.op));
+      ppHRegPPC(i->Pin.AvCipherV128Unary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Unary.src);
+      return;
+
+   case Pin_AvCipherV128Binary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvCipherV128Binary.op));
+      ppHRegPPC(i->Pin.AvCipherV128Binary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Binary.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvCipherV128Binary.srcR);
+      return;
+
+   case Pin_AvHashV128Binary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvHashV128Binary.op));
+      ppHRegPPC(i->Pin.AvHashV128Binary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvHashV128Binary.src);
+      vex_printf(",");
+      ppPPCRI(i->Pin.AvHashV128Binary.s_field);
+      return;
+
+   case Pin_AvBCDV128Trinary:
+      vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBCDV128Trinary.op));
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.src1);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBCDV128Trinary.src2);
+      vex_printf(",");
+      ppPPCRI(i->Pin.AvBCDV128Trinary.ps);
+      return;
+
    case Pin_Dfp64Unary:
       vex_printf("%s ", showPPCFpOp(i->Pin.Dfp64Unary.op));
       ppHRegPPC(i->Pin.Dfp64Unary.dst);
@@ -2433,6 +2529,26 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
    case Pin_AvLdVSCR:
       addHRegUse(u, HRmRead, i->Pin.AvLdVSCR.src);
       return;
+   case Pin_AvCipherV128Unary:
+      addHRegUse(u, HRmWrite, i->Pin.AvCipherV128Unary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Unary.src);
+      return;
+   case Pin_AvCipherV128Binary:
+      addHRegUse(u, HRmWrite, i->Pin.AvCipherV128Binary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Binary.srcL);
+      addHRegUse(u, HRmRead,  i->Pin.AvCipherV128Binary.srcR);
+      return;
+   case Pin_AvHashV128Binary:
+      addHRegUse(u, HRmWrite, i->Pin.AvHashV128Binary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvHashV128Binary.src);
+      addRegUsage_PPCRI(u,    i->Pin.AvHashV128Binary.s_field);
+      return;
+   case Pin_AvBCDV128Trinary:
+      addHRegUse(u, HRmWrite, i->Pin.AvBCDV128Trinary.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvBCDV128Trinary.src1);
+      addHRegUse(u, HRmRead,  i->Pin.AvBCDV128Trinary.src2);
+      addRegUsage_PPCRI(u,    i->Pin.AvBCDV128Trinary.ps);
+      return;
    case Pin_Dfp64Unary:
       addHRegUse(u, HRmWrite, i->Pin.Dfp64Unary.dst);
       addHRegUse(u, HRmRead, i->Pin.Dfp64Unary.src);
@@ -2742,6 +2858,26 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
    case Pin_AvLdVSCR:
       mapReg(m, &i->Pin.AvLdVSCR.src);
       return;
+   case Pin_AvCipherV128Unary:
+      mapReg(m, &i->Pin.AvCipherV128Unary.dst);
+      mapReg(m, &i->Pin.AvCipherV128Unary.src);
+      return;
+   case Pin_AvCipherV128Binary:
+      mapReg(m, &i->Pin.AvCipherV128Binary.dst);
+      mapReg(m, &i->Pin.AvCipherV128Binary.srcL);
+      mapReg(m, &i->Pin.AvCipherV128Binary.srcR);
+      return;
+   case Pin_AvHashV128Binary:
+      mapRegs_PPCRI(m, i->Pin.AvHashV128Binary.s_field);
+      mapReg(m, &i->Pin.AvHashV128Binary.dst);
+      mapReg(m, &i->Pin.AvHashV128Binary.src);
+      return;
+   case Pin_AvBCDV128Trinary:
+      mapReg(m, &i->Pin.AvBCDV128Trinary.dst);
+      mapReg(m, &i->Pin.AvBCDV128Trinary.src1);
+      mapReg(m, &i->Pin.AvBCDV128Trinary.src2);
+      mapRegs_PPCRI(m, i->Pin.AvBCDV128Trinary.ps);
+      return;
    case Pin_Dfp64Unary:
       mapReg(m, &i->Pin.Dfp64Unary.dst);
       mapReg(m, &i->Pin.Dfp64Unary.src);
@@ -4632,6 +4768,11 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       case Pav_UNPCKL16S: opc2 =  718; break; // vupklsh
       case Pav_UNPCKHPIX: opc2 =  846; break; // vupkhpx
       case Pav_UNPCKLPIX: opc2 =  974; break; // vupklpx
+
+      case Pav_ZEROCNTBYTE: opc2 = 1794; break; // vclzb
+      case Pav_ZEROCNTHALF: opc2 = 1858; break; // vclzh
+      case Pav_ZEROCNTWORD: opc2 = 1922; break; // vclzw
+      case Pav_ZEROCNTDBL:  opc2 = 1986; break; // vclzd
       default:
          goto bad;
       }
@@ -4713,6 +4854,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       case Pav_MRGHI:    opc2 =   12; break; // vmrghb
       case Pav_MRGLO:    opc2 =  268; break; // vmrglb
 
+      case Pav_POLYMULADD: opc2 = 1032; break; // vpmsumb
+
       default:
          goto bad;
       }
@@ -4765,6 +4908,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       case Pav_MRGHI:   opc2 =   76; break; // vmrghh
       case Pav_MRGLO:   opc2 =  332; break; // vmrglh
 
+      case Pav_POLYMULADD: opc2 = 1224; break; // vpmsumh
+
       default:
          goto bad;
       }
@@ -4822,6 +4967,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       case Pav_CATODD:  opc2 = 1676; break; // vmrgow
       case Pav_CATEVEN: opc2 = 1932; break; // vmrgew
 
+      case Pav_POLYMULADD: opc2 = 1160; break; // vpmsumw
+
       default:
          goto bad;
       }
@@ -4853,13 +5000,71 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       case Pav_QPACKSS: opc2 = 1486; break; // vpksdsm
       case Pav_MRGHI:   opc2 = 1614; break; // vmrghw
       case Pav_MRGLO:   opc2 = 1742; break; // vmrglw
+      case Pav_POLYMULADD: opc2 = 1096; break; // vpmsumd
       default:
          goto bad;
       }
       p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
       goto done;
    }
-
+   case Pin_AvCipherV128Unary: {
+      UInt v_dst = vregNo(i->Pin.AvCipherV128Unary.dst);
+      UInt v_src = vregNo(i->Pin.AvCipherV128Unary.src);
+      UInt opc2;
+      switch (i->Pin.AvCipherV128Unary.op) {
+      case Pav_CIPHERSUBV128:   opc2 =  1480; break; // vsbox
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_src, 0, opc2 );
+      goto done;
+   }
+   case Pin_AvCipherV128Binary: {
+      UInt v_dst  = vregNo(i->Pin.AvCipherV128Binary.dst);
+      UInt v_srcL = vregNo(i->Pin.AvCipherV128Binary.srcL);
+      UInt v_srcR = vregNo(i->Pin.AvCipherV128Binary.srcR);
+      UInt opc2;
+      switch (i->Pin.AvCipherV128Binary.op) {
+      case Pav_CIPHERV128:     opc2 =  1288; break; // vcipher
+      case Pav_CIPHERLV128:    opc2 =  1289; break; // vcipherlast
+      case Pav_NCIPHERV128:    opc2 =  1352; break; // vncipher
+      case Pav_NCIPHERLV128:   opc2 =  1353; break; // vncipherlast
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      goto done;
+   }
+   case Pin_AvHashV128Binary: {
+      UInt v_dst = vregNo(i->Pin.AvHashV128Binary.dst);
+      UInt v_src = vregNo(i->Pin.AvHashV128Binary.src);
+      PPCRI* s_field = i->Pin.AvHashV128Binary.s_field;
+      UInt opc2;
+      switch (i->Pin.AvHashV128Binary.op) {
+      case Pav_SHA256:   opc2 =  1666; break; // vshasigmaw
+      case Pav_SHA512:   opc2 =  1730; break; // vshasigmad
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, v_src, s_field->Pri.Imm, opc2 );
+      goto done;
+   }
+   case Pin_AvBCDV128Trinary: {
+      UInt v_dst  = vregNo(i->Pin.AvBCDV128Trinary.dst);
+      UInt v_src1 = vregNo(i->Pin.AvBCDV128Trinary.src1);
+      UInt v_src2 = vregNo(i->Pin.AvBCDV128Trinary.src2);
+      PPCRI* ps   = i->Pin.AvBCDV128Trinary.ps;
+      UInt opc2;
+      switch (i->Pin.AvBCDV128Trinary.op) {
+      case Pav_BCDAdd:   opc2 =  1; break; // bcdadd
+      case Pav_BCDSub:   opc2 = 65; break; // bcdsub
+      default:
+         goto bad;
+      }
+      p = mkFormVXR( p, 4, v_dst, v_src1, v_src2,
+                     0x1, (ps->Pri.Imm << 9) | opc2 );
+      goto done;
+   }
    case Pin_AvBin32Fx4: {
       UInt v_dst  = vregNo(i->Pin.AvBin32Fx4.dst);
       UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 6e6ec241c5..85727b23b2 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -426,6 +426,22 @@ typedef
 
       /* Concatenation */
       Pav_CATODD, Pav_CATEVEN,
+
+      /* Polynomial Multipy-Add */
+      Pav_POLYMULADD,
+
+      /* Cipher */
+      Pav_CIPHERV128, Pav_CIPHERLV128, Pav_NCIPHERV128, Pav_NCIPHERLV128,
+      Pav_CIPHERSUBV128,
+
+      /* Hash */
+      Pav_SHA256, Pav_SHA512,
+
+      /* BCD Arithmetic */
+      Pav_BCDAdd, Pav_BCDSub,
+
+      /* zero count */
+      Pav_ZEROCNTBYTE, Pav_ZEROCNTWORD, Pav_ZEROCNTHALF, Pav_ZEROCNTDBL,
    }
    PPCAvOp;
 
@@ -507,6 +523,10 @@ typedef
       Pin_AvSplat,    /* One elem repeated throughout dst */
       Pin_AvLdVSCR,   /* mtvscr */
       Pin_AvCMov,     /* AV conditional move */
+      Pin_AvCipherV128Unary,  /* AV Vector unary Cipher */
+      Pin_AvCipherV128Binary, /* AV Vector binary Cipher */
+      Pin_AvHashV128Binary, /* AV Vector binary Hash */
+      Pin_AvBCDV128Trinary, /* BCD Arithmetic */
       Pin_Dfp64Unary,   /* DFP64  unary op */
       Pin_Dfp128Unary,  /* DFP128 unary op */
       Pin_DfpShift,     /* Decimal floating point shift by immediate value */
@@ -853,6 +873,30 @@ typedef
          struct {
             HReg src;
          } AvLdVSCR;
+         struct {
+            PPCAvOp   op;
+            HReg      dst;
+            HReg      src;
+         } AvCipherV128Unary;
+         struct {
+            PPCAvOp     op;
+            HReg       dst;
+            HReg       src;
+            PPCRI* s_field;
+         } AvHashV128Binary;
+         struct {
+            PPCAvOp     op;
+            HReg       dst;
+            HReg      src1;
+            HReg      src2;
+            PPCRI*      ps;
+         } AvBCDV128Trinary;
+         struct {
+            PPCAvOp   op;
+            HReg      dst;
+            HReg      srcL;
+            HReg      srcR;
+         } AvCipherV128Binary;
          struct {
             PPCFpOp op;
             HReg dst;
@@ -1034,7 +1078,15 @@ extern PPCInstr* PPCInstr_AvShlDbl   ( UChar shift, HReg dst, HReg srcL, HReg sr
 extern PPCInstr* PPCInstr_AvSplat    ( UChar sz, HReg dst, PPCVI5s* src );
 extern PPCInstr* PPCInstr_AvCMov     ( PPCCondCode, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_AvLdVSCR   ( HReg src );
-
+extern PPCInstr* PPCInstr_AvCipherV128Unary  ( PPCAvOp op, HReg dst,
+                                               HReg srcR );
+extern PPCInstr* PPCInstr_AvCipherV128Binary ( PPCAvOp op, HReg dst,
+                                               HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvHashV128Binary ( PPCAvOp op, HReg dst,
+                                             HReg src, PPCRI* s_field );
+extern PPCInstr* PPCInstr_AvBCDV128Trinary ( PPCAvOp op, HReg dst,
+                                             HReg src1, HReg src2,
+                                             PPCRI* ps );
 extern PPCInstr* PPCInstr_Dfp64Unary  ( PPCFpOp op, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_Dfp64Binary ( PPCFpOp op, HReg dst, HReg srcL,
                                         HReg srcR );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index e75ad43ba3..fa5cb7a910 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -4845,6 +4845,26 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_Dup32x4:
          return mk_AvDuplicateRI(env, e->Iex.Unop.arg);
 
+      case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
+      do_AvCipherV128Un: {
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg dst = newVRegV(env);
+         addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
+         return dst;
+      }
+
+      case Iop_Clz8Sx16: fpop = Pav_ZEROCNTBYTE;   goto do_zerocnt;
+      case Iop_Clz16Sx8: fpop = Pav_ZEROCNTHALF;   goto do_zerocnt;
+      case Iop_Clz32Sx4: fpop = Pav_ZEROCNTWORD;   goto do_zerocnt;
+      case Iop_Clz64x2:  fpop = Pav_ZEROCNTDBL;    goto do_zerocnt;
+      do_zerocnt:
+      {
+        HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+        HReg dst = newVRegV(env);
+        addInstr(env, PPCInstr_AvUnary(fpop, dst, arg));
+        return dst;
+      }
+
       default:
          break;
       } /* switch (e->Iex.Unop.op) */
@@ -4981,6 +5001,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
       case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
       case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
+      case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
       do_AvBin8x16: {
          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5015,6 +5036,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
       case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
       case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
+      case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
       do_AvBin16x8: {
          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5052,6 +5074,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
       case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
       case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
+      case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
       do_AvBin32x4: {
          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5078,6 +5101,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
       case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
       case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
+      case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
       do_AvBin64x2: {
          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
@@ -5148,11 +5172,52 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
          return dst;
       }
 
+      case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
+      case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
+      case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
+      case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
+      do_AvCipherV128: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst  = newVRegV(env);
+         addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
+         return dst;
+      }
+
+      case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
+      case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
+      do_AvHashV128: {
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg dst  = newVRegV(env);
+         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2);
+         addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
+         return dst;
+      }
       default:
          break;
       } /* switch (e->Iex.Binop.op) */
    } /* if (e->tag == Iex_Binop) */
 
+   if (e->tag == Iex_Triop) {
+      IRTriop *triop = e->Iex.Triop.details;
+      switch (triop->op) {
+      case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
+      case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
+      do_AvBCDV128: {
+         HReg arg1 = iselVecExpr(env, triop->arg1);
+         HReg arg2 = iselVecExpr(env, triop->arg2);
+         HReg dst  = newVRegV(env);
+         PPCRI* ps = iselWordExpr_RI(env, triop->arg3);
+         addInstr(env, PPCInstr_AvBCDV128Trinary(op, dst, arg1, arg2, ps));
+         return dst;
+      }
+
+      default:
+         break;
+      } /* switch (e->Iex.Triop.op) */
+   } /* if (e->tag == Iex_Trinop) */
+
+
    if (e->tag == Iex_Const ) {
       vassert(e->Iex.Const.con->tag == Ico_V128);
       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 9b62bcaa6b..692abfbc57 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -782,6 +782,15 @@ void ppIROp ( IROp op )
       case Iop_MullEven16Sx8: vex_printf("MullEven16Sx8"); return;
       case Iop_MullEven32Sx4: vex_printf("MullEven32Sx4"); return;
 
+      case Iop_PolynomialMulAdd8x16:
+         vex_printf("PolynomialMulAdd8x16"); return;
+      case Iop_PolynomialMulAdd16x8:
+         vex_printf("PolynomialMulAdd16x8"); return;
+      case Iop_PolynomialMulAdd32x4:
+         vex_printf("PolynomialMulAdd32x4"); return;
+      case Iop_PolynomialMulAdd64x2:
+         vex_printf("PolynomialMulAdd64x2"); return;
+
       case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return;
       case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return;
       case Iop_Avg32Ux4: vex_printf("Avg32Ux4"); return;
@@ -824,6 +833,7 @@ void ppIROp ( IROp op )
       case Iop_Clz8Sx16: vex_printf("Clz8Sx16"); return;
       case Iop_Clz16Sx8: vex_printf("Clz16Sx8"); return;
       case Iop_Clz32Sx4: vex_printf("Clz32Sx4"); return;
+      case Iop_Clz64x2: vex_printf("Clz64x2"); return;
       case Iop_Cls8Sx16: vex_printf("Cls8Sx16"); return;
       case Iop_Cls16Sx8: vex_printf("Cls16Sx8"); return;
       case Iop_Cls32Sx4: vex_printf("Cls32Sx4"); return;
@@ -1127,6 +1137,17 @@ void ppIROp ( IROp op )
 
       case Iop_Perm32x8:   vex_printf("Perm32x8"); return;
 
+      case Iop_CipherV128:   vex_printf("CipherV128"); return;
+      case Iop_CipherLV128:  vex_printf("CipherLV128"); return;
+      case Iop_NCipherV128:  vex_printf("NCipherV128"); return;
+      case Iop_NCipherLV128: vex_printf("NCipherLV128"); return;
+      case Iop_CipherSV128:  vex_printf("CipherSV128"); return;
+
+      case Iop_SHA256:  vex_printf("SHA256"); return;
+      case Iop_SHA512:  vex_printf("SHA512"); return;
+      case Iop_BCDAdd:  vex_printf("BCDAdd"); return;
+      case Iop_BCDSub:  vex_printf("BCDSub"); return;
+
       default: vpanic("ppIROp(1)");
    }
 
@@ -2795,6 +2816,8 @@ void typeOfPrimop ( IROp op,
       case Iop_QSub32Sx4: case Iop_QSub64Sx2:
       case Iop_Mul8x16: case Iop_Mul16x8: case Iop_Mul32x4:
       case Iop_PolynomialMul8x16:
+      case Iop_PolynomialMulAdd8x16: case Iop_PolynomialMulAdd16x8:
+      case Iop_PolynomialMulAdd32x4: case Iop_PolynomialMulAdd64x2:
       case Iop_MulHi16Ux8: case Iop_MulHi32Ux4: 
       case Iop_MulHi16Sx8: case Iop_MulHi32Sx4: 
       case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4:
@@ -2845,6 +2868,10 @@ void typeOfPrimop ( IROp op,
       case Iop_Perm8x16: case Iop_Perm32x4:
       case Iop_Recps32Fx4:
       case Iop_Rsqrts32Fx4:
+      case Iop_CipherV128:
+      case Iop_CipherLV128:
+      case Iop_NCipherV128:
+      case Iop_NCipherLV128:
          BINARY(Ity_V128,Ity_V128, Ity_V128);
 
       case Iop_PolynomialMull8x8:
@@ -2864,7 +2891,7 @@ void typeOfPrimop ( IROp op,
       case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8:
       case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2:
       case Iop_Cnt8x16:
-      case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4:
+      case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4: case Iop_Clz64x2:
       case Iop_Cls8Sx16: case Iop_Cls16Sx8: case Iop_Cls32Sx4:
       case Iop_PwAddL8Ux16: case Iop_PwAddL16Ux8: case Iop_PwAddL32Ux4:
       case Iop_PwAddL8Sx16: case Iop_PwAddL16Sx8: case Iop_PwAddL32Sx4:
@@ -2873,6 +2900,7 @@ void typeOfPrimop ( IROp op,
       case Iop_Reverse16_8x16:
       case Iop_Neg32Fx4:
       case Iop_Abs8x16: case Iop_Abs16x8: case Iop_Abs32x4:
+      case Iop_CipherSV128:
          UNARY(Ity_V128, Ity_V128);
 
       case Iop_ShlV128: case Iop_ShrV128:
@@ -2888,6 +2916,7 @@ void typeOfPrimop ( IROp op,
       case Iop_QShlN32Sx4: case Iop_QShlN64Sx2:
       case Iop_QSalN8x16: case Iop_QSalN16x8:
       case Iop_QSalN32x4: case Iop_QSalN64x2:
+      case Iop_SHA256:    case Iop_SHA512:
          BINARY(Ity_V128,Ity_I8, Ity_V128);
 
       case Iop_F32ToFixed32Ux4_RZ:
@@ -2928,6 +2957,9 @@ void typeOfPrimop ( IROp op,
       case Iop_ExtractV128:
          TERNARY(Ity_V128, Ity_V128, Ity_I8, Ity_V128);
 
+      case Iop_BCDAdd:
+      case Iop_BCDSub:
+         TERNARY(Ity_V128,Ity_V128, Ity_I8, Ity_V128);
       case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
          BINARY(Ity_I64, Ity_I64, Ity_V128);
 
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index 50d986bbb1..1c888d5412 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -896,6 +896,7 @@ typedef
       Iop_Cnt8x8,
       Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
       Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
+      Iop_Clz64x2,
 
       /* VECTOR x VECTOR SHIFT / ROTATE */
       Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
@@ -1237,6 +1238,12 @@ typedef
        */
       Iop_BCDtoDPB,
 
+      /* BCD arithmetic instructions, (V128, V128) -> V128
+       * The BCD format is the same as that used in the BCD<->DPB conversion
+       * routines, except using 124 digits (vs 60) plus the trailing 4-bit signed code.
+       * */
+      Iop_BCDAdd, Iop_BCDSub,
+
       /* Conversion I64 -> D64 */
       Iop_ReinterpI64asD64,
 
@@ -1403,6 +1410,39 @@ typedef
       Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
       Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
 
+      /* Vector Polynomial multiplication add.   (V128, V128) -> V128
+
+       *** Below is the algorithm for the instructions. These Iops could
+           be emulated to get this functionality, but the emulation would
+           be long and messy.
+
+        Example for polynomial multiply add for vector of bytes
+        do i = 0 to 15
+            prod[i].bit[0:14] <- 0
+            srcA <- VR[argL].byte[i]
+            srcB <- VR[argR].byte[i]
+            do j = 0 to 7
+                do k = 0 to j
+                    gbit <- srcA.bit[k] & srcB.bit[j-k]
+                    prod[i].bit[j] <- prod[i].bit[j] ^ gbit
+                end
+            end
+
+            do j = 8 to 14
+                do k = j-7 to 7
+                     gbit <- (srcA.bit[k] & srcB.bit[j-k])
+                     prod[i].bit[j] <- prod[i].bit[j] ^ gbit
+                end
+            end
+        end
+
+        do i = 0 to 7
+            VR[dst].hword[i] <- 0b0 || (prod[2Ãi] ^ prod[2Ãi+1])
+        end
+      */
+      Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8,
+      Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2,
+
       /* PAIRWISE operations */
       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
@@ -1598,6 +1638,17 @@ typedef
 
       Iop_Perm32x8,
 
+      /* (V128, V128) -> V128 */
+      Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128,
+      Iop_NCipherV128, Iop_NCipherLV128,
+
+      /* Hash instructions, Federal Information Processing Standards
+       * Publication 180-3 Secure Hash Standard. */
+      /* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and
+       * SIX are fields from the insn. See ISA 2.07 description of
+       * vshasigmad and vshasigmaw insns.*/
+      Iop_SHA512, Iop_SHA256,
+
       /* ------------------ 256-bit SIMD FP. ------------------ */
       Iop_Add64Fx4,
       Iop_Sub64Fx4,
-- 
2.47.2