Power PC Add support for ISA 3.0, part 5

author Carl Love <cel@us.ibm.com>

Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)

committer Carl Love <cel@us.ibm.com>

Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)
author Carl Love <cel@us.ibm.com>
Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)
committer Carl Love <cel@us.ibm.com>
Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c

index 2fa952be92d2058cc73fdc6dfb7c113c46e5ab22..d5f92944c7d0d501c86c7ffd6a476d38c5c4d89a 100644 (file)
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -1506,6 +1506,21 @@ static IRExpr* getVReg ( UInt archreg )
     return IRExpr_Get( vectorGuestRegOffset(archreg), Ity_V128 );
  }
  
+/* Get contents of 128-bit reg guest register */
+static IRExpr* getF128Reg ( UInt archreg )
+{
+   vassert(archreg < 64);
+   return IRExpr_Get( vectorGuestRegOffset(archreg), Ity_F128 );
+}
+
+/* Ditto, but write to a reg instead. */
+static void putF128Reg ( UInt archreg, IRExpr* e )
+{
+   vassert(archreg < 64);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F128);
+   stmt( IRStmt_Put(vectorGuestRegOffset(archreg), e) );
+}
+
  /* Ditto, but write to a reg instead. */
  static void putVReg ( UInt archreg, IRExpr* e )
  {
@@ -1910,6 +1925,36 @@ static void putCR0 ( UInt cr, IRExpr* e )
     stmt( IRStmt_Put(guestCR0offset(cr), e) );
  }
  
+static void putC ( IRExpr* e )
+{
+   /* The assumption is that the value of the Floating-Point Result
+    * Class Descriptor bit (C) is passed in the lower four bits of a
+    * 32 bit value.
+    *
+    * Note, the C and FPCC bits which are fields in the FPSCR
+    * register are stored in their own memory location of
+    * memory.  The FPCC bits are in the lower 4 bits.  The C bit needs
+    * to be shifted to bit 4 in the memory location that holds C and FPCC.
+    * Note not all of the FPSCR register bits are supported.  We are
+    * only writing C bit.
+    */
+   IRExpr* tmp;
+
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+
+   /* Get the FPCC bit field */
+   tmp = binop( Iop_And32,
+                mkU32( 0xF ),
+                unop( Iop_8Uto32, IRExpr_Get( OFFB_C_FPCC, Ity_I8 ) ) );
+
+   stmt( IRStmt_Put( OFFB_C_FPCC,
+                     unop( Iop_32to8,
+                           binop( Iop_Or32, tmp,
+                                  binop( Iop_Shl32,
+                                         binop( Iop_And32, mkU32( 0x1 ), e ),
+                                         mkU8( 4 ) ) ) ) ) );
+}
+
  static IRExpr* /* :: Ity_I8 */ getCR0 ( UInt cr )
  {
     vassert(cr < 8);
@@ -3394,266 +3439,488 @@ static IRExpr* /* ::Ity_I32 */  getFPCC ( void )
                                 mkexpr( x ), \
                                 mkU64( NONZERO_FRAC_MASK ) )
  
-// Returns exponent part of a single precision floating point as I32
-static IRExpr * fp_exp_part_sp(IRTemp src)
+#define NONZERO_FRAC_MASK32 0x007fffffULL
+#define FP_FRAC_PART32(x) binop( Iop_And32,   \
+                                 mkexpr( x ), \
+                                 mkU32( NONZERO_FRAC_MASK32 ) )
+
+// Returns exponent part of floating point src as I32
+static IRExpr * fp_exp_part( IRType size, IRTemp src )
  {
-   return binop( Iop_And32,
-                 binop( Iop_Shr32, mkexpr( src ), mkU8( 23 ) ),
-                 mkU32( 0xff ) );
+   IRExpr *shift_by, *mask, *tsrc;
+
+   vassert( ( size == Ity_I16 ) || ( size == Ity_I32 )
+            || ( size == Ity_I64 ) );
+
+   if( size == Ity_I16 ) {
+      /* The 16-bit floating point value is in the lower 16-bits
+       * of the 32-bit input value.
+       */
+      tsrc  = mkexpr( src );
+      mask  = mkU32( 0x1F );
+      shift_by = mkU8( 10 );
+
+   } else if( size == Ity_I32 ) {
+      tsrc  = mkexpr( src );
+      mask  = mkU32( 0xFF );
+      shift_by = mkU8( 23 );
+
+   } else if( size == Ity_I64 ) {
+      tsrc  = unop( Iop_64HIto32, mkexpr( src ) );
+      mask  = mkU32( 0x7FF );
+      shift_by = mkU8( 52 - 32 );
+   }
+
+   return binop( Iop_And32, binop( Iop_Shr32, tsrc, shift_by ), mask );
  }
  
-// Returns exponent part of floating point as I32
-static IRExpr * fp_exp_part(IRTemp src, Bool sp)
-{
-   IRExpr * exp;
-   if (sp)
-      return fp_exp_part_sp(src);
+/* The following functions check the floating point value to see if it
+   is zero, infinity, NaN, Normalized, Denormalized.
+*/
+/* 16-bit floating point number is stored in the lower 16-bits of 32-bit value */
+#define I16_EXP_MASK       0x7C00
+#define I16_FRACTION_MASK  0x03FF
+#define I32_EXP_MASK       0x7F800000
+#define I32_FRACTION_MASK  0x007FFFFF
+#define I64_EXP_MASK       0x7FF0000000000000ULL
+#define I64_FRACTION_MASK  0x000FFFFFFFFFFFFFULL
+#define V128_EXP_MASK      0x7FFF000000000000ULL
+#define V128_FRACTION_MASK 0x0000FFFFFFFFFFFFULL  /* upper 64-bit fractional mask */
+
+void setup_value_check_args( IRType size, IRTemp *exp_mask, IRTemp *frac_mask,
+                             IRTemp *zero );
+
+void setup_value_check_args( IRType size, IRTemp *exp_mask, IRTemp *frac_mask,
+                             IRTemp *zero ) {
+
+   vassert( ( size == Ity_I16 ) || ( size == Ity_I32 )
+            || ( size == Ity_I64 ) || ( size == Ity_V128 ) );
+
+   if( size == Ity_I16 ) {
+      /* The 16-bit floating point value is in the lower 16-bits of
+         the 32-bit input value */
+      *frac_mask = newTemp( Ity_I32 );
+      *exp_mask  = newTemp( Ity_I32 );
+      *zero  = newTemp( Ity_I32 );
+      assign( *exp_mask, mkU32( I16_EXP_MASK ) );
+      assign( *frac_mask, mkU32( I16_FRACTION_MASK ) );
+      assign( *zero, mkU32( 0 ) );
+
+   } else if( size == Ity_I32 ) {
+      *frac_mask = newTemp( Ity_I32 );
+      *exp_mask  = newTemp( Ity_I32 );
+      *zero  = newTemp( Ity_I32 );
+      assign( *exp_mask, mkU32( I32_EXP_MASK ) );
+      assign( *frac_mask, mkU32( I32_FRACTION_MASK ) );
+      assign( *zero, mkU32( 0 ) );
+
+   } else if( size == Ity_I64 ) {
+      *frac_mask = newTemp( Ity_I64 );
+      *exp_mask  = newTemp( Ity_I64 );
+      *zero  = newTemp( Ity_I64 );
+      assign( *exp_mask, mkU64( I64_EXP_MASK ) );
+      assign( *frac_mask, mkU64( I64_FRACTION_MASK ) );
+      assign( *zero, mkU64( 0 ) );
  
-   if (!mode64)
-      exp = binop( Iop_And32, binop( Iop_Shr32, unop( Iop_64HIto32,
-                                                      mkexpr( src ) ),
-                                     mkU8( 20 ) ), mkU32( 0x7ff ) );
-   else
-      exp = unop( Iop_64to32,
-                  binop( Iop_And64,
-                         binop( Iop_Shr64, mkexpr( src ), mkU8( 52 ) ),
-                         mkU64( 0x7ff ) ) );
-   return exp;
+   } else {
+      /* V128 is converted to upper and lower 64 bit values, */
+      /* uses 64-bit operators and temps */
+      *frac_mask = newTemp( Ity_I64 );
+      *exp_mask  = newTemp( Ity_I64 );
+      *zero  = newTemp( Ity_I64 );
+      assign( *exp_mask, mkU64( V128_EXP_MASK ) );
+      /* upper 64-bit fractional mask */
+      assign( *frac_mask, mkU64( V128_FRACTION_MASK ) );
+      assign( *zero, mkU64( 0 ) );
+   }
  }
  
-static IRExpr * is_Inf_sp(IRTemp src)
+/* Helper function for the various function which check the value of
+   the floating point value.
+*/
+static IRExpr * exponent_compare( IRType size, IRTemp src,
+                                  IRTemp exp_mask, IRExpr *exp_val )
  {
-   IRTemp frac_part = newTemp(Ity_I32);
-   IRExpr * Inf_exp;
+   IROp opAND, opCmpEQ;
  
-   assign( frac_part, binop( Iop_And32, mkexpr(src), mkU32(0x007fffff)) );
-   Inf_exp = binop( Iop_CmpEQ32, fp_exp_part( src, True /*single precision*/ ), mkU32( 0xff ) );
-   return mkAND1( Inf_exp, binop( Iop_CmpEQ32, mkexpr( frac_part ), mkU32( 0 ) ) );
-}
+   if( ( size == Ity_I16 ) || ( size == Ity_I32 ) )  {
+      /* The 16-bit floating point value is in the lower 16-bits of
+         the 32-bit input value */
+      opAND = Iop_And32;
+      opCmpEQ = Iop_CmpEQ32;
  
+   } else {
+      opAND = Iop_And64;
+      opCmpEQ = Iop_CmpEQ64;
+   }
  
-// Infinity: exp = 7ff and fraction is zero; s = 0/1
-static IRExpr * is_Inf(IRTemp src, Bool sp)
-{
-   IRExpr * Inf_exp, * hi32, * low32;
-   IRTemp frac_part;
+   if( size == Ity_V128 ) {
+      return binop( opCmpEQ,
+                    binop ( opAND,
+                            unop( Iop_V128HIto64, mkexpr( src ) ),
+                            mkexpr( exp_mask ) ),
+                    exp_val );
  
-   if (sp)
-      return is_Inf_sp(src);
+   } else if( ( size == Ity_I16 ) || ( size == Ity_I32 ) )  {
+      return binop( opCmpEQ,
+                    binop ( opAND, mkexpr( src ), mkexpr( exp_mask ) ),
+                    exp_val );
+   } else {
+      /* 64-bit operands */
  
-   frac_part = newTemp(Ity_I64);
-   assign( frac_part, FP_FRAC_PART(src) );
-   Inf_exp = binop( Iop_CmpEQ32, fp_exp_part( src, False /*not single precision*/  ), mkU32( 0x7ff ) );
-   hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
-   low32 = unop( Iop_64to32, mkexpr( frac_part ) );
-   return mkAND1( Inf_exp, binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ),
-                                  mkU32( 0 ) ) );
+      if (mode64) {
+         return binop( opCmpEQ,
+                       binop ( opAND, mkexpr( src ), mkexpr( exp_mask ) ),
+                       exp_val );
+      } else {
+         /* No support for 64-bit compares in 32-bit mode, need to do upper
+          * and lower parts using 32-bit compare operators.
+          */
+         return 
+            mkAND1( binop( Iop_CmpEQ32,
+                           binop ( Iop_And32,
+                                   unop(Iop_64HIto32, mkexpr( src ) ),
+                                   unop(Iop_64HIto32, mkexpr( exp_mask ) ) ),
+                           unop(Iop_64HIto32, exp_val ) ),
+                    binop( Iop_CmpEQ32,
+                           binop ( Iop_And32,
+                                   unop(Iop_64to32, mkexpr( src ) ),
+                                   unop(Iop_64to32, mkexpr( exp_mask ) ) ),
+                           unop(Iop_64to32, exp_val ) ) );
+      }
+   }
  }
  
-/*  Quad precision floating point number is infinit:
- *      exp is 32767 and fraction is zero; sign = 0/1
- */
-static IRExpr * is_Inf_V128 ( IRTemp src )
+static IRExpr *fractional_part_compare( IRType size, IRTemp src,
+                                        IRTemp frac_mask, IRExpr *zero )
  {
-   IRExpr * frac_part_hi, * frac_part_low, *Inf_exp;
+   IROp opAND, opCmpEQ;
  
-   Inf_exp = binop( Iop_CmpEQ64,
-                    binop( Iop_And64,
-                           binop( Iop_Shr64,
-                                  unop( Iop_V128HIto64,
-                                        mkexpr( src ) ),
-                                  mkU8( 48 ) ),
-                           mkU64( 0x7FFF ) ),
-                    mkU64( 0x7FFF ) );
+   if( ( size == Ity_I16 ) || ( size == Ity_I32 ) )  {
+      /*The 16-bit floating point value is in the lower 16-bits of
+        the 32-bit input value */
+      opAND = Iop_And32;
+      opCmpEQ = Iop_CmpEQ32;
  
-   frac_part_hi = binop( Iop_And64,
-                         unop( Iop_V128HIto64, mkexpr( src ) ),
-                         mkU64( 0x0000FFFFFFFFFFFF ) );
-   frac_part_low = unop( Iop_V128to64, mkexpr( src ) );
+   } else {
+      opAND = Iop_And64;
+      opCmpEQ = Iop_CmpEQ64;
+   }
  
-   return mkAND1( Inf_exp,
-                  mkAND1( binop( Iop_CmpEQ64, frac_part_hi, mkU64( 0 ) ),
-                          binop( Iop_CmpEQ64, frac_part_low, mkU64( 0 ) ) ) );
+   if( size == Ity_V128 ) {
+      /* 128-bit, note we only care if the fractional part is zero so take upper
+         52-bits of fractional part and lower 64-bits and OR them together and test
+         for zero.  This keeps the temp variables and operators all 64-bit.
+      */
+      return binop( opCmpEQ,
+                    binop( Iop_Or64,
+                           binop( opAND,
+                                  unop( Iop_V128HIto64, mkexpr( src ) ),
+                                  mkexpr( frac_mask ) ),
+                           unop( Iop_V128to64, mkexpr( src ) ) ),
+                    zero );
+
+   } else if( ( size == Ity_I16 ) || ( size == Ity_I32 ) )  {
+         return binop( opCmpEQ,
+                       binop( opAND,  mkexpr( src ), mkexpr( frac_mask ) ),
+                       zero );
+   } else {
+      if (mode64) {
+         return binop( opCmpEQ,
+                       binop( opAND,  mkexpr( src ), mkexpr( frac_mask ) ),
+                       zero );
+      } else {
+         /* No support for 64-bit compares in 32-bit mode, need to do upper
+          * and lower parts using 32-bit compare operators.
+          */
+         return 
+            mkAND1( binop( Iop_CmpEQ32,
+                           binop ( Iop_And32,
+                                   unop(Iop_64HIto32, mkexpr( src ) ),
+                                   unop(Iop_64HIto32, mkexpr( frac_mask ) ) ),
+                           mkU32 ( 0 ) ),
+                    binop( Iop_CmpEQ32,
+                           binop ( Iop_And32,
+                                   unop(Iop_64to32, mkexpr( src ) ),
+                                   unop(Iop_64to32, mkexpr( frac_mask ) ) ),
+                           mkU32 ( 0 ) ) );
+      }
+   }
  }
  
-static IRExpr * is_Zero_sp(IRTemp src)
+// Infinity: exp has all bits set, and fraction is zero; s = 0/1
+static IRExpr * is_Inf( IRType size, IRTemp src )
  {
-   IRTemp sign_less_part = newTemp(Ity_I32);
-   assign( sign_less_part, binop( Iop_And32, mkexpr( src ), mkU32( SIGN_MASK32 ) ) );
-   return binop( Iop_CmpEQ32, mkexpr( sign_less_part ), mkU32( 0 ) );
+   IRExpr *max_exp, *zero_frac;
+   IRTemp exp_mask, frac_mask, zero;
+
+   setup_value_check_args( size, &exp_mask, &frac_mask, &zero );
+
+   /* check exponent is all ones, i.e. (exp AND exp_mask) = exp_mask */
+   max_exp = exponent_compare( size, src, exp_mask, mkexpr( exp_mask ) );
+
+   /* check fractional part is all zeros */
+   zero_frac = fractional_part_compare( size, src, frac_mask, mkexpr( zero ) );
+
+   return  mkAND1( max_exp, zero_frac );
  }
  
  // Zero: exp is zero and fraction is zero; s = 0/1
-static IRExpr * is_Zero(IRTemp src, Bool sp)
+static IRExpr * is_Zero( IRType size, IRTemp src )
  {
-   IRExpr * hi32, * low32;
-   IRTemp sign_less_part;
-   if (sp)
-      return is_Zero_sp(src);
+   IRExpr *zero_exp, *zero_frac;
+   IRTemp exp_mask, frac_mask, zero;
  
-   sign_less_part = newTemp(Ity_I64);
+   setup_value_check_args( size, &exp_mask, &frac_mask, &zero );
  
-   assign( sign_less_part, binop( Iop_And64, mkexpr( src ), mkU64( SIGN_MASK ) ) );
-   hi32 = unop( Iop_64HIto32, mkexpr( sign_less_part ) );
-   low32 = unop( Iop_64to32, mkexpr( sign_less_part ) );
-   return binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ),
-                              mkU32( 0 ) );
-}
+   /* check the exponent is all zeros, i.e. (exp AND exp_mask) = zero */
+   zero_exp = exponent_compare( size, src, exp_mask, mkexpr( zero ) );
  
-/*  Quad precision floating point number is Zero:
-       exp is zero and fraction is zero; sign = 0/1 */
-static IRExpr * is_Zero_V128 ( IRTemp src )
-{
-   IRExpr * hi64, * low64;
+   /* check fractional part is all zeros */
+   zero_frac = fractional_part_compare( size, src, frac_mask, mkexpr( zero ) );
  
-   hi64 = binop( Iop_And64,
-                 unop( Iop_V128HIto64, mkexpr( src ) ),
-                 mkU64( 0x7FFFFFFFFFFFFFFF ) );
-   low64 = unop( Iop_V128to64, mkexpr( src ) );
-   return binop( Iop_CmpEQ64,
-                 binop( Iop_Or64, hi64, low64 ),
-                 mkU64( 0 ) );
+   return  mkAND1( zero_exp, zero_frac );
  }
  
-/*  SNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '1'
- *  QNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '0'
- *  This function returns an IRExpr value of '1' for any type of NaN.
+/*  SNAN: s = 1/0; exp all 1's; fraction is nonzero, with highest bit '1'
+ *  QNAN: s = 1/0; exp all 1's; fraction is nonzero, with highest bit '0'
   */
-static IRExpr * is_NaN(IRTemp src)
+static IRExpr * is_NaN( IRType size, IRTemp src )
  {
-   IRExpr * NaN_exp, * hi32, * low32;
-   IRTemp frac_part = newTemp(Ity_I64);
+   IRExpr *max_exp, *not_zero_frac;
+   IRTemp exp_mask, frac_mask, zero;
  
-   assign( frac_part, FP_FRAC_PART(src) );
-   hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
-   low32 = unop( Iop_64to32, mkexpr( frac_part ) );
-   NaN_exp = binop( Iop_CmpEQ32, fp_exp_part( src, False /*not single precision*/ ),
-                    mkU32( 0x7ff ) );
+   setup_value_check_args( size, &exp_mask, &frac_mask, &zero );
  
-   return mkAND1( NaN_exp, binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ),
-                                               mkU32( 0 ) ) );
-}
-
-/* This function returns an IRExpr value of '1' for any type of NaN.
-   The passed 'src' argument is assumed to be Ity_I32. */
-static IRExpr * is_NaN_32(IRTemp src)
-{
-#define NONZERO_FRAC_MASK32 0x007fffffULL
-#define FP_FRAC_PART32(x) binop( Iop_And32, \
-                                 mkexpr( x ), \
-                                 mkU32( NONZERO_FRAC_MASK32 ) )
+   /* check exponent is all ones, i.e. (exp AND exp_mask) = exp_mask */
+   max_exp = exponent_compare( size, src, exp_mask, mkexpr( exp_mask ) );
  
-   IRExpr * frac_part = FP_FRAC_PART32(src);
-   IRExpr * exp_part = binop( Iop_And32,
-                              binop( Iop_Shr32, mkexpr( src ), mkU8( 23 ) ),
-                              mkU32( 0x0ff ) );
-   IRExpr * NaN_exp = binop( Iop_CmpEQ32, exp_part, mkU32( 0xff ) );
+   /* check fractional part is not zero */
+   not_zero_frac = unop( Iop_Not1,
+                         fractional_part_compare( size, src, frac_mask,
+                                                  mkexpr( zero ) ) );
  
-   return mkAND1( NaN_exp, binop( Iop_CmpNE32, frac_part, mkU32( 0 ) ) );
+   return  mkAND1( max_exp, not_zero_frac );
  }
  
-/* This function returns an IRExpr value of '1' for any type of NaN.
- * The passed 'src' argument is assumed to be a quad precisiion
- * floating point number stored in an Ity_V128.
- */
-static IRExpr * is_NaN_V128 ( IRTemp src )
+/* Denormalized number has a zero exponent and non zero fraction. */
+static IRExpr * is_Denorm( IRType size, IRTemp src )
  {
+   IRExpr *zero_exp, *not_zero_frac;
+   IRTemp exp_mask, frac_mask, zero;
  
-   /* ignore top fractional bit */
-#define NONZERO_FRAC_MASK64Hi 0x0000ffffffffffffULL
-#define EXP_MASK 0x7fff
-
-   IRExpr * frac_part_hi = binop( Iop_And64,
-                                  unop( Iop_V128HIto64,
-                                        mkexpr ( src ) ),
-                                  mkU64( NONZERO_FRAC_MASK64Hi ) );
+   setup_value_check_args( size, &exp_mask, &frac_mask, &zero );
  
-   IRExpr * frac_part_lo = unop( Iop_V128to64, mkexpr ( src ) );
+   /* check exponent is all ones, i.e. (exp AND exp_mask) = exp_mask */
+   zero_exp = exponent_compare( size, src, exp_mask, mkexpr( zero ) );
  
-   IRExpr * exp_part = binop( Iop_And64,
-                              binop( Iop_Shr64,
-                                     unop( Iop_V128HIto64,
-                                           mkexpr( src ) ),
-                                      mkU8( 48 ) ),
-                              mkU64( EXP_MASK ) );
-   IRExpr * NaN_exp = binop( Iop_CmpEQ64, exp_part, mkU64( EXP_MASK ) );
+   /* check fractional part is not zero */
+   not_zero_frac = unop( Iop_Not1,
+                         fractional_part_compare( size, src, frac_mask,
+                                                  mkexpr( zero ) ) );
  
-   return mkAND1( NaN_exp,
-                  mkOR1( binop( Iop_CmpNE64, frac_part_hi, mkU64( 0 ) ),
-                         binop( Iop_CmpNE64, frac_part_lo, mkU64( 0 ) ) ) );
+   return  mkAND1( zero_exp, not_zero_frac );
  }
  
-static IRExpr * is_Denorm ( IRType size, IRTemp src )
+/* Normalized number has exponent between 1 and max_exp -1, or in other words
+   the exponent is not zero and not equal to the max exponent value. */
+static IRExpr * is_Norm( IRType size, IRTemp src )
  {
-   /* Denormalized number has a zero exponent and non zero fraction.
-      Takes a 32bit or 64-bit floating point number and returns 1-bit result.
-   */
-   IROp opAND, opSHR, opCmpEQ, opCmpNE;
-   IRExpr *shift_by, *mask1, *mask2;
-   IRTemp zero = newTemp( size );
+   IRExpr *not_zero_exp, *not_max_exp;
+   IRTemp exp_mask, zero;
  
-   vassert( ( size == Ity_I32 ) || ( size == Ity_I64 ) );
+   vassert( ( size == Ity_I16 ) || ( size == Ity_I32 )
+            || ( size == Ity_I64 ) || ( size == Ity_V128 ) );
  
-   if( size == Ity_I32 ) {
-      opAND = Iop_And32;
-      opSHR = Iop_Shr32;
-      opCmpEQ = Iop_CmpEQ32;
-      opCmpNE = Iop_CmpNE32;
-      shift_by = mkU8( 23 );
-      mask1 = mkU32( 0xFF );
-      mask2 = mkU32( 0x007fffff );
-      assign( zero , mkU32( 0 ) );
+   if( size == Ity_I16 ) {
+      /* The 16-bit floating point value is in the lower 16-bits of
+         the 32-bit input value */
+      exp_mask = newTemp( Ity_I32 );
+      zero = newTemp( Ity_I32 );
+      assign( exp_mask, mkU32( I16_EXP_MASK ) );
+      assign( zero,  mkU32( 0 ) );
+
+   } else if( size == Ity_I32 ) {
+      exp_mask = newTemp( Ity_I32 );
+      zero = newTemp( Ity_I32 );
+      assign( exp_mask, mkU32( I32_EXP_MASK ) );
+      assign( zero, mkU32( 0 ) );
+
+   } else if( size == Ity_I64 ) {
+      exp_mask = newTemp( Ity_I64 );
+      zero = newTemp( Ity_I64 );
+      assign( exp_mask, mkU64( I64_EXP_MASK ) );
+      assign( zero, mkU64( 0 ) );
  
     } else {
-      opAND = Iop_And64;
-      opSHR = Iop_Shr64;
-      opCmpEQ = Iop_CmpEQ64;
-      opCmpNE = Iop_CmpNE64;
-      shift_by = mkU8( 52 );
-      mask1 = mkU64( 0x7FF );
-      mask2 = mkU64( 0x000fffffffffffffULL );
+      /* V128 is converted to upper and lower 64 bit values, */
+      /* uses 64-bit operators and temps */
+      exp_mask = newTemp( Ity_I64 );
+      zero = newTemp( Ity_I64 );
+      assign( exp_mask, mkU64( V128_EXP_MASK ) );
        assign( zero, mkU64( 0 ) );
     }
  
-   IRExpr * exp_part = binop( opAND,
-                              binop( opSHR,
-                                     mkexpr( src ),
-                                     shift_by ),
-                              mask1 );
-   IRExpr * frac = binop( opAND,
-                          mkexpr( src ),
-                          mask2 );
+   not_zero_exp = unop( Iop_Not1,
+                        exponent_compare( size, src,
+                                          exp_mask, mkexpr( zero ) ) );
+   not_max_exp = unop( Iop_Not1,
+                       exponent_compare( size, src,
+                                         exp_mask, mkexpr( exp_mask ) ) );
  
-   return mkAND1( binop( opCmpEQ, exp_part, mkexpr( zero ) ),
-                  binop( opCmpNE,
-                         frac,
-                         mkexpr( zero ) ) );
+   return  mkAND1( not_zero_exp, not_max_exp );
  }
  
-static IRExpr * is_Denorm_V128 ( IRTemp src )
-{
-   /* Denormalized number has a zero exponent and non zero fraction.
-    * Takes a 128-bit floating point number and returns 1-bit result.
+
+static IRExpr * create_FPCC( IRTemp NaN,   IRTemp inf,
+                             IRTemp zero,  IRTemp norm,
+                             IRTemp dnorm, IRTemp pos,
+                             IRTemp neg ) {
+   IRExpr *bit0, *bit1, *bit2, *bit3;
+
+   /* If the result is NaN then must force bits 1, 2 and 3 to zero
+    * to get correct result.
      */
-   IRExpr * exp_part = binop( Iop_And64,
-                              binop( Iop_Shr64,
-                                     unop( Iop_V128HIto64,
-                                           mkexpr( src ) ),
-                                     mkU8( 48 ) ),
-                              mkU64( 0x7FFF ) );
-   IRExpr * frac_hi = binop( Iop_And64,
-                             unop( Iop_V128HIto64, mkexpr( src ) ),
-                             mkU64( 0x0000ffffffffffffULL ) );
-   IRExpr * frac_lo = unop( Iop_V128to64, mkexpr( src ) );
-   return mkAND1( binop( Iop_CmpEQ64, exp_part, mkU64( 0 ) ),
-                  mkOR1( binop( Iop_CmpNE64,
-                                frac_hi,
-                                mkU64( 0 ) ),
-                         binop( Iop_CmpNE64,
-                                frac_lo,
-                                mkU64( 0 ) ) ) );
-}
+   bit0 = unop( Iop_1Uto32, mkOR1( mkexpr( NaN ), mkexpr( inf ) ) );
+   bit1 = unop( Iop_1Uto32, mkAND1( mkNOT1( mkexpr( NaN ) ), mkexpr( zero ) ) );
+   bit2 = unop( Iop_1Uto32,
+                mkAND1( mkNOT1( mkexpr( NaN ) ),
+                        mkAND1( mkOR1( mkOR1( mkAND1( mkexpr( pos ),
+                                                      mkexpr( dnorm ) ),
+                                              mkAND1( mkexpr( pos ),
+                                                      mkexpr( norm ) ) ),
+                                       mkAND1( mkexpr( pos ),
+                                               mkexpr( inf ) ) ),
+                                mkAND1( mkNOT1 ( mkexpr( zero ) ),
+                                        mkNOT1( mkexpr( NaN ) ) ) ) ) );
+   bit3 = unop( Iop_1Uto32,
+                mkAND1( mkNOT1( mkexpr( NaN ) ),
+                        mkAND1( mkOR1( mkOR1( mkAND1( mkexpr( neg ),
+                                                      mkexpr( dnorm ) ),
+                                              mkAND1( mkexpr( neg ),
+                                                      mkexpr( norm ) ) ),
+                                       mkAND1( mkexpr( neg ),
+                                               mkexpr( inf ) ) ),
+                                mkAND1( mkNOT1 ( mkexpr( zero ) ),
+                                        mkNOT1( mkexpr( NaN ) ) ) ) ) );
+
+   return binop( Iop_Or32,
+                 binop( Iop_Or32,
+                        bit0,
+                        binop( Iop_Shl32, bit1, mkU8( 1 ) ) ),
+                 binop( Iop_Or32,
+                        binop( Iop_Shl32, bit2, mkU8( 2 ) ),
+                        binop( Iop_Shl32, bit3, mkU8( 3 ) ) ) );
+}
+
+static IRExpr * create_C( IRTemp NaN,   IRTemp zero,
+                          IRTemp dnorm, IRTemp pos,
+                          IRTemp neg )
+{
+
+   return unop( Iop_1Uto32,
+                mkOR1( mkOR1( mkexpr( NaN ),
+                              mkAND1( mkexpr( neg ), mkexpr( dnorm ) ) ),
+                       mkOR1( mkAND1( mkexpr( neg ), mkexpr( zero ) ),
+                              mkAND1( mkexpr( pos ), mkexpr( dnorm ) ) ) ) );
+}
+
+static void generate_store_FPRF( IRType size, IRTemp src )
+{
+   IRExpr *FPCC, *C;
+   IRTemp NaN = newTemp( Ity_I1 ), inf = newTemp( Ity_I1 );
+   IRTemp dnorm = newTemp( Ity_I1 ), norm = newTemp( Ity_I1 );
+   IRTemp pos = newTemp( Ity_I1 ),  neg = newTemp( Ity_I1 );
+   IRTemp zero = newTemp( Ity_I1 );
+
+   IRTemp sign_bit = newTemp( Ity_I1 );
+   IRTemp value;
+
+   vassert( ( size == Ity_I16 ) || ( size == Ity_I32 )
+            || ( size == Ity_I64 ) || ( size == Ity_F128 ) );
+
+   vassert( ( typeOfIRExpr(irsb->tyenv, mkexpr( src ) )   == Ity_I32 )
+            || ( typeOfIRExpr(irsb->tyenv, mkexpr( src ) ) == Ity_I64 )
+            || ( typeOfIRExpr(irsb->tyenv, mkexpr( src ) ) == Ity_F128 ) );
+
+   if( size == Ity_I16 ) {
+      /* The 16-bit floating point value is in the lower 16-bits of
+         the 32-bit input value */
+      value = newTemp( Ity_I32 );
+      assign( value, mkexpr( src ) );
+      assign( sign_bit,
+              unop ( Iop_32to1,
+                     binop( Iop_And32,
+                            binop( Iop_Shr32, mkexpr( value ), mkU8( 15 ) ),
+                            mkU32( 0x1 ) ) ) );
+
+   } else if( size == Ity_I32 ) {
+      value = newTemp( size );
+      assign( value, mkexpr( src ) );
+      assign( sign_bit,
+              unop ( Iop_32to1,
+                     binop( Iop_And32,
+                            binop( Iop_Shr32, mkexpr( value ), mkU8( 31 ) ),
+                            mkU32( 0x1 ) ) ) );
+
+   } else if( size == Ity_I64 ) {
+      value = newTemp( size );
+      assign( value, mkexpr( src ) );
+      assign( sign_bit,
+              unop ( Iop_64to1,
+                     binop( Iop_And64,
+                            binop( Iop_Shr64, mkexpr( value ), mkU8( 63 ) ),
+                            mkU64( 0x1 ) ) ) );
+
+   } else {
+   /* Move the F128 bit pattern to an integer V128 bit pattern */
+      value = newTemp( Ity_V128 );
+      assign( value,
+              binop( Iop_64HLtoV128,
+                     unop( Iop_ReinterpF64asI64,
+                           unop( Iop_F128HItoF64, mkexpr( src ) ) ),
+                     unop( Iop_ReinterpF64asI64,
+                           unop( Iop_F128LOtoF64, mkexpr( src ) ) ) ) );
+
+      size = Ity_V128;
+      assign( sign_bit,
+              unop ( Iop_64to1,
+                     binop( Iop_And64,
+                            binop( Iop_Shr64,
+                                   unop( Iop_V128HIto64, mkexpr( value ) ),
+                                   mkU8( 63 ) ),
+                            mkU64( 0x1 ) ) ) );
+   }
+
+   /* Calculate the floating point result field FPRF */
+   assign( NaN, is_NaN( size, value ) );
+   assign( inf, is_Inf( size, value ) );
+   assign( zero, is_Zero( size, value ) );
+   assign( norm, is_Norm( size, value ) );
+   assign( dnorm, is_Denorm( size, value ) );
+   assign( pos, mkAND1( mkNOT1( mkexpr( sign_bit ) ), mkU1( 1 ) ) );
+   assign( neg, mkAND1( mkexpr( sign_bit ), mkU1( 1 ) ) );
+
+   /* create the FPRF bit field
+    *
+    *   FPRF field[4:0]   type of value
+    *      10001           QNaN
+    *      01001           - infininity
+    *      01000           - Normalized
+    *      11000           - Denormalized
+    *      10010           - zero
+    *      00010           + zero
+    *      10100           + Denormalized
+    *      00100           + Normalized
+    *      00101           + infinity
+    */
+   FPCC = create_FPCC( NaN, inf, zero, norm, dnorm, pos, neg );
+   C = create_C( NaN, zero, dnorm, pos, neg );
  
+   /* Write the C and FPCC fields of the FPRF field */
+   putC( C );
+   putFPCC( FPCC );
+}
  
  /* This function takes an Ity_I32 input argument interpreted
     as a single-precision floating point value. If src is a
@@ -3671,7 +3938,7 @@ static IRExpr * handle_SNaN_to_QNaN_32(IRExpr * src)
  
     /* check if input is SNaN, if it is convert to QNaN */
     assign( is_SNAN,
-           mkAND1( is_NaN_32( tmp ),
+           mkAND1( is_NaN( Ity_I32, tmp ),
                     binop( Iop_CmpEQ32,
                            binop( Iop_And32, mkexpr( tmp ),
                                   mkU32( SNAN_MASK32 ) ),
@@ -3711,7 +3978,8 @@ static IRTemp getNegatedResult(IRTemp intermediateResult)
                binop( Iop_CmpEQ32,
                       binop( Iop_Xor32,
                              mkexpr( signbit_32 ),
-                            unop( Iop_1Uto32, is_NaN( intermediateResult ) ) ),
+                            unop( Iop_1Uto32, is_NaN( Ity_I64,
+                                                      intermediateResult ) ) ),
                       mkU32( 1 ) ) ) );
  
     assign( negatedResult,
@@ -3754,7 +4022,8 @@ static IRTemp getNegatedResult_32(IRTemp intermediateResult)
                binop( Iop_CmpEQ32,
                       binop( Iop_Xor32,
                              mkexpr( signbit_32 ),
-                            unop( Iop_1Uto32, is_NaN_32( intermediateResult ) ) ),
+                            unop( Iop_1Uto32, is_NaN( Ity_I32,
+                                                      intermediateResult ) ) ),
                       mkU32( 1 ) ) ) );
  
     assign( negatedResult,
@@ -4074,9 +4343,8 @@ static IRExpr * is_BCDstring128 (UInt Signed, IRExpr *src)
  
  static IRExpr * BCDstring_zero (IRExpr *src)
  {
-   /* The src is a 128-bit value containing a BCD string and a sign in the
-    * least significant byte.  The function returns a 1 if the BCD string
-    * values are all zero, 0 otherwise.
+   /* The src is a 128-bit value containing a BCD string.  The function
+    * returns a 1 if the BCD string values are all zero, 0 otherwise.
      */
     IRTemp tsrc = newTemp( Ity_V128 );
     assign( tsrc, src);
@@ -5207,6 +5475,67 @@ static Bool dis_modulo_int ( UInt theInstr )
              break;
           }
  
+      case 0x21A: // cnttzw, cnttzw.   Count Trailing Zero Word
+         {
+            /* Note cnttzw RA, RS  - RA is dest, RS is source.  But the
+             * order of the operands in theInst is opc1 RS RA opc2 which has
+             * the operand fields backwards to what the standard order.
+             */
+            UChar rA_address = ifieldRegA(theInstr);
+            UChar rS_address = ifieldRegDS(theInstr);
+            IRTemp rA     = newTemp(Ity_I64);
+            IRTemp rS     = newTemp(Ity_I64);
+            UChar flag_rC = ifieldBIT0(theInstr);
+            IRTemp result = newTemp(Ity_I32);
+
+            DIP("cnttzw%s r%u,r%u\n",  flag_rC ? "." : "",
+                rA_address, rS_address);
+
+            assign( rS, getIReg( rS_address ) );
+            assign( result, unop( Iop_Ctz32,
+                                  unop( Iop_64to32, mkexpr( rS ) ) ) );
+            assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
+
+            if ( flag_rC )
+               set_CR0( mkexpr( rA ) );
+
+            putIReg( rA_address, mkexpr( rA ) );
+
+            return True;  /* Return here since this inst is not consistent
+                           * with the other instructions
+                           */
+         }
+         break;
+
+      case 0x23A: // cnttzd, cnttzd.   Count Trailing Zero Double word
+         {
+            /* Note cnttzd RA, RS  - RA is dest, RS is source.  But the
+             * order of the operands in theInst is opc1 RS RA opc2 which has
+             * the operand order listed backwards to what is standard.
+             */
+            UChar rA_address = ifieldRegA(theInstr);
+            UChar rS_address = ifieldRegDS(theInstr);
+            IRTemp rA     = newTemp(Ity_I64);
+            IRTemp rS     = newTemp(Ity_I64);
+            UChar flag_rC = ifieldBIT0(theInstr);
+
+            DIP("cnttzd%s r%u,r%u\n",  flag_rC ? "." : "",
+                rA_address, rS_address);
+
+            assign( rS, getIReg( rS_address ) );
+            assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
+
+            if ( flag_rC == 1 )
+               set_CR0( mkexpr( rA ) );
+
+            putIReg( rA_address, mkexpr( rA ) );
+
+            return True;  /* Return here since this inst is not consistent
+                           * with the other instructions
+                           */
+         }
+         break;
+
        case 0x309: // modsd  Modulo Signed Double Word
           {
              IRTemp rA     = newTemp( Ity_I64 );
@@ -9317,6 +9646,31 @@ static Bool dis_cache_manage ( UInt         theInstr,
     IRRoundingMode.  PPCRoundingMode encoding is different to
     IRRoundingMode, so need to map it.
  */
+
+static IRExpr* /* :: Ity_I32 */ set_round_to_Oddmode ( void )
+{
+/* PPC/ valgrind have two-bits to designate the rounding mode.
+   ISA 3.0 adds instructions than can use a round to odd mode
+   but did not change the number of bits for the rm.  Basically,
+   they added two instructions that only differ by the rounding
+   mode the operation uses.  In essesce, they encoded the rm
+   in the name.  In order to avoid having to create Iops, that
+   encode the rm in th name, we will "expand" the definition of
+   the rounding mode bits.  We will just pass the rm and then
+   map the to odd mode to the appropriate PPCFpOp name that
+   will tell us which instruction to map to.
+
+   rounding mode | PPC | IR
+   ------------------------
+   to nearest    | 000  | 00
+   to zero       | 001  | 11
+   to +infinity  | 010  | 10
+   to -infinity  | 011  | 01
+   to odd        | 1xx  | xx
+*/
+   return mkU32(8);
+}
+
  static IRExpr* /* :: Ity_I32 */ get_IR_roundingmode ( void )
  {
  /* 
@@ -10134,6 +10488,7 @@ static Bool dis_fp_multadd ( UInt theInstr )
   *  Otherwise fg_flag is set to 0.
   *
   */
+
  static void do_fp_tsqrt(IRTemp frB_Int, Bool sp, IRTemp * fe_flag_tmp, IRTemp * fg_flag_tmp)
  {
     // The following temps are for holding intermediate results
@@ -10147,7 +10502,12 @@ static void do_fp_tsqrt(IRTemp frB_Int, Bool sp, IRTemp * fe_flag_tmp, IRTemp *
     IRTemp  frbInf_tmp = newTemp(Ity_I1);
     *fe_flag_tmp = newTemp(Ity_I32);
     *fg_flag_tmp = newTemp(Ity_I32);
-   assign( frB_exp_shR, fp_exp_part( frB_Int, sp ) );
+
+   if ( sp )
+      assign( frB_exp_shR, fp_exp_part( Ity_I32, frB_Int ) );
+   else
+      assign( frB_exp_shR, fp_exp_part( Ity_I64, frB_Int ) );
+
     assign(e_b, binop( Iop_Sub32, mkexpr(frB_exp_shR), mkU32( bias ) ));
  
     //////////////////  fe_flag tests BEGIN //////////////////////
@@ -10155,9 +10515,17 @@ static void do_fp_tsqrt(IRTemp frB_Int, Bool sp, IRTemp * fe_flag_tmp, IRTemp *
      * (NOTE: These tests are similar to those used for ftdiv.  See do_fp_tdiv()
      * for details.)
      */
-   frbNaN = sp ? is_NaN_32(frB_Int) : is_NaN(frB_Int);
-   assign( frbInf_tmp, is_Inf(frB_Int, sp) );
-   assign( frbZero_tmp, is_Zero(frB_Int, sp ) );
+   if ( sp ) {
+      frbNaN = is_NaN( Ity_I32, frB_Int );
+      assign( frbInf_tmp, is_Inf( Ity_I32, frB_Int ) );
+      assign( frbZero_tmp, is_Zero( Ity_I32, frB_Int ) );
+
+   } else {
+      frbNaN = is_NaN( Ity_I64, frB_Int );
+      assign( frbInf_tmp, is_Inf( Ity_I64, frB_Int ) );
+      assign( frbZero_tmp, is_Zero( Ity_I64, frB_Int ) );
+   }
+
     {
        // Test_value = -970 for double precision
        UInt test_value = sp ? 0xffffff99 : 0xfffffc36;
@@ -10271,8 +10639,14 @@ static void _do_fp_tdiv(IRTemp frA_int, IRTemp frB_int, Bool sp, IRTemp * fe_fla
     IRExpr * fe_flag, * fg_flag;
  
     // Create temps that will be used throughout the following tests.
-   assign( frA_exp_shR, fp_exp_part( frA_int, sp ) );
-   assign( frB_exp_shR, fp_exp_part( frB_int, sp ) );
+   if ( sp ) {
+      assign( frA_exp_shR, fp_exp_part( Ity_I32, frA_int ) );
+      assign( frB_exp_shR, fp_exp_part( Ity_I32, frB_int ) );
+   } else{
+      assign( frA_exp_shR, fp_exp_part( Ity_I64, frA_int ) );
+      assign( frB_exp_shR, fp_exp_part( Ity_I64, frB_int ) );
+   }
+
     /* Let e_[a|b] be the unbiased exponent: i.e. exp - 1023. */
     assign(e_a, binop( Iop_Sub32, mkexpr(frA_exp_shR), mkU32( bias ) ));
     assign(e_b, binop( Iop_Sub32, mkexpr(frB_exp_shR), mkU32( bias ) ));
@@ -10285,28 +10659,26 @@ static void _do_fp_tdiv(IRTemp frA_int, IRTemp frB_int, Bool sp, IRTemp * fe_fla
      * Test if the double-precision floating-point operand in register FRA is
      * a NaN:
      */
-   fraNaN = sp ? is_NaN_32(frA_int) : is_NaN(frA_int);
+   fraNaN = sp ? is_NaN( Ity_I32, frA_int ) : is_NaN( Ity_I64, frA_int );
     /*
-    * Test if the double-precision floating-point operand in register FRA is
-    * an Infinity.
+    * Test if the double-precision floating-point operands in register FRA
+    * and FRB is an Infinity.  Test if FRB is zero.
      */
-   assign(fraInf_tmp, is_Inf(frA_int, sp));
+   if ( sp ) {
+      assign(fraInf_tmp, is_Inf( Ity_I32, frA_int ) );
+      assign( frbInf_tmp, is_Inf( Ity_I32, frB_int ) );
+      assign( frbZero_tmp, is_Zero( Ity_I32, frB_int ) );
  
+   } else {
+      assign(fraInf_tmp, is_Inf( Ity_I64, frA_int ) );
+      assign( frbInf_tmp, is_Inf( Ity_I64, frB_int ) );
+      assign( frbZero_tmp, is_Zero( Ity_I64, frB_int ) );
+   }
     /*
      * Test if the double-precision floating-point operand in register FRB is
      * a NaN:
      */
-   frbNaN = sp ? is_NaN_32(frB_int) : is_NaN(frB_int);
-   /*
-    * Test if the double-precision floating-point operand in register FRB is
-    * an Infinity.
-    */
-   assign( frbInf_tmp, is_Inf(frB_int, sp) );
-   /*
-    * Test if the double-precision floating-point operand in register FRB is
-    * a Zero.
-    */
-   assign( frbZero_tmp, is_Zero(frB_int, sp) );
+   frbNaN = sp ? is_NaN( Ity_I32, frB_int ) : is_NaN( Ity_I64, frB_int );
  
     /*
      * Test if e_b <= -1022 for double precision;
@@ -10329,7 +10701,11 @@ static void _do_fp_tdiv(IRTemp frA_int, IRTemp frB_int, Bool sp, IRTemp * fe_fla
     /*
      * Test if FRA != Zero and (e_a - e_b) >= bias
      */
-   assign( fraNotZero_tmp, unop( Iop_Not1, is_Zero( frA_int, sp ) ) );
+   if ( sp )
+      assign( fraNotZero_tmp, unop( Iop_Not1, is_Zero( Ity_I32, frA_int ) ) );
+   else
+      assign( fraNotZero_tmp, unop( Iop_Not1, is_Zero( Ity_I64, frA_int ) ) );
+
     ea_eb_GTE = mkAND1( mkexpr( fraNotZero_tmp ),
                         binop( Iop_CmpLT32S, mkU32( bias ),
                                binop( Iop_Sub32, mkexpr( e_a ),
@@ -14723,12 +15099,64 @@ static Bool dis_av_extend_sign_count_zero ( UInt theInstr, UInt allow_isa_3_0 )
                          mkexpr( vB ) ) );
        break;
  
+   case 28: // vctzb,  Vector Count Trailing Zeros Byte
+      {
+         DIP("vctzb  v%d,v%d", rT_addr, vB_addr);
+
+         /* This instruction is only available in the ISA 3.0 */
+         if ( !mode64 || !allow_isa_3_0 ) {
+            vex_printf("\n vctzb instruction not supported on non ISA 3.0 platform\n\n");
+            return False;
+         }
+         assign( vT, unop( Iop_Ctz8x16, mkexpr( vB ) ) );
+      }
+      break;
+
+   case 29: // vctzh,  Vector Count Trailing Zeros Halfword
+      {
+         DIP("vctzh  v%d,v%d", rT_addr, vB_addr);
+
+         /* This instruction is only available in the ISA 3.0 */
+         if ( !mode64 || !allow_isa_3_0 ) {
+            vex_printf("\n vctzh instruction not supported on non ISA 3.0 platform\n\n");
+            return False;
+         }
+         assign( vT, unop( Iop_Ctz16x8, mkexpr( vB ) ) );
+      }
+      break;
+
+   case 30: // vctzw,  Vector Count Trailing Zeros Word
+      {
+         DIP("vctzw  v%d,v%d", rT_addr, vB_addr);
+
+         /* This instruction is only available in the ISA 3.0 */
+         if ( !mode64 || !allow_isa_3_0 ) {
+            vex_printf("\n vctzw instruction not supported on non ISA 3.0 platform\n\n");
+            return False;
+         }
+         assign( vT, unop( Iop_Ctz32x4, mkexpr( vB ) ) );
+      }
+      break;
+
+   case 31: // vctzd,  Vector Count Trailing Zeros Double word
+      {
+         DIP("vctzd  v%d,v%d", rT_addr, vB_addr);
+
+         /* This instruction is only available in the ISA 3.0 */
+         if ( !mode64 || !allow_isa_3_0 ) {
+            vex_printf("\n vctzd instruction not supported on non ISA 3.0 platform\n\n");
+            return False;
+         }
+         assign( vT, unop( Iop_Ctz64x2, mkexpr( vB ) ) );
+      }
+      break;
+
     default:
        vex_printf("dis_av_extend_sign(ppc)(Unsupported vector extend sign instruction)\n");
        return False;
     }
  
-   putVReg( rT_addr, mkexpr( vT) );
+   putVReg( rT_addr, mkexpr( vT ) );
     return True;
  }
  
@@ -15239,22 +15667,22 @@ dis_vx_conv ( UInt theInstr, UInt opc2 )
           assign( res1, unop(Iop_64HIto32, mkexpr(lo64)) );
           assign( res0, unop(Iop_64to32,   mkexpr(lo64)) );
  
-         b3_result = IRExpr_ITE(is_NaN_32(b3),
+         b3_result = IRExpr_ITE(is_NaN(Ity_I32, b3),
                                  // then: result is 0x{8|0}80000000
                                  mkU32(un_signed ? 0x00000000 : 0x80000000),
                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
                                  mkexpr(res3));
-         b2_result = IRExpr_ITE(is_NaN_32(b2),
+         b2_result = IRExpr_ITE(is_NaN(Ity_I32, b2),
                                  // then: result is 0x{8|0}80000000
                                  mkU32(un_signed ? 0x00000000 : 0x80000000),
                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
                                  mkexpr(res2));
-         b1_result = IRExpr_ITE(is_NaN_32(b1),
+         b1_result = IRExpr_ITE(is_NaN(Ity_I32, b1),
                                  // then: result is 0x{8|0}80000000
                                  mkU32(un_signed ? 0x00000000 : 0x80000000),
                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
                                  mkexpr(res1));
-         b0_result = IRExpr_ITE(is_NaN_32(b0),
+         b0_result = IRExpr_ITE(is_NaN(Ity_I32, b0),
                                  // then: result is 0x{8|0}80000000
                                  mkU32(un_signed ? 0x00000000 : 0x80000000),
                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
@@ -16447,8 +16875,8 @@ static IRExpr * _get_maxmin_fp_NaN(IRTemp frA_I64, IRTemp frB_I64)
     IRTemp frA_isQNaN = newTemp(Ity_I1);
     IRTemp frB_isQNaN = newTemp(Ity_I1);
  
-   assign( frA_isNaN, is_NaN( frA_I64 ) );
-   assign( frB_isNaN, is_NaN( frB_I64 ) );
+   assign( frA_isNaN, is_NaN( Ity_I64, frA_I64 ) );
+   assign( frB_isNaN, is_NaN( Ity_I64, frB_I64 ) );
     // If operand is a NAN and bit 12 is '0', then it's an SNaN
     assign( frA_isSNaN,
             mkAND1( mkexpr(frA_isNaN),
@@ -16540,9 +16968,10 @@ static IRExpr * get_max_min_fp(IRTemp frA_I64, IRTemp frB_I64, Bool isMin)
     IRTemp anyNaN = newTemp(Ity_I1);
     IRTemp frA_isZero = newTemp(Ity_I1);
     IRTemp frB_isZero = newTemp(Ity_I1);
-   assign(frA_isZero, is_Zero(frA_I64, False /*not single precision*/ ));
-   assign(frB_isZero, is_Zero(frB_I64, False /*not single precision*/ ));
-   assign(anyNaN, mkOR1(is_NaN(frA_I64), is_NaN(frB_I64)));
+   assign( frA_isZero, is_Zero( Ity_I64, frA_I64 ) );
+   assign( frB_isZero, is_Zero( Ity_I64, frB_I64 ) );
+   assign( anyNaN, mkOR1( is_NaN( Ity_I64, frA_I64 ),
+                          is_NaN(Ity_I64, frB_I64 ) ) );
  #define MINUS_ZERO 0x8000000000000000ULL
  
     return IRExpr_ITE( /* If both arguments are zero . . . */
@@ -16662,7 +17091,7 @@ static IRExpr * _do_vsx_fp_roundToInt(IRTemp frB_I64, UInt opc2)
  #define SNAN_MASK 0x0008000000000000ULL
     hi32 = unop( Iop_64HIto32, mkexpr(frB_I64) );
     assign( is_SNAN,
-           mkAND1( is_NaN( frB_I64 ),
+           mkAND1( is_NaN( Ity_I64, frB_I64 ),
                     binop( Iop_CmpEQ32,
                            binop( Iop_And32, hi32, mkU32( 0x00080000 ) ),
                            mkU32( 0 ) ) ) );
@@ -16707,6 +17136,7 @@ dis_vxv_misc ( UInt theInstr, UInt opc2 )
           assign(frB2, unop(Iop_V128to64, getVSReg( XB )));
  
           DIP("%s v%d,v%d\n", redp ? "xvredp" : "xvrsqrtedp", XT, XB);
+
           if (!redp) {
              assign( sqrtHi,
                      binop( Iop_SqrtF64,
@@ -17696,7 +18126,7 @@ dis_vvec_cmp( UInt theInstr, UInt opc2 )
   * Miscellaneous VSX Scalar Instructions
   */
  static Bool
-dis_vxs_misc( UInt theInstr, UInt opc2 )
+dis_vxs_misc( UInt theInstr, UInt opc2, int allow_isa_3_0 )
  {
  #define VG_PPC_SIGN_MASK 0x7fffffffffffffffULL
     /* XX3-Form and XX2-Form */
@@ -17781,8 +18211,8 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                               mkU64( 0x7FFF00000000000 ) ) );
  
           /* exp A or exp B is NaN */
-         bit7 = mkOR1( is_NaN_V128( vA ),
-                       is_NaN_V128( vB ) );
+         bit7 = mkOR1( is_NaN( Ity_V128, vA ),
+                       is_NaN( Ity_V128, vB ) );
  
           assign( eq_lt_gt, binop( Iop_Or32,
                                    binop( Iop_Shl32,
@@ -17881,6 +18311,10 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
  
        case 0x2b6: // xsxexpdp (VSX Scalar Extract Exponent Double-Precision)
                    // xsxsigdp (VSX Scalar Extract Significand Doulbe-Precision)
+                  // xsvhpdp  (VSX Scalar Convert Half-Precision format
+                  //           to Double-Precision format)
+                  // xscvdphp (VSX Scalar round & convert Double-precision
+                  //           format to Half-precision format)
        {
           IRTemp rT = newTemp( Ity_I64 );
           UInt inst_select = IFIELD( theInstr, 16, 5);
@@ -17903,8 +18337,9 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
  
              /* Value is normal if it isn't infinite, zero or denormalized */
              normal = mkNOT1( mkOR1(
-                                   mkOR1( is_NaN( tmp ), is_Inf( tmp, False ) ),
-                                   mkOR1( is_Zero( tmp, False ),
+                                   mkOR1( is_NaN( Ity_I64, tmp ),
+                                          is_Inf( Ity_I64, tmp ) ),
+                                   mkOR1( is_Zero( Ity_I64, tmp ),
                                            is_Denorm( Ity_I64, tmp ) ) ) );
  
              assign( rT, binop( Iop_Or64,
@@ -17914,11 +18349,46 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                                 binop( Iop_Shl64,
                                        unop( Iop_1Uto64, normal),
                                        mkU8( 52 ) ) ) );
+            putIReg( XT, mkexpr( rT ) );
+
+        } else if (inst_select == 16) {
+            IRTemp result = newTemp( Ity_V128 );
+            IRTemp value = newTemp( Ity_I64 );
+            /* Note: PPC only coverts the 16-bit value in the upper 64-bits
+             * of the source V128 to a 64-bit value stored in the upper
+             * 64-bits of the V128 result.  The contents of the lower 64-bits
+             * is undefined.
+             */
+
+            DIP("xscvhpdp v%d, v%d\n", (UInt)XT, (UInt)XB);
+            assign( result, unop( Iop_F16toF64x2, mkexpr( vB ) ) );
+
+            putVSReg( XT, mkexpr( result ) );
+
+            assign( value, unop( Iop_V128HIto64, mkexpr( result ) ) );
+            generate_store_FPRF( Ity_I64, value );
+            return True;
+
+         } else if (inst_select == 17) {   // xscvdphp
+            IRTemp value = newTemp( Ity_I32 );
+            IRTemp result = newTemp( Ity_V128 );
+            /* Note: PPC only coverts the 64-bit value in the upper 64-bits of
+             * the V128 and stores the 16-bit result in the upper word of the
+             * V128 result.  The contents of the lower 64-bits is undefined.
+             */
+            DIP("xscvdphp v%d, v%d\n", (UInt)XT, (UInt)XB);
+            assign( result,  unop( Iop_F64toF16x2, mkexpr( vB ) ) );
+            assign( value, unop( Iop_64to32, unop( Iop_V128HIto64,
+                                                   mkexpr( result ) ) ) );
+            putVSReg( XT, mkexpr( result ) );
+            generate_store_FPRF( Ity_I32, value );
+            return True;
+
           } else {
              vex_printf( "dis_vxv_scalar_extract_exp_sig invalid inst_select (ppc)(opc2)\n" );
+            vex_printf("inst_select = %d\n", inst_select);
              return False;
           }
-         putIReg( XT, mkexpr(rT));
        }
        break;
  
@@ -17952,9 +18422,9 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                                            mkU8( 63 ) ),
                                     mkU64( 0 ) ) ) );
  
-         assign( NaN, unop( Iop_1Uto64, is_NaN( vB_hi ) ) );
-         assign( inf, unop( Iop_1Uto64, is_Inf( vB_hi, False ) ) );
-         assign( zero, unop( Iop_1Uto64, is_Zero( vB_hi, False ) ) );
+         assign( NaN, unop( Iop_1Uto64, is_NaN( Ity_I64, vB_hi ) ) );
+         assign( inf, unop( Iop_1Uto64, is_Inf( Ity_I64, vB_hi ) ) );
+         assign( zero, unop( Iop_1Uto64, is_Zero( Ity_I64, vB_hi ) ) );
  
           if (opc2 == 0x254) {
              DIP("xststdcsp %d,v%d,%d\n", BF, (UInt)XB, DCMX_mask);
@@ -18254,9 +18724,9 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                                                  mkU8( 31 ) ),
                                           mkU32( 0 ) ) ) );
  
-            assign( NaN[i], unop( Iop_1Uto32, is_NaN_32( value[i] ) ));
-            assign( inf[i], unop( Iop_1Uto32, is_Inf( value[i], True ) ) );
-            assign( zero[i], unop( Iop_1Uto32, is_Zero( value[i], True ) ) );
+            assign( NaN[i], unop( Iop_1Uto32, is_NaN( Ity_I32, value[i] ) ));
+            assign( inf[i], unop( Iop_1Uto32, is_Inf( Ity_I32, value[i] ) ) );
+            assign( zero[i], unop( Iop_1Uto32, is_Zero( Ity_I32, value[i] ) ) );
  
              assign( dnorm[i], unop( Iop_1Uto32, is_Denorm( Ity_I32,
                                                             value[i] ) ) );
@@ -18384,6 +18854,8 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                    // xxbrw
                    // xxbrd
                    // xxbrq
+                  // xvcvhpsp (VSX Vector Convert Half-Precision format to Single-Precision format)
+                  // xvcvsphp (VSX Vector round and convert Single-Precision format to Half-Precision format)
        {
           UInt inst_select = IFIELD( theInstr, 16, 5);
  
@@ -18422,15 +18894,14 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                                             mkU64( 0x0 ),
                                             mkU64( 0xFFFFFFFFFFFFFFFF ) ) ) ) );
  
-               new_XT[i+1] = newTemp(Ity_V128);
-
                 /* Value is normal if it isn't infinite, zero or denormalized */
                 normal[i] = mkNOT1( mkOR1(
-                                         mkOR1( is_NaN( value[i] ),
-                                                is_Inf( value[i], False ) ),
-                                         mkOR1( is_Zero( value[i], False ),
+                                         mkOR1( is_NaN( Ity_I64, value[i] ),
+                                                is_Inf( Ity_I64, value[i] ) ),
+                                         mkOR1( is_Zero( Ity_I64, value[i] ),
                                                  is_Denorm( Ity_I64,
                                                             value[i] ) ) ) );
+               new_XT[i+1] = newTemp(Ity_V128);
  
                 assign( new_XT[i+1],
                         binop( Iop_OrV128,
@@ -18517,9 +18988,9 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
  
                 /* Value is normal if it isn't infinite, zero or denormalized */
                 normal[i] = mkNOT1( mkOR1(
-                                         mkOR1( is_NaN_32( value[i] ),
-                                                is_Inf( value[i], True ) ),
-                                         mkOR1( is_Zero( value[i], True ),
+                                         mkOR1( is_NaN( Ity_I32, value[i] ),
+                                                is_Inf( Ity_I32, value[i] ) ),
+                                         mkOR1( is_Zero( Ity_I32, value[i] ),
                                                  is_Denorm( Ity_I32,
                                                             value[i] ) ) ) );
                 new_value[i] = newTemp(Ity_I32);
@@ -18643,6 +19114,79 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
  
              putVSReg( XT, mkexpr( new_xT[4] ) );
  
+         } else if (inst_select == 24) {
+            // xvcvhpsp, (VSX Vector Convert half-precision format to
+            //           Single-precision format)
+            /* only supported on ISA 3.0 and newer */
+            IRTemp result = newTemp( Ity_V128 );
+            IRTemp src  = newTemp( Ity_I64 );
+
+            if (!allow_isa_3_0) return False;
+
+            DIP("xvcvhpsp v%d,v%d\n", XT,XB);
+            /* The instruction does not set the C or FPCC fields.  The
+             * instruction takes four 16-bit values stored in a 128-bit value
+             * as follows:   x V | x V | x V | x V  where V is a 16-bit
+             * value and x is an unused 16-bit value. To use Iop_F16toF32x4
+             * the four 16-bit values will be gathered into a single 64 bit
+             * value.  The backend will scatter the four 16-bit values back
+             * into a 128-bit operand before issuing the instruction.
+             */
+            /* Gather 16-bit float values from V128 source into new 64-bit
+             * source value for the Iop.
+             */
+            assign( src,
+                    unop( Iop_V128to64,
+                          binop( Iop_Perm8x16,
+                                 mkexpr( vB ),
+                                 binop ( Iop_64HLtoV128,
+                                         mkU64( 0 ),
+                                         mkU64( 0x020306070A0B0E0F) ) ) ) );
+
+            assign( result, unop( Iop_F16toF32x4, mkexpr( src ) ) );
+
+            putVSReg( XT, mkexpr( result ) );
+
+         } else if (inst_select == 25) {
+            // xvcvsphp, (VSX Vector round and Convert single-precision
+            // format to half-precision format)
+            /* only supported on ISA 3.0 and newer */
+            IRTemp result = newTemp( Ity_V128 );
+            IRTemp tmp64  = newTemp( Ity_I64 );
+            IRTemp f0  = newTemp( Ity_I64 );
+            IRTemp f1  = newTemp( Ity_I64 );
+            IRTemp f2  = newTemp( Ity_I64 );
+            IRTemp f3  = newTemp( Ity_I64 );
+
+            if (!allow_isa_3_0) return False;
+            DIP("xvcvsphp v%d,v%d\n", XT,XB);
+
+            /* Iop_F32toF16x4 is V128 -> I64, scatter the 16-bit floats in the
+             * I64 result to the V128 register to store.
+             */
+            assign( tmp64, unop( Iop_F32toF16x4, mkexpr( vB ) ) );
+            assign( f0,binop( Iop_And64,
+                              mkU64( 0xFFFF ), mkexpr( tmp64 ) ) );
+            assign( f1, binop( Iop_And64,
+                               mkU64( 0xFFFF0000 ), mkexpr( tmp64 ) ) );
+            assign( f2, binop( Iop_And64,
+                               mkU64( 0xFFFF00000000 ), mkexpr( tmp64 ) ) );
+            assign( f3, binop( Iop_And64,
+                               mkU64( 0xFFFF000000000000 ), mkexpr( tmp64 ) ) );
+
+            /* Scater 16-bit float values from returned 64-bit value
+             * of V128 result.
+             */
+            assign( result,
+                    binop( Iop_Perm8x16,
+                           binop( Iop_64HLtoV128,
+                                  mkU64( 0 ),
+                                  mkexpr( tmp64 ) ),
+                           binop ( Iop_64HLtoV128,
+                                   mkU64( 0x0000080900000A0B ),
+                                   mkU64( 0x00000C0D00000E0F ) ) ) );
+            putVSReg( XT, mkexpr( result ) );
+
           } else if ( inst_select == 31 ) {
              int i;
              int shift_left = 8;
@@ -18741,9 +19285,9 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
                                                 mkU8( 63 ) ),
                                          mkU64( 0 ) ) ) );
  
-            assign( NaN[i], unop( Iop_1Uto64, is_NaN( value[i] ) ) );
-            assign( inf[i], unop( Iop_1Uto64, is_Inf( value[i], False ) ) );
-            assign( zero[i], unop( Iop_1Uto64, is_Zero( value[i], False ) ) );
+            assign( NaN[i], unop( Iop_1Uto64, is_NaN( Ity_I64, value[i] ) ) );
+            assign( inf[i], unop( Iop_1Uto64, is_Inf( Ity_I64, value[i] ) ) );
+            assign( zero[i], unop( Iop_1Uto64, is_Zero( Ity_I64, value[i] ) ) );
              assign( dnorm[i], unop( Iop_1Uto64, is_Denorm( Ity_I64,
                                                             value[i] ) ) );
  
@@ -20390,45 +20934,460 @@ dis_vx_store ( UInt theInstr )
     return True;
  }
  
-/* VSX Scalar Quad-Precision instructions */
  static Bool
-dis_vx_scalar_quad_precision ( UInt theInstr )
+dis_vx_Scalar_Round_to_quad_integer( UInt theInstr )
  {
+   /* The ISA 3.0 instructions supported in this function require
+    * the underlying hardware platform that supports the ISA3.0
+    * instruction set.
+    */
     /* XX1-Form */
     UChar opc1 = ifieldOPC( theInstr );
-   UInt opc2 = ifieldOPClo10( theInstr );
-   UChar vT_addr = ifieldRegDS( theInstr ) + 32;
-   UChar vA_addr = ifieldRegA( theInstr ) + 32;
-   UChar vB_addr = ifieldRegB( theInstr ) + 32;
-   IRTemp vA = newTemp( Ity_V128 );
-   IRTemp vB = newTemp( Ity_V128 );
-   IRTemp vT  = newTemp( Ity_V128 );
-
-   assign( vB, getVSReg(vB_addr));
+   UInt opc2 = IFIELD( theInstr, 1, 8 );
+   UChar vT_addr = ifieldRegDS( theInstr );
+   UChar vB_addr = ifieldRegB( theInstr );
+   IRTemp vB = newTemp( Ity_F128 );
+   IRTemp vT = newTemp( Ity_F128 );
+   UChar EX = IFIELD( theInstr, 0, 1 );
  
+   assign( vB, getF128Reg( vB_addr ) );
     if (opc1 != 0x3F) {
-      vex_printf( "dis_vx_scalar_quad_precision(ppc)(instr)\n" );
+      vex_printf( "dis_vx_Scalar_Round_to_quad_integer(ppc)(instr)\n" );
        return False;
     }
-
     switch (opc2) {
-
-   case 0x064:     // xscpsgnqp (VSX Scalar Copy Sign Quad-Precision)
+   case 0x005:     // VSX Scalar Round to Quad-Precision Integer [with Inexact]
        {
-         IRTemp sign_vA = newTemp( Ity_I64 );
-         IRTemp vB_hi = newTemp( Ity_I64 );
+         UChar R   = IFIELD( theInstr, 16, 1 );
+         UChar RMC = IFIELD( theInstr, 9, 2 );
  
-         DIP("xscpsgnqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+         /* Store the rm specification bits.  Will extract them later when
+          * the isntruction is issued.
+          */
+         IRExpr* rm = mkU32( R << 3 | RMC << 1 | EX);
  
-         assign( vA, getVSReg(vA_addr) );
+         if ( EX == 0 ) {  // xsrqpi
+            DIP("xsrqpi %d,v%d,v%d,%d\n",  R, vT_addr, vB_addr, RMC);
+            assign( vT, binop( Iop_F128toI128S, rm, mkexpr( vB ) ) );
  
-         assign( sign_vA, binop( Iop_And64,
-                                 unop( Iop_V128HIto64,
-                                       mkexpr( vA ) ),
-                                 mkU64( 0x8000000000000000ULL ) ) );
-         assign( vB_hi, binop( Iop_Or64,
-                               binop( Iop_And64,
-                                      unop( Iop_V128HIto64,
+         } else {     // xsrqpix
+            DIP("xsrqpix %d,v%d,v%d,%d\n", R, vT_addr, vB_addr, RMC);
+            assign( vT, binop( Iop_F128toI128S, rm, mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+      }   /* case 0x005 */
+      break;
+   case 0x025:     // xsrqpxp  VSX Scalar Round Quad-Precision to
+                   // Double-Extended Precision
+      {
+         UChar R   = IFIELD( theInstr, 16, 1 );
+         UChar RMC = IFIELD( theInstr, 9, 2 );
+
+         /* Store the rm specification bits.  Will extract them later when
+          * the isntruction is issued.
+          */
+         IRExpr* rm = mkU32( R << 3 | RMC << 1 );
+
+        DIP("xsrqpxp %d,v%d,v%d,%d\n",  R, vT_addr, vB_addr, RMC);
+         assign( vT, binop( Iop_RndF128, rm, mkexpr( vB ) ) );
+         generate_store_FPRF( Ity_F128, vT );
+      }   /* case 0x025 */
+     break;
+   default:
+      vex_printf( "dis_vx_Scalar_Round_to_quad_integer(ppc)(opc2)\n" );
+      return False;
+   }     /* switch opc2 */
+   putF128Reg( vT_addr, mkexpr( vT ) );
+   return True;
+}
+
+static Bool
+dis_vx_Floating_Point_Arithmetic_quad_precision( UInt theInstr )
+{
+   /* The ISA 3.0 instructions supported in this function require
+    * the underlying hardware platform that supports the ISA 3.0
+    * instruction set.
+    */
+   /* XX1-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UInt opc2 = ifieldOPClo10( theInstr );
+   UChar vT_addr = ifieldRegDS( theInstr );
+   UChar vA_addr = ifieldRegA( theInstr );
+   UChar vB_addr = ifieldRegB( theInstr );
+   IRTemp vA = newTemp( Ity_F128 );
+   IRTemp vB = newTemp( Ity_F128 );
+   IRTemp vT = newTemp( Ity_F128 );
+   IRExpr* rm = get_IR_roundingmode();
+   UChar R0 = IFIELD( theInstr, 0, 1 );
+
+   assign( vB, getF128Reg( vB_addr ) );
+
+   if ( opc1 != 0x3F ) {
+      vex_printf( "Erorr, dis_vx_Floating_Point_Arithmetic_quad_precision(ppc)(instr)\n" );
+      return False;
+   }
+   switch ( opc2 ) {
+   case 0x004:     // xsaddqp (VSX Scalar Add Quad-Precision[using round to Odd])
+      {
+         assign( vA, getF128Reg( vA_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsaddqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_AddF128, rm, mkexpr( vA ), mkexpr( vB ) ) );
+
+         } else {
+           /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsaddqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_AddF128, set_round_to_Oddmode(),
+                               mkexpr( vA ), mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x024:     // xsmulqp (VSX Scalar Multiply Quad-Precision[using round to Odd])
+      {
+         assign( vA, getF128Reg( vA_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsmulqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_MulF128, rm, mkexpr( vA ), mkexpr( vB ) ) );
+
+         } else {
+            /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsmulqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_MulF128, set_round_to_Oddmode(), mkexpr( vA ),
+                               mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x184:   // xsmaddqp (VSX Scalar Multiply add Quad-Precision[using round to Odd])
+      {
+         /* instruction computes (vA * vB) + vC */
+         IRTemp vC = newTemp( Ity_F128 );
+
+         assign( vA, getF128Reg( vA_addr ) );
+         assign( vC, getF128Reg( vT_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsmaddqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+               qop( Iop_MAddF128, rm, mkexpr( vA ),
+                    mkexpr( vC ), mkexpr( vB ) ) );
+
+         } else {
+            /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsmaddqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+               qop( Iop_MAddF128, set_round_to_Oddmode(), mkexpr( vA ),
+                    mkexpr( vC ), mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x1A4:   // xsmsubqp (VSX Scalar Multiply Subtract Quad-Precision[using round to Odd])
+      {
+         IRTemp vC = newTemp( Ity_F128 );
+
+         assign( vA, getF128Reg( vA_addr ) );
+         assign( vC, getF128Reg( vT_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsmsubqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+               qop( Iop_MSubF128, rm, mkexpr( vA ),
+                    mkexpr( vC ), mkexpr( vB ) ) );
+
+         } else {
+            /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsmsubqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+                    qop( Iop_MSubF128, set_round_to_Oddmode(),
+                         mkexpr( vA ), mkexpr( vC ), mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x1C4:   // xsnmaddqp (VSX Scalar Negative Multiply Add Quad-Precision[using round to Odd])
+      {
+         IRTemp vC = newTemp( Ity_F128 );
+
+         assign( vA, getF128Reg( vA_addr ) );
+         assign( vC, getF128Reg( vT_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsnmaddqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+               qop( Iop_NegMAddF128, rm, mkexpr( vA ),
+                    mkexpr( vC ), mkexpr( vB ) ) );
+
+         } else {
+            /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsnmaddqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+                    qop( Iop_NegMAddF128, set_round_to_Oddmode(),
+                         mkexpr( vA ), mkexpr( vC ), mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+        break;
+      }
+   case 0x1E4:   // xsmsubqp (VSX Scalar Negatve Multiply Subtract Quad-Precision[using round to Odd])
+      {
+         IRTemp vC = newTemp( Ity_F128 );
+
+         assign( vA, getF128Reg( vA_addr ) );
+         assign( vC, getF128Reg( vT_addr ) );
+
+         if ( R0 == 0 ) {
+            /* rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsnmsubqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+               qop( Iop_NegMSubF128, rm, mkexpr( vA ),
+                    mkexpr( vC ), mkexpr( vB ) ) );
+
+         } else {
+            /* rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsnmsubqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT,
+                    qop( Iop_NegMSubF128, set_round_to_Oddmode(),
+                         mkexpr( vA ), mkexpr( vC ), mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x204:     // xssubqp (VSX Scalar Subtract Quad-Precision[using round to Odd])
+      {
+         assign( vA, getF128Reg( vA_addr ) );
+         if ( R0 == 0 ) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xssubqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_SubF128, rm, mkexpr( vA ), mkexpr( vB ) ) );
+
+         } else {
+            /* use rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xssubqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_SubF128, set_round_to_Oddmode(), mkexpr( vA ),
+                               mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+        break;
+      }
+   case 0x224:     // xsdivqp (VSX Scalar Divide Quad-Precision[using round to Odd])
+      {
+         assign( vA, getF128Reg( vA_addr ) );
+         if ( R0 == 0 ) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            DIP("xsdivqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_DivF128, rm, mkexpr( vA ), mkexpr( vB ) ) );
+
+         } else {
+            /* use rounding mode specified by Round to odd. Issue inst with R0 = 1 */
+            DIP("xsdivqpo v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+            assign( vT, triop( Iop_DivF128, set_round_to_Oddmode(), mkexpr( vA ),
+                               mkexpr( vB ) ) );
+         }
+         generate_store_FPRF( Ity_F128, vT );
+         break;
+      }
+   case 0x324:  // xssqrtqp (VSX Scalar Square root Quad-Precision[using round to Odd])
+      {
+         UInt inst_select = IFIELD( theInstr, 16, 5 );
+
+         switch (inst_select) {
+         case 27:
+            {
+               if ( R0 == 0 ) { // xssqrtqp
+                  /* rounding mode specified by RN. Issue inst with R0 = 0 */
+                  DIP("xssqrtqp v%d,v%d\n",  vT_addr, vB_addr);
+                  assign( vT, binop( Iop_SqrtF128, rm, mkexpr( vB ) ) );
+
+               } else {      // xssqrtqpo
+                  /* rounding mode is Round to odd. Issue inst with R0 = 1 */
+                  DIP("xssqrtqpo v%d,v%d\n",  vT_addr, vB_addr);
+                  assign( vT, binop( Iop_SqrtF128, set_round_to_Oddmode(),
+                                     mkexpr( vB ) ) );
+               }
+               generate_store_FPRF( Ity_F128, vT );
+               break;
+            }   /* end case 27 */
+         default:
+            vex_printf("dis_vx_Floating_Point_Arithmetic_quad_precision(0x324 unknown inst_select)\n");
+            return False;
+         }  /* end switch inst_select */
+         break;
+      }   /* end case 0x324 */
+
+   case 0x344:
+      {
+         UInt inst_select = IFIELD( theInstr, 16, 5);
+
+         switch (inst_select) {
+         case 1:    // xscvqpuwz  VSX Scalar Truncate & Convert Quad-Precision
+                    // format to Unsigned Word format
+            {
+               DIP("xscvqpuwz v%d,v%d\n",  vT_addr, vB_addr);
+               assign( vT, unop( Iop_TruncF128toI32U, mkexpr( vB ) ) );
+               break;
+            }
+         case 2:    // xscvudqp  VSX Scalar Convert from Unsigned Doubleword
+                    // format to Quad-Precision format
+            {
+               IRTemp tmp = newTemp( Ity_I64 );
+
+               DIP("xscvudqp v%d,v%d\n",  vT_addr, vB_addr);
+               assign( tmp, unop( Iop_ReinterpF64asI64,
+                                  unop( Iop_F128HItoF64, mkexpr( vB ) ) ) );
+               assign( vT, unop( Iop_I64UtoF128, mkexpr( tmp ) ) );
+               generate_store_FPRF( Ity_F128, vT );
+               break;
+            }
+         case 9:    // xsvqpswz  VSX Scalar Truncate & Convert Quad-Precision
+                    // format to Signed Word format
+            {
+               DIP("xscvqpswz v%d,v%d\n",  vT_addr, vB_addr);
+               assign( vT, unop( Iop_TruncF128toI32S, mkexpr( vB ) ) );
+               break;
+            }
+         case 10:   // xscvsdqp  VSX Scalar from Signed Doubleword format
+                    // Quad-Precision format
+            {
+               IRTemp tmp = newTemp( Ity_I64 );
+
+               DIP("xscvsdqp v%d,v%d\n",  vT_addr, vB_addr);
+
+               assign( tmp, unop( Iop_ReinterpF64asI64,
+                                  unop( Iop_F128HItoF64, mkexpr( vB ) ) ) );
+               assign( vT, unop( Iop_I64StoF128, mkexpr( tmp ) ) );
+               generate_store_FPRF( Ity_F128, vT );
+               break;
+            }
+        case 17:    // xsvqpudz  VSX Scalar Truncate & Convert Quad-Precision
+                    // format to Unigned Doubleword format
+            {
+              DIP("xscvqpudz v%d,v%d\n",  vT_addr, vB_addr);
+               assign( vT, unop( Iop_TruncF128toI64U, mkexpr( vB ) ) );
+               break;
+            }
+         case 20: //  xscvqpdp  Scalar round & Conver Quad-Precision
+                  //  format to Double-Precision format [using round to Odd]
+            {
+               IRTemp ftmp = newTemp( Ity_F64 );
+               IRTemp tmp = newTemp( Ity_I64 );
+
+               /* This instruction takes a 128-bit floating point value and
+                * converts it to a 64-bit floating point value.  The 64-bit
+                * result is stored in the upper 64-bit of the 128-bit result
+                * register.  The lower 64-bit are undefined.
+                */
+               if (R0 == 0) { //  xscvqpdp
+                  /* rounding mode specified by RN. Issue inst with R0 = 0 */
+                  DIP("xscvqpdp v%d,v%d\n",  vT_addr, vB_addr);
+
+                  assign( ftmp, binop( Iop_F128toF64, rm, mkexpr( vB ) ) );
+
+               } else {       //  xscvqpdpo
+                  /* rounding mode is Round to odd. Issue inst with R0 = 1 */
+                  DIP("xscvqpdpo v%d,v%d\n",  vT_addr, vB_addr);
+                  assign( ftmp,
+                          binop( Iop_F128toF64,
+                                 set_round_to_Oddmode(), mkexpr( vB ) ) );
+               }
+
+               /* store 64-bit float in upper 64-bits of 128-bit register,
+                * lower 64-bits are zero.
+                */
+               assign( vT,
+                       binop( Iop_F64HLtoF128,
+                              mkexpr( ftmp ),
+                              unop( Iop_ReinterpI64asF64, mkU64( 0 ) ) ) );
+
+               assign( tmp, unop( Iop_ReinterpF64asI64,
+                                  unop( Iop_F128HItoF64, mkexpr( vT ) ) ) );
+               generate_store_FPRF( Ity_I64, tmp );
+               break;
+            }
+         case 22:    // xscvdpqp VSX Scalar Convert from Double-Precision
+                     // format to Quad-Precision format
+            {
+               DIP("xscvdpqp v%d,v%d\n",  vT_addr, vB_addr);
+               /* The 64-bit value is in the upper 64 bit of the src */
+               assign( vT, unop( Iop_F64toF128,
+                                 unop( Iop_F128HItoF64, mkexpr( vB ) ) ) );
+
+               generate_store_FPRF( Ity_F128, vT );
+               break;
+            }
+         case 25:    // xsvqpsdz  VSX Scalar Truncate & Convert Quad-Precision
+                     // format to Signed Doubleword format
+            {
+               DIP("xscvqpsdz v%d,v%d\n",  vT_addr, vB_addr);
+               assign( vT, unop( Iop_TruncF128toI64S, mkexpr( vB ) ) );
+               break;
+            }
+         default:
+           vex_printf( "dis_vx_Floating_Point_Arithmetic_quad_precision invalid inst_select (ppc)(opc2)\n" );
+           return False;
+         }  /* switch inst_select */
+      }   /* end case 0x344 */
+      break;
+   default:    /* switch opc2 */
+      vex_printf( "dis_vx_Floating_Point_Arithmetic_quad_precision(ppc)(opc2)\n" );
+      return False;
+   }
+   putF128Reg( vT_addr, mkexpr( vT ) );
+   return True;
+}
+
+
+/* VSX Scalar Quad-Precision instructions */
+static Bool
+dis_vx_scalar_quad_precision ( UInt theInstr )
+{
+   /* This function emulates the 128-bit floating point instructions
+    * using existing 128-bit vector instructions (Iops).  The 128-bit
+    * floating point instructions use the same 128-bit vector register
+    * set.
+    */
+   /* XX1-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UInt opc2 = ifieldOPClo10( theInstr );
+   UChar vT_addr = ifieldRegDS( theInstr ) + 32;
+   UChar vA_addr = ifieldRegA( theInstr ) + 32;
+   UChar vB_addr = ifieldRegB( theInstr ) + 32;
+   IRTemp vA = newTemp( Ity_V128 );
+   IRTemp vB = newTemp( Ity_V128 );
+   IRTemp vT = newTemp( Ity_V128 );
+
+   assign( vB, getVSReg( vB_addr ) );
+
+   if (opc1 != 0x3F) {
+      vex_printf( "dis_vx_scalar_quad_precision(ppc)(instr)\n" );
+      return False;
+   }
+
+   switch (opc2) {
+
+   case 0x064:     // xscpsgnqp (VSX Scalar Copy Sign Quad-Precision)
+      {
+         IRTemp sign_vA = newTemp( Ity_I64 );
+         IRTemp vB_hi = newTemp( Ity_I64 );
+
+         DIP("xscpsgnqp v%d,v%d,v%d\n",  vT_addr, vA_addr, vB_addr);
+
+         assign( vA, getVSReg(vA_addr) );
+
+         assign( sign_vA, binop( Iop_And64,
+                                 unop( Iop_V128HIto64,
+                                       mkexpr( vA ) ),
+                                 mkU64( 0x8000000000000000ULL ) ) );
+         assign( vB_hi, binop( Iop_Or64,
+                               binop( Iop_And64,
+                                      unop( Iop_V128HIto64,
                                              mkexpr( vB ) ),
                                        mkU64( 0x7FFFFFFFFFFFFFFFULL ) ),
                                 mkexpr( sign_vA ) ) );
@@ -20446,7 +21405,6 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
           IRExpr *bit4, *bit5, *bit6, *bit7;
           IRExpr *bit_zero, *bit_inf, *same_sign;
           UInt BF = IFIELD( theInstr, 23, 3 );
-
           IRTemp eq_lt_gt = newTemp( Ity_I32 );
           IRTemp CC = newTemp( Ity_I32 );
  
@@ -20488,11 +21446,11 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
                                       mkexpr( vB ) ) ) );
  
           /* test both zero don't care about sign */
-         bit_zero = mkAND1( is_Zero_V128( vA ), is_Zero_V128( vB ) );
+         bit_zero = mkAND1( is_Zero( Ity_V128, vA ), is_Zero( Ity_V128, vB ) );
  
-         /* test both infinit don't care about sign */
+         /* test both for infinity, don't care about sign */
           bit_inf = mkAND1(
-                          mkAND1( is_Inf_V128( vA ), is_Inf_V128( vB ) ),
+                          mkAND1( is_Inf( Ity_V128, vA ), is_Inf( Ity_V128, vB ) ),
                            binop( Iop_CmpEQ64,
                                   binop( Iop_And64,
                                          unop( Iop_V128to64,
@@ -20504,30 +21462,32 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
                                          mkU64( 0x80000000) ) ) );
  
           /* exp A or exp B is NaN */
-         bit7 = mkOR1( is_NaN_V128( vA ),
-                       is_NaN_V128( vB ) );
+         bit7 = mkOR1( is_NaN( Ity_V128, vA ),
+                       is_NaN( Ity_V128, vB ) );
+
+         assign( eq_lt_gt,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32,
+                               binop( Iop_Shl32,
+                                      unop( Iop_1Uto32, bit4 ),
+                                      mkU8( 3 ) ),
+                               binop( Iop_Shl32,
+                                      unop( Iop_1Uto32, bit5 ),
+                                      mkU8( 2 ) ) ),
+                        binop( Iop_Or32,
+                               binop( Iop_Shl32,
+                                      unop( Iop_1Uto32, bit6 ),
+                                      mkU8( 1 ) ),
+                               binop( Iop_Or32,
+                                      binop( Iop_Shl32,
+                                             unop( Iop_1Uto32,
+                                                   bit_zero ),
+                                             mkU8( 1 ) ),
+                                      binop( Iop_Shl32,
+                                             unop( Iop_1Uto32,
+                                                   mkAND1( bit_inf, same_sign ) ),
+                                             mkU8( 1 ) ) ) ) ) );
  
-         assign( eq_lt_gt, binop( Iop_Or32,
-                                  binop( Iop_Or32,
-                                         binop( Iop_Shl32,
-                                                unop( Iop_1Uto32, bit4 ),
-                                                mkU8( 3) ),
-                                         binop( Iop_Shl32,
-                                                unop( Iop_1Uto32, bit5 ),
-                                                mkU8( 2) ) ),
-                                  binop( Iop_Or32,
-                                         binop( Iop_Shl32,
-                                                unop( Iop_1Uto32, bit6 ),
-                                                mkU8( 1 ) ),
-                                         binop( Iop_Or32,
-                                                binop( Iop_Shl32,
-                                                       unop( Iop_1Uto32,
-                                                             bit_zero ),
-                                                       mkU8( 1 ) ),
-                                                binop( Iop_Shl32,
-                                                       unop( Iop_1Uto32,
-                                                             mkAND1( bit_inf, same_sign ) ),
-                                                       mkU8( 1 ) ) ) ) ) );
           assign(CC, binop( Iop_Or32,
                             binop( Iop_And32,
                                    unop( Iop_Not32,
@@ -20586,8 +21546,8 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
                               mkU64( 0x7FFF000000000000 ) ) );
  
           /* exp A or exp B is NaN */
-         bit7 = mkOR1( is_NaN_V128( vA ),
-                       is_NaN_V128( vB ) );
+         bit7 = mkOR1( is_NaN( Ity_V128, vA ),
+                       is_NaN( Ity_V128, vB ) );
  
           /* NaN over rules the other comparisons */
           assign( eq_lt_gt, binop( Iop_Or32,
@@ -20602,11 +21562,11 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
                                                  unop( Iop_1Uto32, bit6 ),
                                                  mkU8( 1 ) ) ) ) );
           assign(CC, binop( Iop_Or32,
-                             binop( Iop_And32,
-                                    unop( Iop_Not32,
-                                          unop( Iop_1Sto32, bit7 ) ),
-                                    mkexpr( eq_lt_gt ) ),
-                             unop( Iop_1Uto32, bit7 ) ) );
+                           binop( Iop_And32,
+                                  unop( Iop_Not32,
+                                        unop( Iop_1Sto32, bit7 ) ),
+                                  mkexpr( eq_lt_gt ) ),
+                           unop( Iop_1Uto32, bit7 ) ) );
  
           /* put result of the comparison into CC and FPCC */
           putGST_field( PPC_GST_CR, mkexpr( CC ), BF );
@@ -20629,7 +21589,7 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
  
           DIP("xststdcqp  %d,v%d,%d\n",  BF, vB_addr, DCMX_mask);
  
-         assign( zero, unop( Iop_1Uto64, is_Zero_V128( vB ) ) );
+         assign( zero, unop( Iop_1Uto64, is_Zero( Ity_V128, vB ) ) );
           assign( pos, unop( Iop_1Uto64,
                              binop( Iop_CmpEQ64,
                                     binop( Iop_Shr64,
@@ -20638,10 +21598,10 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
                                            mkU8( 63 ) ),
                                     mkU64( 0 ) ) ) );
  
-         assign( NaN, unop( Iop_1Uto64, is_NaN_V128( vB ) ) );
-         assign( inf, unop( Iop_1Uto64, is_Inf_V128( vB ) ) );
+         assign( NaN, unop( Iop_1Uto64, is_NaN( Ity_V128, vB ) ) );
+         assign( inf, unop( Iop_1Uto64, is_Inf( Ity_V128, vB ) ) );
  
-         assign( dnorm, unop( Iop_1Uto64, is_Denorm_V128( vB ) ) );
+         assign( dnorm, unop( Iop_1Uto64, is_Denorm( Ity_V128, vB ) ) );
           assign( DCM, create_DCM( Ity_I64, NaN, inf, zero, dnorm, pos ) );
           assign( CC, binop( Iop_Or64,
                              binop( Iop_And64,    /* vB sign bit */
@@ -20676,9 +21636,9 @@ dis_vx_scalar_quad_precision ( UInt theInstr )
           case 0:
              DIP("xsabsqp  v%d,v%d\n",  vT_addr, vB_addr);
              assign( vT, binop( Iop_AndV128, mkexpr( vB ),
-                            binop( Iop_64HLtoV128,
-                                   mkU64( 0x7FFFFFFFFFFFFFFF ),
-                                   mkU64( 0xFFFFFFFFFFFFFFFF ) ) ) );
+                               binop( Iop_64HLtoV128,
+                                      mkU64( 0x7FFFFFFFFFFFFFFF ),
+                                      mkU64( 0xFFFFFFFFFFFFFFFF ) ) ) );
              break;
  
           case 2:
@@ -21340,8 +22300,8 @@ static Bool dis_av_arith ( UInt theInstr )
     a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
     b3 = b2 = b1 = b0 = IRTemp_INVALID;
  
-   assign( vA, getVReg(vA_addr));
-   assign( vB, getVReg(vB_addr));
+   assign( vA, getVReg( vA_addr ) );
+   assign( vB, getVReg( vB_addr ) );
  
     if (opc1 != 0x4) {
        vex_printf("dis_av_arith(ppc)(opc1 != 0x4)\n");
@@ -21353,10 +22313,10 @@ static Bool dis_av_arith ( UInt theInstr )
     case 0x180: { // vaddcuw (Add Carryout Unsigned Word, AV p136)
        DIP("vaddcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
        /* unsigned_ov(x+y) = (y >u not(x)) */
-      putVReg( vD_addr, binop(Iop_ShrN32x4,
-                              binop(Iop_CmpGT32Ux4, mkexpr(vB),
-                                    unop(Iop_NotV128, mkexpr(vA))),
-                              mkU8(31)) );
+      putVReg( vD_addr, binop( Iop_ShrN32x4,
+                               binop( Iop_CmpGT32Ux4, mkexpr( vB ),
+                                    unop( Iop_NotV128, mkexpr( vA ) ) ),
+                              mkU8( 31 ) ) );
        break;
     }
     case 0x000: // vaddubm (Add Unsigned Byte Modulo, AV p141)
@@ -22392,22 +23352,22 @@ static Bool dis_av_polymultarith ( UInt theInstr )
  
     switch (opc2) {
        /* Polynomial Multiply-Add */
-      case 0x408:  // vpmsumb   Vector Polynomial Multipy-sum Byte
+      case 0x408:  // vpmsumb   Vector Polynomial Multiply-sum Byte
           DIP("vpmsumb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
           putVReg( vD_addr, binop(Iop_PolynomialMulAdd8x16,
                                   mkexpr(vA), mkexpr(vB)) );
           break;
-      case 0x448:  // vpmsumd   Vector Polynomial Multipy-sum Double Word
+      case 0x448:  // vpmsumd   Vector Polynomial Multiply-sum Double Word
           DIP("vpmsumd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
           putVReg( vD_addr, binop(Iop_PolynomialMulAdd64x2,
                                   mkexpr(vA), mkexpr(vB)) );
           break;
-      case 0x488:  // vpmsumw   Vector Polynomial Multipy-sum Word
+      case 0x488:  // vpmsumw   Vector Polynomial Multiply-sum Word
           DIP("vpmsumw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
           putVReg( vD_addr, binop(Iop_PolynomialMulAdd32x4,
                                   mkexpr(vA), mkexpr(vB)) );
           break;
-      case 0x4C8:  // vpmsumh   Vector Polynomial Multipy-sum Half Word
+      case 0x4C8:  // vpmsumh   Vector Polynomial Multiply-sum Half Word
           DIP("vpmsumh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
           putVReg( vD_addr, binop(Iop_PolynomialMulAdd16x8,
                                   mkexpr(vA), mkexpr(vB)) );
@@ -23254,6 +24214,63 @@ static Bool dis_abs_diff ( UInt theInstr )
    return True;
  }
  
+/*
+  AltiVec 128 bit integer multiply by 10 Instructions
+*/
+static Bool dis_av_mult10 ( UInt theInstr )
+{
+   /* VX-Form */
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar vT_addr  = ifieldRegDS(theInstr);
+   UChar vA_addr  = ifieldRegA(theInstr);
+   UChar vB_addr  = ifieldRegB(theInstr);
+   UInt  opc2     = IFIELD( theInstr, 0, 11 );
+
+   IRTemp vA    = newTemp(Ity_V128);
+   assign( vA, getVReg(vA_addr));
+
+   if (opc1 != 0x4) {
+      vex_printf("dis_av_mult10(ppc)(instr)\n");
+      return False;
+   }
+   switch (opc2) {
+   case 0x001: { // vmul10cuq (Vector Multiply-by-10 and write carry
+      DIP("vmul10cuq v%d,v%d\n", vT_addr, vA_addr);
+      putVReg( vT_addr,
+               unop( Iop_MulI128by10Carry, mkexpr( vA ) ) );
+      break;
+  }
+   case 0x041: { // vmul10uq (Vector Multiply-by-10 Extended and write carry
+                 //           Unsigned Quadword VX form)
+      IRTemp vB    = newTemp(Ity_V128);
+      assign( vB, getVReg(vB_addr));
+      DIP("vmul10ecuq v%d,v%d,v%d\n", vT_addr, vA_addr, vB_addr);
+      putVReg( vT_addr,
+               binop( Iop_MulI128by10ECarry, mkexpr( vA ), mkexpr( vB ) ) );
+      break;
+   }
+   case 0x201: { // vmul10uq (Vector Multiply-by-10 Unsigned Quadword VX form)
+      DIP("vmul10uq v%d,v%d\n", vT_addr, vA_addr);
+      putVReg( vT_addr,
+               unop( Iop_MulI128by10, mkexpr( vA ) ) );
+      break;
+   }
+  case 0x241: { // vmul10uq (Vector Multiply-by-10 Extended
+                //           Unsigned Quadword VX form)
+      IRTemp vB    = newTemp(Ity_V128);
+      assign( vB, getVReg(vB_addr));
+      DIP("vmul10euq v%d,v%d,v%d\n", vT_addr, vA_addr, vB_addr);
+      putVReg( vT_addr,
+               binop( Iop_MulI128by10E, mkexpr( vA ), mkexpr( vB ) ) );
+      break;
+   }
+   default:
+      vex_printf("dis_av_mult10(ppc)(opc2)\n");
+      return False;
+   }
+   return True;
+}
+
  /*
    AltiVec Pack/Unpack Instructions
  */
@@ -24560,14 +25577,15 @@ static Bool dis_av_bcd ( UInt theInstr )
                                 mkU64( value_mask_low ) ),
                          mkexpr( vB ) );
  
-         /* Check if all of the non-sign digits are zero */
-         zero = BCDstring_zero( binop( Iop_AndV128,
-                                       binop( Iop_64HLtoV128,
-                                              mkU64( 0xFFFFFFFFFFFFFFFF ),
-                                              mkU64( 0xFFFFFFFFFFFFFFF0 ) ),
-                                       value ) );
  
-         if ( opc2 == 0x101) {     // bcdtrunc.
+         if ( opc2 == 0x101 ) {     // bcdtrunc.
+            /* Check if all of the non-sign digits are zero */
+            zero = BCDstring_zero( binop( Iop_AndV128,
+                                          binop( Iop_64HLtoV128,
+                                                 mkU64( 0xFFFFFFFFFFFFFFFF ),
+                                                 mkU64( 0xFFFFFFFFFFFFFFF0 ) ),
+                                          value ) );
+
              /* Sign codes of 0xA, 0xC, 0xE or 0xF are positive, sign
               * codes 0xB and 0xD are negative.
               */
@@ -24585,13 +25603,7 @@ static Bool dis_av_bcd ( UInt theInstr )
              /* Pos position AKA gt = 1 if ((not neg) & (not eq zero)) */
              pos = mkAND1( mkNOT1( sign ), mkNOT1( zero ) );
  
-            /* if PS = 0
-                  vB positive, sign is C
-                  vB negative, sign is D
-               if PS = 1
-                  vB positive, sign is F
-                  vB negative, sign is D
-               Note can't use pos or neg here since they are ANDed with zero,
+            /* Note can't use pos or neg here since they are ANDed with zero,
                 use sign instead.
              */
              if (ps == 0) {
@@ -24610,13 +25622,17 @@ static Bool dis_av_bcd ( UInt theInstr )
                       is_BCDstring128( /* Signed */True, mkexpr( vB ) ) );
  
           } else {   // bcdutrunc.
+            /* Check if all of the digits are zero */
+            zero = BCDstring_zero( value );
+
              /* unsigned value, need to make CC code happy */
              neg = mkU1( 0 );
-            /* Pos position AKA gt = 1 if ((not neg) & (not eq zero)) */
-            pos = mkAND1( mkNOT1( neg ), mkNOT1( zero ) );
+
+            /* Pos position AKA gt = 1 if (not eq zero) */
+            pos = mkNOT1( zero );
              valid =
                 unop( Iop_64to32,
-                     is_BCDstring128( /* Signed */True, mkexpr( vB ) ) );
+                     is_BCDstring128( /* Unsigned */False, mkexpr( vB ) ) );
           }
  
           /* If vB is not valid, the result is undefined, but we need to
@@ -24641,11 +25657,133 @@ static Bool dis_av_bcd ( UInt theInstr )
        }
        break;
  
-   case 0x181:  // bcdctz., bcdctn., bcdcfz., bcdcfn., bcdsetsgn.
+   case 0x181:   // bcdctz., bcdctn., bcdcfz., bcdcfn., bcdsetsgn.,
+                 // bcdcfsq., bcdctsq.
        {
           UInt inst_select = IFIELD( theInstr, 16, 5);
  
           switch (inst_select) {
+         case 0:  // bcdctsq.  (Decimal Convert to Signed Quadword VX-form)
+            {
+               IRExpr *sign;
+
+               /* The instruction takes a 32-bit integer in a vector source
+                * register and returns the signed packed decimal number
+                * in a vector register.  The source integer needs to be moved
+                * from the V128 to an I32 for the Iop.
+                */
+
+               DIP("bcdctsq v%d, v%d\n", vRT_addr, vRB_addr);
+
+               putVReg( vRT_addr, unop( Iop_BCD128toI128S, mkexpr( vB ) ) );
+
+               sign =  binop( Iop_And64,
+                              unop( Iop_V128to64, mkexpr( vB ) ),
+                              mkU64( 0xF ) );
+               zero = mkAND1( binop( Iop_CmpEQ64,
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ),
+                                     mkU64( 0x0 ) ),
+                              binop( Iop_CmpEQ64,
+                                     binop( Iop_And64,
+                                            unop( Iop_V128to64, mkexpr( vB ) ),
+                                            mkU64( 0xFFFFFFF0 ) ),
+                                     mkU64( 0x0 ) ) );
+               pos = mkAND1( mkNOT1( zero ),
+                             mkOR1( mkOR1( binop( Iop_CmpEQ64,
+                                                  sign, mkU64( 0xA ) ),
+                                           binop( Iop_CmpEQ64,
+                                                  sign, mkU64( 0xC ) ) ),
+                                    mkOR1( binop( Iop_CmpEQ64,
+                                                  sign, mkU64( 0xE ) ),
+                                           binop( Iop_CmpEQ64,
+                                                  sign, mkU64( 0xF ) ) ) ) );
+               neg = mkAND1( mkNOT1( zero ),
+                             mkOR1( binop( Iop_CmpEQ64, sign, mkU64( 0xB ) ),
+                                    binop( Iop_CmpEQ64, sign, mkU64( 0xD ) ) ) );
+
+               /* Check each of the nibbles for a valid digit 0 to 9 */
+               valid =
+                  unop( Iop_64to32,
+                        is_BCDstring128( /* Signed */True, mkexpr( vB ) ) );
+               overflow = mkU1( 0 );  // not used
+            }
+            break;
+
+         case 2:  // bcdcfsq. (Decimal Convert from Signed Quadword VX-form)
+            {
+               IRExpr *pos_upper_gt, *pos_upper_eq, *pos_lower_gt;
+               IRExpr *neg_upper_lt, *neg_upper_eq, *neg_lower_lt;
+
+               DIP("bcdcfsq v%d, v%d, %d\n", vRT_addr, vRB_addr, ps);
+
+               /* The instruction takes a signed packed decimal number and
+                * returns the integer value in the vector register.  The Iop
+                * returns an I32 which needs to be moved to the destination
+                * vector register.
+                */
+               putVReg( vRT_addr,
+                        binop( Iop_I128StoBCD128, mkexpr( vB ), mkU8( ps ) ) );
+
+               zero = mkAND1( binop( Iop_CmpEQ64, mkU64( 0 ),
+                                     unop( Iop_V128to64, mkexpr( vB ) ) ),
+                              binop( Iop_CmpEQ64, mkU64( 0 ),
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ) ) );
+               pos = mkAND1( mkNOT1 ( zero ),
+                             binop( Iop_CmpEQ64, mkU64( 0 ),
+                                    binop( Iop_And64,
+                                           unop( Iop_V128HIto64,
+                                                 mkexpr( vB ) ),
+                                           mkU64( 0x8000000000000000UL ) ) ) );
+               neg = mkAND1( mkNOT1 ( zero ),
+                             binop( Iop_CmpEQ64, mkU64( 0x8000000000000000UL ),
+                                    binop( Iop_And64,
+                                           unop( Iop_V128HIto64,
+                                                 mkexpr( vB ) ),
+                                           mkU64( 0x8000000000000000UL ) ) ) );
+
+               /* Overflow occurs if: vB > 10^31-1 OR vB < -10^31-1
+                * do not have a 128 bit compare.  Will have to compare the
+                * upper 64 bit and athe lower 64 bits.  If the upper 64-bits
+                * are equal then overflow if the lower 64 bits of vB is greater
+                * otherwise if the upper bits of vB is greater then the max
+                * for the upper 64-bits then overflow
+                *
+                *     10^31-1 = 0x7E37BE2022C0914B267FFFFFFF
+                */
+               pos_upper_gt = binop( Iop_CmpLT64U,
+                                     mkU64( 0x7E37BE2022 ),
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ) );
+               pos_upper_eq = binop( Iop_CmpEQ64,
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ),
+                                     mkU64( 0x7E37BE2022 ) );
+               pos_lower_gt = binop( Iop_CmpLT64U,
+                                     mkU64( 0x0914B267FFFFFFF ),
+                                     unop( Iop_V128to64, mkexpr( vB ) ) );
+               /* -10^31-1 =  0X81C841DFDD3F6EB4D97FFFFFFF */
+               neg_upper_lt = binop( Iop_CmpLT64U,
+                                     mkU64( 0X81C841DFDD ),
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ) );
+               neg_upper_eq = binop( Iop_CmpEQ64,
+                                     unop( Iop_V128HIto64, mkexpr( vB ) ),
+                                     mkU64( 0X81C841DFDD ) );
+               neg_lower_lt = binop( Iop_CmpLT64U,
+                                     mkU64( 0x3F6EB4D97FFFFFFF ),
+                                     unop( Iop_V128to64, mkexpr( vB ) ) );
+
+               /* calculate overflow, masking for positive and negative */
+               overflow = mkOR1( mkAND1( pos,
+                                         mkOR1( pos_upper_gt,
+                                                mkAND1( pos_upper_eq,
+                                                        pos_lower_gt ) ) ),
+                                 mkAND1( neg,
+                                         mkOR1( neg_upper_lt,
+                                                mkAND1( neg_upper_eq,
+                                                        neg_lower_lt )
+                                                ) ) );
+               valid = mkU32( 1 );
+            }
+            break;
+
           case 4:  // bcdctz. (Decimal Convert to Zoned VX-form)
              {
                 IRExpr *ox_flag, *sign, *vrb_nibble30;
@@ -25829,7 +26967,7 @@ static struct vsx_insn vsx_all[] = {
        { 0x3ac, "xvcmpgtdp." },
        { 0x3b0, "xvcvdpsxds" },
        { 0x3b2, "xvabsdp" },
-      { 0x3b6, "xxbr[h|w|d|q]|xvxexpdp|xvxexpsp|xvxsigdp|xvxsigsp" },
+      { 0x3b6, "xxbr[h|w|d|q]|xvxexpdp|xvxexpsp|xvxsigdp|xvxsigsp|xvcvhpsp|xvcvsphp|xscvdphp|xscvhpdp" },
        { 0x3c0, "xvcpsgndp" },
        { 0x3c4, "xvnmsubadp" },
        { 0x3cc, "xvcmpgedp." },
@@ -26344,13 +27482,13 @@ DisResult disInstr_PPC_WRK (
        UInt vsxOpc2 = get_VSX60_opc2(opc2);
  
        if (( opc2hi == 13 ) && ( opc2lo == 5)) { //xvtstdcsp
-         if (dis_vxs_misc(theInstr, 0x354))
+         if (dis_vxs_misc(theInstr, 0x354, allow_isa_3_0))
              goto decode_success;
           goto decode_failure;
        }
  
        if (( opc2hi == 15 ) && ( opc2lo == 5)) { //xvtstdcdp
-            if (dis_vxs_misc(theInstr, 0x3D4))
+         if (dis_vxs_misc(theInstr, 0x3D4, allow_isa_3_0))
                 goto decode_success;
              goto decode_failure;
        }
@@ -26366,7 +27504,7 @@ DisResult disInstr_PPC_WRK (
           /* This is a special case of the XX1 form where the  RA, RB
            * fields hold an immediate value.
            */
-         if (dis_vxs_misc(theInstr, opc2)) goto decode_success;
+         if (dis_vxs_misc(theInstr, opc2, allow_isa_3_0)) goto decode_success;
           goto decode_failure;
        }
  
@@ -26392,13 +27530,14 @@ DisResult disInstr_PPC_WRK (
           case 0x0D6: case 0x0B2: // xsrdpic, xsrdpiz
           case 0x092: case 0x232: // xsrdpi, xsrsp
           case 0x3B6:             // xxbrh, xvxexpdp, xvxexpsp, xvxsigdp
-                                 // xvxsigsp
+                                 // xvxsigsp, xvcvhpsp
           case 0x2b6:             // xsxexpdp, xsxsigdp
           case 0x254: case 0x2d4: // xststdcsp, xststdcdp
           case 0x354:             // xvtstdcsp
           case 0x360:case 0x396:  // xviexpsp, xsiexpdp
           case 0x3D4: case 0x3E0: // xvtstdcdp, xviexpdp
-            if (dis_vxs_misc(theInstr, vsxOpc2)) goto decode_success;
+            if (dis_vxs_misc(theInstr, vsxOpc2, allow_isa_3_0))
+               goto decode_success;
              goto decode_failure;
           case 0x08C: case 0x0AC: // xscmpudp, xscmpodp
              if (dis_vx_cmp(theInstr, vsxOpc2)) goto decode_success;
@@ -26546,7 +27685,21 @@ DisResult disInstr_PPC_WRK (
           break; // Fall through
        }
  
+      opc2 = IFIELD(theInstr, 1, 8);
+      switch (opc2) {
+      case 0x5: // xsrqpi, xsrqpix
+      case 0x25: // xsrqpxp
+         if ( !mode64 || !allow_isa_3_0 ) goto decode_failure;
+         if ( dis_vx_Scalar_Round_to_quad_integer( theInstr ) )
+            goto decode_success;
+         goto decode_failure;
+      default:
+         break; // Fall through
+      }
+
        opc2 = IFIELD(theInstr, 1, 10);
+      UInt inst_select = IFIELD( theInstr, 16, 5 );
+
        switch (opc2) {
        /* 128-bit DFP instructions */
        case 0x2:    // daddq - DFP Add
@@ -26656,17 +27809,47 @@ DisResult disInstr_PPC_WRK (
           if (dis_fp_scr( theInstr, allow_GX )) goto decode_success;
           goto decode_failure;
  
-        /* VSX Scalar Quad-Precision instructions */
+      case 0x324: // xsabsqp, xsxexpqp,xsnabsqp, xsnegqp, xsxsigqp
+         if ( inst_select == 27 ) {    // xssqrtqp
+            if ( dis_vx_Floating_Point_Arithmetic_quad_precision( theInstr ) )
+               goto decode_success;
+         }
+
+         /* Instructions implemented with Pre ISA 3.0 Iops */
+         /* VSX Scalar Quad-Precision instructions */
        case 0x064: // xscpsgnqp
        case 0x0A4: // xscmpexpqp
        case 0x084: // xscmpoqp
        case 0x284: // xscmpuqp
        case 0x2C4: // xststdcqp
-      case 0x324: // xsabsqp, xsxexpqp,xsnabsqp, xsnegqp, xsxsigqp
        case 0x364: // xsiexpqp
           if (dis_vx_scalar_quad_precision( theInstr )) goto decode_success;
           goto decode_failure;
  
+      /* Instructions implemented using ISA 3.0 instructions */
+                  // xsaddqpo (VSX Scalar Add Quad-Precision [using round to ODD]
+      case 0x004: // xsaddqp  (VSX Scalar Add Quad-Precision [using RN mode]
+                  // xsmulqpo (VSX Scalar Multiply Quad-Precision [using round to ODD]
+      case 0x024: // xsmulqp  (VSX Scalar Multiply Quad-Precision [using RN mode]
+                  // xsmaddqpo (VSX Scalar Multiply Add Quad-Precision [using round to ODD]
+      case 0x184: // xsmaddqp  (VSX Scalar Multiply Add Quad-Precision [using RN mode]
+                  // xsmsubqpo (VSX Scalar Multiply Sub Quad-Precision [using round to ODD]
+      case 0x1A4: // xsmsubqp  (VSX Scalar Multiply Sub Quad-Precision [using RN mode]
+                  // xsnmaddqpo (VSX Scalar Negative Multiply Add Quad-Precision [using round to ODD]
+      case 0x1C4: // xsnmaddqp  (VSX Scalar Negative Multiply Add Quad-Precision [using RN mode]
+                  // xsnmsubqpo (VSX Scalar Negative Multiply Sub Quad-Precision [using round to ODD]
+      case 0x1E4: // xsnmsubqp  (VSX Scalar Negative Multiply Sub Quad-Precision [usin RN mode]
+                  // xssubqpo (VSX Scalar Subrtact Quad-Precision [using round to ODD]
+      case 0x204: // xssubqp  (VSX Scalar Subrtact Quad-Precision [using RN mode]
+                  // xsdivqpo (VSX Scalar Divde Quad-Precision [using round to ODD]
+      case 0x224: // xsdivqp  (VSX Scalar Divde Quad-Precision [using RN mode]
+      case 0x344: // xscvudqp, xscvsdqp, xscvqpdp, xscvqpdpo, xsvqpdp
+                  // xscvqpswz, xscvqpuwz, xscvqpudz, xscvqpsdz
+         if ( !mode64 || !allow_isa_3_0 ) goto decode_failure;
+         if ( dis_vx_Floating_Point_Arithmetic_quad_precision( theInstr ) )
+            goto decode_success;
+         goto decode_failure;
+
        default:
           break; // Fall through...
        }
@@ -26822,6 +28005,7 @@ DisResult disInstr_PPC_WRK (
  
        case 0x10B: case 0x30B: // moduw, modsw
        case 0x109: case 0x309: // modsd, modud
+      case 0x21A: case 0x23A: // cnttzw, cnttzd
           if (dis_modulo_int( theInstr )) goto decode_success;
           goto decode_failure;
  
@@ -27187,29 +28371,24 @@ DisResult disInstr_PPC_WRK (
        }
  
        opc2 = IFIELD(theInstr, 0, 9);
-      switch (opc2) {
-      /* BCD arithmetic */
-      case 0x001: case 0x041:             // bcdadd, bcdsub
-      case 0x101: case 0x141:             // bcdtrunc., bcdutrunc.
-      case 0x081: case 0x0C1: case 0x1C1: // bcdus., bcds., bcdsr.
-      case 0x181:                         // bcdcfn., bcdcfz.
-
-         /* These instructions must have bit 21 set to 1 for a correct
-          * opcode match.
+      if (IFIELD(theInstr, 10, 1) == 1) {
+         /* The following instructions have bit 21 set and a PS bit (bit 22)
+          * Bit 21 distinquishes them from instructions with an 11 bit opc2
+          * field.
            */
-         if ((theInstr & 0x400) != 0x400)
-            break;  /* keep looking */
-
-         if (!allow_isa_2_07) goto decode_noP8;
-         if (dis_av_bcd( theInstr )) goto decode_success;
-         goto decode_failure;
-
-         if (!allow_isa_2_07) goto decode_noP8;
-         if (dis_av_bcd( theInstr )) goto decode_success;
-         goto decode_failure;
-
-      default:
-         break;  // Fall through...
+         switch (opc2) {
+            /* BCD arithmetic */
+            case 0x001: case 0x041:             // bcdadd, bcdsub
+            case 0x101: case 0x141:             // bcdtrunc., bcdutrunc.
+            case 0x081: case 0x0C1: case 0x1C1: // bcdus., bcds., bcdsr.
+            case 0x181:                         // bcdcfn., bcdcfz.
+                                                // bcdctz., bcdcfsq., bcdctsq.
+               if (!allow_isa_2_07) goto decode_noP8;
+               if (dis_av_bcd( theInstr )) goto decode_success;
+              goto decode_failure;
+         default:
+              break;  // Fall through...
+            }
        }
  
        opc2 = IFIELD(theInstr, 0, 11);
@@ -27350,6 +28529,14 @@ DisResult disInstr_PPC_WRK (
            if (dis_av_permute( theInstr )) goto decode_success;
            goto decode_failure;
  
+      /* AltiVec 128 bit integer multiply by 10 Instructions */
+      case 0x201: case 0x001:               //vmul10uq, vmul10cuq
+      case 0x241: case 0x041:               //vmul10euq, vmul10ceuq
+          if (!allow_V) goto decode_noV;
+          if (!allow_isa_3_0) goto decode_noP9;
+          if (dis_av_mult10( theInstr )) goto decode_success;
+          goto decode_failure;
+
        /* AV Pack, Unpack */
        case 0x00E: case 0x04E: case 0x08E: // vpkuhum, vpkuwum, vpkuhus
        case 0x0CE:                         // vpkuwus
@@ -27515,6 +28702,13 @@ DisResult disInstr_PPC_WRK (
                  theInstr);
        goto not_supported;
  
+   decode_noP9:
+      vassert(!allow_isa_3_0);
+      vex_printf("disInstr(ppc): found the Power 9 instruction 0x%x that can't be handled\n"
+                 "by Valgrind on this host.  This instruction requires a host that\n"
+                "supports Power 9 instructions.\n",
+                theInstr);
+      goto not_supported;
  
     decode_failure:
     /* All decode failures end up here. */
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c

index 06487b5d5d61ffb0a7d170005e186da0c1dd9f1e..73db64c5d33d62cd82cd3aa1d07c7ee05becf3f5 100644 (file)
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -498,6 +498,8 @@ const HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
     case Pun_NEG:   return "neg";
     case Pun_CLZ32: return "cntlzw";
     case Pun_CLZ64: return "cntlzd";
+   case Pun_CTZ32: return "cnttzw";
+   case Pun_CTZ64: return "cnttzd";
     case Pun_EXTSW: return "extsw";
     default: vpanic("showPPCUnaryOp");
     }
@@ -550,6 +552,31 @@ const HChar* showPPCFpOp ( PPCFpOp op ) {
        case Pfp_FRIN:   return "frin";
        case Pfp_FRIP:   return "frip";
        case Pfp_FRIZ:   return "friz";
+      case Pfp_FPADDQ:     return "xsaddqp";
+      case Pfp_FPSUBQ:     return "xsubqp";
+      case Pfp_FPMULQ:     return "xsmulqp";
+      case Pfp_FPDIVQ:     return "xsdivqp";
+      case Pfp_FPMULADDQ:        return "xsmaddqp";
+      case Pfp_FPMULSUBQ:        return "xsmsubqp";
+      case Pfp_FPNEGMULADDQ:     return "xsnmaddqp";
+      case Pfp_FPNEGMULSUBQ:     return "xsnmsubqp";
+      case Pfp_FPADDQRNDODD:     return "xsaddqpo";
+      case Pfp_FPSUBQRNDODD:     return "xsubqpo";
+      case Pfp_FPMULQRNDODD:     return "xsmulqpo";
+      case Pfp_FPDIVQRNDODD:     return "xsaddqpo";
+      case Pfp_FPMULADDQRNDODD:      return "xsmaddqpo";
+      case Pfp_FPMULSUBQRNDODD:      return "xsmsubqpo";
+      case Pfp_FPNEGMULADDQRNDODD:   return "xsnmaddqpo";
+      case Pfp_FPNEGMULSUBQRNDODD:   return "xsnmsubqpo";
+      case Pfp_FPQTOD:               return "xscvqpdp";
+      case Pfp_FPQTODRNDODD:         return "xscvqpdpo";
+      case Pfp_FPDTOQ:               return "xscvdpqp";
+      case Pfp_IDSTOQ:               return "xscvsdqp";
+      case Pfp_IDUTOQ:               return "xscvudqp";
+      case Pfp_TRUNCFPQTOISD:        return "xscvqpsdz";
+      case Pfp_TRUNCFPQTOISW:        return "xscvqpswz";
+      case Pfp_TRUNCFPQTOIUD:        return "xscvqpudz";
+      case Pfp_TRUNCFPQTOIUW:        return "xscvqpuwz";
        case Pfp_DFPADD:     return "dadd";
        case Pfp_DFPADDQ:    return "daddq";
        case Pfp_DFPSUB:     return "dsub";
@@ -650,6 +677,20 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
     /* BCD */
     case Pav_BCDAdd:     return "bcdadd.";  // qw
     case Pav_BCDSub:     return "bcdsub.";  // qw
+   case Pav_I128StoBCD128: return  "bcdcfsq.";  //qw
+   case Pav_BCD128toI128S: return  "bcdctsq.";  //qw
+
+   /* I128 mult by 10 */
+   case Pav_MulI128by10:       return  "vmul10uq";  //qw
+   case Pav_MulI128by10Carry:  return  "vmul10cuq";  //qw
+   case Pav_MulI128by10E:      return  "vmul10euq";  //qw
+   case Pav_MulI128by10ECarry: return  "vmul10ecuq";  //qw
+
+      /* F128 to I128 signed */
+   case Pav_F128toI128S:    return "xsrqpi|x";   //qw
+
+      /* F128 round to F128 */
+   case Pav_ROUNDFPQ:       return "xsrqpxp";
  
     /* Polynomial arith */
     case Pav_POLYMULADD: return "vpmsum";   // b, h, w, d
@@ -664,10 +705,31 @@ const HChar* showPPCAvOp ( PPCAvOp op ) {
     case Pav_ZEROCNTHALF: case Pav_ZEROCNTDBL:
        return "vclz_";                           // b, h, w, d
  
+   /* trailing zero count */
+   case Pav_TRAILINGZEROCNTBYTE: case Pav_TRAILINGZEROCNTWORD:
+   case Pav_TRAILINGZEROCNTHALF: case Pav_TRAILINGZEROCNTDBL:
+      return "vctz_";                           // b, h, w, d
+
     /* vector gather (byte-by-byte bit matrix transpose) */
     case Pav_BITMTXXPOSE:
        return "vgbbd";
  
+   /* Vector Half-precision format to single precision conversion */
+   case Pav_F16toF32x4:
+      return"xvcvhpsp";
+
+   /* Vector Single-precision format to Half-precision conversion */
+   case Pav_F32toF16x4:
+      return"xvcvsphp";
+
+   /* Vector Half-precision format to Double precision conversion */
+   case Pav_F16toF64x2:
+      return"xvcvhpdp";
+
+      /* Vector Half-precision format to Double precision conversion */
+   case Pav_F64toF16x2:
+      return"xvcvdphp";
+
     default: vpanic("showPPCAvOp");
     }
  }
@@ -925,6 +987,32 @@ PPCInstr* PPCInstr_FpBinary ( PPCFpOp op, HReg dst,
     i->Pin.FpBinary.srcR = srcR;
     return i;
  }
+PPCInstr* PPCInstr_Fp128Unary(PPCFpOp op, HReg dst, HReg src) {
+   PPCInstr* i = LibVEX_Alloc_inline( sizeof(PPCInstr) );
+   i->tag = Pin_Fp128Unary;
+   i->Pin.Fp128Unary.op = op;
+   i->Pin.Fp128Unary.dst = dst;
+   i->Pin.Fp128Unary.src = src;
+   return i;
+}
+PPCInstr* PPCInstr_Fp128Binary(PPCFpOp op, HReg dst, HReg srcL, HReg srcR) {
+   PPCInstr* i = LibVEX_Alloc_inline( sizeof(PPCInstr) );
+   i->tag = Pin_Fp128Binary;
+   i->Pin.Fp128Binary.op = op;
+   i->Pin.Fp128Binary.dst = dst;
+   i->Pin.Fp128Binary.srcL = srcL;
+   i->Pin.Fp128Binary.srcR = srcR;
+   return i;
+}
+PPCInstr* PPCInstr_Fp128Trinary(PPCFpOp op, HReg dst, HReg srcL, HReg srcR) {
+   PPCInstr* i = LibVEX_Alloc_inline( sizeof(PPCInstr) );
+   i->tag = Pin_Fp128Trinary;
+   i->Pin.Fp128Trinary.op = op;
+   i->Pin.Fp128Trinary.dst = dst;
+   i->Pin.Fp128Trinary.srcL = srcL;
+   i->Pin.Fp128Trinary.srcR = srcR;
+   return i;
+}
  PPCInstr* PPCInstr_FpMulAcc ( PPCFpOp op, HReg dst, HReg srcML, 
                                            HReg srcMR, HReg srcAcc )
  {
@@ -1268,6 +1356,16 @@ PPCInstr* PPCInstr_AvBinary ( PPCAvOp op, HReg dst,
     i->Pin.AvBinary.srcR = srcR;
     return i;
  }
+PPCInstr* PPCInstr_AvBinaryInt ( PPCAvOp op, HReg dst,
+                                 HReg src, PPCRI* val ) {
+   PPCInstr* i            = LibVEX_Alloc_inline(sizeof(PPCInstr));
+   i->tag                 = Pin_AvBinaryInt;
+   i->Pin.AvBinaryInt.op  = op;
+   i->Pin.AvBinaryInt.dst = dst;
+   i->Pin.AvBinaryInt.src = src;
+   i->Pin.AvBinaryInt.val = val;
+   return i;
+}
  PPCInstr* PPCInstr_AvBin8x16 ( PPCAvOp op, HReg dst,
                                 HReg srcL, HReg srcR ) {
     PPCInstr* i           = LibVEX_Alloc_inline(sizeof(PPCInstr));
@@ -1716,6 +1814,28 @@ void ppPPCInstr ( const PPCInstr* i, Bool mode64 )
        vex_printf(",");
        ppHRegPPC(i->Pin.FpBinary.srcR);
        return;
+   case Pin_Fp128Unary:
+      vex_printf("%s ", showPPCFpOp(i->Pin.Fp128Unary.op));
+      ppHRegPPC(i->Pin.Fp128Unary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.Fp128Unary.src);
+      return;
+   case Pin_Fp128Binary:
+      vex_printf("%s ", showPPCFpOp(i->Pin.Fp128Binary.op));
+      ppHRegPPC(i->Pin.Fp128Binary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.Fp128Binary.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.Fp128Binary.srcR);
+      return;
+   case Pin_Fp128Trinary:
+      vex_printf("%s ", showPPCFpOp(i->Pin.Fp128Trinary.op));
+      ppHRegPPC(i->Pin.Fp128Trinary.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.Fp128Trinary.srcL);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.Fp128Trinary.srcR);
+      return;
     case Pin_FpMulAcc:
        vex_printf("%s ", showPPCFpOp(i->Pin.FpMulAcc.op));
        ppHRegPPC(i->Pin.FpMulAcc.dst);
@@ -1873,6 +1993,14 @@ void ppPPCInstr ( const PPCInstr* i, Bool mode64 )
        vex_printf(",");
        ppHRegPPC(i->Pin.AvBinary.srcR);
        return;
+   case Pin_AvBinaryInt:
+      vex_printf("%s ", showPPCAvOp(i->Pin.AvBinaryInt.op));
+      ppHRegPPC(i->Pin.AvBinaryInt.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.AvBinaryInt.src);
+      vex_printf(",");
+      ppPPCRI(i->Pin.AvBinaryInt.val);
+      return;
     case Pin_AvBin8x16:
        vex_printf("%s(b) ", showPPCAvOp(i->Pin.AvBin8x16.op));
        ppHRegPPC(i->Pin.AvBin8x16.dst);
@@ -2364,6 +2492,21 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
        addHRegUse(u, HRmRead,  i->Pin.FpBinary.srcL);
        addHRegUse(u, HRmRead,  i->Pin.FpBinary.srcR);
        return;
+
+   case Pin_Fp128Unary:
+      addHRegUse(u, HRmWrite, i->Pin.Fp128Unary.dst);
+      addHRegUse(u, HRmRead, i->Pin.Fp128Unary.src);
+      return;
+   case Pin_Fp128Binary:
+      addHRegUse(u, HRmWrite, i->Pin.Fp128Binary.dst);
+      addHRegUse(u, HRmRead, i->Pin.Fp128Binary.srcL);
+      addHRegUse(u, HRmRead, i->Pin.Fp128Binary.srcR);
+      return;
+   case Pin_Fp128Trinary:
+      addHRegUse(u, HRmModify, i->Pin.Fp128Trinary.dst);
+      addHRegUse(u, HRmRead, i->Pin.Fp128Trinary.srcL);
+      addHRegUse(u, HRmRead, i->Pin.Fp128Trinary.srcR);
+      return;
     case Pin_FpMulAcc:
        addHRegUse(u, HRmWrite, i->Pin.FpMulAcc.dst);
        addHRegUse(u, HRmRead,  i->Pin.FpMulAcc.srcML);
@@ -2429,6 +2572,10 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
           addHRegUse(u, HRmRead,  i->Pin.AvBinary.srcR);
        }
        return;
+   case Pin_AvBinaryInt:
+      addHRegUse(u, HRmWrite, i->Pin.AvBinaryInt.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvBinaryInt.src);
+      return;
     case Pin_AvBin8x16:
        addHRegUse(u, HRmWrite, i->Pin.AvBin8x16.dst);
        addHRegUse(u, HRmRead,  i->Pin.AvBin8x16.srcL);
@@ -2715,6 +2862,20 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
        mapReg(m, &i->Pin.FpBinary.srcL);
        mapReg(m, &i->Pin.FpBinary.srcR);
        return;
+   case Pin_Fp128Unary:
+      mapReg(m, &i->Pin.Fp128Unary.dst);
+      mapReg(m, &i->Pin.Fp128Unary.src);
+      return;
+   case Pin_Fp128Binary:
+      mapReg(m, &i->Pin.Fp128Binary.dst);
+      mapReg(m, &i->Pin.Fp128Binary.srcL);
+      mapReg(m, &i->Pin.Fp128Binary.srcR);
+      return;
+   case Pin_Fp128Trinary:
+      mapReg(m, &i->Pin.Fp128Trinary.dst);
+      mapReg(m, &i->Pin.Fp128Trinary.srcL);
+      mapReg(m, &i->Pin.Fp128Trinary.srcR);
+      return;
     case Pin_FpMulAcc:
        mapReg(m, &i->Pin.FpMulAcc.dst);
        mapReg(m, &i->Pin.FpMulAcc.srcML);
@@ -2765,6 +2926,10 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
        mapReg(m, &i->Pin.AvBinary.srcL);
        mapReg(m, &i->Pin.AvBinary.srcR);
        return;
+   case Pin_AvBinaryInt:
+      mapReg(m, &i->Pin.AvBinaryInt.dst);
+      mapReg(m, &i->Pin.AvBinaryInt.src);
+      return;
     case Pin_AvBin8x16:
        mapReg(m, &i->Pin.AvBin8x16.dst);
        mapReg(m, &i->Pin.AvBin8x16.srcL);
@@ -3672,6 +3837,95 @@ static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2,
     return emit32(p, theInstr, endness_host);
  }
  
+static UChar* mkFormVSXRND ( UChar* p, UInt opc1, UInt R, UInt r1,
+                             UInt r2, UInt RMC, UInt opc2, UChar EX,
+                             VexEndness endness_host )
+{
+   /* The register mapping is all done using VR register numbers for the
+    * V128 support.  This means that the operands for this instruction have
+    * been loaded into a VR register. The 32 VR registers map to VSR registers
+    * 32 to 63. For these instructions, the hardware adds 32 to the source
+    * and destination register numbers.  Do not need to adjust the register
+    * numbers for these instructions.
+    */
+
+   UInt theInstr;
+
+   vassert(opc1 < 0x40);
+   vassert(r1   < 0x20);
+   vassert(r2   < 0x20);
+   vassert(opc2 < 0x100);
+   vassert(EX  < 0x2);
+   vassert(R   < 0x2);
+   vassert(RMC < 0x4);
+
+   theInstr = ((opc1<<26)  | (r1<<21) | (R<<16) | (r2<<11) | (RMC<<9) |
+               (opc2 << 1) | EX);
+   return emit32(p, theInstr, endness_host);
+}
+
+static UChar* mkFormVX_BX_TX ( UChar* p, UInt opc1, UInt r1, UInt r2,
+                               UInt r3, UInt opc2, VexEndness endness_host )
+{
+   /* The register mapping is all done using VR register numbers for the
+    * V128 support.  This means that the operands for this instruction have
+    * been loaded into a VR register. The 32 VR registers map to VSR registers
+    * 32 to 63. So to make the issued instruction reference the
+    * corresponding VR register we have to add 32 to the source and
+    * destination operand numbers, then load the new operand number into the
+    * correct bit fields.
+    *
+    * r1 = 32xTX + T; r3 = 32xBX + B;
+    * TX is bit 0, BX is bit 1, T is in bits [25:21], B is in bit [14:11]
+    * opc2 is in bits [10:2]
+    */
+   UInt T, TX, B, BX;
+
+   UInt theInstr;
+
+   r1 += 32;   // adjust the VSR register number to map to the VR number
+   r3 += 32;
+
+   vassert(opc1 < 0x40);
+   vassert(r1   < 0x40);
+   vassert(r2   < 0x20);
+   vassert(r3   < 0x40);
+   vassert(opc2 < 0x800);
+
+   T  = r1 & 0x1F;
+   TX = r1 >> 5;
+   B  = r3 & 0x1F;
+   BX = r3 >> 5;
+   theInstr = ((opc1<<26) | (T<<21) | (r2<<16) | (B<<11) | (opc2<<2)
+               | (BX<<1) | TX);
+   return emit32(p, theInstr, endness_host);
+}
+
+static UChar* mkFormVXR0 ( UChar* p, UInt opc1, UInt r1, UInt r2,
+                           UInt r3, UInt opc2, UChar R0,
+                           VexEndness endness_host )
+{
+   /* The register mapping is all done using VR register numbers for the
+    * V128 support.  This means that the operands for this instruction have
+    * been loaded into a VR register. The 32 VR registers map to VSR registers
+    * 32 to 63.  For these instructions, the hardware adds 32 to the source
+    * and destination register numbers.  Do not need to adjust the register
+    * numbers for these instructions.
+    */
+
+   UInt theInstr;
+
+   vassert(opc1 < 0x40);
+   vassert(r1   < 0x20);  // register numbers are between 0 and 31 (5-bits)
+   vassert(r2   < 0x20);
+   vassert(r3   < 0x20);
+   vassert(opc2 < 0x800);
+   vassert(R0 < 0x2);
+
+   theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | (opc2<<1) | R0);
+   return emit32(p, theInstr, endness_host);
+}
+
  static UChar* mkFormVXI ( UChar* p, UInt opc1, UInt r1, UInt r2,
                            UInt r3, UInt opc2, VexEndness endness_host )
  {
@@ -4010,6 +4264,15 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
           vassert(mode64);
           p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
           break;
+      case Pun_CTZ32:  // cnttzw r_dst, r_src
+         /* Note oder of src and dst is backwards from normal */
+         p = mkFormX(p, 31, r_src, r_dst, 0, 538, 0, endness_host);
+         break;
+      case Pun_CTZ64:  // cnttzd r_dst, r_src
+         /* Note oder of src and dst is backwards from normal */
+         vassert(mode64);
+         p = mkFormX(p, 31, r_src, r_dst, 0, 570, 0, endness_host);
+         break;
        case Pun_EXTSW:  // extsw r_dst, r_src
           vassert(mode64);
           p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
@@ -4605,6 +4868,178 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        goto done;
     }
  
+   case Pin_Fp128Unary: {
+      /* Note Fp128 instructions use the vector scalar registers.  The register
+       * mapping for the V128 type assumes the a vector instruction.  The
+       * PPC hardware has a single register file that the vector scalar
+       * registers and the vector registers map to.  The 32 vector
+       * registers instructions map to the same registers as the vector
+       * scalar registers 32 to 63.  mkFormVXR0 does the needed
+       * adjustment.
+      */
+      UInt fr_dst = vregEnc(i->Pin.Fp128Unary.dst);
+      UInt fr_src = vregEnc(i->Pin.Fp128Unary.src);
+
+      switch (i->Pin.Fp128Unary.op) {
+      case Pfp_FPSQRTQ:         // xssqrtqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, 27, fr_src, 804, 0, endness_host );
+         break;
+      case Pfp_FPSQRTQRNDODD:   // xssqrtqpo, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, 27, fr_src, 804, 1, endness_host );
+         break;
+      case Pfp_FPQTOD:         // xscvqpdp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, 20, fr_src, 836, 0, endness_host );
+         break;
+      case Pfp_FPQTODRNDODD:   // xscvqpdpo, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, 20, fr_src, 836, 1, endness_host );
+        break;
+      case Pfp_FPDTOQ:         // xscvdpqp
+         p = mkFormVXR0( p, 63, fr_dst, 22, fr_src, 836, 0, endness_host );
+         break;
+      case Pfp_IDSTOQ:         // xscvsdqp
+         p = mkFormVXR0( p, 63, fr_dst, 10, fr_src, 836, 0, endness_host );
+         break;
+      case Pfp_IDUTOQ:         // xscvudqp
+         p = mkFormVXR0( p, 63, fr_dst, 2, fr_src, 836, 0, endness_host );
+         break;
+      case Pfp_TRUNCFPQTOISD:   // xscvqpsdz
+         p = mkFormVXR0( p, 63, fr_dst, 25, fr_src, 836, 0, endness_host );
+         break;
+     case Pfp_TRUNCFPQTOISW:   // xscvqpswz
+         p = mkFormVXR0( p, 63, fr_dst, 9, fr_src, 836, 0, endness_host );
+         break;
+     case Pfp_TRUNCFPQTOIUD:   // xscvqpudz
+         p = mkFormVXR0( p, 63, fr_dst, 17, fr_src, 836, 0, endness_host );
+         break;
+      case Pfp_TRUNCFPQTOIUW:   // xscvqpuwz
+         p = mkFormVXR0( p, 63, fr_dst, 1, fr_src, 836, 0, endness_host );
+         break;
+      default:
+        goto bad;
+      }
+      goto done;
+   }
+
+   case Pin_Fp128Binary: {
+      /* Note Fp128 instructions use the vector registers */
+      UInt fr_dst  = vregEnc(i->Pin.Fp128Binary.dst);
+      UInt fr_srcL = vregEnc(i->Pin.Fp128Binary.srcL);
+      UInt fr_srcR = vregEnc(i->Pin.Fp128Binary.srcR);
+
+      /* Note this issues a Vector scalar instruction.  The register
+       * mapping for the V128 type assumes the a vector instruction.  The
+       * PPC hardware has a single register file that the vector scalar
+       * registers and the vector registers map to.  The 32 vector
+       * registers instructions map to the same registers as the vector
+       * scalar registers 32 to 63.  For these instructions the HW adds
+       * 32 to the register numbers to access the VSRR register.  No need
+       * to adjust the numbers to map to the VR register that contians the
+       * operands.
+       */
+
+      switch (i->Pin.Fp128Binary.op) {
+      case Pfp_FPADDQ:         // xsaddqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 4, 0, endness_host );
+         break;
+      case Pfp_FPADDQRNDODD:   // xsaddqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 4, 1, endness_host );
+         break;
+      case Pfp_FPSUBQ:         // xssubqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 516, 0, endness_host );
+         break;
+      case Pfp_FPSUBQRNDODD:   // xssubqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 516, 1, endness_host );
+         break;
+      case Pfp_FPMULQ:         // xsmulqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 36, 0, endness_host );
+         break;
+      case Pfp_FPMULQRNDODD:   // xsmulqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 36, 1, endness_host );
+         break;
+      case Pfp_FPDIVQ:         // xsdivqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 548, 0, endness_host );
+         break;
+      case Pfp_FPDIVQRNDODD:   // xsdivqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 548, 1, endness_host );
+         break;
+      case Pfp_FPMULADDQ:      // xsmaddqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 388, 0, endness_host );
+         break;
+      case Pfp_FPMULADDQRNDODD:   // xsmaddqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 388, 1, endness_host );
+         break;
+      case Pfp_FPMULSUBQ:         // xsmsubqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 420, 0, endness_host );
+         break;
+      case Pfp_FPMULSUBQRNDODD:   // xsmsubsqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 420, 1, endness_host );
+         break;
+      case Pfp_FPNEGMULADDQ:      // xsnmaddqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 452, 0, endness_host );
+         break;
+      case Pfp_FPNEGMULADDQRNDODD:   // xsnmaddqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 452, 1, endness_host );
+         break;
+      case Pfp_FPNEGMULSUBQ:         // xsnmsubqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 484, 0, endness_host );
+         break;
+      case Pfp_FPNEGMULSUBQRNDODD:   // xsnmsubsqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 484, 1, endness_host );
+         break;
+     default:
+          goto bad;
+      }
+      goto done;
+   }
+
+   case Pin_Fp128Trinary: {
+      /* Note Fp128 instructions use the vector registers */
+      UInt fr_dst  = vregEnc(i->Pin.Fp128Binary.dst);
+      UInt fr_srcL = vregEnc(i->Pin.Fp128Binary.srcL);
+      UInt fr_srcR = vregEnc(i->Pin.Fp128Binary.srcR);
+
+      /* Note this issues a Vector scalar instruction.  The register
+       * mapping for the V128 type assumes the a vector instruction.  The
+       * PPC hardware has a single register file that the vector scalar
+       * registers and the vector registers map to.  The 32 vector
+       * registers instructions map to the same registers as the vector
+       * scalar registers 32 to 63.  For these instructions the HW adds
+       * 32 to the register numbers to access the VSRR register.  No need
+       * to adjust the numbers to map to the VR register that contians the
+       * operands.
+       */
+
+      switch (i->Pin.Fp128Binary.op) {
+      case Pfp_FPMULADDQ:      // xsmaddqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 388, 0, endness_host );
+         break;
+      case Pfp_FPMULADDQRNDODD:   // xsmaddqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 388, 1, endness_host );
+         break;
+      case Pfp_FPMULSUBQ:         // xsmsubqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 420, 0, endness_host );
+         break;
+      case Pfp_FPMULSUBQRNDODD:   // xsmsubsqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 420, 1, endness_host );
+         break;
+      case Pfp_FPNEGMULADDQ:      // xsnmaddqp, use rounding specified by RN
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 452, 0, endness_host );
+         break;
+      case Pfp_FPNEGMULADDQRNDODD:   // xsnmaddqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 452, 1, endness_host );
+         break;
+      case Pfp_FPNEGMULSUBQ:         // xsnmsubqp, use rounding specified by RN
+        p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 484, 0, endness_host );
+         break;
+      case Pfp_FPNEGMULSUBQRNDODD:   // xsnmsubsqpo, round to odd
+         p = mkFormVXR0( p, 63, fr_dst, fr_srcL, fr_srcR, 484, 1, endness_host );
+         break;
+      default:
+          goto bad;
+      }
+      goto done;
+   }
+
     case Pin_FpMulAcc: {
        UInt fr_dst    = fregEnc(i->Pin.FpMulAcc.dst);
        UInt fr_srcML  = fregEnc(i->Pin.FpMulAcc.srcML);
@@ -4807,7 +5242,8 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
     case Pin_AvUnary: {
        UInt v_dst = vregEnc(i->Pin.AvUnary.dst);
        UInt v_src = vregEnc(i->Pin.AvUnary.src);
-      UInt opc2;
+      UInt opc2, opc3;
+
        switch (i->Pin.AvUnary.op) {
        case Pav_MOV:       opc2 = 1156; break; // vor vD,vS,vS
        case Pav_NOT:       opc2 = 1284; break; // vnor vD,vS,vS
@@ -4822,7 +5258,19 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_ZEROCNTHALF: opc2 = 1858; break; // vclzh
        case Pav_ZEROCNTWORD: opc2 = 1922; break; // vclzw
        case Pav_ZEROCNTDBL:  opc2 = 1986; break; // vclzd
+      case Pav_TRAILINGZEROCNTBYTE: opc2 = 1538; break; // vctzb
+      case Pav_TRAILINGZEROCNTHALF: opc2 = 1538; break; // vctzh
+      case Pav_TRAILINGZEROCNTWORD: opc2 = 1538; break; // vctzw
+      case Pav_TRAILINGZEROCNTDBL:  opc2 = 1538; break; // vctzd
        case Pav_BITMTXXPOSE: opc2 = 1292; break; // vgbbd
+      case Pav_BCD128toI128S: opc2 = 385; break; //bcdctsq.
+      case Pav_MulI128by10:      opc2 = 513; break; // vmul10uq
+      case Pav_MulI128by10Carry: opc2 =   1; break; // vmul10cuq
+      case Pav_F16toF64x2: opc2 = 347; opc3 = 16; break; // xvcvhpdp
+      case Pav_F64toF16x2: opc2 = 347; opc3 = 17; break; // xvcvdphp
+      case Pav_F16toF32x4: opc2 = 475; opc3 = 24; break; // xvcvhpsp
+      case Pav_F32toF16x4: opc2 = 475; opc3 = 25; break; // xvcvsphp
+
        default:
           goto bad;
        }
@@ -4831,6 +5279,55 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_NOT:
           p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2, endness_host );
           break;
+      case Pav_F16toF32x4:
+         {
+            /* I64 source has four 16-bit float values in the upper 64-bit
+             * of the source vector register, lower 64-bits are undefined.
+             */
+            /* Scatter the four F16 values in the Vector register */
+            p = mkFormVX( p, 4, v_dst, 0, v_src, 590, endness_host );// vupkhsh
+
+            /* The layout of the vector register is now: S0F0 S1F1 S2F2 S3F3
+             * where S is the sign extension of the 16-bit F value.  We don't
+             * care about the extended signs.
+             */
+
+            /* Input, in v_dst, is now correct for the xvcvhpsp instruction */
+            p = mkFormVX_BX_TX( p, 60, v_dst, opc3, v_dst, opc2,
+                                endness_host );
+         }
+         break;
+      case Pav_F64toF16x2:
+      case Pav_F16toF64x2:
+      case Pav_F32toF16x4:
+         /* Note this issues a Vector scalar instruction.  The register
+          * mapping for the V128 type assumes the a vector instruction.  The
+          * PPC hardware has a single register file that the vector scalar
+          * registers and the vector registers map to.  The 32 vector registers
+          * instructions map to the same registers as the vector scalar
+          * registers 32 to 63.  mkFormVX_BX_TX does the needed adjustment.
+          */
+         p = mkFormVX_BX_TX( p, 60, v_dst, opc3, v_src, opc2, endness_host );
+         break;
+      case Pav_BCD128toI128S:  // bcdctsq
+         p = mkFormVX( p, 4, v_dst, 0, v_src, (1<<10 | 385), endness_host );
+         break;
+      case Pav_MulI128by10:
+      case Pav_MulI128by10Carry:
+         p = mkFormVX( p, 4, v_dst, v_src, 0, opc2, endness_host );
+         break;
+      case Pav_TRAILINGZEROCNTBYTE:
+         p = mkFormVX( p, 4, v_dst, 28, v_src, opc2, endness_host );
+         break;
+      case Pav_TRAILINGZEROCNTHALF:
+         p = mkFormVX( p, 4, v_dst, 29, v_src, opc2, endness_host );
+         break;
+      case Pav_TRAILINGZEROCNTWORD:
+         p = mkFormVX( p, 4, v_dst, 30, v_src, opc2, endness_host );
+         break;
+      case Pav_TRAILINGZEROCNTDBL:
+         p = mkFormVX( p, 4, v_dst, 31, v_src, opc2, endness_host );
+         break;
        default:
           p = mkFormVX( p, 4, v_dst, 0, v_src, opc2, endness_host );
           break;
@@ -4858,6 +5355,9 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        case Pav_AND:       opc2 = 1028; break; // vand
        case Pav_OR:        opc2 = 1156; break; // vor
        case Pav_XOR:       opc2 = 1220; break; // vxor
+      /* Mult by 10 */
+      case Pav_MulI128by10E:      opc2 = 577; break; // vmul10euq
+      case Pav_MulI128by10ECarry: opc2 =  65; break; // vmul10ecuq
        default:
           goto bad;
        }
@@ -4865,6 +5365,61 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
        goto done;
     }
  
+   case Pin_AvBinaryInt: {
+       UInt   ps = i->Pin.AvBinaryInt.val->Pri.Imm;
+       UInt   dst = vregEnc(i->Pin.AvBinaryInt.dst);
+       UInt   src = vregEnc(i->Pin.AvBinaryInt.src);
+
+       switch (i->Pin.AvBinaryInt.op) {
+       /* BCD */
+       case Pav_I128StoBCD128:   // bcdcfsq
+          {
+              /* v_srcR actually contains the value of the one-bit ps field */
+              int opc2 = 385;
+              p = mkFormVX( p, 4, dst, 2, src,
+                            (1 << 10 | (ps << 9) | opc2), endness_host );
+           }
+       break;
+
+       case Pav_F128toI128S:  // xsrqpi, xsrqpix
+           {
+              int opc2 = 5;
+              UInt EX  = ps & 0x1;
+              UInt R   = (ps >> 3) & 0x1;
+              UInt RMC = (ps >> 1) & 0x3;
+              /* Note this issues a Vector scalar instruction.  The register
+               * mapping for the V128 type assumes the a vector instruction.  The
+               * PPC hardware has a single register file that the vector scalar
+               * registers and the vector registers map to.  The 32 vector
+               * registers instructions map to the same registers as the vector
+               * scalar registers 32 to 63.  For these instructions the HW adds
+               * 32 to the register numbers to access the VSRR register.  No need
+               * to adjust the numbers to map to the VR register that contians
+               * the operands.
+               */
+              p = mkFormVSXRND( p, 63, R, dst, src, RMC, opc2, EX,
+                                endness_host );
+           }
+       break;
+
+        case Pav_ROUNDFPQ:   // xsrqpxp
+           {
+              int opc2 = 37;
+              UInt EX  = ps & 0x1;
+              UInt RMC = (ps >> 1) & 0x3;
+              UInt R   = (ps >> 3) & 0x1;
+              p = mkFormVSXRND( p, 63, R, dst, src, RMC, opc2, EX,
+                                endness_host );
+           }
+       break;
+
+        default:
+           goto bad;
+
+      }
+      goto done;
+   }
+
     case Pin_AvBin8x16: {
        UInt v_dst  = vregEnc(i->Pin.AvBin8x16.dst);
        UInt v_srcL = vregEnc(i->Pin.AvBin8x16.srcL);
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h

index 0b1939d0f3af545308b78fbad11b1ddc0b95489d..59a016b583c19cc52e211c3c46da04e4a8f88c00 100644 (file)
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -291,6 +291,8 @@ typedef
        Pun_NOT,
        Pun_CLZ32,
        Pun_CLZ64,
+      Pun_CTZ32,
+      Pun_CTZ64,
        Pun_EXTSW
     }
     PPCUnaryOp;
@@ -334,6 +336,24 @@ typedef
        /* Ternary */
        Pfp_MADDD,  Pfp_MSUBD,
        Pfp_MADDS,  Pfp_MSUBS,
+      Pfp_FPADDQ, Pfp_FPADDQRNDODD,
+      Pfp_FPSUBQ, Pfp_FPSUBQRNDODD,
+      Pfp_FPMULQ, Pfp_FPMULQRNDODD,
+      Pfp_FPDIVQ, Pfp_FPDIVQRNDODD,
+      Pfp_FPMULADDQ, Pfp_FPMULADDQRNDODD,
+      Pfp_FPMULSUBQ, Pfp_FPMULSUBQRNDODD,
+      Pfp_FPNEGMULADDQ, Pfp_FPNEGMULADDQRNDODD,
+      Pfp_FPNEGMULSUBQ, Pfp_FPNEGMULSUBQRNDODD,
+      Pfp_FPSQRTQ, Pfp_FPSQRTQRNDODD,
+      Pfp_FPQTOD, Pfp_FPQTODRNDODD,
+      Pfp_FPQTOW, Pfp_FPQTOWRNDODD,
+      Pfp_FPDTOQ,
+      Pfp_IDSTOQ,
+      Pfp_IDUTOQ,
+      Pfp_TRUNCFPQTOISD,
+      Pfp_TRUNCFPQTOISW,
+      Pfp_TRUNCFPQTOIUD,
+      Pfp_TRUNCFPQTOIUW,
        Pfp_DFPADD, Pfp_DFPADDQ,
        Pfp_DFPSUB, Pfp_DFPSUBQ,
        Pfp_DFPMUL, Pfp_DFPMULQ,
@@ -409,11 +429,51 @@ typedef
        /* BCD Arithmetic */
        Pav_BCDAdd, Pav_BCDSub,
  
+      /* Conversion signed 128-bit value to signed BCD 128-bit */
+      Pav_I128StoBCD128,
+
+      /* Conversion signed BCD 128-bit to signed 128-bit value */
+      Pav_BCD128toI128S,
+
        /* zero count */
        Pav_ZEROCNTBYTE, Pav_ZEROCNTWORD, Pav_ZEROCNTHALF, Pav_ZEROCNTDBL,
  
+      /* trailing zero count */
+      Pav_TRAILINGZEROCNTBYTE, Pav_TRAILINGZEROCNTWORD,
+      Pav_TRAILINGZEROCNTHALF, Pav_TRAILINGZEROCNTDBL,
+
        /* Vector bit matrix transpose by byte */
        Pav_BITMTXXPOSE,
+
+      /* Vector Half-precision format to single precision conversion */
+      Pav_F16toF32x4,
+
+      /* Vector Single precision format to Half-precision conversion */
+      Pav_F32toF16x4,
+
+      /* Vector Half-precision format to Double precision conversion */
+      Pav_F16toF64x2,
+
+      /* Vector Double precision format to Half-precision conversion */
+      Pav_F64toF16x2,
+
+      /* 128 bit mult by 10 */
+      Pav_MulI128by10,
+
+      /* 128 bit mult by 10, carry out */
+      Pav_MulI128by10Carry,
+
+      /* 128 bit mult by 10 plus carry in */
+      Pav_MulI128by10E,
+
+      /* 128 bit mult by 10 plus carry in, carry out */
+      Pav_MulI128by10ECarry,
+
+      /* F128 to I128 */
+      Pav_F128toI128S,
+
+      /* Round F128 to F128 */
+      Pav_ROUNDFPQ,
     }
     PPCAvOp;
  
@@ -466,6 +526,9 @@ typedef
  
        Pin_FpUnary,    /* FP unary op */
        Pin_FpBinary,   /* FP binary op */
+      Pin_Fp128Unary,   /* FP unary op for 128-bit floating point */
+      Pin_Fp128Binary,  /* FP binary op for 128-bit floating point */
+      Pin_Fp128Trinary, /* FP trinary op for 128-bit floating point */
        Pin_FpMulAcc,   /* FP multipy-accumulate style op */
        Pin_FpLdSt,     /* FP load/store */
        Pin_FpSTFIW,    /* stfiwx */
@@ -481,6 +544,7 @@ typedef
        Pin_AvUnary,    /* AV unary general reg=>reg */
  
        Pin_AvBinary,   /* AV binary general reg,reg=>reg */
+      Pin_AvBinaryInt,/* AV binary  reg,int=>reg */
        Pin_AvBin8x16,  /* AV binary, 8x4 */
        Pin_AvBin16x8,  /* AV binary, 16x4 */
        Pin_AvBin32x4,  /* AV binary, 32x4 */
@@ -698,6 +762,23 @@ typedef
              HReg    srcL;
              HReg    srcR;
           } FpBinary;
+         struct {
+            PPCFpOp op;
+            HReg dst;
+            HReg src;
+         } Fp128Unary;
+         struct {
+            PPCFpOp op;
+            HReg dst;
+            HReg srcL;
+            HReg srcR;
+         } Fp128Binary;
+         struct {
+            PPCFpOp op;
+            HReg dst;
+            HReg srcL;
+            HReg srcR;
+         } Fp128Trinary;
           struct {
              PPCFpOp op;
              HReg    dst;
@@ -775,6 +856,12 @@ typedef
              HReg    srcL;
              HReg    srcR;
           } AvBinary;
+         struct {
+            PPCAvOp op;
+            HReg    dst;
+            HReg    src;
+            PPCRI*  val;
+         } AvBinaryInt;
           struct {
              PPCAvOp op;
              HReg    dst;
@@ -1025,6 +1112,11 @@ extern PPCInstr* PPCInstr_Set        ( PPCCondCode cond, HReg dst );
  extern PPCInstr* PPCInstr_MfCR       ( HReg dst );
  extern PPCInstr* PPCInstr_MFence     ( void );
  
+extern PPCInstr* PPCInstr_Fp128Unary    ( PPCFpOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_Fp128Binary   ( PPCFpOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_Fp128Trinary  ( PPCFpOp op, HReg dst, HReg srcL,
+                                          HReg srcR);
+
  extern PPCInstr* PPCInstr_FpUnary    ( PPCFpOp op, HReg dst, HReg src );
  extern PPCInstr* PPCInstr_FpBinary   ( PPCFpOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_FpMulAcc   ( PPCFpOp op, HReg dst, HReg srcML, 
@@ -1043,6 +1135,7 @@ extern PPCInstr* PPCInstr_RdWrLR     ( Bool wrLR, HReg gpr );
  extern PPCInstr* PPCInstr_AvLdSt     ( Bool isLoad, UChar sz, HReg, PPCAMode* );
  extern PPCInstr* PPCInstr_AvUnary    ( PPCAvOp op, HReg dst, HReg src );
  extern PPCInstr* PPCInstr_AvBinary   ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBinaryInt( PPCAvOp op, HReg dst, HReg src, PPCRI* val );
  extern PPCInstr* PPCInstr_AvBin8x16  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvBin16x8  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPCInstr* PPCInstr_AvBin32x4  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c

index 5a701ed491fd34472fa8152923c5d70de7906183..9887f27ece32d96ff70b69c8bf4fca6d09d13438 100644 (file)
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -497,6 +497,8 @@ static HReg          iselDfp64Expr_wrk ( ISelEnv* env, IRExpr* e,
                                           IREndness IEndianess );
  static HReg          iselDfp64Expr     ( ISelEnv* env, IRExpr* e,
                                           IREndness IEndianess );
+static HReg iselFp128Expr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess);
+static HReg iselFp128Expr     ( ISelEnv* env, IRExpr* e, IREndness IEndianess);
  
  /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
  static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
@@ -1225,6 +1227,20 @@ static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
     _set_FPU_rounding_mode(env, mode, True, IEndianess);
  }
  
+static
+Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
+   /* If the rounding mode is set to odd, the the expr must be a constant U8
+    * value equal to 8.  Otherwise, it must be a bin op expressiong that
+    * calculates the value.
+    */
+
+   if (mode->tag != Iex_Const)
+      return False;
+
+   vassert(mode->Iex.Const.con->tag == Ico_U32);
+   vassert(mode->Iex.Const.con->Ico.U8 == 0x8);
+   return True;
+}
  
  /*---------------------------------------------------------*/
  /*--- ISEL: vector helpers                              ---*/
@@ -2060,6 +2076,20 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e,
           return r_dst;
        }
  
+      case Iop_Ctz32:
+      case Iop_Ctz64: {
+         HReg r_src, r_dst;
+         PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
+                                                      Pun_CTZ64;
+         if (op_unop == Iop_Ctz64 && !mode64)
+            goto irreducible;
+         /* Count trailing zeroes. */
+         r_dst = newVRegI(env);
+         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
+         return r_dst;
+      }
+
        case Iop_Left8:
        case Iop_Left16:
        case Iop_Left32: 
@@ -2300,7 +2330,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e,
              addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
                                           argiregs, 
                                           mk_RetLoc_simple(RLPri_Int) ) );
-       } else {
+        } else {
              HWord*      fdescr;
              fdescr = (HWord*)h_calc_DPBtoBCD;
              addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
@@ -2311,6 +2341,61 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e,
           addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
           return r_dst;
        }
+      case Iop_F32toF16x4: {
+         HReg vdst = newVRegV(env);    /* V128 */
+         HReg dst  = newVRegI(env);    /* I64*/
+         HReg r0 = newVRegI(env);    /* I16*/
+         HReg r1 = newVRegI(env);    /* I16*/
+         HReg r2 = newVRegI(env);    /* I16*/
+         HReg r3 = newVRegI(env);    /* I16*/
+         HReg vsrc  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+         PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8, *am_off12;
+         HReg r_aligned16;
+
+         sub_from_sp( env, 32 );     // Move SP down
+
+         /* issue instruction */
+         addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
+
+         /* Get a  quadword aligned address within our stack space */
+         r_aligned16 = get_sp_aligned16( env );
+         am_off0  = PPCAMode_IR( 0, r_aligned16 );
+         am_off2  = PPCAMode_IR( 2, r_aligned16 );
+         am_off4  = PPCAMode_IR( 4, r_aligned16 );
+         am_off6  = PPCAMode_IR( 6, r_aligned16 );
+         am_off8  = PPCAMode_IR( 8, r_aligned16 );
+         am_off12 = PPCAMode_IR( 12, r_aligned16 );
+
+         /* Store v128 result to stack. */
+         if (IEndianess == Iend_LE) {
+            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
+         } else {
+            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off8));
+         }
+
+         /* fetch four I16 from V128, store into contiguous I64 via stack,  */
+
+         /* get 16-bit values */
+         addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
+         addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
+         addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
+         addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
+
+         /* store in contiguous 64-bit values */
+         addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
+         addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
+         addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
+         addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
+
+         /* Fetch I64 */
+         if (IEndianess == Iend_LE) {
+            addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
+         } else {
+            addInstr(env, PPCInstr_Load(8, dst, am_off8, mode64));
+         }
+         add_to_sp( env, 32 );          // Reset SP
+         return dst;
+      }
  
        default: 
           break;
@@ -3032,6 +3117,8 @@ static void iselInt128Expr ( HReg* rHi, HReg* rLo,
  static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
                                   ISelEnv* env, IRExpr* e, IREndness IEndianess )
  {
+   Bool mode64 = env->mode64;
+
     vassert(e);
     vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
  
@@ -3041,6 +3128,21 @@ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
        return;
     }
  
+   /* 128-bit GET */
+   if (e->tag == Iex_Get) {
+      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
+                                       GuestStatePtr(mode64) );
+      PPCAMode* am_addr4 = advance4(env, am_addr);
+      HReg tLo = newVRegI(env);
+      HReg tHi = newVRegI(env);
+
+      addInstr(env, PPCInstr_Load( 8, tHi, am_addr,  mode64));
+      addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
+      *rHi = tHi;
+      *rLo = tLo;
+      return;
+   }
+
     /* --------- BINARY ops --------- */
     if (e->tag == Iex_Binop) {
        switch (e->Iex.Binop.op) {
@@ -4071,6 +4173,36 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
  
     if (e->tag == Iex_Binop) {
  
+      if (e->Iex.Binop.op == Iop_F128toF64) {
+         HReg fr_dst = newVRegF(env);
+         HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
+         HReg tmp = newVRegV(env);
+         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+         PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
+         PPCAvFpOp fpop = Pavfp_INVALID;
+
+         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPQTODRNDODD;
+         } else {
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+            fpop = Pfp_FPQTOD;
+         }
+
+         addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
+
+         /* result is in a 128-bit vector register, move to 64-bit reg to
+          * match the Iop specification.  The result will get moved back
+          * to a 128-bit register and stored once the value is returned.
+          */
+         sub_from_sp( env, 16 );
+         addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
+         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
+         add_to_sp( env, 16 );
+
+         return fr_dst;
+      }
+
        if (e->Iex.Binop.op == Iop_RoundF64toF32) {
           HReg r_dst = newVRegF(env);
           HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
@@ -4160,6 +4292,43 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
  
     if (e->tag == Iex_Unop) {
        switch (e->Iex.Unop.op) {
+      case Iop_F128HItoF64:
+      case Iop_F128LOtoF64:
+         {
+            /* put upper/lower 64-bits of F128 into an F64. */
+            HReg     r_aligned16;
+            HReg     fdst = newVRegF(env);
+            HReg     fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
+            PPCAMode *am_off0, *am_off8, *am_off_arg;
+            sub_from_sp( env, 32 );     // Move SP down 32 bytes
+
+            // get a quadword aligned address within our stack space
+            r_aligned16 = get_sp_aligned16( env );
+            am_off0 = PPCAMode_IR( 0, r_aligned16 );
+            am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
+
+            /* store 128-bit floating point value to memory, load low word
+             * or high to 64-bit destination floating point register
+             */
+            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
+            if (IEndianess == Iend_LE) {
+               if (e->Iex.Binop.op == Iop_F128HItoF64)
+                  am_off_arg = am_off8;
+               else
+                  am_off_arg = am_off0;
+            } else {
+               if (e->Iex.Binop.op == Iop_F128HItoF64)
+                  am_off_arg = am_off0;
+               else
+                  am_off_arg = am_off8;
+            }
+            addInstr(env,
+                    PPCInstr_FpLdSt( True /*load*/,
+                                      8, fdst,
+                                      am_off_arg ));
+            add_to_sp( env, 32 );       // Reset SP
+            return fdst;
+         }
           case Iop_ReinterpI64asF64: {
              /* Given an I64, produce an IEEE754 double with the same
                 bit pattern. */
@@ -4278,6 +4447,322 @@ static HReg iselDfp32Expr_wrk(ISelEnv* env, IRExpr* e, IREndness IEndianess)
     vpanic( "iselDfp32Expr_wrk(ppc)" );
  }
  
+static HReg iselFp128Expr( ISelEnv* env, IRExpr* e, IREndness IEndianess )
+{
+   HReg r = iselFp128Expr_wrk( env, e, IEndianess );
+   vassert(hregClass(r) == HRcVec128);
+   vassert(hregIsVirtual(r));
+   return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFp128Expr_wrk( ISelEnv* env, IRExpr* e, IREndness IEndianess)
+{
+   Bool mode64 = env->mode64;
+   PPCAvFpOp fpop = Pavfp_INVALID;
+   IRType  ty = typeOfIRExpr(env->type_env,e);
+
+   vassert(e);
+   vassert( ty == Ity_F128 );
+
+   /* read 128-bit IRTemp */
+   if (e->tag == Iex_RdTmp) {
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+   }
+
+  if (e->tag == Iex_Get) {
+      /* Guest state vectors are 16byte aligned,
+         so don't need to worry here */
+      HReg dst = newVRegV(env);
+
+      addInstr(env,
+               PPCInstr_AvLdSt( True/*load*/, 16, dst,
+                                PPCAMode_IR( e->Iex.Get.offset,
+                                             GuestStatePtr(mode64) )));
+      return dst;
+   }
+
+   if (e->tag == Iex_Unop) {
+      switch (e->Iex.Unop.op) {
+      case Iop_TruncF128toI64S:
+         fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
+      case Iop_TruncF128toI32S:
+         fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
+      case Iop_TruncF128toI64U:
+         fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
+      case Iop_TruncF128toI32U:
+         fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
+
+      do_Un_F128: {
+         HReg r_dst = newVRegV(env);
+         HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
+         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
+         return r_dst;
+      }
+
+      case Iop_F64toF128: {
+         fpop = Pfp_FPDTOQ;
+         HReg r_dst = newVRegV(env);
+         HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
+         HReg v128tmp = newVRegV(env);
+         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+
+         /* value is in 64-bit float reg, need to move to 128-bit vector reg */
+         sub_from_sp( env, 16 );
+         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
+         addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
+         add_to_sp( env, 16 );
+
+         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
+         return r_dst;
+      }
+
+      case Iop_I64StoF128:
+         fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
+      case Iop_I64UtoF128:
+         fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
+
+      do_Un_int_F128: {
+         HReg r_dst = newVRegV(env);
+         HReg tmp = newVRegV(env);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         PPCAMode *am_offhi, *am_offlo;
+         HReg r_aligned16;
+
+         /* source is in a 64-bit integer reg, move to 128-bit float reg
+          * do this via the stack (easy, convenient, etc).
+          */
+         sub_from_sp( env, 32 );        // Move SP down
+
+         /* Get a quadword aligned address within our stack space */
+         r_aligned16 = get_sp_aligned16( env );
+
+         am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
+         am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
+
+         /* Inst only uses the upper 64-bit of the source */
+         addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
+
+         /* Fetch result back from stack. */
+         addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
+
+         add_to_sp( env, 32 );          // Reset SP
+
+         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
+         return r_dst;
+      }
+
+      default:
+         break;
+      } /* switch (e->Iex.Unop.op) */
+   } /* if (e->tag == Iex_Unop) */
+
+   if (e->tag == Iex_Binop) {
+      switch (e->Iex.Binop.op) {
+
+      case Iop_F64HLtoF128:
+         {
+            HReg dst    = newVRegV(env);
+            HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
+            HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
+            PPCAMode *am_offhi, *am_offlo;
+            HReg r_aligned16;
+
+            /* do this via the stack (easy, convenient, etc) */
+            sub_from_sp( env, 16 );        // Move SP down
+
+            /* Get a quadword aligned address within our stack space */
+            r_aligned16 = get_sp_aligned16( env );
+
+            am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
+            am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
+
+            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
+                                          r_src_lo, am_offlo));
+            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
+                                          r_src_hi, am_offhi));
+
+            /* Fetch result back from stack. */
+            addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
+                                          dst, am_offlo));
+
+            add_to_sp( env, 16 );          // Reset SP
+            return dst;
+         }
+      case Iop_F128toI128S:
+         {
+            HReg dst    = newVRegV(env);
+            HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
+            PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
+            /* Note: rm is a set of three bit fields that specify the
+             * rounding mode and which of the two instructions to issue.
+             */
+            addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
+                                               r_src, rm));
+            return dst;
+         }
+      case Iop_RndF128:
+         {
+            HReg dst    = newVRegV(env);
+            HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
+            PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
+            /* Note: rm is a set of three bit fields that specify the
+             * rounding mode and which of the two instructions to issue.
+             */
+            addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
+                                               r_src, rm));
+            return dst;
+         }
+      case Iop_SqrtF128:
+         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPSQRTQRNDODD;
+            goto do_Bin_F128;
+         } else {
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+            fpop = Pfp_FPSQRTQ;
+            goto do_Bin_F128;
+         }
+      case Iop_F128toF32:
+         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPQTOWRNDODD;
+            goto do_Bin_F128;
+         } else {
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+            fpop = Pfp_FPQTOW;
+            goto do_Bin_F128;
+         }
+      do_Bin_F128: {
+         HReg r_dst = newVRegV(env);
+         HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
+         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
+         return r_dst;
+      }
+
+      default:
+         break;
+      } /* switch (e->Iex.Binop.op) */
+   } /* if (e->tag == Iex_Binop) */
+
+   if (e->tag == Iex_Triop) {
+      IRTriop *triop = e->Iex.Triop.details;
+
+      switch (triop->op) {
+      case Iop_AddF128:
+         if (FPU_rounding_mode_isOdd(triop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
+         } else {
+            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
+            fpop = Pfp_FPADDQ; goto do_Tri_F128;
+         }
+      case Iop_SubF128:
+         if (FPU_rounding_mode_isOdd(triop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
+         } else {
+            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
+            fpop = Pfp_FPSUBQ; goto do_Tri_F128;
+         }
+      case Iop_MulF128:
+         if (FPU_rounding_mode_isOdd(triop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
+         } else {
+            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
+            fpop = Pfp_FPMULQ; goto do_Tri_F128;
+         }
+      case Iop_DivF128:
+         if (FPU_rounding_mode_isOdd(triop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
+         } else {
+            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
+            fpop = Pfp_FPDIVQ; goto do_Tri_F128;
+         }
+      case Iop_MAddF128:
+         if (FPU_rounding_mode_isOdd(triop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
+         } else {
+            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
+            fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
+         }
+
+   do_Tri_F128: {
+         HReg r_dst  = newVRegV(env);
+         HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
+         HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
+
+         addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
+         return r_dst;
+      }
+
+      default:
+         break;
+      } /* switch (e->Iex.Triop.op) */
+
+   } /* if (e->tag == Iex_Trinop) */
+
+   if (e->tag == Iex_Qop) {
+      IRQop *qop = e->Iex.Qop.details;
+
+      switch (qop->op) {
+      case Iop_MAddF128:
+         if (FPU_rounding_mode_isOdd(qop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
+         } else {
+            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
+            fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
+         }
+      case Iop_MSubF128:
+         if (FPU_rounding_mode_isOdd(qop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
+         } else {
+            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
+            fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
+         }
+      case Iop_NegMAddF128:
+         if (FPU_rounding_mode_isOdd(qop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
+         } else {
+            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
+            fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
+         }
+      case Iop_NegMSubF128:
+         if (FPU_rounding_mode_isOdd(qop->arg1)) {
+            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
+            fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
+         } else {
+            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
+            fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
+         }
+
+      do_Quad_F128: {
+         HReg r_dst = iselFp128Expr(env, qop->arg3,
+                                    IEndianess);
+         HReg r_srcL = iselFp128Expr(env, qop->arg2,
+                                     IEndianess);
+         HReg r_srcR = iselFp128Expr(env, qop->arg4,
+                                     IEndianess);
+
+         addInstr(env, PPCInstr_Fp128Trinary(fpop, r_dst, r_srcL, r_srcR));
+         return r_dst;
+         }
+
+      default:
+         break;
+      }
+   }   /* if (e->tag == Iex_Qop) */
+
+   ppIRExpr( e );
+   vpanic( "iselFp128Expr(ppc64)" );
+}
+
  static HReg iselDfp64Expr(ISelEnv* env, IRExpr* e, IREndness IEndianess)
  {
     HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
@@ -4932,6 +5417,101 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
     if (e->tag == Iex_Unop) {
        switch (e->Iex.Unop.op) {
  
+      case Iop_F16toF64x2:
+         {
+            HReg dst = newVRegV(env);
+            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            /* Note: PPC only coverts the 16-bt value in the upper word
+             *       to a 64-bit value stored in the upper word.  The
+             *       contents of the lower word is undefined.
+             */
+            addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
+            return dst;
+         }
+
+      case Iop_F64toF16x2:
+         {
+            HReg dst = newVRegV(env);
+            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
+             * to a 16-bit value stored in the upper 64-bits of the result
+             * V128.  The contents of the lower 64-bits is undefined.
+             */
+            addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
+            return dst;
+         }
+
+      case Iop_F16toF32x4:
+         {
+            HReg src = newVRegV(env);
+            HReg dst = newVRegV(env);
+            HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+            PPCAMode *am_off0, *am_off8;
+            HReg r_aligned16;
+
+            vassert(mode64);
+            /* need to put I64 src into upper 64-bits of vector register,
+               use stack */
+            sub_from_sp( env, 32 );     // Move SP down
+
+            /* Get a quadword aligned address within our stack space */
+            r_aligned16 = get_sp_aligned16( env );
+            am_off0  = PPCAMode_IR( 0, r_aligned16 );
+            am_off8  = PPCAMode_IR( 8, r_aligned16 );
+
+            /* Store I64 to stack */
+
+            if (IEndianess == Iend_LE) {
+               addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
+            } else {
+               addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
+            }
+
+            /* Fetch new v128 src back from stack. */
+            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
+
+            /* issue instruction */
+            addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
+            add_to_sp( env, 32 );          // Reset SP
+
+            return dst;
+         }
+
+      case Iop_F32toF16x4:
+         {
+            HReg dst = newVRegI(env);
+            HReg tmp = newVRegV(env);
+            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            PPCAMode *am_off0, *am_off8;
+            HReg r_aligned16;
+
+            /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
+             * I64.  Move the upper 64-bits from the instruction to an I64 via
+             * the stack and return it.
+             */
+            sub_from_sp( env, 32 );     // Move SP down
+
+            addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
+
+            /* Get a quadword aligned address within our stack space */
+            r_aligned16 = get_sp_aligned16( env );
+            am_off0  = PPCAMode_IR( 0, r_aligned16 );
+            am_off8  = PPCAMode_IR( 8, r_aligned16 );
+
+            /* Store v128 tmp to stack. */
+            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
+
+            /* Fetch I64 from stack */
+            if (IEndianess == Iend_LE) {
+               addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
+            } else {
+               addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
+            }
+
+            add_to_sp( env, 32 );          // Reset SP
+            return dst;
+         }
+
        case Iop_NotV128: {
           HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
           HReg dst = newVRegV(env);
@@ -5054,6 +5634,10 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
        case Iop_Clz16x8: op = Pav_ZEROCNTHALF;   goto do_zerocnt;
        case Iop_Clz32x4: op = Pav_ZEROCNTWORD;   goto do_zerocnt;
        case Iop_Clz64x2: op = Pav_ZEROCNTDBL;    goto do_zerocnt;
+      case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
+      case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
+      case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
+      case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL;  goto do_zerocnt;
        case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE;  goto do_zerocnt;
        do_zerocnt:
        {
@@ -5063,6 +5647,24 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
          return dst;
        }
  
+      /* BCD Iops */
+      case Iop_BCD128toI128S:
+         {
+            HReg dst  = newVRegV(env);
+            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
+            return dst;
+         }
+
+      case Iop_MulI128by10:       op = Pav_MulI128by10;      goto do_MulI128;
+      case Iop_MulI128by10Carry:  op = Pav_MulI128by10Carry; goto do_MulI128;
+      do_MulI128: {
+            HReg dst = newVRegV(env);
+            HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            addInstr(env, PPCInstr_AvUnary(op, dst, arg));
+            return dst;
+         }
+
        default:
           break;
        } /* switch (e->Iex.Unop.op) */
@@ -5393,6 +5995,28 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
           return dst;
        }
  
+      /* BCD Iops */
+      case Iop_I128StoBCD128:
+         {
+            HReg dst = newVRegV(env);
+            HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+            PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
+
+            addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
+                                                ps ) );
+            return dst;
+         }
+
+      case Iop_MulI128by10E:       op = Pav_MulI128by10E;      goto do_MulI128E;
+      case Iop_MulI128by10ECarry:  op = Pav_MulI128by10ECarry; goto do_MulI128E;
+      do_MulI128E: {
+            HReg dst  = newVRegV(env);
+            HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+            HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
+            addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
+            return dst;
+         }
+
        case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
        case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
        do_AvBCDV128: {
@@ -5579,8 +6203,30 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
           addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
           addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
           return;
-     }
-     if (ty == Ity_V128) {
+      }
+      if (ty == Ity_I128) {
+         HReg rHi, rLo;
+         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
+                                           GuestStatePtr(mode64) );
+         PPCAMode* am_addr4 = advance4(env, am_addr);
+
+         iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
+         addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
+         addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
+         return;
+      }
+      if (ty == Ity_F128) {
+         /* Guest state vectors are 16byte aligned,
+            so don't need to worry here */
+         HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
+
+         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
+                                           GuestStatePtr(mode64) );
+         addInstr(env,
+                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
+         return;
+      }
+      if (ty == Ity_V128) {
           /* Guest state vectors are 16byte aligned,
              so don't need to worry here */
           HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
@@ -5714,6 +6360,12 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
           addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
           return;
        }
+      if (ty == Ity_F128) {
+         HReg v_dst = lookupIRTemp(env, tmp);
+         HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
+         addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
+         return;
+      }
        if (ty == Ity_V128) {
           HReg v_dst = lookupIRTemp(env, tmp);
           HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
@@ -6242,6 +6894,7 @@ HInstrArray* iselSB_PPC ( const IRSB* bb,
        case Ity_F64:
           hregLo = mkHReg(True, HRcFlt64, 0, j++);
           break;
+      case Ity_F128:
        case Ity_V128:
           hregLo = mkHReg(True, HRcVec128, 0, j++);
           break;
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index cd29630e3e996e4a33cd08c5f09d13505a9548dd..4940ffeae43a07ffb4ab4938d552e1905aca897d 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -281,6 +281,17 @@ void ppIROp ( IROp op )
        case Iop_SubF128:   vex_printf("SubF128");  return;
        case Iop_MulF128:   vex_printf("MulF128");  return;
        case Iop_DivF128:   vex_printf("DivF128");  return;
+
+      case Iop_TruncF128toI64S:   vex_printf("TruncF128toI64S");  return;
+      case Iop_TruncF128toI32S:   vex_printf("TruncF128toI32S");  return;
+      case Iop_TruncF128toI64U:   vex_printf("TruncF128toI64U");  return;
+      case Iop_TruncF128toI32U:   vex_printf("TruncF128toI32U");  return;
+
+      case Iop_MAddF128:   vex_printf("MAddF128");  return;
+      case Iop_MSubF128:   vex_printf("MSubF128");  return;
+      case Iop_NegMAddF128:   vex_printf("NegMAddF128");  return;
+      case Iop_NegMSubF128:   vex_printf("NegMSubF128");  return;
+
        case Iop_AbsF128:   vex_printf("AbsF128");  return;
        case Iop_NegF128:   vex_printf("NegF128");  return;
        case Iop_SqrtF128:  vex_printf("SqrtF128"); return;
@@ -301,6 +312,8 @@ void ppIROp ( IROp op )
        case Iop_F64toF128:  vex_printf("F64toF128");  return;
        case Iop_F128toF64:  vex_printf("F128toF64");  return;
        case Iop_F128toF32:  vex_printf("F128toF32");  return;
+      case Iop_F128toI128S: vex_printf("F128toI128");  return;
+      case Iop_RndF128:    vex_printf("RndF128");  return;
  
          /* s390 specific */
        case Iop_MAddF32:    vex_printf("s390_MAddF32"); return;
@@ -411,6 +424,8 @@ void ppIROp ( IROp op )
  
        case Iop_F32toF16x4: vex_printf("F32toF16x4"); return;
        case Iop_F16toF32x4: vex_printf("F16toF32x4"); return;
+      case Iop_F16toF64x2: vex_printf("F16toF64x2"); return;
+      case Iop_F64toF16x2: vex_printf("F64toF16x2"); return;
  
        case Iop_RSqrtEst32Fx4: vex_printf("RSqrtEst32Fx4"); return;
        case Iop_RSqrtEst32Ux4: vex_printf("RSqrtEst32Ux4"); return;
@@ -865,6 +880,10 @@ void ppIROp ( IROp op )
        case Iop_Cls8x16: vex_printf("Cls8x16"); return;
        case Iop_Cls16x8: vex_printf("Cls16x8"); return;
        case Iop_Cls32x4: vex_printf("Cls32x4"); return;
+      case Iop_Ctz8x16: vex_printf("Iop_Ctz8x16"); return;
+      case Iop_Ctz16x8: vex_printf("Iop_Ctz16x8"); return;
+      case Iop_Ctz32x4: vex_printf("Iop_Ctz32x4"); return;
+      case Iop_Ctz64x2: vex_printf("Iop_Ctz64x2"); return;
  
        case Iop_ShlV128: vex_printf("ShlV128"); return;
        case Iop_ShrV128: vex_printf("ShrV128"); return;
@@ -1247,6 +1266,8 @@ void ppIROp ( IROp op )
        case Iop_SHA512:  vex_printf("SHA512"); return;
        case Iop_BCDAdd:  vex_printf("BCDAdd"); return;
        case Iop_BCDSub:  vex_printf("BCDSub"); return;
+      case Iop_I128StoBCD128:  vex_printf("bcdcfsq."); return;
+      case Iop_BCD128toI128S:  vex_printf("bcdctsq."); return;
  
        case Iop_PwBitMtxXpose64x2: vex_printf("BitMatrixTranspose64x2"); return;
  
@@ -3052,6 +3073,8 @@ void typeOfPrimop ( IROp op,
        case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
        case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
        case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
+      case Iop_MulI128by10E:
+      case Iop_MulI128by10ECarry:
           BINARY(Ity_V128,Ity_V128, Ity_V128);
  
        case Iop_PolynomialMull8x8:
@@ -3085,6 +3108,13 @@ void typeOfPrimop ( IROp op,
        case Iop_PwBitMtxXpose64x2:
        case Iop_ZeroHI64ofV128:  case Iop_ZeroHI96ofV128:
        case Iop_ZeroHI112ofV128: case Iop_ZeroHI120ofV128:
+      case Iop_F16toF64x2:
+      case Iop_F64toF16x2:
+      case Iop_MulI128by10:
+      case Iop_MulI128by10Carry:
+      case Iop_Ctz8x16: case Iop_Ctz16x8:
+      case Iop_Ctz32x4: case Iop_Ctz64x2:
+      case Iop_BCD128toI128S:
           UNARY(Ity_V128, Ity_V128);
  
        case Iop_ShlV128: case Iop_ShrV128:
@@ -3119,7 +3149,8 @@ void typeOfPrimop ( IROp op,
        case Iop_QandQRSarNnarrow16Sto8Ux8:
        case Iop_QandQRSarNnarrow32Sto16Ux4:
        case Iop_QandQRSarNnarrow64Sto32Ux2:
-         BINARY(Ity_V128,Ity_I8, Ity_V128);
+      case Iop_I128StoBCD128:
+         BINARY(Ity_V128, Ity_I8, Ity_V128);
  
        case Iop_F32ToFixed32Ux4_RZ:
        case Iop_F32ToFixed32Sx4_RZ:
@@ -3184,6 +3215,12 @@ void typeOfPrimop ( IROp op,
        case Iop_DivF128:
           TERNARY(ity_RMode,Ity_F128,Ity_F128, Ity_F128);
  
+      case Iop_MAddF128:
+      case Iop_MSubF128:
+      case Iop_NegMAddF128:
+      case Iop_NegMSubF128:
+         QUATERNARY(ity_RMode,Ity_F128,Ity_F128,Ity_F128, Ity_F128);
+
        case Iop_Add64Fx2: case Iop_Sub64Fx2:
        case Iop_Mul64Fx2: case Iop_Div64Fx2: 
        case Iop_Add32Fx4: case Iop_Sub32Fx4:
@@ -3222,6 +3259,16 @@ void typeOfPrimop ( IROp op,
        case Iop_F128toF32: BINARY(ity_RMode,Ity_F128, Ity_F32);
        case Iop_F128toF64: BINARY(ity_RMode,Ity_F128, Ity_F64);
  
+      case Iop_TruncF128toI32S:
+      case Iop_TruncF128toI64S:
+      case Iop_TruncF128toI32U:
+      case Iop_TruncF128toI64U:
+         UNARY(Ity_F128, Ity_F128);
+
+      case Iop_F128toI128S:
+      case Iop_RndF128:
+         BINARY(ity_RMode,Ity_F128, Ity_F128);
+
        case Iop_D32toD64:
           UNARY(Ity_D32, Ity_D64);
  
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index a945efaeaa420ab5eb07fc050b3c3ab107dc43a7..5efcd5c959652a6634259393452a8964b6c3c4d8 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -670,6 +670,10 @@ typedef
  
        /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
        Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
+      Iop_MAddF128,    // (A * B) + C
+      Iop_MSubF128,    // (A * B) - C
+      Iop_NegMAddF128, // -((A * B) + C)
+      Iop_NegMSubF128, // -((A * B) - C)
  
        /* :: F128 -> F128 */
        Iop_NegF128, Iop_AbsF128,
@@ -688,8 +692,18 @@ typedef
        Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
        Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32  */
        Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64  */
+      Iop_F128toI128S,/* IRRoundingMode(I32) x F128 -> signed I128 */
        Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
        Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
+      Iop_RndF128,    /* IRRoundingMode(I32) x F128 -> F128         */
+
+      /* Truncate to the specified value, source and result
+       * are stroed in a F128 register.
+       */
+      Iop_TruncF128toI32S,  /* truncate F128 -> I32         */
+      Iop_TruncF128toI32U,  /* truncate F128 -> I32         */
+      Iop_TruncF128toI64U,  /* truncate F128 -> I64         */
+      Iop_TruncF128toI64S,  /* truncate F128 -> I64         */
  
        /* --- guest x86/amd64 specifics, not mandated by 754. --- */
  
@@ -920,6 +934,9 @@ typedef
        Iop_Cls8x8, Iop_Cls16x4, Iop_Cls32x2,
        Iop_Clz64x2,
  
+      /*Vector COUNT trailing zeros */
+      Iop_Ctz8x16, Iop_Ctz16x8, Iop_Ctz32x4, Iop_Ctz64x2, 
+
        /* VECTOR x VECTOR SHIFT / ROTATE */
        Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
        Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
@@ -1270,6 +1287,12 @@ typedef
         * signed code. */
        Iop_BCDAdd, Iop_BCDSub,
  
+      /* Conversion signed 128-bit integer to signed BCD 128-bit */
+      Iop_I128StoBCD128,
+
+      /* Conversion signed BCD 128-bit to 128-bit integer */
+      Iop_BCD128toI128S,
+
        /* Conversion I64 -> D64 */
        Iop_ReinterpI64asD64,
  
@@ -1339,6 +1362,9 @@ typedef
        /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
        Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
  
+      /* -- Double to/from half conversion -- */
+      Iop_F64toF16x2, Iop_F16toF64x2,
+
        /* --- 32x4 lowest-lane-only scalar FP --- */
  
        /* In binary cases, upper 3/4 is copied from first operand.  In
@@ -1769,6 +1795,22 @@ typedef
           See floating-point equivalents for details. */
        Iop_RecipEst32Ux4, Iop_RSqrtEst32Ux4,
  
+      /* 128-bit multipy by 10 instruction, result is lower 128-bits */
+      Iop_MulI128by10,
+
+      /* 128-bit multipy by 10 instruction, result is carry out from the MSB */
+      Iop_MulI128by10Carry,
+
+      /* 128-bit multipy by 10 instruction, result is lower 128-bits of the
+       * source times 10 plus the carry in
+       */
+      Iop_MulI128by10E,
+
+      /* 128-bit multipy by 10 instruction, result is carry out from the MSB
+       * of the source times 10 plus the carry in
+       */
+      Iop_MulI128by10ECarry,
+
        /* ------------------ 256-bit SIMD Integer. ------------------ */
  
        /* Pack/unpack */
author	Carl Love <cel@us.ibm.com>
	Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)
committer	Carl Love <cel@us.ibm.com>
	Mon, 15 Aug 2016 21:53:20 +0000 (21:53 +0000)
VEX/priv/guest_ppc_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.c		patch \| blob \| blame \| history
VEX/priv/host_ppc_defs.h		patch \| blob \| blame \| history
VEX/priv/host_ppc_isel.c		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history