PPC64, fix issues with dnormal values in the vector fp instructions.

author Carl Love <carll@us.ibm.com>

Tue, 28 May 2019 16:26:13 +0000 (11:26 -0500)

committer Carl Love <carll@us.ibm.com>

Tue, 28 May 2019 18:49:33 +0000 (13:49 -0500)
author Carl Love <carll@us.ibm.com>
Tue, 28 May 2019 16:26:13 +0000 (11:26 -0500)
committer Carl Love <carll@us.ibm.com>
Tue, 28 May 2019 18:49:33 +0000 (13:49 -0500)
diff --git a/NEWS b/NEWS

index 53c92cef8bb2b817660354266a268ce7ecddc5df..788e92fc75979ff5265420afa440631612e68417 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -203,6 +203,8 @@ where XXXXXX is the bug number as listed below.
  405782  "VEX temporary storage exhausted" when attempting to debug slic3r-pe
  406198  none/tests/ppc64/test_isa_3_0_other test sporadically including CA
          bit in output.
+406256  PPC64, vector floating point instructions don't handle subnormal
+        according to VSCR[NJ] bit setting.
  406352  cachegrind/callgrind fails ann tests because of missing a.c
  406354  dhat is broken on x86 (32bit)
  406355  mcsignopass, mcsigpass, mcbreak fail due to difference in gdb output
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c

index 9cabe28281218e217b39968591499a99301a5180..80c7965556179fea47e1ef56d5ecbc7d57fba627 100644 (file)
--- a/VEX/priv/guest_ppc_helpers.c
+++ b/VEX/priv/guest_ppc_helpers.c
@@ -833,7 +833,15 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state )
  
     vex_state->guest_VRSAVE = 0;
  
-   vex_state->guest_VSCR = 0x0;  // Non-Java mode = 0
+# if defined(VGP_ppc64be_linux)
+   /* By default, the HW for BE sets the VSCR[NJ] bit to 1.
+      VSR is a 128-bit register, NJ bit is bit 111 (IBM numbering).
+      However, VSCR is modeled as a 64-bit register. */
+   vex_state->guest_VSCR = 0x1 << (127 - 111);
+# else
+   /* LE API requires NJ be set to 0. */
+   vex_state->guest_VSCR = 0x0;
+#endif
  
     vex_state->guest_EMNOTE = EmNote_NONE;
  
@@ -1000,7 +1008,15 @@ void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state )
  
     vex_state->guest_VRSAVE = 0;
  
-   vex_state->guest_VSCR = 0x0;  // Non-Java mode = 0
+# if defined(VGP_ppc64be_linux)
+   /* By default, the HW for BE sets the VSCR[NJ] bit to 1.
+      VSR is a 128-bit register, NJ bit is bit 111 (IBM numbering).
+      However, VSCR is modeled as a 64-bit register. */
+   vex_state->guest_VSCR = 0x1 << (127 - 111);
+# else
+   /* LE API requires NJ be set to 0. */
+   vex_state->guest_VSCR = 0x0;
+#endif
  
     vex_state->guest_EMNOTE = EmNote_NONE;
  
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c

index 81ae16fd1632c6358c4a49330f6cfd1b42aaf83b..ad79b5e560052084fce833bd71883315cdc97a65 100644 (file)
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -154,6 +154,35 @@
   *   register" (i.e, the part on the left side).
   *
   */
+
+/* Notes on handling subnormal results:
+ *
+ * The various vector floating point instructions:
+ *    vmaddfp, vaddfp, vsubfp, vmaxfp, vminfp, vrefp, vexptefp,
+ *    vlogefp, vcmpeqfp, vcmpgefp, vcmpgtfp, vcmpbfp, vrfin, vrfiz,
+ *     vrfip, vrfim
+ * generate subnormal results that are controled by the VSCR[NJ] bit setting.
+ *
+ * The following describes how the host and guest is setup so that the function
+ * dnorm_adj_Vector() can properly handle the results of the Iops in the guest
+ * state.
+ *
+ *   At startup, on all host variants, we set VSCR[NJ].host = 0 (don't flush to
+ *   zero).  It stays at 0 permanently.
+ *
+ *   At startup, we set VSCR[NJ].guest = (if BE then 1 else 0)
+ *
+ *   When running, guest insns can set/clear/query VSCR[NJ].guest as they
+ *   like.
+ *
+ *   When running, any (guest) insn whose result depends on VSCR[NJ] will query
+ *   VSCR[NJ].guest and the results will be truncated accordingly, by
+ *   dnorm_adj_Vector().  Because VSCR[NJ].host is always 0, we will always
+ *   be able to provide correct guest results for either value of
+ *   VSCR[NJ].guest.
+ */
+
+
  /* Translates PPC32/64 code to IR. */
  
  /* References
@@ -469,23 +498,6 @@ typedef enum {
  #define MASK_VSCR_VALID 0x00010001
  
  
-/*------------------------------------------------------------*/
-/*---  FP Helpers                                          ---*/
-/*------------------------------------------------------------*/
-
-/* Produce the 32-bit pattern corresponding to the supplied
-   float. */
-static UInt float_to_bits ( Float f )
-{
-   union { UInt i; Float f; } u;
-   vassert(4 == sizeof(UInt));
-   vassert(4 == sizeof(Float));
-   vassert(4 == sizeof(u));
-   u.f = f;
-   return u.i;
-}
-
-
  /*------------------------------------------------------------*/
  /*--- Misc Helpers                                         ---*/
  /*------------------------------------------------------------*/
@@ -3666,6 +3678,7 @@ static IRExpr * fp_exp_part( IRType size, IRTemp src )
  #define I32_EXP_MASK       0x7F800000
  #define I32_FRACTION_MASK  0x007FFFFF
  #define I32_MSB_FRACTION_MASK  0x00400000
+#define I32_SIGN_MASK      0x80000000
  #define I64_EXP_MASK       0x7FF0000000000000ULL
  #define I64_FRACTION_MASK  0x000FFFFFFFFFFFFFULL
  #define I64_MSB_FRACTION_MASK  0x0008000000000000ULL
@@ -3943,6 +3956,117 @@ static IRExpr * is_Denorm( IRType size, IRTemp src )
     return  mkAND1( zero_exp, not_zero_frac );
  }
  
+static IRExpr * is_Zero_Vector( IRType element_size, IRExpr *src )
+{
+/* Check elements of a 128-bit floating point vector, with element size
+   element_size, are zero.  Return 1's in the elements of the vector
+   which are values. */
+   IRTemp exp_maskV128 = newTemp( Ity_V128 );
+   IRTemp exp_zeroV128 = newTemp( Ity_V128 );
+   IRTemp frac_maskV128 = newTemp( Ity_V128 );
+   IRTemp frac_zeroV128 = newTemp( Ity_V128 );
+   IRTemp zeroV128 = newTemp( Ity_V128 );
+
+   assign( zeroV128, mkV128( 0 ) );
+
+   if ( element_size == Ity_I32 ) {
+      assign( exp_maskV128, unop( Iop_Dup32x4, mkU32( I32_EXP_MASK ) ) );
+      assign( frac_maskV128, unop( Iop_Dup32x4, mkU32( I32_FRACTION_MASK ) ) );
+
+   } else
+      vex_printf("ERROR, is_Zero_Vector:  Unknown input size\n");
+
+   /* CmpEQ32x4 returns all 1's in elements where comparison is true */
+   assign( exp_zeroV128,
+           binop( Iop_CmpEQ32x4,
+                  binop( Iop_AndV128,
+                         mkexpr( exp_maskV128 ), src ),
+                  mkexpr( zeroV128 ) ) );
+
+   assign( frac_zeroV128,
+           binop( Iop_CmpEQ32x4,
+                  binop( Iop_AndV128,
+                         mkexpr( frac_maskV128 ), src ),
+                  mkexpr( zeroV128 ) ) );
+
+   return binop( Iop_AndV128, mkexpr( exp_zeroV128 ),
+                 mkexpr( frac_zeroV128 ) );
+}
+
+static IRExpr * is_Denorm_Vector( IRType element_size, IRExpr *src )
+{
+/* Check elements of a 128-bit floating point vector, with element size
+   element_size, are Denorm.  Return 1's in the elements of the vector
+   which are denormalized values. */
+   IRTemp exp_maskV128 = newTemp( Ity_V128 );
+   IRTemp exp_zeroV128 = newTemp( Ity_V128 );
+   IRTemp frac_maskV128 = newTemp( Ity_V128 );
+   IRTemp frac_nonzeroV128 = newTemp( Ity_V128 );
+   IRTemp zeroV128 = newTemp( Ity_V128 );
+
+   assign( zeroV128, mkV128(0 ) );
+
+   if ( element_size == Ity_I32 ) {
+      assign( exp_maskV128, unop( Iop_Dup32x4, mkU32( I32_EXP_MASK ) ) );
+      assign( frac_maskV128, unop( Iop_Dup32x4, mkU32( I32_FRACTION_MASK ) ) );
+
+   } else
+      vex_printf("ERROR, is_Denorm_Vector:  Unknown input size\n");
+
+   /* CmpEQ32x4 returns all 1's in elements where comparison is true */
+   assign( exp_zeroV128,
+           binop( Iop_CmpEQ32x4,
+                  binop( Iop_AndV128,
+                         mkexpr( exp_maskV128 ), src ),
+                  mkexpr( zeroV128 ) ) );
+
+   assign( frac_nonzeroV128,
+           unop( Iop_NotV128,
+                 binop( Iop_CmpEQ32x4,
+                        binop( Iop_AndV128,
+                               mkexpr( frac_maskV128 ), src ),
+                        mkexpr( zeroV128 ) ) ) );
+
+   return binop( Iop_AndV128, mkexpr( exp_zeroV128 ),
+                 mkexpr( frac_nonzeroV128 ) );
+}
+
+static IRExpr * is_NaN_Vector( IRType element_size, IRExpr *src )
+{
+   IRTemp max_expV128 = newTemp( Ity_V128 );
+   IRTemp not_zero_fracV128 = newTemp( Ity_V128 );
+   IRTemp zeroV128  = newTemp( Ity_V128 );
+   IRTemp exp_maskV128 = newTemp( Ity_V128 );
+   IRTemp frac_maskV128 = newTemp( Ity_V128 );
+   IROp   opCmpEQ;
+
+   assign( zeroV128, mkV128( 0 ) );
+
+   if ( element_size == Ity_I32 ) {
+      assign( exp_maskV128, unop( Iop_Dup32x4, mkU32( I32_EXP_MASK ) ) );
+      assign( frac_maskV128, unop( Iop_Dup32x4, mkU32( I32_FRACTION_MASK ) ) );
+      opCmpEQ = Iop_CmpEQ32x4;
+
+   } else
+      vex_printf("ERROR, is_NaN_Vector:  Unknown input size\n");
+
+   /* check exponent is all ones, i.e. (exp AND exp_mask) = exp_mask */
+   assign( max_expV128,
+           binop( opCmpEQ,
+                  binop( Iop_AndV128, src, mkexpr( exp_maskV128 ) ),
+                  mkexpr( exp_maskV128 ) ) );
+
+   /* check fractional part is not zero */
+   assign( not_zero_fracV128,
+           unop( Iop_NotV128,
+           binop( opCmpEQ,
+                        binop( Iop_AndV128, src, mkexpr( frac_maskV128 ) ),
+                        mkexpr( zeroV128 ) ) ) );
+
+   return  binop( Iop_AndV128, mkexpr( max_expV128 ),
+                  mkexpr( not_zero_fracV128 ) );
+}
+
  #if 0
  /* Normalized number has exponent between 1 and max_exp -1, or in other words
     the exponent is not zero and not equal to the max exponent value. */
@@ -4171,6 +4295,31 @@ static IRTemp getNegatedResult_32(IRTemp intermediateResult)
     return negatedResult;
  }
  
+static IRExpr* negate_Vector ( IRType element_size, IRExpr* value )
+{
+   /* This function takes a vector of floats.  If the value is
+      not a NaN, the value is negated.  */
+
+   IRTemp not_nan_mask = newTemp( Ity_V128 );
+   IRTemp sign_maskV128 = newTemp( Ity_V128 );
+
+   if ( element_size == Ity_I32 ) {
+      assign( sign_maskV128, unop( Iop_Dup32x4, mkU32( I32_SIGN_MASK ) ) );
+
+   } else
+      vex_printf("ERROR, negate_Vector:  Unknown input size\n");
+
+   /* Determine if vector elementes are not a NaN, negate sign bit
+      for non NaN elements */
+   assign ( not_nan_mask,
+            unop( Iop_NotV128, is_NaN_Vector( element_size, value ) ) );
+
+   return binop( Iop_XorV128,
+                 binop( Iop_AndV128,
+                        mkexpr( sign_maskV128 ), mkexpr( not_nan_mask ) ),
+                 value );
+}
+
  /* This function takes two quad_precision floating point numbers of type
     V128 and return 1 if src_A > src_B, 0 otherwise. */
  static IRExpr * Quad_precision_gt ( IRTemp src_A, IRTemp  src_B )
@@ -4806,6 +4955,106 @@ static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) {
     return mkexpr( result );
  }
  
+/*------------------------------------------------------------*/
+/*---  FP Helpers                                          ---*/
+/*------------------------------------------------------------*/
+
+/* Produce the 32-bit pattern corresponding to the supplied
+   float. */
+static UInt float_to_bits ( Float f )
+{
+   union { UInt i; Float f; } u;
+   vassert(4 == sizeof(UInt));
+   vassert(4 == sizeof(Float));
+   vassert(4 == sizeof(u));
+   u.f = f;
+   return u.i;
+}
+
+static IRExpr* dnorm_adj_Vector ( IRExpr* src )
+{
+   /* This function takes a vector of 32-bit floats.  It does the required
+      adjustment on denormalized values based on the setting of the
+      VSCR[NJ] bit.
+
+      The VSCR[NJ] bit controlls how subnormal (denormalized) results for
+      vector floating point operations are handled. VSCR[NJ] is bit 17
+      (bit 111 IBM numbering).
+
+      VSCR[NJ] = 0  Denormalized values are handled as
+                    specified by Java and the IEEE standard.
+
+      VSCR[NJ] = 1  If an element in a source VR contains a denormalized
+                    value, the value 0 is used instead. If an instruction
+                    causes an Underflow Exception, the corresponding element
+                    in the target VR is set to 0.  In both cases the 0 has
+                    the same sign as the denormalized or underflowing value.
+                    Convert negative zero to positive zero.
+
+      The ABI for LE requires VSCR[NJ] = 0.  For BE mode, VSCR[NJ] = 1 by
+      default.  The PPC guest state is initialized to match the HW setup.
+   */
+   IRTemp sign_bit_maskV128 = newTemp( Ity_V128 );
+   IRTemp ones_maskV128 = newTemp( Ity_V128 );
+   IRTemp clear_dnorm_maskV128 = newTemp( Ity_V128 );
+   IRTemp adj_valueV128 = newTemp( Ity_V128 );
+   IRTemp dnormV128 = newTemp( Ity_V128 );
+   IRTemp zeroV128  = newTemp( Ity_V128 );
+   IRTemp VSCR_NJ = newTemp( Ity_I64 );
+   IRTemp VSCR_NJ_mask = newTemp( Ity_V128 );
+   IRTemp resultV128 = newTemp( Ity_V128 );
+
+   /* get the VSCR[NJ] bit */
+   assign( VSCR_NJ,
+           unop( Iop_1Sto64,
+                 unop( Iop_32to1,
+                       binop( Iop_Shr32,
+                              getGST( PPC_GST_VSCR ),
+                              mkU8( 16 ) ) ) ) );
+
+   assign ( VSCR_NJ_mask, binop( Iop_64HLtoV128,
+                                 mkexpr( VSCR_NJ ) ,
+                                 mkexpr( VSCR_NJ ) ) );
+
+   /* Create the masks to do the rounding of dnorm values and absolute
+      value of zero. */
+   assign( dnormV128, is_Denorm_Vector( Ity_I32, src ) );
+   assign( zeroV128, is_Zero_Vector( Ity_I32, src ) );
+
+   /* If the value is dnorm, then we need to clear the significand and
+      exponent but leave the sign bit. Put 1'x in elements that are not
+      denormalized values.  */
+   assign( sign_bit_maskV128, unop( Iop_Dup32x4, mkU32( 0x80000000 ) ) );
+
+   assign( clear_dnorm_maskV128,
+           binop( Iop_OrV128,
+                  binop( Iop_AndV128,
+                         mkexpr( dnormV128 ),
+                         mkexpr( sign_bit_maskV128 ) ),
+                  unop( Iop_NotV128, mkexpr( dnormV128 ) ) ) );
+
+   assign( ones_maskV128, mkV128( 0xFFFF ) );
+
+   assign( adj_valueV128, binop( Iop_AndV128,
+                                 mkexpr( clear_dnorm_maskV128 ),
+                                 binop( Iop_AndV128,
+                                        src,
+                                        mkexpr( ones_maskV128 ) ) ) );
+
+   /* If the VSCR[NJ] bit is 1, then clear the denormalized values,
+      otherwise just return the input unchanged.  */
+   assign( resultV128,
+           binop( Iop_OrV128,
+                  binop( Iop_AndV128,
+                         mkexpr( VSCR_NJ_mask ),
+                         mkexpr( adj_valueV128 ) ),
+                  binop( Iop_AndV128,
+                         unop( Iop_NotV128, mkexpr( VSCR_NJ_mask ) ),
+                         src ) ) );
+
+   return mkexpr(resultV128);
+}
+
  /*------------------------------------------------------------*/
  /* Transactional memory helpers
   *
@@ -27364,22 +27613,33 @@ static Bool dis_av_fp_arith ( UInt theInstr )
        DIP("vmaddfp v%d,v%d,v%d,v%d\n",
            vD_addr, vA_addr, vC_addr, vB_addr);
        putVReg( vD_addr,
-               triop(Iop_Add32Fx4, mkU32(Irrm_NEAREST),
-                     mkexpr(vB),
-                     triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
-                           mkexpr(vA), mkexpr(vC))) );
+               dnorm_adj_Vector(
+                  triop( Iop_Add32Fx4,
+                         mkU32( Irrm_NEAREST ),
+                         dnorm_adj_Vector( mkexpr( vB ) ),
+                         dnorm_adj_Vector( triop( Iop_Mul32Fx4,
+                                                  mkU32( Irrm_NEAREST ),
+                                                  dnorm_adj_Vector( mkexpr( vA ) ),
+                                                  dnorm_adj_Vector( mkexpr( vC ) ) )
+                            ) ) ) );
        return True;
  
     case 0x2F: { // vnmsubfp (Negative Multiply-Subtract FP, AV p215)
        DIP("vnmsubfp v%d,v%d,v%d,v%d\n",
            vD_addr, vA_addr, vC_addr, vB_addr);
        putVReg( vD_addr,
-               triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST),
-                     mkexpr(vB),
-                     triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
-                           mkexpr(vA), mkexpr(vC))) );
-      return True;
-   }
+               negate_Vector( Ity_I32,
+                  dnorm_adj_Vector(
+                     triop( Iop_Sub32Fx4,
+                            mkU32( Irrm_NEAREST ),
+                            dnorm_adj_Vector(
+                               triop( Iop_Mul32Fx4,
+                                      mkU32( Irrm_NEAREST ),
+                                      dnorm_adj_Vector( mkexpr( vA ) ),
+                                      dnorm_adj_Vector( mkexpr( vC ) ) ) ),
+                            dnorm_adj_Vector( mkexpr( vB ) ) ) ) ) );
+       return True;
+    }
  
     default:
       break; // Fall through...
@@ -27389,24 +27649,32 @@ static Bool dis_av_fp_arith ( UInt theInstr )
     switch (opc2) {
     case 0x00A: // vaddfp (Add FP, AV p137)
        DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, triop(Iop_Add32Fx4,
-                              mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               dnorm_adj_Vector( triop( Iop_Add32Fx4, mkU32( Irrm_NEAREST ),
+                                        dnorm_adj_Vector( mkexpr( vA ) ),
+                                        dnorm_adj_Vector( mkexpr( vB ) ) ) ) );
        return True;
  
    case 0x04A: // vsubfp (Subtract FP, AV p261)
        DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, triop(Iop_Sub32Fx4,
-                              mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               dnorm_adj_Vector( triop( Iop_Sub32Fx4, mkU32( Irrm_NEAREST ),
+                                        dnorm_adj_Vector( mkexpr( vA ) ),
+                                        dnorm_adj_Vector( mkexpr( vB ) ) ) ) );
        return True;
  
     case 0x40A: // vmaxfp (Maximum FP, AV p178)
        DIP("vmaxfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, binop(Iop_Max32Fx4, mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               dnorm_adj_Vector( binop( Iop_Max32Fx4,
+                                        mkexpr( vA ), mkexpr( vB ) ) ) );
        return True;
  
     case 0x44A: // vminfp (Minimum FP, AV p187)
        DIP("vminfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, binop(Iop_Min32Fx4, mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               dnorm_adj_Vector( binop( Iop_Min32Fx4,
+                                    mkexpr( vA ), mkexpr( vB ) ) ) );
        return True;
  
     default:
@@ -27422,22 +27690,28 @@ static Bool dis_av_fp_arith ( UInt theInstr )
     switch (opc2) {
     case 0x10A: // vrefp (Reciprocal Esimate FP, AV p228)
        DIP("vrefp v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RecipEst32Fx4, mkexpr(vB)) );
+      putVReg( vD_addr, dnorm_adj_Vector( unop( Iop_RecipEst32Fx4,
+                                            dnorm_adj_Vector( mkexpr( vB ) ) ) ) );
        return True;
  
     case 0x14A: // vrsqrtefp (Reciprocal Sqrt Estimate FP, AV p237)
        DIP("vrsqrtefp v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RSqrtEst32Fx4, mkexpr(vB)) );
+      putVReg( vD_addr, dnorm_adj_Vector( unop( Iop_RSqrtEst32Fx4,
+                                            dnorm_adj_Vector( mkexpr( vB ) ) ) ) );
        return True;
  
     case 0x18A: // vexptefp (2 Raised to the Exp Est FP, AV p173)
        DIP("vexptefp v%d,v%d\n", vD_addr, vB_addr);
        DIP(" => not implemented\n");
+      /* NOTE, need to address dnormalized value handling when this is
+         implemented.  */
        return False;
  
     case 0x1CA: // vlogefp (Log2 Estimate FP, AV p175)
        DIP("vlogefp v%d,v%d\n", vD_addr, vB_addr);
        DIP(" => not implemented\n");
+      /* NOTE, need to address dnormalized value handling when this is
+         implemented.  */
        return False;
  
     default:
@@ -27477,25 +27751,34 @@ static Bool dis_av_fp_cmp ( UInt theInstr )
     case 0x0C6: // vcmpeqfp (Compare Equal-to FP, AV p159)
        DIP("vcmpeqfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
                                        vD_addr, vA_addr, vB_addr);
-      assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) );
+      assign( vD, binop( Iop_CmpEQ32Fx4,
+                         dnorm_adj_Vector( mkexpr( vA ) ),
+                         dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x1C6: // vcmpgefp (Compare Greater-than-or-Equal-to, AV p163)
        DIP("vcmpgefp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
                                        vD_addr, vA_addr, vB_addr);
-      assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) );
+      assign( vD, binop( Iop_CmpGE32Fx4,
+                         dnorm_adj_Vector( mkexpr( vA ) ),
+                         dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x2C6: // vcmpgtfp (Compare Greater-than FP, AV p164)
        DIP("vcmpgtfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
                                        vD_addr, vA_addr, vB_addr);
-      assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) );
+      assign( vD, binop( Iop_CmpGT32Fx4,
+                         dnorm_adj_Vector( mkexpr( vA ) ),
+                         dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x3C6: { // vcmpbfp (Compare Bounds FP, AV p157)
        IRTemp gt      = newTemp(Ity_V128);
        IRTemp lt      = newTemp(Ity_V128);
        IRTemp zeros   = newTemp(Ity_V128);
+      IRTemp srcA    = newTemp(Ity_V128);
+      IRTemp srcB    = newTemp(Ity_V128);
+
        DIP("vcmpbfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
                                       vD_addr, vA_addr, vB_addr);
        cmp_bounds = True;
@@ -27509,13 +27792,17 @@ static Bool dis_av_fp_cmp ( UInt theInstr )
           need this for the other compares too (vcmpeqfp etc)...
           Better still, tighten down the spec for compare irops.
         */
-      assign( gt, unop(Iop_NotV128,
-                       binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) );
-      assign( lt, unop(Iop_NotV128,
-                       binop(Iop_CmpGE32Fx4, mkexpr(vA),
-                             triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST),
-                                   mkexpr(zeros),
-                                   mkexpr(vB)))) );
+      assign ( srcA, dnorm_adj_Vector( mkexpr( vA ) ) );
+      assign ( srcB, dnorm_adj_Vector( mkexpr( vB ) ) );
+
+      assign( gt, unop( Iop_NotV128,
+                        binop( Iop_CmpLE32Fx4, mkexpr( srcA ),
+                               mkexpr( srcB ) ) ) );
+      assign( lt, unop( Iop_NotV128,
+                        binop( Iop_CmpGE32Fx4, mkexpr( srcA ),
+                               triop( Iop_Sub32Fx4, mkU32( Irrm_NEAREST ),
+                                      mkexpr( zeros ),
+                                      mkexpr( srcB ) ) ) ) );
  
        // finally, just shift gt,lt to correct position
        assign( vD, binop(Iop_ShlN32x4,
@@ -27617,22 +27904,26 @@ static Bool dis_av_fp_convert ( UInt theInstr )
     switch (opc2) {
     case 0x20A: // vrfin (Round to FP Integer Nearest, AV p231)
        DIP("vrfin v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RoundF32x4_RN, mkexpr(vB)) );
+      putVReg( vD_addr, unop(Iop_RoundF32x4_RN,
+                             dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x24A: // vrfiz (Round to FP Integer toward zero, AV p233)
        DIP("vrfiz v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RoundF32x4_RZ, mkexpr(vB)) );
+      putVReg( vD_addr, unop(Iop_RoundF32x4_RZ,
+                             dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x28A: // vrfip (Round to FP Integer toward +inf, AV p232)
        DIP("vrfip v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RoundF32x4_RP, mkexpr(vB)) );
+      putVReg( vD_addr, unop(Iop_RoundF32x4_RP,
+                             dnorm_adj_Vector( mkexpr( vB ) ) ) );
        break;
  
     case 0x2CA: // vrfim (Round to FP Integer toward -inf, AV p230)
        DIP("vrfim v%d,v%d\n", vD_addr, vB_addr);
-      putVReg( vD_addr, unop(Iop_RoundF32x4_RM, mkexpr(vB)) );
+      putVReg( vD_addr, unop(Iop_RoundF32x4_RM,
+                             dnorm_adj_Vector( mkexpr(vB ) ) ) );
        break;
  
     default:
diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S

index 002345dd0f387e2f3ac86b588df5827b988ed073..b679a2eb6a51ed2e56da8a60e550280b3b99d806 100644 (file)
--- a/coregrind/m_dispatch/dispatch-ppc32-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S
@@ -217,7 +217,7 @@ LafterFP2:
  
  #ifdef HAS_ALTIVEC
          vspltisw 3,0x0  /* generate zero */
-        mtvscr  3
+        mtvscr  3       /* sets VSCR[NJ]=0 */
  #endif
          
  LafterVMX2:
@@ -275,7 +275,8 @@ LafterFP8:
          beq     LafterVMX8
  
  #ifdef HAS_ALTIVEC
-        /* Check VSCR[NJ] == 1 */
+        /* Expect VSCR[NJ] to be 0, call invariant_violation if
+           VSCR[NJ] == 1 . */
          /* first generate 4x 0x00010000 */
          vspltisw  4,0x1                   /* 4x 0x00000001 */
          vspltisw  5,0x0                   /* zero */
diff --git a/coregrind/m_dispatch/dispatch-ppc64be-linux.S b/coregrind/m_dispatch/dispatch-ppc64be-linux.S

index 27fcc6996825cfb7b410fe247d6acc243af63b82..9722fd803dcaf7ab412e8460c1e342b27ed02eb8 100644 (file)
--- a/coregrind/m_dispatch/dispatch-ppc64be-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc64be-linux.S
@@ -234,7 +234,8 @@ VG_(disp_run_translations):
          beq     .LafterVMX2
  
          vspltisw 3,0x0  /* generate zero */
-        mtvscr  3
+        mtvscr  3       /* sets VSCR[NJ]=0 */
+
  .LafterVMX2:
  
          /* make a stack frame for the code we are calling */
@@ -284,7 +285,8 @@ VG_(disp_run_translations):
          cmpldi  11,0    /* Do we have altivec? */
          beq     .LafterVMX8
  
-        /* Check VSCR[NJ] == 1 */
+        /* Expect VSCR[NJ] to be 0, call invariant_violation if
+           VSCR[NJ] == 1 . */
          /* first generate 4x 0x00010000 */
          vspltisw  4,0x1                   /* 4x 0x00000001 */
          vspltisw  5,0x0                   /* zero */
@@ -294,7 +296,7 @@ VG_(disp_run_translations):
          vand      7,7,6                   /* gives NJ flag */
          vspltw    7,7,0x3                 /* flags-word to all lanes */
          vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
-        bt        24,.invariant_violation /* branch if all_equal */
+        bt        24,.invariant_violation /* branch if all_equal, i.e. NJ=1 */
  
  .LafterVMX8:
         /* otherwise we're OK */
diff --git a/coregrind/m_dispatch/dispatch-ppc64le-linux.S b/coregrind/m_dispatch/dispatch-ppc64le-linux.S

index a2ad8fceee9a3f6666910bfba71242f8d3fef477..e790ae84cdf81d85782c948e2d6cc630036f41d7 100644 (file)
--- a/coregrind/m_dispatch/dispatch-ppc64le-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc64le-linux.S
@@ -255,7 +255,8 @@ VG_(disp_run_translations):
          beq     .LafterVMX2
  
          vspltisw 3,0x0  /* generate zero */
-        mtvscr  3
+        mtvscr  3       /* sets VSCR[NJ]=0 */
+
  .LafterVMX2:
  
          /* make a stack frame for the code we are calling */
@@ -310,7 +311,8 @@ VG_(disp_run_translations):
          cmpldi  11,0    /* Do we have altivec? */
          beq     .LafterVMX8
  
-        /* Check VSCR[NJ] == 1 */
+        /* Expect VSCR[NJ] to be 0, call invariant_violation if
+           VSCR[NJ] == 1 . */
          /* first generate 4x 0x00010000 */
          vspltisw  4,0x1                   /* 4x 0x00000001 */
          vspltisw  5,0x0                   /* zero */
@@ -320,7 +322,7 @@ VG_(disp_run_translations):
          vand      7,7,6                   /* gives NJ flag */
          vspltw    7,7,0x3                 /* flags-word to all lanes */
          vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
-        bt        24,.invariant_violation /* branch if all_equal */
+        bt        24,.invariant_violation /* branch if all_equal, i.e. NJ=1 */
  
  .LafterVMX8:
         /* otherwise we're OK */
author	Carl Love <carll@us.ibm.com>
	Tue, 28 May 2019 16:26:13 +0000 (11:26 -0500)
committer	Carl Love <carll@us.ibm.com>
	Tue, 28 May 2019 18:49:33 +0000 (13:49 -0500)
NEWS		patch \| blob \| blame \| history
VEX/priv/guest_ppc_helpers.c		patch \| blob \| blame \| history
VEX/priv/guest_ppc_toIR.c		patch \| blob \| blame \| history
coregrind/m_dispatch/dispatch-ppc32-linux.S		patch \| blob \| blame \| history
coregrind/m_dispatch/dispatch-ppc64be-linux.S		patch \| blob \| blame \| history
coregrind/m_dispatch/dispatch-ppc64le-linux.S		patch \| blob \| blame \| history