From: Julian Seward <jseward@acm.org>
Date: Fri, 3 Feb 2006 16:12:27 +0000 (+0000)
Subject: * Track introduction of IR ternary primops and rounding modes, at least as
X-Git-Tag: svn/VALGRIND_3_2_0~306
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5163ea2aed90f89c5c8dfd582b379e39e1aed255;p=thirdparty%2Fvalgrind.git

* Track introduction of IR ternary primops and rounding modes, at least as
  to the extent needed to make ppc32 work.

* As a result, remove the replacements for glibc's floor/ceil fns on
  ppc32/64, since vex can now correctly simulate the real ones.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5605
---

diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S
index 0f3226bbc7..ff8e4a28d3 100644
--- a/coregrind/m_dispatch/dispatch-ppc32-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S
@@ -401,7 +401,7 @@ run_innerloop_exit:
 
 	/* This check avoidance may be removable if stfiwx is
 	implemented. */
-#	if !defined(ENABLE_INNER)
+#	if 0 //!defined(ENABLE_INNER)
         /* Check FPSCR & 0xFF == 0 (lowest 8bits are controls)  */
         mffs      4                       /* fpscr -> fpr */
         li        5,48
diff --git a/coregrind/vg_preloaded.c b/coregrind/vg_preloaded.c
index cad1e40214..1b17ee0ba1 100644
--- a/coregrind/vg_preloaded.c
+++ b/coregrind/vg_preloaded.c
@@ -66,183 +66,6 @@ void VG_NOTIFY_ON_LOAD(freeres)( void )
    *(int *)0 = 'x';
 }
 
-/* ---------------------------------------------------------------------
-   Avoid glibc's floor/ceil functions on ppc32/64.  In recent glibcs
-   (about 2.3.4 and after) these rely on doing fadd/fsub with with
-   round to +inf/-inf set, which vex does not currently handle
-   correctly.  This just reroutes to the glibc default implementations.
-   This is a really ugly hack.
-   ------------------------------------------------------------------ */
-
-#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * floor(x)
- * Return x rounded toward -inf to integral value
- * Method:
- *      Bit twiddling.
- * Exception:
- *      Inexact flag raised if x not equal to floor(x).
- */
-
-typedef union
-{
-  double value;
-  struct
-  {
-    /*u_int32_t*/ UInt msw;
-    /*u_int32_t*/ UInt lsw;
-  } parts;
-} ieee_double_shape_type;
-
-/* Get two 32 bit ints from a double.  */
-#define EXTRACT_WORDS(ix0,ix1,d)                                \
-do {                                                            \
-  ieee_double_shape_type ew_u;                                  \
-  ew_u.value = (d);                                             \
-  (ix0) = ew_u.parts.msw;                                       \
-  (ix1) = ew_u.parts.lsw;                                       \
-} while (0)
-
-/* Set a double from two 32 bit ints.  */
-#define INSERT_WORDS(d,ix0,ix1)                                 \
-do {                                                            \
-  ieee_double_shape_type iw_u;                                  \
-  iw_u.parts.msw = (ix0);                                       \
-  iw_u.parts.lsw = (ix1);                                       \
-  (d) = iw_u.value;                                             \
-} while (0)
-
-static double bit_twiddling_floor ( double x )
-{
-   static const double huge = 1.0e300;
-        /*int32_t*/   Int i0,i1,j0;
-        /*u_int32_t*/ UInt i,j;
-        EXTRACT_WORDS(i0,i1,x);
-        j0 = ((i0>>20)&0x7ff)-0x3ff;
-        if(j0<20) {
-            if(j0<0) {  /* raise inexact if x != 0 */
-                if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
-                    if(i0>=0) {i0=i1=0;}
-                    else if(((i0&0x7fffffff)|i1)!=0)
-                        { i0=0xbff00000;i1=0;}
-                }
-            } else {
-                i = (0x000fffff)>>j0;
-                if(((i0&i)|i1)==0) return x; /* x is integral */
-                if(huge+x>0.0) {        /* raise inexact flag */
-                    if(i0<0) i0 += (0x00100000)>>j0;
-                    i0 &= (~i); i1=0;
-                }
-            }
-        } else if (j0>51) {
-            if(j0==0x400) return x+x;   /* inf or NaN */
-            else return x;              /* x is integral */
-        } else {
-            i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
-            if((i1&i)==0) return x;     /* x is integral */
-            if(huge+x>0.0) {            /* raise inexact flag */
-                if(i0<0) {
-                    if(j0==20) i0+=1;
-                    else {
-                        j = i1+(1<<(52-j0));
-                        if(j<i1) i0 +=1 ;       /* got a carry */
-                        i1=j;
-                    }
-                }
-                i1 &= (~i);
-            }
-        }
-        INSERT_WORDS(x,i0,i1);
-        return x;
-}
-
-/* Catch libm.so.6:__floor */
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double);
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double x) {
-   return bit_twiddling_floor(x);
-}
-
-/* Catch libm.so.6:floor */
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double);
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double x) {
-   return bit_twiddling_floor(x);
-}
-
-
-/*
- * ceil(x)
- * Return x rounded toward -inf to integral value
- * Method:
- *      Bit twiddling.
- * Exception:
- *      Inexact flag raised if x not equal to ceil(x).
- */
-static double bit_twiddling_ceil ( double x )
-{
-   static const double huge = 1.0e300;
-        /*int32_t*/ Int i0,i1,j0;
-        /*u_int32_t*/ UInt i,j;
-        EXTRACT_WORDS(i0,i1,x);
-        j0 = ((i0>>20)&0x7ff)-0x3ff;
-        if(j0<20) {
-            if(j0<0) {  /* raise inexact if x != 0 */
-                if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
-                    if(i0<0) {i0=0x80000000;i1=0;}
-                    else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
-                }
-            } else {
-                i = (0x000fffff)>>j0;
-                if(((i0&i)|i1)==0) return x; /* x is integral */
-                if(huge+x>0.0) {        /* raise inexact flag */
-                    if(i0>0) i0 += (0x00100000)>>j0;
-                    i0 &= (~i); i1=0;
-                }
-            }
-        } else if (j0>51) {
-            if(j0==0x400) return x+x;   /* inf or NaN */
-            else return x;              /* x is integral */
-        } else {
-            i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
-            if((i1&i)==0) return x;     /* x is integral */
-            if(huge+x>0.0) {            /* raise inexact flag */
-                if(i0>0) {
-                    if(j0==20) i0+=1;
-                    else {
-                        j = i1 + (1<<(52-j0));
-                        if(j<i1) i0+=1; /* got a carry */
-                        i1 = j;
-                    }
-                }
-                i1 &= (~i);
-            }
-        }
-        INSERT_WORDS(x,i0,i1);
-        return x;
-}
-
-/* Catch libm.so.6:__ceil */
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double);
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double x) {
-   return bit_twiddling_ceil(x);
-}
-
-/* Catch libm.so.6:ceil */
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double);
-double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double x) {
-   return bit_twiddling_ceil(x);
-}
-
-#endif
 
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 2c178e53e6..bf0ca3f1cb 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1106,6 +1106,61 @@ IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
 }
 
 
+/* 3-arg version of the above. */
+static
+IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, 
+                  IRAtom* va1, IRAtom* va2, IRAtom* va3 )
+{
+   IRAtom* at;
+   IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
+   IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
+   IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
+   tl_assert(isShadowAtom(mce,va1));
+   tl_assert(isShadowAtom(mce,va2));
+   tl_assert(isShadowAtom(mce,va3));
+
+   /* The general case is inefficient because PCast is an expensive
+      operation.  Here are some special cases which use PCast only
+      twice rather than three times. */
+
+   /* I32 x I64 x I64 -> I64 */
+   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
+   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 
+       && finalVty == Ity_I64) {
+      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
+      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
+         mode indication which is fully defined, this should get
+         folded out later. */
+      at = mkPCastTo(mce, Ity_I64, va1);
+      /* Now fold in 2nd and 3rd args. */
+      at = mkUifU(mce, Ity_I64, at, va2);
+      at = mkUifU(mce, Ity_I64, at, va3);
+      /* and PCast once again. */
+      at = mkPCastTo(mce, Ity_I64, at);
+      return at;
+   }
+
+   if (0) {
+      VG_(printf)("mkLazy3 ");
+      ppIRType(t1);
+      VG_(printf)("_");
+      ppIRType(t2);
+      VG_(printf)("_");
+      ppIRType(t3);
+      VG_(printf)("_");
+      ppIRType(finalVty);
+      VG_(printf)("\n");
+   }
+
+   /* General case: force everything via 32-bit intermediaries. */
+   at = mkPCastTo(mce, Ity_I32, va1);
+   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
+   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
+   at = mkPCastTo(mce, finalVty, at);
+   return at;
+}
+
+
 /* Do the lazy propagation game from a null-terminated vector of
    atoms.  This is presumably the arguments to a helper call, so the
    IRCallee info is also supplied in order that we can know which
@@ -1590,6 +1645,46 @@ IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
 /*------------------------------------------------------------*/
 
+static 
+IRAtom* expr2vbits_Triop ( MCEnv* mce,
+                           IROp op,
+                           IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
+{
+   IRType  and_or_ty;
+   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
+   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
+   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
+
+   IRAtom* vatom1 = expr2vbits( mce, atom1 );
+   IRAtom* vatom2 = expr2vbits( mce, atom2 );
+   IRAtom* vatom3 = expr2vbits( mce, atom3 );
+
+   tl_assert(isOriginalAtom(mce,atom1));
+   tl_assert(isOriginalAtom(mce,atom2));
+   tl_assert(isOriginalAtom(mce,atom3));
+   tl_assert(isShadowAtom(mce,vatom1));
+   tl_assert(isShadowAtom(mce,vatom2));
+   tl_assert(isShadowAtom(mce,vatom3));
+   tl_assert(sameKindedAtoms(atom1,vatom1));
+   tl_assert(sameKindedAtoms(atom2,vatom2));
+   tl_assert(sameKindedAtoms(atom3,vatom3));
+   switch (op) {
+      case Iop_AddF64:
+      case Iop_AddF64r32:
+      case Iop_SubF64:
+      case Iop_SubF64r32:
+      case Iop_MulF64:
+      case Iop_MulF64r32:
+      case Iop_DivF64:
+      case Iop_DivF64r32:
+         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
+      default:
+         ppIROp(op);
+         VG_(tool_panic)("memcheck:expr2vbits_Triop");
+   }
+}
+
+
 static 
 IRAtom* expr2vbits_Binop ( MCEnv* mce,
                            IROp op,
@@ -1906,7 +2001,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
 
       /* Scalar floating point */
 
-      case Iop_RoundF64:
+      case Iop_RoundF64toInt:
+      case Iop_RoundF64toF32:
       case Iop_F64toI64:
       case Iop_I64toF64:
          /* First arg is I32 (rounding mode), second is F64 or I64
@@ -1930,10 +2026,6 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
       case Iop_PRemF64:
       case Iop_PRem1F64:
       case Iop_AtanF64:
-      case Iop_AddF64:
-      case Iop_DivF64:
-      case Iop_SubF64:
-      case Iop_MulF64:
          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
 
       case Iop_CmpF64:
@@ -2185,7 +2277,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
       case Iop_SqrtF64:
       case Iop_AbsF64:
       case Iop_2xm1F64:
-      case Iop_Est8FRecip:
       case Iop_Est5FRSqrt:
       case Iop_Clz64:
       case Iop_Ctz64:
@@ -2193,6 +2284,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
 
       case Iop_Clz32:
       case Iop_Ctz32:
+      case Iop_TruncF64asF32:
          return mkPCastTo(mce, Ity_I32, vatom);
 
       case Iop_1Uto64:
@@ -2428,6 +2520,13 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
       case Iex_Const:
          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
 
+      case Iex_Triop:
+         return expr2vbits_Triop(
+                   mce,
+                   e->Iex.Triop.op,
+                   e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
+                );
+
       case Iex_Binop:
          return expr2vbits_Binop(
                    mce,
@@ -2931,6 +3030,10 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
             case Iex_Binop: 
                return isBogusAtom(e->Iex.Binop.arg1)
                       || isBogusAtom(e->Iex.Binop.arg2);
+            case Iex_Triop: 
+               return isBogusAtom(e->Iex.Triop.arg1)
+                      || isBogusAtom(e->Iex.Triop.arg2)
+                      || isBogusAtom(e->Iex.Triop.arg3);
             case Iex_Mux0X:
                return isBogusAtom(e->Iex.Mux0X.cond)
                       || isBogusAtom(e->Iex.Mux0X.expr0)