From: Julian Seward Date: Fri, 3 Feb 2006 16:12:27 +0000 (+0000) Subject: * Track introduction of IR ternary primops and rounding modes, at least as X-Git-Tag: svn/VALGRIND_3_2_0~306 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5163ea2aed90f89c5c8dfd582b379e39e1aed255;p=thirdparty%2Fvalgrind.git * Track introduction of IR ternary primops and rounding modes, at least as to the extent needed to make ppc32 work. * As a result, remove the replacements for glibc's floor/ceil fns on ppc32/64, since vex can now correctly simulate the real ones. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5605 --- diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S index 0f3226bbc7..ff8e4a28d3 100644 --- a/coregrind/m_dispatch/dispatch-ppc32-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S @@ -401,7 +401,7 @@ run_innerloop_exit: /* This check avoidance may be removable if stfiwx is implemented. */ -# if !defined(ENABLE_INNER) +# if 0 //!defined(ENABLE_INNER) /* Check FPSCR & 0xFF == 0 (lowest 8bits are controls) */ mffs 4 /* fpscr -> fpr */ li 5,48 diff --git a/coregrind/vg_preloaded.c b/coregrind/vg_preloaded.c index cad1e40214..1b17ee0ba1 100644 --- a/coregrind/vg_preloaded.c +++ b/coregrind/vg_preloaded.c @@ -66,183 +66,6 @@ void VG_NOTIFY_ON_LOAD(freeres)( void ) *(int *)0 = 'x'; } -/* --------------------------------------------------------------------- - Avoid glibc's floor/ceil functions on ppc32/64. In recent glibcs - (about 2.3.4 and after) these rely on doing fadd/fsub with with - round to +inf/-inf set, which vex does not currently handle - correctly. This just reroutes to the glibc default implementations. - This is a really ugly hack. - ------------------------------------------------------------------ */ - -#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * floor(x) - * Return x rounded toward -inf to integral value - * Method: - * Bit twiddling. - * Exception: - * Inexact flag raised if x not equal to floor(x). - */ - -typedef union -{ - double value; - struct - { - /*u_int32_t*/ UInt msw; - /*u_int32_t*/ UInt lsw; - } parts; -} ieee_double_shape_type; - -/* Get two 32 bit ints from a double. */ -#define EXTRACT_WORDS(ix0,ix1,d) \ -do { \ - ieee_double_shape_type ew_u; \ - ew_u.value = (d); \ - (ix0) = ew_u.parts.msw; \ - (ix1) = ew_u.parts.lsw; \ -} while (0) - -/* Set a double from two 32 bit ints. */ -#define INSERT_WORDS(d,ix0,ix1) \ -do { \ - ieee_double_shape_type iw_u; \ - iw_u.parts.msw = (ix0); \ - iw_u.parts.lsw = (ix1); \ - (d) = iw_u.value; \ -} while (0) - -static double bit_twiddling_floor ( double x ) -{ - static const double huge = 1.0e300; - /*int32_t*/ Int i0,i1,j0; - /*u_int32_t*/ UInt i,j; - EXTRACT_WORDS(i0,i1,x); - j0 = ((i0>>20)&0x7ff)-0x3ff; - if(j0<20) { - if(j0<0) { /* raise inexact if x != 0 */ - if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */ - if(i0>=0) {i0=i1=0;} - else if(((i0&0x7fffffff)|i1)!=0) - { i0=0xbff00000;i1=0;} - } - } else { - i = (0x000fffff)>>j0; - if(((i0&i)|i1)==0) return x; /* x is integral */ - if(huge+x>0.0) { /* raise inexact flag */ - if(i0<0) i0 += (0x00100000)>>j0; - i0 &= (~i); i1=0; - } - } - } else if (j0>51) { - if(j0==0x400) return x+x; /* inf or NaN */ - else return x; /* x is integral */ - } else { - i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20); - if((i1&i)==0) return x; /* x is integral */ - if(huge+x>0.0) { /* raise inexact flag */ - if(i0<0) { - if(j0==20) i0+=1; - else { - j = i1+(1<<(52-j0)); - if(j>20)&0x7ff)-0x3ff; - if(j0<20) { - if(j0<0) { /* raise inexact if x != 0 */ - if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */ - if(i0<0) {i0=0x80000000;i1=0;} - else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;} - } - } else { - i = (0x000fffff)>>j0; - if(((i0&i)|i1)==0) return x; /* x is integral */ - if(huge+x>0.0) { /* raise inexact flag */ - if(i0>0) i0 += (0x00100000)>>j0; - i0 &= (~i); i1=0; - } - } - } else if (j0>51) { - if(j0==0x400) return x+x; /* inf or NaN */ - else return x; /* x is integral */ - } else { - i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20); - if((i1&i)==0) return x; /* x is integral */ - if(huge+x>0.0) { /* raise inexact flag */ - if(i0>0) { - if(j0==20) i0+=1; - else { - j = i1 + (1<<(52-j0)); - if(jbb->tyenv, va1); + IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2); + IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3); + tl_assert(isShadowAtom(mce,va1)); + tl_assert(isShadowAtom(mce,va2)); + tl_assert(isShadowAtom(mce,va3)); + + /* The general case is inefficient because PCast is an expensive + operation. Here are some special cases which use PCast only + twice rather than three times. */ + + /* I32 x I64 x I64 -> I64 */ + /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */ + if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 + && finalVty == Ity_I64) { + if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n"); + /* Widen 1st arg to I64. Since 1st arg is typically a rounding + mode indication which is fully defined, this should get + folded out later. */ + at = mkPCastTo(mce, Ity_I64, va1); + /* Now fold in 2nd and 3rd args. */ + at = mkUifU(mce, Ity_I64, at, va2); + at = mkUifU(mce, Ity_I64, at, va3); + /* and PCast once again. */ + at = mkPCastTo(mce, Ity_I64, at); + return at; + } + + if (0) { + VG_(printf)("mkLazy3 "); + ppIRType(t1); + VG_(printf)("_"); + ppIRType(t2); + VG_(printf)("_"); + ppIRType(t3); + VG_(printf)("_"); + ppIRType(finalVty); + VG_(printf)("\n"); + } + + /* General case: force everything via 32-bit intermediaries. */ + at = mkPCastTo(mce, Ity_I32, va1); + at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); + at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3)); + at = mkPCastTo(mce, finalVty, at); + return at; +} + + /* Do the lazy propagation game from a null-terminated vector of atoms. This is presumably the arguments to a helper call, so the IRCallee info is also supplied in order that we can know which @@ -1590,6 +1645,46 @@ IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) /*--- Generate shadow values from all kinds of IRExprs. ---*/ /*------------------------------------------------------------*/ +static +IRAtom* expr2vbits_Triop ( MCEnv* mce, + IROp op, + IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 ) +{ + IRType and_or_ty; + IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); + IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); + IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); + + IRAtom* vatom1 = expr2vbits( mce, atom1 ); + IRAtom* vatom2 = expr2vbits( mce, atom2 ); + IRAtom* vatom3 = expr2vbits( mce, atom3 ); + + tl_assert(isOriginalAtom(mce,atom1)); + tl_assert(isOriginalAtom(mce,atom2)); + tl_assert(isOriginalAtom(mce,atom3)); + tl_assert(isShadowAtom(mce,vatom1)); + tl_assert(isShadowAtom(mce,vatom2)); + tl_assert(isShadowAtom(mce,vatom3)); + tl_assert(sameKindedAtoms(atom1,vatom1)); + tl_assert(sameKindedAtoms(atom2,vatom2)); + tl_assert(sameKindedAtoms(atom3,vatom3)); + switch (op) { + case Iop_AddF64: + case Iop_AddF64r32: + case Iop_SubF64: + case Iop_SubF64r32: + case Iop_MulF64: + case Iop_MulF64r32: + case Iop_DivF64: + case Iop_DivF64r32: + return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3); + default: + ppIROp(op); + VG_(tool_panic)("memcheck:expr2vbits_Triop"); + } +} + + static IRAtom* expr2vbits_Binop ( MCEnv* mce, IROp op, @@ -1906,7 +2001,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, /* Scalar floating point */ - case Iop_RoundF64: + case Iop_RoundF64toInt: + case Iop_RoundF64toF32: case Iop_F64toI64: case Iop_I64toF64: /* First arg is I32 (rounding mode), second is F64 or I64 @@ -1930,10 +2026,6 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PRemF64: case Iop_PRem1F64: case Iop_AtanF64: - case Iop_AddF64: - case Iop_DivF64: - case Iop_SubF64: - case Iop_MulF64: return mkLazy2(mce, Ity_I64, vatom1, vatom2); case Iop_CmpF64: @@ -2185,7 +2277,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) case Iop_SqrtF64: case Iop_AbsF64: case Iop_2xm1F64: - case Iop_Est8FRecip: case Iop_Est5FRSqrt: case Iop_Clz64: case Iop_Ctz64: @@ -2193,6 +2284,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) case Iop_Clz32: case Iop_Ctz32: + case Iop_TruncF64asF32: return mkPCastTo(mce, Ity_I32, vatom); case Iop_1Uto64: @@ -2428,6 +2520,13 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) case Iex_Const: return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e))); + case Iex_Triop: + return expr2vbits_Triop( + mce, + e->Iex.Triop.op, + e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3 + ); + case Iex_Binop: return expr2vbits_Binop( mce, @@ -2931,6 +3030,10 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) case Iex_Binop: return isBogusAtom(e->Iex.Binop.arg1) || isBogusAtom(e->Iex.Binop.arg2); + case Iex_Triop: + return isBogusAtom(e->Iex.Triop.arg1) + || isBogusAtom(e->Iex.Triop.arg2) + || isBogusAtom(e->Iex.Triop.arg3); case Iex_Mux0X: return isBogusAtom(e->Iex.Mux0X.cond) || isBogusAtom(e->Iex.Mux0X.expr0)