From: Julian Seward Date: Sun, 26 Jan 2014 18:36:52 +0000 (+0000) Subject: Handle and instrument an extra rounding-mode argument as added by X-Git-Tag: svn/VALGRIND_3_10_0~655 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c2d3e03fbe9b83e7ff1476e3ca1d45911d9b8dfa;p=thirdparty%2Fvalgrind.git Handle and instrument an extra rounding-mode argument as added by vex r2809 to the following primops: Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4, Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8, git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13781 --- diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index b654816815..0916bf1bd3 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -398,6 +398,7 @@ static IRExpr* definedOfType ( IRType ty ) { case Ity_I64: return IRExpr_Const(IRConst_U64(0)); case Ity_I128: return i128_const_zero(); case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); + case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000)); default: VG_(tool_panic)("memcheck:definedOfType"); } } @@ -767,6 +768,21 @@ static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); } + if (src_ty == Ity_I32 && dst_ty == Ity_V128) { + /* PCast the arg, then clone it 4 times. */ + IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); + tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); + return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); + } + + if (src_ty == Ity_I32 && dst_ty == Ity_V256) { + /* PCast the arg, then clone it 8 times. */ + IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits)); + tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp)); + tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp)); + return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp)); + } + if (src_ty == Ity_I64 && dst_ty == Ity_I32) { /* PCast the arg. This gives all 0s or all 1s. Then throw away the top half. */ @@ -2244,6 +2260,69 @@ IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX ) return at; } +/* --- 64Fx2 binary FP ops, with rounding mode --- */ + +static +IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, + IRAtom* vatomX, IRAtom* vatomY ) +{ + /* This is the same as binary64Fx2, except that we subsequently + pessimise vRM (definedness of the rounding mode), widen to 128 + bits and UifU it into the result. As with the scalar cases, if + the RM is a constant then it is defined and so this extra bit + will get constant-folded out later. */ + // "do" the vector args + IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY); + // PCast the RM, and widen it to 128 bits + IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); + // Roll it into the result + t1 = mkUifUV128(mce, t1, t2); + return t1; +} + +/* --- ... and ... 32Fx4 versions of the same --- */ + +static +IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, + IRAtom* vatomX, IRAtom* vatomY ) +{ + IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY); + // PCast the RM, and widen it to 128 bits + IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM); + // Roll it into the result + t1 = mkUifUV128(mce, t1, t2); + return t1; +} + +/* --- ... and ... 64Fx4 versions of the same --- */ + +static +IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, + IRAtom* vatomX, IRAtom* vatomY ) +{ + IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY); + // PCast the RM, and widen it to 256 bits + IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); + // Roll it into the result + t1 = mkUifUV256(mce, t1, t2); + return t1; +} + +/* --- ... and ... 32Fx8 versions of the same --- */ + +static +IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM, + IRAtom* vatomX, IRAtom* vatomY ) +{ + IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY); + // PCast the RM, and widen it to 256 bits + IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM); + // Roll it into the result + t1 = mkUifUV256(mce, t1, t2); + return t1; +} + + /* --- --- Vector saturated narrowing --- --- */ /* We used to do something very clever here, but on closer inspection @@ -2715,6 +2794,31 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce, complainIfUndefined(mce, atom3, NULL); return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)); + /* Vector FP with rounding mode as the first arg */ + case Iop_Add64Fx2: + case Iop_Sub64Fx2: + case Iop_Mul64Fx2: + case Iop_Div64Fx2: + return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3); + + case Iop_Add32Fx4: + case Iop_Sub32Fx4: + case Iop_Mul32Fx4: + case Iop_Div32Fx4: + return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3); + + case Iop_Add64Fx4: + case Iop_Sub64Fx4: + case Iop_Mul64Fx4: + case Iop_Div64Fx4: + return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3); + + case Iop_Add32Fx8: + case Iop_Sub32Fx8: + case Iop_Mul32Fx8: + case Iop_Div32Fx8: + return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3); + default: ppIROp(op); VG_(tool_panic)("memcheck:expr2vbits_Triop"); @@ -3175,16 +3279,12 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_QNarrowBin16Sto8Ux16: return vectorNarrowBinV128(mce, op, vatom1, vatom2); - case Iop_Sub64Fx2: - case Iop_Mul64Fx2: case Iop_Min64Fx2: case Iop_Max64Fx2: - case Iop_Div64Fx2: case Iop_CmpLT64Fx2: case Iop_CmpLE64Fx2: case Iop_CmpEQ64Fx2: case Iop_CmpUN64Fx2: - case Iop_Add64Fx2: return binary64Fx2(mce, vatom1, vatom2); case Iop_Sub64F0x2: @@ -3199,18 +3299,14 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_Add64F0x2: return binary64F0x2(mce, vatom1, vatom2); - case Iop_Sub32Fx4: - case Iop_Mul32Fx4: case Iop_Min32Fx4: case Iop_Max32Fx4: - case Iop_Div32Fx4: case Iop_CmpLT32Fx4: case Iop_CmpLE32Fx4: case Iop_CmpEQ32Fx4: case Iop_CmpUN32Fx4: case Iop_CmpGT32Fx4: case Iop_CmpGE32Fx4: - case Iop_Add32Fx4: case Iop_Recps32Fx4: case Iop_Rsqrts32Fx4: return binary32Fx4(mce, vatom1, vatom2); @@ -3417,18 +3513,10 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, /* V256-bit SIMD */ - case Iop_Add64Fx4: - case Iop_Sub64Fx4: - case Iop_Mul64Fx4: - case Iop_Div64Fx4: case Iop_Max64Fx4: case Iop_Min64Fx4: return binary64Fx4(mce, vatom1, vatom2); - case Iop_Add32Fx8: - case Iop_Sub32Fx8: - case Iop_Mul32Fx8: - case Iop_Div32Fx8: case Iop_Max32Fx8: case Iop_Min32Fx8: return binary32Fx8(mce, vatom1, vatom2); @@ -5730,6 +5818,7 @@ static Bool isBogusAtom ( IRAtom* at ) case Ico_F32i: return False; case Ico_F64i: return False; case Ico_V128: return False; + case Ico_V256: return False; default: ppIRExpr(at); tl_assert(0); } /* VG_(printf)("%llx\n", n); */