From: Carl Love Date: Fri, 22 Mar 2019 17:06:31 +0000 (-0500) Subject: PPC64, fix for vmsummbm instruction. X-Git-Tag: VALGRIND_3_15_0~44 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2da60f569fd5b2e9c8d6361e860731e98cf8053d;p=thirdparty%2Fvalgrind.git PPC64, fix for vmsummbm instruction. The instruction needs to have the 32-bit "lane" values chopped to 32-bits. The current lane implementation is not doing the chopping. Need to explicitly do the chop and add. Valgrind bug 405362 --- diff --git a/NEWS b/NEWS index a2c055ed31..c59168be3b 100644 --- a/NEWS +++ b/NEWS @@ -115,6 +115,7 @@ where XXXXXX is the bug number as listed below. 405716 drd: Fix an integer overflow in the stack margin calculation 405356 PPC64, xvcvsxdsp, xvcvuxdsp are supposed to write the 32-bit result to the upper and lower 32-bits of the 64-bit result +405362 PPC64, vmsummbm instruction doesn't handle overflow case correctly n-i-bz add syswrap for PTRACE_GET|SET_THREAD_AREA on amd64. n-i-bz Fix callgrind_annotate non deterministic order for equal total diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index d6c671abab..fb6f989db7 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -24029,6 +24029,12 @@ static Bool dis_av_multarith ( UInt theInstr ) IRTemp abEO = newTemp(Ity_V128); IRTemp abOE = newTemp(Ity_V128); IRTemp abOO = newTemp(Ity_V128); + IRTemp prod = newTemp(Ity_V128); + IRTemp sum0 = newTemp(Ity_I32); + IRTemp sum1 = newTemp(Ity_I32); + IRTemp sum2 = newTemp(Ity_I32); + IRTemp sum3 = newTemp(Ity_I32); + aEvn = aOdd = bEvn = bOdd = IRTemp_INVALID; DIP("vmsummbm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); @@ -24045,11 +24051,53 @@ static Bool dis_av_multarith ( UInt theInstr ) assign( abOO, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd)) ); /* add results together, + vC */ +/* Unfortunately, we need to chop the results of the adds to 32-bits. The + following lane based calculations don't handle the overflow correctly. Need + to explicitly do the adds and 32-bit chops. + putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vC), binop(Iop_QAdd32Sx4, binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abEO)), binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)))) ); +*/ + + assign(prod, + binop(Iop_QAdd32Sx4, + binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abEO)), + binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)))); + assign( sum0, + unop(Iop_64to32, + binop(Iop_Add64, + unop(Iop_32Sto64, + unop(Iop_64HIto32, unop(Iop_V128HIto64, mkexpr(prod)))), + unop(Iop_32Sto64, + unop(Iop_64HIto32, unop(Iop_V128HIto64, mkexpr(vC))))))); + assign( sum1, + unop(Iop_64to32, + binop(Iop_Add64, + unop(Iop_32Sto64, + unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(prod)))), + unop(Iop_32Sto64, + unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(vC))))))); + assign( sum2, + unop(Iop_64to32, + binop(Iop_Add64, + unop(Iop_32Sto64, + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(prod)))), + unop(Iop_32Sto64, + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(vC))))))); + assign( sum3, + unop(Iop_64to32, + binop(Iop_Add64, + unop(Iop_32Sto64, + unop(Iop_64to32, unop(Iop_V128to64, mkexpr(prod)))), + unop(Iop_32Sto64, + unop(Iop_64to32, unop(Iop_V128to64, mkexpr(vC))))))); + putVReg( vD_addr, binop(Iop_64HLtoV128, + binop(Iop_32HLto64, mkexpr(sum0), mkexpr(sum1)), + binop(Iop_32HLto64, mkexpr(sum2), mkexpr(sum3)))); + break; } case 0x26: { // vmsumuhm (Multiply Sum Unsigned HW Modulo, AV p205)