From 25559462c5ffea239f9788fdaa1a4ef91968a31a Mon Sep 17 00:00:00 2001 From: Cerion Armour-Brown Date: Wed, 9 Nov 2005 21:34:20 +0000 Subject: [PATCH] Fix usage of Iop_MullEven* to give IR correct meaning of which lanes being multiplied, i.e. lowest significant lane = zero (rather than the ibm-speke 'most significant = zero') git-svn-id: svn://svn.valgrind.org/vex/trunk@1448 --- VEX/priv/guest-ppc32/toIR.c | 154 +++++++++++++++++------------------- VEX/priv/host-ppc32/isel.c | 8 +- VEX/pub/libvex_ir.h | 3 +- 3 files changed, 76 insertions(+), 89 deletions(-) diff --git a/VEX/priv/guest-ppc32/toIR.c b/VEX/priv/guest-ppc32/toIR.c index e4a75036a0..3a65ff64b5 100644 --- a/VEX/priv/guest-ppc32/toIR.c +++ b/VEX/priv/guest-ppc32/toIR.c @@ -454,9 +454,9 @@ static void expand8Ux16( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) *vOdd = newTemp(Ity_V128); assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) ); - assign( *vEvn, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), vIn) ); - assign( *vOdd, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), - binop(Iop_ShlV128, vIn, mkU8(8))) ); + assign( *vOdd, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), vIn) ); + assign( *vEvn, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), + binop(Iop_ShrV128, vIn, mkU8(8))) ); } /* expand V128_8Sx16 to 2x V128_16Sx8's */ @@ -471,9 +471,9 @@ static void expand8Sx16( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) *vOdd = newTemp(Ity_V128); assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) ); - assign( *vEvn, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), vIn) ); - assign( *vOdd, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), - binop(Iop_ShlV128, vIn, mkU8(8))) ); + assign( *vOdd, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), vIn) ); + assign( *vEvn, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), + binop(Iop_ShrV128, vIn, mkU8(8))) ); } /* expand V128_16Uto8 to 2x V128_32Ux4's */ @@ -488,9 +488,9 @@ static void expand16Ux8( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) *vOdd = newTemp(Ity_V128); assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) ); - assign( *vEvn, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), vIn) ); - assign( *vOdd, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), - binop(Iop_ShlV128, vIn, mkU8(16))) ); + assign( *vOdd, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), vIn) ); + assign( *vEvn, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), + binop(Iop_ShrV128, vIn, mkU8(16))) ); } /* expand V128_16Sto8 to 2x V128_32Sx4's */ @@ -505,9 +505,9 @@ static void expand16Sx8( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) *vOdd = newTemp(Ity_V128); assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) ); - assign( *vEvn, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), vIn) ); - assign( *vOdd, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), - binop(Iop_ShlV128, vIn, mkU8(16))) ); + assign( *vOdd, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), vIn) ); + assign( *vEvn, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), + binop(Iop_ShrV128, vIn, mkU8(16))) ); } /* break V128 to 4xI32's, then sign-extend to I64's */ @@ -641,6 +641,28 @@ static IRExpr* mkV128from4x64U ( IRExpr* t3, IRExpr* t2, mkQNarrow64Uto32( t0 ))); } +/* Simulate irops Iop_MullOdd*, since we don't have them */ +#define MK_Iop_MullOdd8Ux16( expr_vA, expr_vB ) \ + binop(Iop_MullEven8Ux16, \ + binop(Iop_ShrV128, expr_vA, mkU8(8)), \ + binop(Iop_ShrV128, expr_vB, mkU8(8))) + +#define MK_Iop_MullOdd8Sx16( expr_vA, expr_vB ) \ + binop(Iop_MullEven8Sx16, \ + binop(Iop_ShrV128, expr_vA, mkU8(8)), \ + binop(Iop_ShrV128, expr_vB, mkU8(8))) + +#define MK_Iop_MullOdd16Ux8( expr_vA, expr_vB ) \ + binop(Iop_MullEven16Ux8, \ + binop(Iop_ShrV128, expr_vA, mkU8(16)), \ + binop(Iop_ShrV128, expr_vB, mkU8(16))) + +#define MK_Iop_MullOdd16Sx8( expr_vA, expr_vB ) \ + binop(Iop_MullEven16Sx8, \ + binop(Iop_ShrV128, expr_vA, mkU8(16)), \ + binop(Iop_ShrV128, expr_vB, mkU8(16))) + + static Int integerGuestRegOffset ( UInt archreg ) { @@ -5607,50 +5629,42 @@ static Bool dis_av_arith ( UInt theInstr ) /* Multiply */ case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213) DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven8Ux16, - binop(Iop_ShlV128, mkexpr(vA), mkU8(8)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) )); + putVReg( vD_addr, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB))); break; case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214) DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven16Ux8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )); + putVReg( vD_addr, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB))); break; case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211) DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven8Sx16, - binop(Iop_ShlV128, mkexpr(vA), mkU8(8)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) )); + putVReg( vD_addr, binop(Iop_MullEven8Sx16, mkexpr(vA), mkexpr(vB))); break; case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212) DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )); + putVReg( vD_addr, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB))); break; case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209) DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) )); break; case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210) DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) )); break; case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207) DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven8Sx16, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, MK_Iop_MullOdd8Sx16( mkexpr(vA), mkexpr(vB) )); break; case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208) DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - putVReg( vD_addr, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); break; @@ -6001,21 +6015,17 @@ static Bool dis_av_multarith ( UInt theInstr ) assign( bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)) ); assign( cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns), mkexpr(vC)) ); - assign( zLo, binop(Iop_Add32x4, + assign( zLo, binop(Iop_Add32x4, mkexpr(cLo), binop(Iop_SarN32x4, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aLo), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bLo), mkU8(16)) ), - mkU8(15)), - mkexpr(cLo)) ); + mkexpr(aLo), mkexpr(bLo)), + mkU8(15))) ); - assign( zHi, binop(Iop_Add32x4, + assign( zHi, binop(Iop_Add32x4, mkexpr(cHi), binop(Iop_SarN32x4, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aHi), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bHi), mkU8(16)) ), - mkU8(15)), - mkexpr(cHi)) ); + mkexpr(aHi), mkexpr(bHi)), + mkU8(15))) ); putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); break; @@ -6036,23 +6046,19 @@ static Bool dis_av_multarith ( UInt theInstr ) assign( zKonst, binop(Iop_ShlN32x4, unop(Iop_Dup32x4, mkU32(0x1)), mkU8(14)) ); - assign( zLo, binop(Iop_Add32x4, + assign( zLo, binop(Iop_Add32x4, mkexpr(cLo), binop(Iop_SarN32x4, binop(Iop_Add32x4, mkexpr(zKonst), binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aLo), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bLo), mkU8(16)) )), - mkU8(15)), - mkexpr(cLo)) ); + mkexpr(aLo), mkexpr(bLo))), + mkU8(15))) ); - assign( zHi, binop(Iop_Add32x4, + assign( zHi, binop(Iop_Add32x4, mkexpr(cHi), binop(Iop_SarN32x4, binop(Iop_Add32x4, mkexpr(zKonst), binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aHi), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bHi), mkU8(16)) )), - mkU8(15)), - mkexpr(cHi)) ); + mkexpr(aHi), mkexpr(bHi))), + mkU8(15))) ); putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); break; @@ -6066,14 +6072,10 @@ static Bool dis_av_multarith ( UInt theInstr ) assign( bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)) ); assign( cHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vC)) ); assign( zLo, binop(Iop_Add32x4, - binop(Iop_MullEven16Ux8, - binop(Iop_ShlV128, mkexpr(aLo), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bLo), mkU8(16)) ), + binop(Iop_MullEven16Ux8, mkexpr(aLo), mkexpr(bLo) ), mkexpr(cLo)) ); assign( zHi, binop(Iop_Add32x4, - binop(Iop_MullEven16Ux8, - binop(Iop_ShlV128, mkexpr(aHi), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bHi), mkU8(16)) ), + binop(Iop_MullEven16Ux8, mkexpr(aHi), mkexpr(bHi) ), mkexpr(cHi)) ); putVReg( vD_addr, binop(Iop_Narrow32Ux4, mkexpr(zHi), mkexpr(zLo)) ); break; @@ -6087,10 +6089,8 @@ static Bool dis_av_multarith ( UInt theInstr ) DIP("vmsumubm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); /* multiply vA,vB (unsigned, widening) */ - assign( abEvn, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) ); - assign( abOdd, binop(Iop_MullEven8Ux16, - binop(Iop_ShlV128, mkexpr(vA), mkU8(8)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) )); + assign( abEvn, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) ); /* evn,odd: V128_16Ux8 -> 2 x V128_32Ux4, zero-extended */ expand16Ux8( mkexpr(abEvn), &abEE, &abEO ); @@ -6118,29 +6118,23 @@ static Bool dis_av_multarith ( UInt theInstr ) expand8Ux16( mkexpr(vB), &bEvn, &bOdd ); /* multiply vA, vB, again separating adjacent lanes */ - assign( abEE, binop(Iop_MullEven16Sx8, mkexpr(aEvn), mkexpr(bEvn) )); - assign( abEO, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aEvn), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bEvn), mkU8(16)) )); - assign( abOE, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd) )); - assign( abOO, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(aOdd), mkU8(16)), - binop(Iop_ShlV128, mkexpr(bOdd), mkU8(16)) )); + assign( abEE, MK_Iop_MullOdd16Sx8( mkexpr(aEvn), mkexpr(bEvn) )); + assign( abEO, binop(Iop_MullEven16Sx8, mkexpr(aEvn), mkexpr(bEvn)) ); + assign( abOE, MK_Iop_MullOdd16Sx8( mkexpr(aOdd), mkexpr(bOdd) )); + assign( abOO, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd)) ); /* add results together, + vC */ putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vC), binop(Iop_QAdd32Sx4, binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abEO)), - binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)) ))); + binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)))) ); break; } case 0x26: { // vmsumuhm (Multiply Sum Unsigned HW Modulo, AV p205) DIP("vmsumuhm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); - assign( abEvn, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); - assign( abOdd, binop(Iop_MullEven16Ux8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )); + assign( abEvn, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vC), binop(Iop_Add32x4, mkexpr(abEvn), mkexpr(abOdd))) ); @@ -6149,10 +6143,8 @@ static Bool dis_av_multarith ( UInt theInstr ) case 0x27: { // vmsumuhs (Multiply Sum Unsigned HW Saturate, AV p206) DIP("vmsumuhs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); /* widening multiply, separating lanes */ - assign( abEvn, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); - assign( abOdd, binop(Iop_MullEven16Ux8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16))) ); + assign( abEvn, MK_Iop_MullOdd16Ux8(mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); /* break V128 to 4xI32's, zero-extending to I64's */ breakV128to4x64U( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 ); @@ -6177,10 +6169,8 @@ static Bool dis_av_multarith ( UInt theInstr ) } case 0x28: { // vmsumshm (Multiply Sum Signed HW Modulo, AV p202) DIP("vmsumshm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); - assign( abEvn, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); - assign( abOdd, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )); + assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vC), binop(Iop_Add32x4, mkexpr(abOdd), mkexpr(abEvn))) ); @@ -6189,10 +6179,8 @@ static Bool dis_av_multarith ( UInt theInstr ) case 0x29: { // vmsumshs (Multiply Sum Signed HW Saturate, AV p203) DIP("vmsumshs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); /* widening multiply, separating lanes */ - assign( abEvn, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); - assign( abOdd, binop(Iop_MullEven16Sx8, - binop(Iop_ShlV128, mkexpr(vA), mkU8(16)), - binop(Iop_ShlV128, mkexpr(vB), mkU8(16))) ); + assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); /* break V128 to 4xI32's, sign-extending to I64's */ breakV128to4x64S( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 ); diff --git a/VEX/priv/host-ppc32/isel.c b/VEX/priv/host-ppc32/isel.c index 001766c0d3..d6a0bec2e1 100644 --- a/VEX/priv/host-ppc32/isel.c +++ b/VEX/priv/host-ppc32/isel.c @@ -3307,8 +3307,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Max8Sx16: op = Pav_MAXS; goto do_AvBin8x16; case Iop_Min8Ux16: op = Pav_MINU; goto do_AvBin8x16; case Iop_Min8Sx16: op = Pav_MINS; goto do_AvBin8x16; - case Iop_MullEven8Ux16: op = Pav_EMULU; goto do_AvBin8x16; - case Iop_MullEven8Sx16: op = Pav_EMULS; goto do_AvBin8x16; + case Iop_MullEven8Ux16: op = Pav_OMULU; goto do_AvBin8x16; + case Iop_MullEven8Sx16: op = Pav_OMULS; goto do_AvBin8x16; case Iop_CmpEQ8x16: op = Pav_CMPEQU; goto do_AvBin8x16; case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16; case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16; @@ -3341,8 +3341,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Max16Sx8: op = Pav_MAXS; goto do_AvBin16x8; case Iop_Min16Ux8: op = Pav_MINU; goto do_AvBin16x8; case Iop_Min16Sx8: op = Pav_MINS; goto do_AvBin16x8; - case Iop_MullEven16Ux8: op = Pav_EMULU; goto do_AvBin16x8; - case Iop_MullEven16Sx8: op = Pav_EMULS; goto do_AvBin16x8; + case Iop_MullEven16Ux8: op = Pav_OMULU; goto do_AvBin16x8; + case Iop_MullEven16Sx8: op = Pav_OMULS; goto do_AvBin16x8; case Iop_CmpEQ16x8: op = Pav_CMPEQU; goto do_AvBin16x8; case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8; case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8; diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 47298ecfad..6bbdea89f3 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -553,8 +553,7 @@ typedef Iop_MulLo16Sx8, Iop_MulLo32Sx4, Iop_MulHi16Ux8, Iop_MulHi32Ux4, Iop_MulHi16Sx8, Iop_MulHi32Sx4, - - /* (widening signed/unsigned of even lanes) */ + /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven8Sx16, Iop_MullEven16Sx8, -- 2.47.3