From: Julian Seward Date: Wed, 20 Oct 2010 21:38:42 +0000 (+0000) Subject: Merge from trunk, r2067 (Add support for SMSAD{X}, SMLSD{X}, USAD{A}8.) X-Git-Tag: svn/VALGRIND_3_6_1^2~15 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=886e7307581475f4d878eb9886dde426f4afe345;p=thirdparty%2Fvalgrind.git Merge from trunk, r2067 (Add support for SMSAD{X}, SMLSD{X}, USAD{A}8.) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_6_BRANCH@2068 --- diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index 277a90d5f4..c1f92111f6 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -1667,6 +1667,24 @@ static void armSignedSatQ( IRTemp regT, /* value to clamp - Ity_I32 */ } +/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed + overflow occurred for 32-bit addition. Needs both args and the + result. HD p27. */ +static +IRExpr* signed_overflow_after_Add32 ( IRExpr* resE, + IRTemp argL, IRTemp argR ) +{ + IRTemp res = newTemp(Ity_I32); + assign(res, resE); + return + binop( Iop_Shr32, + binop( Iop_And32, + binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ), + binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )), + mkU8(31) ); +} + + /*------------------------------------------------------------*/ /*--- Larger helpers ---*/ /*------------------------------------------------------------*/ @@ -9651,27 +9669,31 @@ static Bool decode_V6MEDIA_instruction ( } /* --------------- smuad, smuadx,, --------------- */ + /* --------------- smsad, smsadx,, --------------- */ { UInt regD = 99, regN = 99, regM = 99, bitM = 99; - Bool gate = False; + Bool gate = False, isAD = False; if (isT) { - if (INSNT0(15,4) == 0xFB2 && (INSNT1(15,0) & 0xF0E0) == 0xF000) { + if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4) + && (INSNT1(15,0) & 0xF0E0) == 0xF000) { regN = INSNT0(3,0); regD = INSNT1(11,8); regM = INSNT1(3,0); bitM = INSNT1(4,4); + isAD = INSNT0(15,4) == 0xFB2; if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)) gate = True; } } else { if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) && INSNA(15,12) == BITS4(1,1,1,1) && - (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1) ) { + (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) { regD = INSNA(19,16); regN = INSNA(3,0); regM = INSNA(11,8); bitM = INSNA(5,5); + isAD = INSNA(6,6) == 0; if (regD != 15 && regN != 15 && regM != 15) gate = True; } @@ -9701,22 +9723,24 @@ static Bool decode_V6MEDIA_instruction ( binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)), binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) ); IRExpr* ire_result - = binop( Iop_Add32, mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ); + = binop( isAD ? Iop_Add32 : Iop_Sub32, + mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ); if (isT) putIRegT( regD, ire_result, condT ); else putIRegA( regD, ire_result, condT, Ijk_Boring ); - or_into_QFLAG32( binop( Iop_Shr32, - binop( Iop_And32, - binop( Iop_Xor32, ire_result, - mkexpr(irt_prod_hi) ), - binop( Iop_Xor32, ire_result, - mkexpr(irt_prod_lo) ) ), - mkU8(31)), condT ); + if (isAD) { + or_into_QFLAG32( + signed_overflow_after_Add32( ire_result, + irt_prod_lo, irt_prod_hi ), + condT + ); + } - DIP("smuad%s%s r%u, r%u, r%u\n", + DIP("smu%cd%s%s r%u, r%u, r%u\n", + isAD ? 'a' : 's', bitM ? "x" : "", nCC(conq), regD, regN, regM); return True; } @@ -9724,29 +9748,33 @@ static Bool decode_V6MEDIA_instruction ( } /* --------------- smlad{X} ,,, -------------- */ + /* --------------- smlsd{X} ,,, -------------- */ { UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99; - Bool gate = False; + Bool gate = False, isAD = False; if (isT) { - if (INSNT0(15,4) == 0xFB2 && INSNT1(7,5) == BITS3(0,0,0)) { + if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4) + && INSNT1(7,5) == BITS3(0,0,0)) { regN = INSNT0(3,0); regD = INSNT1(11,8); regM = INSNT1(3,0); regA = INSNT1(15,12); bitM = INSNT1(4,4); + isAD = INSNT0(15,4) == 0xFB2; if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM) && !isBadRegT(regA)) gate = True; } } else { if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) && - (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) { + (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) { regD = INSNA(19,16); regA = INSNA(15,12); regN = INSNA(3,0); regM = INSNA(11,8); bitM = INSNA(5,5); + isAD = INSNA(6,6) == 0; if (regD != 15 && regN != 15 && regM != 15 && regA != 15) gate = True; } @@ -9779,7 +9807,7 @@ static Bool decode_V6MEDIA_instruction ( binop( Iop_Mul32, binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ), binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) ); - assign( irt_sum, binop( Iop_Add32, + assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32, mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) ); IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA)); @@ -9789,22 +9817,21 @@ static Bool decode_V6MEDIA_instruction ( else putIRegA( regD, ire_result, condT, Ijk_Boring ); - or_into_QFLAG32( binop( Iop_Shr32, - binop( Iop_And32, - binop( Iop_Xor32, mkexpr(irt_sum), - mkexpr(irt_prod_lo) ), - binop( Iop_Xor32, mkexpr(irt_sum), - mkexpr(irt_prod_hi) ) ), - mkU8(31)), condT ); - or_into_QFLAG32( binop( Iop_Shr32, - binop( Iop_And32, - binop( Iop_Xor32, ire_result, - mkexpr(irt_sum) ), - binop( Iop_Xor32, ire_result, - mkexpr(irt_regA) ) ), - mkU8(31)), condT ); - - DIP("smlad%s%s r%u, r%u, r%u, r%u\n", + if (isAD) { + or_into_QFLAG32( + signed_overflow_after_Add32( mkexpr(irt_sum), + irt_prod_lo, irt_prod_hi ), + condT + ); + } + + or_into_QFLAG32( + signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ), + condT + ); + + DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n", + isAD ? 'a' : 's', bitM ? "x" : "", nCC(conq), regD, regN, regM, regA); return True; } @@ -9868,14 +9895,10 @@ static Bool decode_V6MEDIA_instruction ( else putIRegA( regD, ire_result, condT, Ijk_Boring ); - or_into_QFLAG32( binop( Iop_Shr32, - binop( Iop_And32, - binop(Iop_Xor32, - ire_result, mkexpr(irt_prod)), - binop(Iop_Xor32, - ire_result, mkexpr(irt_regA)) ), - mkU8(31)), - condT ); + or_into_QFLAG32( + signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ), + condT + ); DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b', @@ -9943,14 +9966,10 @@ static Bool decode_V6MEDIA_instruction ( else putIRegA( regD, ire_result, condT, Ijk_Boring ); - or_into_QFLAG32( binop( Iop_Shr32, - binop( Iop_And32, - binop(Iop_Xor32, - ire_result, mkexpr(prod32)), - binop(Iop_Xor32, - ire_result, mkexpr(irt_regA)) ), - mkU8(31)), - condT ); + or_into_QFLAG32( + signed_overflow_after_Add32( ire_result, prod32, irt_regA ), + condT + ); DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n", bitM ? 't' : 'b', @@ -10111,6 +10130,59 @@ static Bool decode_V6MEDIA_instruction ( /* fall through */ } + /* --------------- usad8 Rd,Rn,Rm ---------------- */ + /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */ + { + UInt rD = 99, rN = 99, rM = 99, rA = 99; + Bool gate = False; + + if (isT) { + if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) { + rN = INSNT0(3,0); + rA = INSNT1(15,12); + rD = INSNT1(11,8); + rM = INSNT1(3,0); + if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13) + gate = True; + } + } else { + if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) && + INSNA(7,4) == BITS4(0,0,0,1) ) { + rD = INSNA(19,16); + rA = INSNA(15,12); + rM = INSNA(11,8); + rN = INSNA(3,0); + if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */) + gate = True; + } + } + /* We allow rA == 15, to denote the usad8 (no accumulator) case. */ + + if (gate) { + IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN); + IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM); + IRExpr* rAe = rA == 15 ? mkU32(0) + : (isT ? getIRegT(rA) : getIRegA(rA)); + IRExpr* res = binop(Iop_Add32, + binop(Iop_Sad8Ux4, rNe, rMe), + rAe); + if (isT) + putIRegT( rD, res, condT ); + else + putIRegA( rD, res, condT, Ijk_Boring ); + + if (rA == 15) { + DIP( "usad8%s r%u, r%u, r%u\n", + nCC(conq), rD, rN, rM ); + } else { + DIP( "usada8%s r%u, r%u, r%u, r%u\n", + nCC(conq), rD, rN, rM, rA ); + } + return True; + } + /* fall through */ + } + /* ---------- Doesn't match anything. ---------- */ return False; diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index 747293830a..4bba9a35de 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -1347,6 +1347,8 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) fn = &h_generic_calc_QSub8Sx4; break; case Iop_QSub8Ux4: fn = &h_generic_calc_QSub8Ux4; break; + case Iop_Sad8Ux4: + fn = &h_generic_calc_Sad8Ux4; break; default: break; } diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c index e685ad6a3d..03d6d2ff17 100644 --- a/VEX/priv/host_generic_simd64.c +++ b/VEX/priv/host_generic_simd64.c @@ -439,6 +439,12 @@ static inline Char hsub8S ( Char xx, Char yy ) return (Char)r; } +static inline UInt absdiff8U ( UChar xx, UChar yy ) +{ + UInt xxu = (UChar)xx; + UInt yyu = (UChar)yy; + return xxu >= yyu ? xxu - yyu : yyu - xxu; +} /* ----------------------------------------------------- */ /* Start of the externally visible functions. These simply @@ -1317,6 +1323,15 @@ UInt h_generic_calc_CmpNEZ8x4 ( UInt xx ) ); } +UInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy ) +{ + return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) ) + + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) ) + + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) ) + + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) ); +} + + /*---------------------------------------------------------------*/ /*--- end host_generic_simd64.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h index c29fbf6b2f..e854fc726e 100644 --- a/VEX/priv/host_generic_simd64.h +++ b/VEX/priv/host_generic_simd64.h @@ -149,6 +149,8 @@ extern UInt h_generic_calc_QAdd8Sx4 ( UInt, UInt ); extern UInt h_generic_calc_QSub8Ux4 ( UInt, UInt ); extern UInt h_generic_calc_QSub8Sx4 ( UInt, UInt ); +extern UInt h_generic_calc_Sad8Ux4 ( UInt, UInt ); + extern UInt h_generic_calc_CmpNEZ16x2 ( UInt ); extern UInt h_generic_calc_CmpNEZ8x4 ( UInt ); diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index eabf831da5..f78db106e2 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -305,6 +305,7 @@ void ppIROp ( IROp op ) case Iop_HAdd8Sx4: vex_printf("HAdd8Sx4"); return; case Iop_HSub8Ux4: vex_printf("HSub8Ux4"); return; case Iop_HSub8Sx4: vex_printf("HSub8Sx4"); return; + case Iop_Sad8Ux4: vex_printf("Sad8Ux4"); return; case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return; case Iop_CmpNEZ8x4: vex_printf("CmpNEZ8x4"); return; @@ -1944,6 +1945,7 @@ void typeOfPrimop ( IROp op, case Iop_QSub8Sx4: case Iop_QSub8Ux4: case Iop_HAdd8Ux4: case Iop_HAdd8Sx4: case Iop_HSub8Ux4: case Iop_HSub8Sx4: + case Iop_Sad8Ux4: BINARY(Ity_I32,Ity_I32, Ity_I32); case Iop_Add64: case Iop_Sub64: case Iop_Mul64: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index b3d78b6b06..95042aab27 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -699,6 +699,9 @@ typedef Iop_HAdd8Ux4, Iop_HAdd8Sx4, Iop_HSub8Ux4, Iop_HSub8Sx4, + /* 8x4 sum of absolute unsigned differences. */ + Iop_Sad8Ux4, + /* MISC (vector integer cmp != 0) */ Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,