From: Julian Seward Date: Wed, 11 Jul 2012 13:19:10 +0000 (+0000) Subject: ARM: Implement QADD and QSUB. Fixes #286917. X-Git-Tag: svn/VALGRIND_3_8_1^2~54 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2d137667362315d0b30f318469bc0de7c7c50d16;p=thirdparty%2Fvalgrind.git ARM: Implement QADD and QSUB. Fixes #286917. git-svn-id: svn://svn.valgrind.org/vex/trunk@2424 --- diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index a7b5590ef3..0426b40e17 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -1687,6 +1687,21 @@ IRExpr* signed_overflow_after_Add32 ( IRExpr* resE, mkU8(31) ); } +/* Similarly .. also from HD p27 .. */ +static +IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE, + IRTemp argL, IRTemp argR ) +{ + IRTemp res = newTemp(Ity_I32); + assign(res, resE); + return + binop( Iop_Shr32, + binop( Iop_And32, + binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ), + binop( Iop_Xor32, mkexpr(res), mkexpr(argL) )), + mkU8(31) ); +} + /*------------------------------------------------------------*/ /*--- Larger helpers ---*/ @@ -10255,6 +10270,108 @@ static Bool decode_V6MEDIA_instruction ( /* fall through */ } + /* ------------------ qadd ,, ------------------- */ + { + UInt regD = 99, regN = 99, regM = 99; + Bool gate = False; + + if (isT) { + if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) { + regN = INSNT0(3,0); + regD = INSNT1(11,8); + regM = INSNT1(3,0); + if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)) + gate = True; + } + } else { + if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) && + INSNA(11,8) == BITS4(0,0,0,0) && + INSNA(7,4) == BITS4(0,1,0,1)) { + regD = INSNA(15,12); + regN = INSNA(19,16); + regM = INSNA(3,0); + if (regD != 15 && regN != 15 && regM != 15) + gate = True; + } + } + + if (gate) { + IRTemp rNt = newTemp(Ity_I32); + IRTemp rMt = newTemp(Ity_I32); + IRTemp res_q = newTemp(Ity_I32); + + assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) ); + assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) ); + + assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt))); + if (isT) + putIRegT( regD, mkexpr(res_q), condT ); + else + putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring ); + + or_into_QFLAG32( + signed_overflow_after_Add32( + binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt), + condT + ); + + DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN); + return True; + } + /* fall through */ + } + + /* ------------------ qsub ,, ------------------- */ + { + UInt regD = 99, regN = 99, regM = 99; + Bool gate = False; + + if (isT) { + if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) { + regN = INSNT0(3,0); + regD = INSNT1(11,8); + regM = INSNT1(3,0); + if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)) + gate = True; + } + } else { + if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) && + INSNA(11,8) == BITS4(0,0,0,0) && + INSNA(7,4) == BITS4(0,1,0,1)) { + regD = INSNA(15,12); + regN = INSNA(19,16); + regM = INSNA(3,0); + if (regD != 15 && regN != 15 && regM != 15) + gate = True; + } + } + + if (gate) { + IRTemp rNt = newTemp(Ity_I32); + IRTemp rMt = newTemp(Ity_I32); + IRTemp res_q = newTemp(Ity_I32); + + assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) ); + assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) ); + + assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt))); + if (isT) + putIRegT( regD, mkexpr(res_q), condT ); + else + putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring ); + + or_into_QFLAG32( + signed_overflow_after_Sub32( + binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt), + condT + ); + + DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN); + return True; + } + /* fall through */ + } + /* ---------- Doesn't match anything. ---------- */ return False; diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index f40aa6ef84..059006bbd7 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -1362,6 +1362,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) fn = &h_generic_calc_QSub8Ux4; break; case Iop_Sad8Ux4: fn = &h_generic_calc_Sad8Ux4; break; + case Iop_QAdd32S: + fn = &h_generic_calc_QAdd32S; break; + case Iop_QSub32S: + fn = &h_generic_calc_QSub32S; break; default: break; } diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c index 3bebe90685..e3e1975700 100644 --- a/VEX/priv/host_generic_simd64.c +++ b/VEX/priv/host_generic_simd64.c @@ -139,6 +139,16 @@ static inline UChar index8x8 ( ULong w64, UChar ix ) { /* Scalar helpers. */ +static inline Int qadd32S ( Int xx, Int yy ) +{ + Long t = ((Long)xx) + ((Long)yy); + const Long loLim = -0x80000000LL; + const Long hiLim = 0x7FFFFFFFLL; + if (t < loLim) t = loLim; + if (t > hiLim) t = hiLim; + return (Int)t; +} + static inline Short qadd16S ( Short xx, Short yy ) { Int t = ((Int)xx) + ((Int)yy); @@ -169,6 +179,16 @@ static inline UChar qadd8U ( UChar xx, UChar yy ) return (UChar)t; } +static inline Int qsub32S ( Int xx, Int yy ) +{ + Long t = ((Long)xx) - ((Long)yy); + const Long loLim = -0x80000000LL; + const Long hiLim = 0x7FFFFFFFLL; + if (t < loLim) t = loLim; + if (t > hiLim) t = hiLim; + return (Int)t; +} + static inline Short qsub16S ( Short xx, Short yy ) { Int t = ((Int)xx) - ((Int)yy); @@ -1379,6 +1399,17 @@ UInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy ) + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) ); } +UInt h_generic_calc_QAdd32S ( UInt xx, UInt yy ) +{ + return qadd32S( xx, yy ); +} + +UInt h_generic_calc_QSub32S ( UInt xx, UInt yy ) +{ + return qsub32S( xx, yy ); +} + + /*------------------------------------------------------------------*/ /* Decimal Floating Point (DFP) externally visible helper functions */ /* that implement Iop_BCDtoDPB and Iop_DPBtoBCD */ diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h index 1492ad0036..4a5fa7aad2 100644 --- a/VEX/priv/host_generic_simd64.h +++ b/VEX/priv/host_generic_simd64.h @@ -153,11 +153,14 @@ extern UInt h_generic_calc_QSub8Sx4 ( UInt, UInt ); extern UInt h_generic_calc_Sad8Ux4 ( UInt, UInt ); +extern UInt h_generic_calc_QAdd32S ( UInt, UInt ); +extern UInt h_generic_calc_QSub32S ( UInt, UInt ); + extern UInt h_generic_calc_CmpNEZ16x2 ( UInt ); extern UInt h_generic_calc_CmpNEZ8x4 ( UInt ); -extern ULong h_DPBtoBCD( ULong dpb ); -extern ULong h_BCDtoDPB( ULong bcd ); +extern ULong h_DPBtoBCD ( ULong dpb ); +extern ULong h_BCDtoDPB ( ULong bcd ); ULong dpb_to_bcd(ULong chunk); // helper for h_DPBtoBCD ULong bcd_to_dpb(ULong chunk); // helper for h_BCDtoDPB diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index c689c2b160..dc0fc33a86 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -334,6 +334,8 @@ void ppIROp ( IROp op ) case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return; case Iop_CalcFPRF: vex_printf("CalcFPRF"); return; + case Iop_QAdd32S: vex_printf("QAdd32S"); return; + case Iop_QSub32S: vex_printf("QSub32S"); return; case Iop_Add16x2: vex_printf("Add16x2"); return; case Iop_Sub16x2: vex_printf("Sub16x2"); return; case Iop_QAdd16Sx2: vex_printf("QAdd16Sx2"); return; @@ -2142,6 +2144,7 @@ void typeOfPrimop ( IROp op, case Iop_Add32: case Iop_Sub32: case Iop_Mul32: case Iop_Or32: case Iop_And32: case Iop_Xor32: case Iop_Max32U: + case Iop_QAdd32S: case Iop_QSub32S: case Iop_Add16x2: case Iop_Sub16x2: case Iop_QAdd16Sx2: case Iop_QAdd16Ux2: case Iop_QSub16Sx2: case Iop_QSub16Ux2: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 860b08b4f8..f0af9f06de 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -746,6 +746,10 @@ typedef /* ------------------ 32-bit SIMD Integer ------------------ */ + /* 32x1 saturating add/sub (ok, well, not really SIMD :) */ + Iop_QAdd32S, + Iop_QSub32S, + /* 16x2 add/sub, also signed/unsigned saturating variants */ Iop_Add16x2, Iop_Sub16x2, Iop_QAdd16Sx2, Iop_QAdd16Ux2,