ARM: Implement QADD and QSUB. Fixes #286917.

author Julian Seward <jseward@acm.org>

Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)

committer Julian Seward <jseward@acm.org>

Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)
author Julian Seward <jseward@acm.org>
Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)
committer Julian Seward <jseward@acm.org>
Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)
diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c

index a7b5590ef392b8c9b185242dd8607350d4768aa7..0426b40e172ca4909011619af46f3dd0b8790864 100644 (file)
--- a/VEX/priv/guest_arm_toIR.c
+++ b/VEX/priv/guest_arm_toIR.c
@@ -1687,6 +1687,21 @@ IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
               mkU8(31) );
  }
  
+/* Similarly .. also from HD p27 .. */
+static
+IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
+                                      IRTemp argL, IRTemp argR )
+{
+   IRTemp res = newTemp(Ity_I32);
+   assign(res, resE);
+   return
+      binop( Iop_Shr32, 
+             binop( Iop_And32,
+                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
+                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )), 
+             mkU8(31) );
+}
+
  
  /*------------------------------------------------------------*/
  /*--- Larger helpers                                       ---*/
@@ -10255,6 +10270,108 @@ static Bool decode_V6MEDIA_instruction (
       /* fall through */
     }
  
+   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
+   {
+     UInt regD = 99, regN = 99, regM = 99;
+     Bool gate = False;
+
+     if (isT) {
+        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
+           regN = INSNT0(3,0);
+           regD = INSNT1(11,8);
+           regM = INSNT1(3,0);
+           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+              gate = True;
+        }
+     } else {
+        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
+            INSNA(11,8)  == BITS4(0,0,0,0)         &&
+            INSNA(7,4)   == BITS4(0,1,0,1)) {
+           regD = INSNA(15,12);
+           regN = INSNA(19,16);
+           regM = INSNA(3,0);
+           if (regD != 15 && regN != 15 && regM != 15)
+              gate = True;
+        }
+     }
+
+     if (gate) {
+        IRTemp rNt   = newTemp(Ity_I32);
+        IRTemp rMt   = newTemp(Ity_I32);
+        IRTemp res_q = newTemp(Ity_I32);
+
+        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
+        if (isT)
+           putIRegT( regD, mkexpr(res_q), condT );
+        else
+           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+        or_into_QFLAG32(
+           signed_overflow_after_Add32(
+              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+           condT
+        );
+
+        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+        return True;
+     }
+     /* fall through */
+   }
+
+   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
+   {
+     UInt regD = 99, regN = 99, regM = 99;
+     Bool gate = False;
+
+     if (isT) {
+        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
+           regN = INSNT0(3,0);
+           regD = INSNT1(11,8);
+           regM = INSNT1(3,0);
+           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+              gate = True;
+        }
+     } else {
+        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
+            INSNA(11,8)  == BITS4(0,0,0,0)         &&
+            INSNA(7,4)   == BITS4(0,1,0,1)) {
+           regD = INSNA(15,12);
+           regN = INSNA(19,16);
+           regM = INSNA(3,0);
+           if (regD != 15 && regN != 15 && regM != 15)
+              gate = True;
+        }
+     }
+
+     if (gate) {
+        IRTemp rNt   = newTemp(Ity_I32);
+        IRTemp rMt   = newTemp(Ity_I32);
+        IRTemp res_q = newTemp(Ity_I32);
+
+        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
+        if (isT)
+           putIRegT( regD, mkexpr(res_q), condT );
+        else
+           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+        or_into_QFLAG32(
+           signed_overflow_after_Sub32(
+              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+           condT
+        );
+
+        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+        return True;
+     }
+     /* fall through */
+   }
+
     /* ---------- Doesn't match anything. ---------- */
     return False;
  
diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c

index f40aa6ef84d1f4e109263ec59028df614cbe9737..059006bbd73c1e3a4ed410da1616d3342d8bd8cb 100644 (file)
--- a/VEX/priv/host_arm_isel.c
+++ b/VEX/priv/host_arm_isel.c
@@ -1362,6 +1362,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
              fn = &h_generic_calc_QSub8Ux4; break;
           case Iop_Sad8Ux4:
              fn = &h_generic_calc_Sad8Ux4; break;
+         case Iop_QAdd32S:
+            fn = &h_generic_calc_QAdd32S; break;
+         case Iop_QSub32S:
+            fn = &h_generic_calc_QSub32S; break;
           default:
              break;
        }
diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c

index 3bebe9068547a1d983cb570c73e992fffd9bcc75..e3e19757004204a9e85ea7f2fc51d318c4b57da7 100644 (file)
--- a/VEX/priv/host_generic_simd64.c
+++ b/VEX/priv/host_generic_simd64.c
@@ -139,6 +139,16 @@ static inline UChar index8x8 ( ULong w64, UChar ix ) {
  
  /* Scalar helpers. */
  
+static inline Int qadd32S ( Int xx, Int yy ) 
+{
+   Long t = ((Long)xx) + ((Long)yy);
+   const Long loLim = -0x80000000LL;
+   const Long hiLim =  0x7FFFFFFFLL;
+   if (t < loLim) t = loLim;
+   if (t > hiLim) t = hiLim;
+   return (Int)t;
+}
+
  static inline Short qadd16S ( Short xx, Short yy ) 
  {
     Int t = ((Int)xx) + ((Int)yy);
@@ -169,6 +179,16 @@ static inline UChar qadd8U ( UChar xx, UChar yy )
     return (UChar)t;
  }
  
+static inline Int qsub32S ( Int xx, Int yy ) 
+{
+   Long t = ((Long)xx) - ((Long)yy);
+   const Long loLim = -0x80000000LL;
+   const Long hiLim =  0x7FFFFFFFLL;
+   if (t < loLim) t = loLim;
+   if (t > hiLim) t = hiLim;
+   return (Int)t;
+}
+
  static inline Short qsub16S ( Short xx, Short yy )
  {
     Int t = ((Int)xx) - ((Int)yy);
@@ -1379,6 +1399,17 @@ UInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy )
            + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
  }
  
+UInt h_generic_calc_QAdd32S ( UInt xx, UInt yy )
+{
+   return qadd32S( xx, yy );
+}
+
+UInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
+{
+   return qsub32S( xx, yy );
+}
+
+
  /*------------------------------------------------------------------*/
  /* Decimal Floating Point (DFP) externally visible helper functions */
  /* that implement Iop_BCDtoDPB and Iop_DPBtoBCD                     */
diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h

index 1492ad00369418cc46bca21cc006b5c957eb499d..4a5fa7aad26c4a941e6713fb4c2ee4edb31cefb4 100644 (file)
--- a/VEX/priv/host_generic_simd64.h
+++ b/VEX/priv/host_generic_simd64.h
@@ -153,11 +153,14 @@ extern UInt h_generic_calc_QSub8Sx4 ( UInt, UInt );
  
  extern UInt h_generic_calc_Sad8Ux4  ( UInt, UInt );
  
+extern UInt h_generic_calc_QAdd32S  ( UInt, UInt );
+extern UInt h_generic_calc_QSub32S  ( UInt, UInt );
+
  extern UInt h_generic_calc_CmpNEZ16x2 ( UInt );
  extern UInt h_generic_calc_CmpNEZ8x4  ( UInt );
  
-extern ULong h_DPBtoBCD( ULong dpb );
-extern ULong h_BCDtoDPB( ULong bcd );
+extern ULong h_DPBtoBCD ( ULong dpb );
+extern ULong h_BCDtoDPB ( ULong bcd );
  
  ULong dpb_to_bcd(ULong chunk);  // helper for h_DPBtoBCD
  ULong bcd_to_dpb(ULong chunk);  // helper for h_BCDtoDPB
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index c689c2b1606257fa6bb3e6ec9f0fa85f20baf41e..dc0fc33a86cd4c8fceaa1544e61f37c917fab208 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -334,6 +334,8 @@ void ppIROp ( IROp op )
        case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
        case Iop_CalcFPRF:      vex_printf("CalcFPRF"); return;
  
+      case Iop_QAdd32S: vex_printf("QAdd32S"); return;
+      case Iop_QSub32S: vex_printf("QSub32S"); return; 
        case Iop_Add16x2:   vex_printf("Add16x2"); return;
        case Iop_Sub16x2:   vex_printf("Sub16x2"); return;
        case Iop_QAdd16Sx2: vex_printf("QAdd16Sx2"); return;
@@ -2142,6 +2144,7 @@ void typeOfPrimop ( IROp op,
        case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
        case Iop_Or32:  case Iop_And32: case Iop_Xor32:
        case Iop_Max32U:
+      case Iop_QAdd32S: case Iop_QSub32S:
        case Iop_Add16x2: case Iop_Sub16x2:
        case Iop_QAdd16Sx2: case Iop_QAdd16Ux2:
        case Iop_QSub16Sx2: case Iop_QSub16Ux2:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index 860b08b4f84a61db5bbdb1821b1cc0fc385ee5e4..f0af9f06de8c02791ae821ef31fc2a60fe94a5c9 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -746,6 +746,10 @@ typedef
  
        /* ------------------ 32-bit SIMD Integer ------------------ */
  
+      /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
+      Iop_QAdd32S,
+      Iop_QSub32S,
+
        /* 16x2 add/sub, also signed/unsigned saturating variants */
        Iop_Add16x2, Iop_Sub16x2,
        Iop_QAdd16Sx2, Iop_QAdd16Ux2,
author	Julian Seward <jseward@acm.org>
	Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)
committer	Julian Seward <jseward@acm.org>
	Wed, 11 Jul 2012 13:19:10 +0000 (13:19 +0000)
VEX/priv/guest_arm_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_arm_isel.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd64.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd64.h		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history