From: Julian Seward <jseward@acm.org>
Date: Thu, 11 Apr 2013 13:57:43 +0000 (+0000)
Subject: Implement ARM SDIV and UDIV instructions.  Fixes #314178.  Partially
X-Git-Tag: svn/VALGRIND_3_9_0^2~89
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=361f9de1e1e48bec01aa1249ee6a4f9272bd0551;p=thirdparty%2Fvalgrind.git

Implement ARM SDIV and UDIV instructions.  Fixes #314178.  Partially
based on a patch by Ben Cheng, bccheng@android.com.  Also renames two
misnamed PPC helpers.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2706
---

diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c
index 311b7cb1f5..e72ef94b6e 100644
--- a/VEX/priv/guest_arm_toIR.c
+++ b/VEX/priv/guest_arm_toIR.c
@@ -13845,6 +13845,51 @@ DisResult disInstr_ARM_WRK (
       /* fall through */
    }
 
+   /* --------------------- Integer Divides --------------------- */
+   // SDIV
+   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
+       && INSN(15,12) == BITS4(1,1,1,1)
+       && INSN(7,4) == BITS4(0,0,0,1)) {
+      UInt rD = INSN(19,16);
+      UInt rM = INSN(11,8);
+      UInt rN = INSN(3,0);
+      if (rD == 15 || rM == 15 || rN == 15) {
+         /* Unpredictable; don't decode; fall through */
+      } else {
+         IRTemp res  = newTemp(Ity_I32);
+         IRTemp argL = newTemp(Ity_I32);
+         IRTemp argR = newTemp(Ity_I32);
+         assign(argL, getIRegA(rN));
+         assign(argR, getIRegA(rM));
+         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
+         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+    }
+
+   // UDIV
+   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
+       && INSN(15,12) == BITS4(1,1,1,1)
+       && INSN(7,4) == BITS4(0,0,0,1)) {
+      UInt rD = INSN(19,16);
+      UInt rM = INSN(11,8);
+      UInt rN = INSN(3,0);
+      if (rD == 15 || rM == 15 || rN == 15) {
+         /* Unpredictable; don't decode; fall through */
+      } else {
+         IRTemp res  = newTemp(Ity_I32);
+         IRTemp argL = newTemp(Ity_I32);
+         IRTemp argR = newTemp(Ity_I32);
+         assign(argL, getIRegA(rN));
+         assign(argR, getIRegA(rM));
+         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
+         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+   }
+
    // MLA, MLS
    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
        && INSN(7,4) == BITS4(1,0,0,1)) {
@@ -18400,6 +18445,44 @@ DisResult disInstr_THUMB_WRK (
       }
    }
 
+   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
+   if (INSN0(15,4) == 0xFB9
+       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
+      UInt rN = INSN0(3,0);
+      UInt rD = INSN1(11,8);
+      UInt rM = INSN1(3,0);
+      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+         IRTemp res  = newTemp(Ity_I32);
+         IRTemp argL = newTemp(Ity_I32);
+         IRTemp argR = newTemp(Ity_I32);
+         assign(argL, getIRegT(rN));
+         assign(argR, getIRegT(rM));
+         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
+         putIRegT(rD, mkexpr(res), condT);
+         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+   }
+
+   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
+   if (INSN0(15,4) == 0xFBB
+       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
+      UInt rN = INSN0(3,0);
+      UInt rD = INSN1(11,8);
+      UInt rM = INSN1(3,0);
+      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+         IRTemp res  = newTemp(Ity_I32);
+         IRTemp argL = newTemp(Ity_I32);
+         IRTemp argR = newTemp(Ity_I32);
+         assign(argL, getIRegT(rN));
+         assign(argR, getIRegT(rM));
+         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
+         putIRegT(rD, mkexpr(res), condT);
+         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+   }
+
    /* ------------------ {U,S}MULL ------------------ */
    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
        && INSN1(7,4) == BITS4(0,0,0,0)) {
diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c
index 095fc9cdf2..b2c9edebcc 100644
--- a/VEX/priv/host_arm_isel.c
+++ b/VEX/priv/host_arm_isel.c
@@ -1374,6 +1374,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             fn = &h_generic_calc_QSub32S; break;
          case Iop_QSub16Ux2:
             fn = &h_generic_calc_QSub16Ux2; break;
+         case Iop_DivU32:
+            fn = &h_calc_udiv32_w_arm_semantics; break;
+         case Iop_DivS32:
+            fn = &h_calc_sdiv32_w_arm_semantics; break;
          default:
             break;
       }
diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c
index fdc9eed54e..42aff252df 100644
--- a/VEX/priv/host_generic_simd64.c
+++ b/VEX/priv/host_generic_simd64.c
@@ -36,9 +36,10 @@
 /* Generic helper functions for doing 64-bit SIMD arithmetic in cases
    where the instruction selectors cannot generate code in-line.
    These are purely back-end entities and cannot be seen/referenced
-   from IR. */
+   from IR.  There are also helpers for 32-bit arithmetic in here. */
 
 #include "libvex_basictypes.h"
+#include "main_util.h"              // LIKELY, UNLIKELY
 #include "host_generic_simd64.h"
 
 
@@ -1433,7 +1434,7 @@ UInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
 #define GET( x, y ) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) )
 #define PUT( x, y ) ( ( x )<< ( y ) )
 
-ULong dpb_to_bcd( ULong chunk )
+static ULong dpb_to_bcd( ULong chunk )
 {
    Short a, b, c, d, e, f, g, h, i, j, k, m;
    Short p, q, r, s, t, u, v, w, x, y;
@@ -1473,7 +1474,7 @@ ULong dpb_to_bcd( ULong chunk )
    return value;
 }
 
-ULong bcd_to_dpb( ULong chunk )
+static ULong bcd_to_dpb( ULong chunk )
 {
    Short a, b, c, d, e, f, g, h, i, j, k, m;
    Short p, q, r, s, t, u, v, w, x, y;
@@ -1516,7 +1517,7 @@ ULong bcd_to_dpb( ULong chunk )
    return value;
 }
 
-ULong h_DPBtoBCD( ULong dpb )
+ULong h_calc_DPBtoBCD( ULong dpb )
 {
    ULong result, chunk;
    Int i;
@@ -1531,7 +1532,7 @@ ULong h_DPBtoBCD( ULong dpb )
    return result;
 }
 
-ULong h_BCDtoDPB( ULong bcd )
+ULong h_calc_BCDtoDPB( ULong bcd )
 {
    ULong result, chunk;
    Int i;
@@ -1549,7 +1550,36 @@ ULong h_BCDtoDPB( ULong bcd )
 #undef GET
 #undef PUT
 
+
+/* ----------------------------------------------------- */
+/* Signed and unsigned integer division, that behave like
+   the ARMv7 UDIV ansd SDIV instructions. */
+/* ----------------------------------------------------- */
+
+UInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y )
+{
+   // Division by zero --> zero
+   if (UNLIKELY(y == 0)) return 0;
+   // C requires rounding towards zero, which is also what we need.
+   return x / y;
+}
+
+Int h_calc_sdiv32_w_arm_semantics ( Int x, Int y )
+{
+   // Division by zero --> zero
+   if (UNLIKELY(y == 0)) return 0;
+   // The single case that produces an unpresentable result
+   if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000)
+                 && ((UInt)y) == ((UInt)0xFFFFFFFF) ))
+      return (Int)(UInt)0x80000000;
+   // Else return the result rounded towards zero.  C89 says
+   // this is implementation defined (in the signed case), but gcc
+   // promises to round towards zero.  Nevertheless, at startup,
+   // in main_main.c, do a check for that.
+   return x / y;
+}
+
+
 /*---------------------------------------------------------------*/
 /*--- end                               host_generic_simd64.c ---*/
 /*---------------------------------------------------------------*/
-
diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h
index deef9449ed..71128c53b5 100644
--- a/VEX/priv/host_generic_simd64.h
+++ b/VEX/priv/host_generic_simd64.h
@@ -161,11 +161,13 @@ extern UInt h_generic_calc_QSub32S  ( UInt, UInt );
 extern UInt h_generic_calc_CmpNEZ16x2 ( UInt );
 extern UInt h_generic_calc_CmpNEZ8x4  ( UInt );
 
-extern ULong h_DPBtoBCD ( ULong dpb );
-extern ULong h_BCDtoDPB ( ULong bcd );
+extern ULong h_calc_DPBtoBCD ( ULong dpb );
+extern ULong h_calc_BCDtoDPB ( ULong bcd );
 
-ULong dpb_to_bcd(ULong chunk);  // helper for h_DPBtoBCD
-ULong bcd_to_dpb(ULong chunk);  // helper for h_BCDtoDPB
+// Signed and unsigned integer division, that behave like
+// the ARMv7 UDIV and SDIV instructions.
+extern UInt  h_calc_udiv32_w_arm_semantics ( UInt, UInt );
+extern  Int  h_calc_sdiv32_w_arm_semantics (  Int,  Int );
 
 #endif /* ndef __VEX_HOST_GENERIC_SIMD64_H */
 
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 6c485dbee4..a458ea0152 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -2077,7 +2077,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         fdescr = (HWord*)h_BCDtoDPB;
+         fdescr = (HWord*)h_calc_BCDtoDPB;
          addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
                                       argiregs, RetLocInt) );
 
@@ -2106,7 +2106,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         fdescr = (HWord*)h_DPBtoBCD;
+         fdescr = (HWord*)h_calc_DPBtoBCD;
          addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
                                       argiregs, RetLocInt ) );
 
@@ -3446,7 +3446,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
          addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-         target = toUInt( Ptr_to_ULong(h_BCDtoDPB ) );
+         target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
 
          addInstr( env, PPCInstr_Call( cc, (Addr64)target,
                                        argiregs, RetLoc2Int ) );
@@ -3486,7 +3486,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         target = toUInt( Ptr_to_ULong( h_DPBtoBCD ) );
+         target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
 
          addInstr(env, PPCInstr_Call( cc, (Addr64)target,
                                       argiregs, RetLoc2Int ) );
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index 149b651690..1be918cc59 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -75,6 +75,14 @@ static Bool   are_valid_hwcaps ( VexArch arch, UInt hwcaps );
 static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps );
 
 
+/* --------- helpers --------- */
+
+__attribute__((noinline))
+static UInt udiv32 ( UInt x, UInt y ) { return x/y; }
+__attribute__((noinline))
+static  Int sdiv32 (  Int x,  Int y ) { return x/y; }
+
+
 /* --------- Initialise the library. --------- */
 
 /* Exported to library client. */
@@ -171,6 +179,16 @@ void LibVEX_Init (
       vassert(sizeof(IRStmt) == 32);
    }
 
+   /* Check that signed integer division on the host rounds towards
+      zero.  If not, h_calc_sdiv32_w_arm_semantics() won't work
+      correctly. */
+   /* 100.0 / 7.0 == 14.2857 */
+   vassert(udiv32(100, 7) == 14);
+   vassert(sdiv32(100, 7) == 14);
+   vassert(sdiv32(-100, 7) == -14); /* and not -15 */
+   vassert(sdiv32(100, -7) == -14); /* ditto */
+   vassert(sdiv32(-100, -7) == 14); /* not sure what this proves */
+
    /* Really start up .. */
    vex_debuglevel         = debuglevel;
    vex_valgrind_support   = valgrind_support;