Improvements to condition code handling on ARM.

author Julian Seward <jseward@acm.org>

Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)

committer Julian Seward <jseward@acm.org>

Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)
author Julian Seward <jseward@acm.org>
Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)
committer Julian Seward <jseward@acm.org>
Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)
diff --git a/VEX/priv/guest_arm_defs.h b/VEX/priv/guest_arm_defs.h

index 02078c4595b01500fae64b3ebb77c9db80ec23b7..8724d8edb522f89fccaa68dcc1f090bff8f15f0d 100644 (file)
--- a/VEX/priv/guest_arm_defs.h
+++ b/VEX/priv/guest_arm_defs.h
@@ -148,6 +148,10 @@ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
     that the definedness of the stored flags always depends on
     all 3 DEP values.
  
+   Fields carrying only 1 or 2 bits of useful information (old_C,
+   shifter_co, old_V, oldC:oldV) must have their top 31 or 30 bits
+   (respectively) zero.  The text "31x0:" or "30x0:" denotes this.
+
     A summary of the field usages is:
  
     OP                DEP1              DEP2              DEP3
@@ -156,11 +160,11 @@ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
     OP_COPY           current NZCV      unused            unused
     OP_ADD            argL              argR              unused
     OP_SUB            argL              argR              unused
-   OP_ADC            argL              argR              old_C
-   OP_SBB            argL              argR              old_C
-   OP_LOGIC          result            shifter_co        old_V
-   OP_MUL            result            unused            old_C:old_V
-   OP_MULL           resLO32           resHI32           old_C:old_V
+   OP_ADC            argL              argR              31x0:old_C
+   OP_SBB            argL              argR              31x0:old_C
+   OP_LOGIC          result            31x0:shifter_co   31x0:old_V
+   OP_MUL            result            unused            30x0:old_C:old_V
+   OP_MULL           resLO32           resHI32           30x0:old_C:old_V
  */
  
  enum {
diff --git a/VEX/priv/guest_arm_helpers.c b/VEX/priv/guest_arm_helpers.c

index 93f81150d5a3ab7630ec5224e7f3c771de643d6f..96d5824377b65a4d503be7e6d7927798ee50af66 100644 (file)
--- a/VEX/priv/guest_arm_helpers.c
+++ b/VEX/priv/guest_arm_helpers.c
@@ -110,6 +110,7 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
           UInt argL = cc_dep1;
           UInt argR = cc_dep2;
           UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
           UInt res  = (argL + argR) + oldC;
           UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
           UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
@@ -127,6 +128,7 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
           UInt argL = cc_dep1;
           UInt argR = cc_dep2;
           UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
           UInt res  = argL - argR - (oldC ^ 1);
           UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
           UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
@@ -143,11 +145,13 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
           /* (res, shco, oldV) */
           UInt res  = cc_dep1;
           UInt shco = cc_dep2;
+         vassert((shco & ~1) == 0);
           UInt oldV = cc_dep3;
+         vassert((oldV & ~1) == 0);
           UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
           UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = lshift( shco & 1, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+         UInt cf   = lshift( shco, ARMG_CC_SHIFT_C );
+         UInt vf   = lshift( oldV, ARMG_CC_SHIFT_V );
           return nf | zf | cf | vf;
        }
        case ARMG_CC_OP_MUL: {
@@ -155,10 +159,11 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
           UInt res  = cc_dep1;
           UInt oldC = (cc_dep3 >> 1) & 1;
           UInt oldV = (cc_dep3 >> 0) & 1;
+         vassert((cc_dep3 & ~3) == 0);
           UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
           UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = lshift( oldC & 1, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+         UInt cf   = lshift( oldC, ARMG_CC_SHIFT_C );
+         UInt vf   = lshift( oldV, ARMG_CC_SHIFT_V );
           return nf | zf | cf | vf;
        }
        case ARMG_CC_OP_MULL: {
@@ -167,10 +172,11 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
           UInt resHi32 = cc_dep2;
           UInt oldC    = (cc_dep3 >> 1) & 1;
           UInt oldV    = (cc_dep3 >> 0) & 1;
+         vassert((cc_dep3 & ~3) == 0);
           UInt nf      = lshift( resHi32 & (1<<31), ARMG_CC_SHIFT_N - 31 );
           UInt zf      = lshift( (resHi32|resLo32) == 0, ARMG_CC_SHIFT_Z );
-         UInt cf      = lshift( oldC & 1, ARMG_CC_SHIFT_C );
-         UInt vf      = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+         UInt cf      = lshift( oldC, ARMG_CC_SHIFT_C );
+         UInt vf      = lshift( oldV, ARMG_CC_SHIFT_V );
           return nf | zf | cf | vf;
        }
        default:
@@ -185,7 +191,7 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
  
  /* CALLED FROM GENERATED CODE: CLEAN HELPER */
  /* Calculate the C flag from the thunk components, in the lowest bit
-   of the word (bit 0). */
+   of the word (bit 0).  Bits 31:1 of the returned value are zero. */
  UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
                               UInt cc_dep2, UInt cc_dep3 )
  {
@@ -196,7 +202,7 @@ UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
  
  /* CALLED FROM GENERATED CODE: CLEAN HELPER */
  /* Calculate the V flag from the thunk components, in the lowest bit
-   of the word (bit 0). */
+   of the word (bit 0).  Bits 31:1 of the returned value are zero. */
  UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
                               UInt cc_dep2, UInt cc_dep3 )
  {
@@ -221,7 +227,7 @@ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
  /* Calculate the specified condition from the thunk components, in the
     lowest bit of the word (bit 0). */
  extern 
-UInt armg_calculate_condition ( UInt cond_n_op /* ARMCondcode << 4 | cc_op */,
+UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */,
                                  UInt cc_dep1,
                                  UInt cc_dep2, UInt cc_dep3 )
  {
@@ -332,12 +338,17 @@ IRExpr* guest_arm_spechelper ( HChar*   function_name,
     /* --------- specialising "armg_calculate_condition" --------- */
  
     if (vex_streq(function_name, "armg_calculate_condition")) {
-      /* specialise calls to above "armg_calculate condition" function */
-      IRExpr *cond_n_op, *cc_dep1, *cc_dep2;
+
+      /* specialise calls to the "armg_calculate_condition" function.
+         Not sure whether this is strictly necessary, but: the
+         replacement IR must produce only the values 0 or 1.  Bits
+         31:1 are required to be zero. */
+      IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_ndep;
        vassert(arity == 4);
-      cond_n_op = args[0]; /* ARMCondcode << 4  |  ARMG_CC_OP_* */
+      cond_n_op = args[0]; /* (ARMCondcode << 4)  |  ARMG_CC_OP_* */
        cc_dep1   = args[1];
        cc_dep2   = args[2];
+      cc_ndep   = args[3];
  
        /*---------------- SUB ----------------*/
  
@@ -384,7 +395,27 @@ IRExpr* guest_arm_spechelper ( HChar*   function_name,
                       binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
        }
  
+      /*---------------- SBB ----------------*/
+
+      if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
+         /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* HS after SBB (same as C after SBB below)
+            --> oldC ? (argL >=u argR) : (argL >u argR)
+            --> oldC ? (argR <=u argL) : (argR <u argL)
+         */
+         return
+            IRExpr_Mux0X(
+               unop(Iop_32to8, cc_ndep),
+               /* case oldC == 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1)),
+               /* case oldC != 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1))
+            );
+      }
+
        /*---------------- LOGIC ----------------*/
+
        if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
           /* EQ after LOGIC --> test res == 0 */
           return unop(Iop_1Uto32,
@@ -397,6 +428,7 @@ IRExpr* guest_arm_spechelper ( HChar*   function_name,
        }
  
        /*----------------- AL -----------------*/
+
        /* A critically important case for Thumb code.
  
           What we're trying to spot is the case where cond_n_op is an
@@ -443,6 +475,107 @@ IRExpr* guest_arm_spechelper ( HChar*   function_name,
        }
     }
  
+   /* --------- specialising "armg_calculate_flag_c" --------- */
+
+   else
+   if (vex_streq(function_name, "armg_calculate_flag_c")) {
+
+      /* specialise calls to the "armg_calculate_flag_c" function.
+         Note that the returned value must be either 0 or 1; nonzero
+         bits 31:1 are not allowed.  In turn, incoming oldV and oldC
+         values (from the thunk) are assumed to have bits 31:1
+         clear. */
+      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+      vassert(arity == 4);
+      cc_op   = args[0]; /* ARMG_CC_OP_* */
+      cc_dep1 = args[1];
+      cc_dep2 = args[2];
+      cc_ndep = args[3];
+
+      if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+         /* Thunk args are (result, shco, oldV) */
+         /* C after LOGIC --> shco */
+         return cc_dep2;
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SUB)) {
+         /* Thunk args are (argL, argR, unused) */
+         /* C after SUB --> argL >=u argR
+                        --> argR <=u argL */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+         /* This happens occasionally in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* C after SBB (same as HS after SBB above)
+            --> oldC ? (argL >=u argR) : (argL >u argR)
+            --> oldC ? (argR <=u argL) : (argR <u argL)
+         */
+         return
+            IRExpr_Mux0X(
+               unop(Iop_32to8, cc_ndep),
+               /* case oldC == 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1)),
+               /* case oldC != 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1))
+            );
+      }
+
+   }
+
+   /* --------- specialising "armg_calculate_flag_v" --------- */
+
+   else
+   if (vex_streq(function_name, "armg_calculate_flag_v")) {
+
+      /* specialise calls to the "armg_calculate_flag_v" function.
+         Note that the returned value must be either 0 or 1; nonzero
+         bits 31:1 are not allowed.  In turn, incoming oldV and oldC
+         values (from the thunk) are assumed to have bits 31:1
+         clear. */
+      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+      vassert(arity == 4);
+      cc_op   = args[0]; /* ARMG_CC_OP_* */
+      cc_dep1 = args[1];
+      cc_dep2 = args[2];
+      cc_ndep = args[3];
+
+      if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+         /* Thunk args are (result, shco, oldV) */
+         /* V after LOGIC --> oldV */
+         return cc_ndep;
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+         /* This happens occasionally in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* V after SBB
+            --> let res = argL - argR - (oldC ^ 1)
+                in  (argL ^ argR) & (argL ^ res) & 1
+         */
+         return
+            binop(
+               Iop_And32,
+               binop(
+                  Iop_And32,
+                  // argL ^ argR
+                  binop(Iop_Xor32, cc_dep1, cc_dep2),
+                  // argL ^ (argL - argR - (oldC ^ 1))
+                  binop(Iop_Xor32,
+                        cc_dep1,
+                        binop(Iop_Sub32,
+                              binop(Iop_Sub32, cc_dep1, cc_dep2),
+                              binop(Iop_Xor32, cc_ndep, mkU32(1)))
+                  )
+               ),
+               mkU32(1)
+            );
+      }
+
+   }
+
  #  undef unop
  #  undef binop
  #  undef mkU32
diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c

index 696c6cdeab22d380d75f79960c742a0d4cb8eb57..d9433881d14c50836d73abe1f5ffcb96c6235bee 100644 (file)
--- a/VEX/priv/guest_arm_toIR.c
+++ b/VEX/priv/guest_arm_toIR.c
@@ -1088,14 +1088,13 @@ static HChar* nCC ( ARMCondcode cond ) {
  static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
  {
     vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
-   /* And 'cond' had better produce a value in which only bits 7:4
-      bits are nonzero.  However, obviously we can't assert for
-      that. */
+   /* And 'cond' had better produce a value in which only bits 7:4 are
+      nonzero.  However, obviously we can't assert for that. */
  
     /* So what we're constructing for the first argument is 
-      "(cond << 4) | stored-operation-operation".  However,
-      as per comments above, must be supplied pre-shifted to this
-      function.
+      "(cond << 4) | stored-operation".
+      However, as per comments above, 'cond' must be supplied
+      pre-shifted to this function.
  
        This pairing scheme requires that the ARM_CC_OP_ values all fit
        in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
@@ -1700,6 +1699,12 @@ IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
  
     The calling convention for res and newC is a bit funny.  They could
     be passed by value, but instead are passed by ref.
+
+   The C (shco) value computed must be zero in bits 31:1, as the IR
+   optimisations for flag handling (guest_arm_spechelper) rely on
+   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
+   for it.  Same applies to all these functions that compute shco
+   after a shift or rotate, not just this one.
  */
  
  static void compute_result_and_C_after_LSL_by_imm5 (
@@ -1751,7 +1756,7 @@ static void compute_result_and_C_after_LSL_by_reg (
        /* mux0X(amt == 0,
                 mux0X(amt < 32, 
                       0,
-                     Rm[(32-amt) & 31])
+                     Rm[(32-amt) & 31]),
                 oldC)
        */
        /* About the best you can do is pray that iropt is able
@@ -1767,16 +1772,19 @@ static void compute_result_and_C_after_LSL_by_reg (
                 unop(Iop_1Uto8,
                      binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
                 mkU32(0),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkU32(32),
-                                      mkexpr(amtT)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkU32(32),
+                                            mkexpr(amtT)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                 )
              ),
              mkexpr(oldC)
@@ -1862,7 +1870,7 @@ static void compute_result_and_C_after_LSR_by_reg (
        /* mux0X(amt == 0,
                 mux0X(amt < 32, 
                       0,
-                     Rm[(amt-1) & 31])
+                     Rm[(amt-1) & 31]),
                 oldC)
        */
        IRTemp oldC = newTemp(Ity_I32);
@@ -1876,16 +1884,19 @@ static void compute_result_and_C_after_LSR_by_reg (
                 unop(Iop_1Uto8,
                      binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
                 mkU32(0),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkexpr(amtT),
-                                      mkU32(1)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkexpr(amtT),
+                                            mkU32(1)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                 )
              ),
              mkexpr(oldC)
@@ -1984,20 +1995,26 @@ static void compute_result_and_C_after_ASR_by_reg (
              IRExpr_Mux0X(
                 unop(Iop_1Uto8,
                      binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     mkU8(31)
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           mkU8(31)
+                     ),
+                     mkU32(1)
                 ),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkexpr(amtT),
-                                      mkU32(1)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkexpr(amtT),
+                                            mkU32(1)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                 )
              ),
              mkexpr(oldC)
author	Julian Seward <jseward@acm.org>
	Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)
committer	Julian Seward <jseward@acm.org>
	Sun, 1 May 2011 18:47:10 +0000 (18:47 +0000)
VEX/priv/guest_arm_defs.h		patch \| blob \| blame \| history
VEX/priv/guest_arm_helpers.c		patch \| blob \| blame \| history
VEX/priv/guest_arm_toIR.c		patch \| blob \| blame \| history