Merge branches/OTRACK_BY_INSTRUMENTATION into the trunk. This

author Julian Seward <jseward@acm.org>

Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)

committer Julian Seward <jseward@acm.org>

Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)
author Julian Seward <jseward@acm.org>
Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)
committer Julian Seward <jseward@acm.org>
Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)
diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c

index 652737aa62a89fc6ad47893954f455ced8a55377..bb6e0718f599471f3bc319c22548344c109263f3 100644 (file)
--- a/VEX/priv/guest-amd64/toIR.c
+++ b/VEX/priv/guest-amd64/toIR.c
@@ -1653,7 +1653,7 @@ static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
        may require reading all four thunk fields. */
     stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
     stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
-   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(res)) );
+   stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
     stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
  }
  
@@ -1944,12 +1944,15 @@ void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
     }
  }
  
-/* Let new_rsp be the %rsp value after a call/return.  This function
-   generates an AbiHint to say that -128(%rsp) .. -1(%rsp) should now
-   be regarded as uninitialised.
+/* Let new_rsp be the %rsp value after a call/return.  Let nia be the
+   guest address of the next instruction to be executed.
+
+   This function generates an AbiHint to say that -128(%rsp)
+   .. -1(%rsp) should now be regarded as uninitialised.
  */
  static 
-void make_redzone_AbiHint ( VexAbiInfo* vbi, IRTemp new_rsp, HChar* who )
+void make_redzone_AbiHint ( VexAbiInfo* vbi,
+                            IRTemp new_rsp, IRTemp nia, HChar* who )
  {
     Int szB = vbi->guest_stack_redzone_size;
     vassert(szB >= 0);
@@ -1961,10 +1964,12 @@ void make_redzone_AbiHint ( VexAbiInfo* vbi, IRTemp new_rsp, HChar* who )
  
     if (0) vex_printf("AbiHint: %s\n", who);
     vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
+   vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
     if (szB > 0)
        stmt( IRStmt_AbiHint( 
                 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 
-               szB
+               szB,
+               mkexpr(nia)
              ));
  }
  
@@ -3713,7 +3718,7 @@ ULong dis_Grp5 ( VexAbiInfo* vbi,
              assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
              putIReg64(R_RSP, mkexpr(t2));
              storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
-            make_redzone_AbiHint(vbi, t2, "call-Ev(reg)");
+            make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
              jmp_treg(Ijk_Call,t3);
              dres->whatNext = Dis_StopHere;
              showSz = False;
@@ -3767,7 +3772,7 @@ ULong dis_Grp5 ( VexAbiInfo* vbi,
              assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
              putIReg64(R_RSP, mkexpr(t2));
              storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
-            make_redzone_AbiHint(vbi, t2, "call-Ev(mem)");
+            make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
              jmp_treg(Ijk_Call,t3);
              dres->whatNext = Dis_StopHere;
              showSz = False;
@@ -7679,7 +7684,7 @@ void dis_ret ( VexAbiInfo* vbi, ULong d64 )
     assign(t2, loadLE(Ity_I64,mkexpr(t1)));
     assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
     putIReg64(R_RSP, mkexpr(t3));
-   make_redzone_AbiHint(vbi, t3, "ret");
+   make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
     jmp_treg(Ijk_Ret,t2);
  }
  
@@ -13494,7 +13499,9 @@ DisResult disInstr_AMD64_WRK (
        assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
        putIReg64(R_RSP, mkexpr(t1));
        storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
-      make_redzone_AbiHint(vmi, t1, "call-d32");
+      t2 = newTemp(Ity_I64);
+      assign(t2, mkU64((Addr64)d64));
+      make_redzone_AbiHint(vmi, t1, t2/*nia*/, "call-d32");
        if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
           /* follow into the call target. */
           dres.whatNext   = Dis_Resteer;
diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c

index 6cd5e8dfaf4bd499d798d77d7542641a19ba1374..414c272a670957c986ef6dcf6b943236b5f44d07 100644 (file)
--- a/VEX/priv/guest-ppc/toIR.c
+++ b/VEX/priv/guest-ppc/toIR.c
@@ -1211,24 +1211,31 @@ static IRExpr* addr_align( IRExpr* addr, UChar align )
  /* Generate AbiHints which mark points at which the ELF or PowerOpen
     ABIs say that the stack red zone (viz, -N(r1) .. -1(r1), for some
     N) becomes undefined.  That is at function calls and returns.  ELF
-   ppc32 doesn't have this "feature" (how fortunate for it).
+   ppc32 doesn't have this "feature" (how fortunate for it).  nia is
+   the address of the next instruction to be executed.
  */
-static void make_redzone_AbiHint ( VexAbiInfo* vbi, HChar* who )
+static void make_redzone_AbiHint ( VexAbiInfo* vbi, 
+                                   IRTemp nia, HChar* who )
  {
     Int szB = vbi->guest_stack_redzone_size;
     if (0) vex_printf("AbiHint: %s\n", who);
     vassert(szB >= 0);
     if (szB > 0) {
-      if (mode64)
+      if (mode64) {
+         vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
           stmt( IRStmt_AbiHint( 
                    binop(Iop_Sub64, getIReg(1), mkU64(szB)), 
-                  szB
+                  szB,
+                  mkexpr(nia)
           ));
-      else
+      } else {
+         vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I32);
           stmt( IRStmt_AbiHint( 
                    binop(Iop_Sub32, getIReg(1), mkU32(szB)), 
-                  szB
+                  szB,
+                  mkexpr(nia)
           ));
+      }
     }
  }
  
@@ -4308,9 +4315,12 @@ static Bool dis_branch ( UInt theInstr,
        if (flag_LK) {
           putGST( PPC_GST_LR, e_nia );
           if (vbi->guest_ppc_zap_RZ_at_bl
-             && vbi->guest_ppc_zap_RZ_at_bl( (ULong)tgt) )
-            make_redzone_AbiHint( vbi, 
+             && vbi->guest_ppc_zap_RZ_at_bl( (ULong)tgt) ) {
+            IRTemp t_tgt = newTemp(ty);
+            assign(t_tgt, mode64 ? mkU64(tgt) : mkU32(tgt) );
+            make_redzone_AbiHint( vbi, t_tgt,
                                    "branch-and-link (unconditional call)" );
+         }
        }
  
        if (resteerOkFn( callback_opaque, tgt )) {
@@ -4379,6 +4389,8 @@ static Bool dis_branch ( UInt theInstr,
           
           assign( cond_ok, branch_cond_ok( BO, BI ) );
  
+         /* FIXME: this is confusing.  lr_old holds the old value
+            of ctr, not lr :-) */
           assign( lr_old, addr_align( getGST( PPC_GST_CTR ), 4 ));
  
           if (flag_LK)
@@ -4388,7 +4400,12 @@ static Bool dis_branch ( UInt theInstr,
                    binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)),
                    Ijk_Boring,
                    c_nia ));
-         
+
+         if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) {
+            make_redzone_AbiHint( vbi, lr_old,
+                                  "b-ctr-l (indirect call)" );
+        }
+
           irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
           irsb->next     = mkexpr(lr_old);
           break;
@@ -4424,8 +4441,10 @@ static Bool dis_branch ( UInt theInstr,
                    Ijk_Boring,
                    c_nia ));
  
-        if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr)
-            make_redzone_AbiHint( vbi, "branch-to-lr (unconditional return)" );
+         if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) {
+            make_redzone_AbiHint( vbi, lr_old,
+                                  "branch-to-lr (unconditional return)" );
+         }
  
           /* blrl is pretty strange; it's like a return that sets the
              return address of its caller to the insn following this
diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c

index 26b5af40fa6162468e16208e77b430524b9fe616..7e6c1f52ae2b2ed9f1ffa6ea642516dbd6772bea 100644 (file)
--- a/VEX/priv/guest-x86/toIR.c
+++ b/VEX/priv/guest-x86/toIR.c
@@ -958,7 +958,7 @@ static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
        may require reading all four thunk fields. */
     stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
     stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(ccOp)) );
-   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(res)) );
+   stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
     stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
  }
  
diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c

index 94ee65ca66b79ccacc24f561a5fa063842b67aa5..a9909f8cb9f9b03c8b9df369768c68358f8341ab 100644 (file)
--- a/VEX/priv/host-amd64/isel.c
+++ b/VEX/priv/host-amd64/isel.c
@@ -1150,6 +1150,24 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
  
        /* Handle misc other ops. */
  
+      if (e->Iex.Binop.op == Iop_Max32U) {
+         /* This generates a truly rotten piece of code.  Just as well
+            it doesn't happen very often. */
+         HReg src1  = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg src1L = newVRegI(env);
+         HReg src2  = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         HReg src2L = newVRegI(env);
+         HReg dst   = newVRegI(env);
+         addInstr(env, mk_iMOVsd_RR(src1,dst));
+         addInstr(env, mk_iMOVsd_RR(src1,src1L));
+         addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
+         addInstr(env, mk_iMOVsd_RR(src2,src2L));
+         addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
+         addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
+         addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
+         return dst;
+      }
+
        if (e->Iex.Binop.op == Iop_DivModS64to32
            || e->Iex.Binop.op == Iop_DivModU64to32) {
           /* 64 x 32 -> (32(rem),32(div)) division */
diff --git a/VEX/priv/host-generic/reg_alloc2.c b/VEX/priv/host-generic/reg_alloc2.c

index 5959b735528d219f001b58d2e1e4eae4258997dd..c64333daac3836567b3d1f5a77642f1b3d907928 100644 (file)
--- a/VEX/priv/host-generic/reg_alloc2.c
+++ b/VEX/priv/host-generic/reg_alloc2.c
@@ -215,6 +215,17 @@ Int findMostDistantlyMentionedVReg (
  }
  
  
+/* Check that this vreg has been assigned a sane spill offset. */
+static inline void sanity_check_spill_offset ( VRegLR* vreg )
+{
+   if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
+      vassert(0 == ((UShort)vreg->spill_offset % 16));
+   } else {
+      vassert(0 == ((UShort)vreg->spill_offset % 8));
+   }
+}
+
+
  /* Double the size of the real-reg live-range array, if needed. */
  static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
  {
@@ -396,8 +407,9 @@ HInstrArray* doRegisterAllocation (
        not at each insn processed. */
     Bool do_sanity_check;
  
-   vassert(0 == LibVEX_N_SPILL_BYTES % 16);
-   vassert(0 == guest_sizeB % 8);
+   vassert(0 == (guest_sizeB % 16));
+   vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
+   vassert(0 == (N_SPILL64S % 2));
  
     /* The live range numbers are signed shorts, and so limiting the
        number of insns to 10000 comfortably guards against them
@@ -789,6 +801,16 @@ HInstrArray* doRegisterAllocation (
        64 bits to spill (classes Flt64 and Vec128), we have to allocate
        two spill slots.
  
+      For Vec128-class on PowerPC, the spill slot's actual address
+      must be 16-byte aligned.  Since the spill slot's address is
+      computed as an offset from the guest state pointer, and since
+      the user of the generated code must set that pointer to a
+      16-aligned value, we have the residual obligation here of
+      choosing a 16-aligned spill slot offset for Vec128-class values.
+      Since each spill slot is 8 bytes long, that means for
+      Vec128-class values we must allocated a spill slot number which
+      is zero mod 2.
+
        Do a rank-based allocation of vregs to spill slot numbers.  We
        put as few values as possible in spill slots, but nevertheless
        need to have a spill slot available for all vregs, just in case.
@@ -817,16 +839,19 @@ HInstrArray* doRegisterAllocation (
            || vreg_lrs[j].reg_class == HRcFlt64) {
  
           /* Find two adjacent free slots in which between them provide
-            up to 128 bits in which to spill the vreg. */
+            up to 128 bits in which to spill the vreg.  Since we are
+            trying to find an even:odd pair, move along in steps of 2
+            (slots). */
  
-         for (k = 0; k < N_SPILL64S-1; k++)
+         for (k = 0; k < N_SPILL64S-1; k += 2)
              if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
                  && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
                 break;
-         if (k == N_SPILL64S-1) {
+         if (k >= N_SPILL64S-1) {
              vpanic("LibVEX_N_SPILL_BYTES is too low.  " 
                     "Increase and recompile.");
           }
+         if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
           ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
           ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
  
@@ -849,10 +874,12 @@ HInstrArray* doRegisterAllocation (
        }
  
        /* This reflects LibVEX's hard-wired knowledge of the baseBlock
-         layout: the guest state, then an equal sized area following
-         it for shadow state, and then the spill area. */
-      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 2 + k * 8);
+         layout: the guest state, then two equal sized areas following
+         it for two sets of shadow state, and then the spill area. */
+      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
  
+      /* Independent check that we've made a sane choice of slot */
+      sanity_check_spill_offset( &vreg_lrs[j] );
        /* if (j > max_ss_no) */
        /*    max_ss_no = j; */
     }
diff --git a/VEX/priv/host-ppc/hdefs.h b/VEX/priv/host-ppc/hdefs.h

index 68bbcca873bf4bec6d5e5b3ff1d582449912cdd9..51c3bf12117387c258e3c263bf773129052455e7 100644 (file)
--- a/VEX/priv/host-ppc/hdefs.h
+++ b/VEX/priv/host-ppc/hdefs.h
@@ -288,7 +288,7 @@ typedef
     PPCRI;
  
  extern PPCRI* PPCRI_Imm ( ULong );
-extern PPCRI* PPCRI_Reg ( HReg );
+extern PPCRI* PPCRI_Reg( HReg );
  
  extern void ppPPCRI ( PPCRI* );
  
diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c

index 97b61688f33c4bbf3c5ac699ba5071abbcc85985..b0bc3bd21b9afc9d49033006b8dda33c7b274485 100644 (file)
--- a/VEX/priv/host-ppc/isel.c
+++ b/VEX/priv/host-ppc/isel.c
@@ -570,8 +570,8 @@ PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
  
     if (bias < -100 || bias > 100) /* somewhat arbitrarily */
        vpanic("genGuestArrayOffset(ppc host)(3)");
-   if (descr->base < 0 || descr->base > 2000) /* somewhat arbitrarily */
-     vpanic("genGuestArrayOffset(ppc host)(4)");
+   if (descr->base < 0 || descr->base > 4000) /* somewhat arbitrarily */
+      vpanic("genGuestArrayOffset(ppc host)(4)");
  
     /* Compute off into a reg, %off.  Then return:
  
@@ -1367,6 +1367,18 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
+      if (e->Iex.Binop.op == Iop_Max32U) {
+         HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1);
+         HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg        rdst = newVRegI(env);
+         PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
+         addInstr(env, mk_iMOVds_RR(rdst, r1));
+         addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                    7/*cr*/, rdst, PPCRH_Reg(r2)));
+         addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
+         return rdst;
+      }
+
        if (e->Iex.Binop.op == Iop_32HLto64) {
           HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1);
           HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2);
@@ -1908,7 +1920,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
           addInstr(env, mk_iMOVds_RR(r_dst,rX));
           addInstr(env, PPCInstr_Alu(Palu_AND, r_tmp,
                                      r_cond, PPCRH_Imm(False,0xFF)));
-         addInstr(env, PPCInstr_Cmp(False/*unsined*/, True/*32bit cmp*/,
+         addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
                                      7/*cr*/, r_tmp, PPCRH_Imm(False,0)));
           addInstr(env, PPCInstr_CMov(cc,r_dst,r0));
           return r_dst;
@@ -2672,7 +2684,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
              return;
           }
  
-         /* Add64/Sub64 */
+         /* Add64 */
           case Iop_Add64: {
              HReg xLo, xHi, yLo, yHi;
              HReg tLo = newVRegI(env);
@@ -2751,6 +2763,28 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
           return;
        }
  
+      /* Left64 */
+      case Iop_Left64: {
+         HReg argHi, argLo;
+         HReg zero32 = newVRegI(env);
+         HReg resHi  = newVRegI(env);
+         HReg resLo  = newVRegI(env);
+         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
+         vassert(env->mode64 == False);
+         addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
+         /* resHi:resLo = - argHi:argLo */
+         addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
+                                         resLo, zero32, argLo ));
+         addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
+                                         resHi, zero32, argHi ));
+         /* resHi:resLo |= srcHi:srcLo */
+         addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
+         *rHi = resHi;
+         *rLo = resLo;
+         return;
+      }
+
        /* 32Sto64(e) */
        case Iop_32Sto64: {
           HReg tHi = newVRegI(env);
diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c

index 957692d982079123488ee1d8a7da1cd0f23821b3..0644c5c852471149cb4ddb69e63a8dd29fd60c93 100644 (file)
--- a/VEX/priv/host-x86/isel.c
+++ b/VEX/priv/host-x86/isel.c
@@ -926,6 +926,17 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
        }
  
        /* Handle misc other ops. */
+
+      if (e->Iex.Binop.op == Iop_Max32U) {
+         HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg dst  = newVRegI(env);
+         HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, mk_iMOVsd_RR(src1,dst));
+         addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
+         addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
+         return dst;
+      }
+
        if (e->Iex.Binop.op == Iop_8HLto16) {
           HReg hi8  = newVRegI(env);
           HReg lo8  = newVRegI(env);
diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c

index 727935703079dc5115c19dcd3254044fd3ab0462..55c5aaa57b34855470f7d4d5c113a1dcfa993e82 100644 (file)
--- a/VEX/priv/ir/irdefs.c
+++ b/VEX/priv/ir/irdefs.c
@@ -210,6 +210,7 @@ void ppIROp ( IROp op )
        case Iop_Left16: vex_printf("Left16"); return;
        case Iop_Left32: vex_printf("Left32"); return;
        case Iop_Left64: vex_printf("Left64"); return;
+      case Iop_Max32U: vex_printf("Max32U"); return;
  
        case Iop_CmpORD32U: vex_printf("CmpORD32U"); return;
        case Iop_CmpORD32S: vex_printf("CmpORD32S"); return;
@@ -768,7 +769,9 @@ void ppIRStmt ( IRStmt* s )
        case Ist_AbiHint:
           vex_printf("====== AbiHint(");
           ppIRExpr(s->Ist.AbiHint.base);
-         vex_printf(", %d) ======", s->Ist.AbiHint.len);
+         vex_printf(", %d, ", s->Ist.AbiHint.len);
+         ppIRExpr(s->Ist.AbiHint.nia);
+         vex_printf(") ======");
           break;
        case Ist_Put:
           vex_printf( "PUT(%d) = ", s->Ist.Put.offset);
@@ -1155,11 +1158,12 @@ IRStmt* IRStmt_IMark ( Addr64 addr, Int len ) {
     s->Ist.IMark.len  = len;
     return s;
  }
-IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len ) {
+IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ) {
     IRStmt* s           = LibVEX_Alloc(sizeof(IRStmt));
     s->tag              = Ist_AbiHint;
     s->Ist.AbiHint.base = base;
     s->Ist.AbiHint.len  = len;
+   s->Ist.AbiHint.nia  = nia;
     return s;
  }
  IRStmt* IRStmt_Put ( Int off, IRExpr* data ) {
@@ -1383,7 +1387,8 @@ IRStmt* deepCopyIRStmt ( IRStmt* s )
           return IRStmt_NoOp();
        case Ist_AbiHint:
           return IRStmt_AbiHint(deepCopyIRExpr(s->Ist.AbiHint.base),
-                               s->Ist.AbiHint.len);
+                               s->Ist.AbiHint.len,
+                               deepCopyIRExpr(s->Ist.AbiHint.nia));
        case Ist_IMark:
           return IRStmt_IMark(s->Ist.IMark.addr, s->Ist.IMark.len);
        case Ist_Put: 
@@ -1498,6 +1503,7 @@ void typeOfPrimop ( IROp op,
        case Iop_CmpORD32S:
        case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
        case Iop_Or32:  case Iop_And32: case Iop_Xor32:
+      case Iop_Max32U:
           BINARY(Ity_I32,Ity_I32, Ity_I32);
  
        case Iop_Add64: case Iop_Sub64: case Iop_Mul64:
@@ -1982,7 +1988,8 @@ Bool isFlatIRStmt ( IRStmt* st )
  
     switch (st->tag) {
        case Ist_AbiHint:
-         return isIRAtom(st->Ist.AbiHint.base);
+         return isIRAtom(st->Ist.AbiHint.base)
+                && isIRAtom(st->Ist.AbiHint.nia);
        case Ist_Put:
           return isIRAtom(st->Ist.Put.data);
        case Ist_PutI:
@@ -2192,6 +2199,7 @@ void useBeforeDef_Stmt ( IRSB* bb, IRStmt* stmt, Int* def_counts )
           break;
        case Ist_AbiHint:
           useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.base,def_counts);
+         useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.nia,def_counts);
           break;
        case Ist_Put:
           useBeforeDef_Expr(bb,stmt,stmt->Ist.Put.data,def_counts);
@@ -2445,6 +2453,9 @@ void tcStmt ( IRSB* bb, IRStmt* stmt, IRType gWordTy )
           if (typeOfIRExpr(tyenv, stmt->Ist.AbiHint.base) != gWordTy)
              sanityCheckFail(bb,stmt,"IRStmt.AbiHint.base: "
                                      "not :: guest word type");
+         if (typeOfIRExpr(tyenv, stmt->Ist.AbiHint.nia) != gWordTy)
+            sanityCheckFail(bb,stmt,"IRStmt.AbiHint.nia: "
+                                    "not :: guest word type");
           break;
        case Ist_Put:
           tcExpr( bb, stmt, stmt->Ist.Put.data, gWordTy );
diff --git a/VEX/priv/ir/iropt.c b/VEX/priv/ir/iropt.c

index ea0d54c7459a5bbd43b86fcfbae5969223552b97..a4937af9fb2eded9c68e1d986ac2fb94eb7b324e 100644 (file)
--- a/VEX/priv/ir/iropt.c
+++ b/VEX/priv/ir/iropt.c
@@ -448,7 +448,8 @@ static void flatten_Stmt ( IRSB* bb, IRStmt* st )
           break;
        case Ist_AbiHint:
           e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
-         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len));
+         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
+         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
           break;
        case Ist_Exit:
           e1 = flatten_Expr(bb, st->Ist.Exit.guard);
@@ -712,6 +713,7 @@ static void handle_gets_Stmt (
           AbiHints.*/
        case Ist_AbiHint:
           vassert(isIRAtom(st->Ist.AbiHint.base));
+         vassert(isIRAtom(st->Ist.AbiHint.nia));
           /* fall through */
        case Ist_MBE:
        case Ist_Dirty:
@@ -1200,6 +1202,15 @@ static IRExpr* fold_Expr ( IRExpr* e )
                          - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
                 break;
  
+            /* -- Max32U -- */
+            case Iop_Max32U: {
+               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
+               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+               UInt res  = u32a > u32b ? u32a : u32b;
+               e2 = IRExpr_Const(IRConst_U32(res));
+               break;
+            }
+
              /* -- Mul -- */
              case Iop_Mul32:
                 e2 = IRExpr_Const(IRConst_U32(
@@ -1421,8 +1432,9 @@ static IRExpr* fold_Expr ( IRExpr* e )
              e2 = e->Iex.Binop.arg1;
           } else
  
-         /* Or32/Add32(x,0) ==> x */
-         if ((e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Or32)
+         /* Or32/Add32/Max32U(x,0) ==> x */
+         if ((e->Iex.Binop.op == Iop_Add32 
+              || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
               && e->Iex.Binop.arg2->tag == Iex_Const
               && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
              e2 = e->Iex.Binop.arg1;
@@ -1500,8 +1512,8 @@ static IRExpr* fold_Expr ( IRExpr* e )
              e2 = e->Iex.Binop.arg2;
           } else
  
-         /* Or32(0,x) ==> x */
-         if (e->Iex.Binop.op == Iop_Or32
+         /* Or32/Max32U(0,x) ==> x */
+         if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
               && e->Iex.Binop.arg1->tag == Iex_Const
               && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
              e2 = e->Iex.Binop.arg2;
@@ -1516,6 +1528,7 @@ static IRExpr* fold_Expr ( IRExpr* e )
  
           /* Or8/16/32/64(t,t) ==> t, for some IRTemp t */
           /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
+         /* Max32U(t,t) ==> t, for some IRTemp t */
           if (   (e->Iex.Binop.op == Iop_And64
                || e->Iex.Binop.op == Iop_And32
                || e->Iex.Binop.op == Iop_And16
@@ -1523,7 +1536,8 @@ static IRExpr* fold_Expr ( IRExpr* e )
                || e->Iex.Binop.op == Iop_Or64
                || e->Iex.Binop.op == Iop_Or32
                || e->Iex.Binop.op == Iop_Or16
-              || e->Iex.Binop.op == Iop_Or8)
+              || e->Iex.Binop.op == Iop_Or8
+              || e->Iex.Binop.op == Iop_Max32U)
               && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
              e2 = e->Iex.Binop.arg1;
           }
@@ -1697,9 +1711,11 @@ static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
     switch (st->tag) {
        case Ist_AbiHint:
           vassert(isIRAtom(st->Ist.AbiHint.base));
+         vassert(isIRAtom(st->Ist.AbiHint.nia));
           return IRStmt_AbiHint(
                     fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
-                   st->Ist.AbiHint.len
+                   st->Ist.AbiHint.len,
+                   fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
                  );
        case Ist_Put:
           vassert(isIRAtom(st->Ist.Put.data));
@@ -1943,6 +1959,7 @@ static void addUses_Stmt ( Bool* set, IRStmt* st )
     switch (st->tag) {
        case Ist_AbiHint:
           addUses_Expr(set, st->Ist.AbiHint.base);
+         addUses_Expr(set, st->Ist.AbiHint.nia);
           return;
        case Ist_PutI:
           addUses_Expr(set, st->Ist.PutI.ix);
@@ -3211,6 +3228,7 @@ static void deltaIRStmt ( IRStmt* st, Int delta )
           break;
        case Ist_AbiHint:
           deltaIRExpr(st->Ist.AbiHint.base, delta);
+         deltaIRExpr(st->Ist.AbiHint.nia, delta);
           break;
        case Ist_Put:
           deltaIRExpr(st->Ist.Put.data, delta);
@@ -3667,6 +3685,7 @@ static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
     switch (st->tag) {
        case Ist_AbiHint:
           aoccCount_Expr(uses, st->Ist.AbiHint.base);
+         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
           return;
        case Ist_WrTmp: 
           aoccCount_Expr(uses, st->Ist.WrTmp.data); 
@@ -3898,7 +3917,8 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
        case Ist_AbiHint:
           return IRStmt_AbiHint(
                     atbSubst_Expr(env, st->Ist.AbiHint.base),
-                   st->Ist.AbiHint.len
+                   st->Ist.AbiHint.len,
+                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
                  );
        case Ist_Store:
           return IRStmt_Store(
@@ -4231,6 +4251,7 @@ static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
        switch (st->tag) {
           case Ist_AbiHint:
              vassert(isIRAtom(st->Ist.AbiHint.base));
+            vassert(isIRAtom(st->Ist.AbiHint.nia));
              break;
           case Ist_PutI: 
              *hasGetIorPutI = True;
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h

index bf981894339da776b7d00f5f23027c01fa55d36c..5cf21ea15f2a0c12ae48bb2a0addbf337c34d4bd 100644 (file)
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -335,14 +335,18 @@ typedef
  /* A note about guest state layout.
  
     LibVEX defines the layout for the guest state, in the file
-   pub/libvex_guest_<arch>.h.  The struct will have an 8-aligned size.
-   Each translated bb is assumed to be entered with a specified
-   register pointing at such a struct.  Beyond that is a shadow
-   state area with the same size as the struct.  Beyond that is
-   a spill area that LibVEX may spill into.  It must have size
+   pub/libvex_guest_<arch>.h.  The struct will have an 16-aligned
+   size.  Each translated bb is assumed to be entered with a specified
+   register pointing at such a struct.  Beyond that is two copies of
+   the shadow state area with the same size as the struct.  Beyond
+   that is a spill area that LibVEX may spill into.  It must have size
     LibVEX_N_SPILL_BYTES, and this must be a 16-aligned number.
  
-   On entry, the baseblock pointer register must be 8-aligned.
+   On entry, the baseblock pointer register must be 16-aligned.
+
+   There must be no holes in between the primary guest state, its two
+   copies, and the spill area.  In short, all 4 areas must have a
+   16-aligned size and be 16-aligned, and placed back-to-back.
  */
  
  #define LibVEX_N_SPILL_BYTES 2048
diff --git a/VEX/pub/libvex_guest_amd64.h b/VEX/pub/libvex_guest_amd64.h

index 7a648ad915954a51e2e465eaf9ce64572b176064..41c4ad6baaf2de320ba24d45d9170f0dfd6c86fc 100644 (file)
--- a/VEX/pub/libvex_guest_amd64.h
+++ b/VEX/pub/libvex_guest_amd64.h
@@ -85,8 +85,7 @@ typedef
        /* 144 */ ULong  guest_CC_DEP2;
        /* 152 */ ULong  guest_CC_NDEP;
        /* The D flag is stored here, encoded as either -1 or +1 */
-      /* 160 */ ULong  guest_DFLAG;       /* 48 */
-      /* RIP */
+      /* 160 */ ULong  guest_DFLAG;
        /* 168 */ ULong  guest_RIP;
        /* Probably a lot more stuff too. 
           D,ID flags
@@ -96,16 +95,16 @@ typedef
        */
  
        /* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
-      ULong guest_IDFLAG;
+      /* 176 */ ULong guest_IDFLAG;
  
        /* HACK to make tls on amd64-linux work.  %fs only ever seems to
           hold zero, and so guest_FS_ZERO holds the 64-bit offset
           associated with a %fs value of zero. */
-      ULong guest_FS_ZERO;
+      /* 184 */ ULong guest_FS_ZERO;
  
        /* XMM registers */
-      ULong guest_SSEROUND;
-      U128  guest_XMM0;
+      /* 192 */ULong guest_SSEROUND;
+      /* 200 */U128  guest_XMM0;
        U128  guest_XMM1;
        U128  guest_XMM2;
        U128  guest_XMM3;
@@ -126,14 +125,14 @@ typedef
        /* Note.  Setting guest_FTOP to be ULong messes up the
           delicately-balanced PutI/GetI optimisation machinery.
           Therefore best to leave it as a UInt. */
-      UInt  guest_FTOP;
+      /* 456 */UInt  guest_FTOP;
        ULong guest_FPREG[8];
-      UChar guest_FPTAG[8];
-      ULong guest_FPROUND;
-      ULong guest_FC3210;
+      /* 528 */ UChar guest_FPTAG[8];
+      /* 536 */ ULong guest_FPROUND;
+      /* 544 */ ULong guest_FC3210;
  
        /* Emulation warnings */
-      UInt   guest_EMWARN;
+      /* 552 */ UInt  guest_EMWARN;
  
        /* Translation-invalidation area description.  Not used on amd64
           (there is no invalidate-icache insn), but needed so as to
@@ -153,8 +152,8 @@ typedef
           replace-style ones. */
        ULong guest_NRADDR;
  
-      /* Padding to make it have an 8-aligned size */
-      /* UInt   padding; */
+      /* Padding to make it have an 16-aligned size */
+      ULong padding;
     }
     VexGuestAMD64State;
  
diff --git a/VEX/pub/libvex_guest_ppc32.h b/VEX/pub/libvex_guest_ppc32.h

index a797d10d0d391e77260ceb6a1be871f5c58884fd..2163adf162ba96d8916599809cb9b3aee260703d 100644 (file)
--- a/VEX/pub/libvex_guest_ppc32.h
+++ b/VEX/pub/libvex_guest_ppc32.h
@@ -128,38 +128,41 @@ typedef
        /* 376 */ ULong guest_FPR31;
  
        // Vector Registers
-      /* 384 */ U128 guest_VR0 __attribute__ ((aligned (16)));
-      /* 400 */ U128 guest_VR1 __attribute__ ((aligned (16)));
-      /* 416 */ U128 guest_VR2 __attribute__ ((aligned (16)));
-      /* 432 */ U128 guest_VR3 __attribute__ ((aligned (16)));
-      /* 448 */ U128 guest_VR4 __attribute__ ((aligned (16)));
-      /* 464 */ U128 guest_VR5 __attribute__ ((aligned (16)));
-      /* 480 */ U128 guest_VR6 __attribute__ ((aligned (16)));
-      /* 496 */ U128 guest_VR7 __attribute__ ((aligned (16)));
-      /* 512 */ U128 guest_VR8 __attribute__ ((aligned (16)));
-      /* 528 */ U128 guest_VR9 __attribute__ ((aligned (16)));
-      /* 544 */ U128 guest_VR10 __attribute__ ((aligned (16)));
-      /* 560 */ U128 guest_VR11 __attribute__ ((aligned (16)));
-      /* 576 */ U128 guest_VR12 __attribute__ ((aligned (16)));
-      /* 592 */ U128 guest_VR13 __attribute__ ((aligned (16)));
-      /* 608 */ U128 guest_VR14 __attribute__ ((aligned (16)));
-      /* 624 */ U128 guest_VR15 __attribute__ ((aligned (16)));
-      /* 640 */ U128 guest_VR16 __attribute__ ((aligned (16)));
-      /* 656 */ U128 guest_VR17 __attribute__ ((aligned (16)));
-      /* 672 */ U128 guest_VR18 __attribute__ ((aligned (16)));
-      /* 688 */ U128 guest_VR19 __attribute__ ((aligned (16)));
-      /* 704 */ U128 guest_VR20 __attribute__ ((aligned (16)));
-      /* 720 */ U128 guest_VR21 __attribute__ ((aligned (16)));
-      /* 736 */ U128 guest_VR22 __attribute__ ((aligned (16)));
-      /* 752 */ U128 guest_VR23 __attribute__ ((aligned (16)));
-      /* 768 */ U128 guest_VR24 __attribute__ ((aligned (16)));
-      /* 784 */ U128 guest_VR25 __attribute__ ((aligned (16)));
-      /* 800 */ U128 guest_VR26 __attribute__ ((aligned (16)));
-      /* 816 */ U128 guest_VR27 __attribute__ ((aligned (16)));
-      /* 832 */ U128 guest_VR28 __attribute__ ((aligned (16)));
-      /* 848 */ U128 guest_VR29 __attribute__ ((aligned (16)));
-      /* 864 */ U128 guest_VR30 __attribute__ ((aligned (16)));
-      /* 880 */ U128 guest_VR31 __attribute__ ((aligned (16)));
+      // IMPORTANT: the user of libvex must place the guest state so as
+      // to ensure that guest_VR{0..31}, and any shadows thereof, are
+      // 16-aligned.
+      /* 384 */ U128 guest_VR0;
+      /* 400 */ U128 guest_VR1;
+      /* 416 */ U128 guest_VR2;
+      /* 432 */ U128 guest_VR3;
+      /* 448 */ U128 guest_VR4;
+      /* 464 */ U128 guest_VR5;
+      /* 480 */ U128 guest_VR6;
+      /* 496 */ U128 guest_VR7;
+      /* 512 */ U128 guest_VR8;
+      /* 528 */ U128 guest_VR9;
+      /* 544 */ U128 guest_VR10;
+      /* 560 */ U128 guest_VR11;
+      /* 576 */ U128 guest_VR12;
+      /* 592 */ U128 guest_VR13;
+      /* 608 */ U128 guest_VR14;
+      /* 624 */ U128 guest_VR15;
+      /* 640 */ U128 guest_VR16;
+      /* 656 */ U128 guest_VR17;
+      /* 672 */ U128 guest_VR18;
+      /* 688 */ U128 guest_VR19;
+      /* 704 */ U128 guest_VR20;
+      /* 720 */ U128 guest_VR21;
+      /* 736 */ U128 guest_VR22;
+      /* 752 */ U128 guest_VR23;
+      /* 768 */ U128 guest_VR24;
+      /* 784 */ U128 guest_VR25;
+      /* 800 */ U128 guest_VR26;
+      /* 816 */ U128 guest_VR27;
+      /* 832 */ U128 guest_VR28;
+      /* 848 */ U128 guest_VR29;
+      /* 864 */ U128 guest_VR30;
+      /* 880 */ U128 guest_VR31;
  
        /* 896 */ UInt guest_CIA;    // IP (no arch visible register)
        /* 900 */ UInt guest_LR;     // Link Register
diff --git a/VEX/pub/libvex_guest_ppc64.h b/VEX/pub/libvex_guest_ppc64.h

index 68c3846136695a1104a8125faf1f2356d898df6f..d03c01db99e9f10517ace3877cf1438c86a4e987 100644 (file)
--- a/VEX/pub/libvex_guest_ppc64.h
+++ b/VEX/pub/libvex_guest_ppc64.h
@@ -166,38 +166,41 @@ typedef
        /* 504 */ ULong guest_FPR31;
  
        // Vector Registers
-      /*  512 */ U128 guest_VR0 __attribute__ ((aligned (16)));
-      /*  528 */ U128 guest_VR1 __attribute__ ((aligned (16)));
-      /*  544 */ U128 guest_VR2 __attribute__ ((aligned (16)));
-      /*  560 */ U128 guest_VR3 __attribute__ ((aligned (16)));
-      /*  576 */ U128 guest_VR4 __attribute__ ((aligned (16)));
-      /*  592 */ U128 guest_VR5 __attribute__ ((aligned (16)));
-      /*  608 */ U128 guest_VR6 __attribute__ ((aligned (16)));
-      /*  624 */ U128 guest_VR7 __attribute__ ((aligned (16)));
-      /*  640 */ U128 guest_VR8 __attribute__ ((aligned (16)));
-      /*  656 */ U128 guest_VR9 __attribute__ ((aligned (16)));
-      /*  672 */ U128 guest_VR10 __attribute__ ((aligned (16)));
-      /*  688 */ U128 guest_VR11 __attribute__ ((aligned (16)));
-      /*  704 */ U128 guest_VR12 __attribute__ ((aligned (16)));
-      /*  720 */ U128 guest_VR13 __attribute__ ((aligned (16)));
-      /*  736 */ U128 guest_VR14 __attribute__ ((aligned (16)));
-      /*  752 */ U128 guest_VR15 __attribute__ ((aligned (16)));
-      /*  768 */ U128 guest_VR16 __attribute__ ((aligned (16)));
-      /*  784 */ U128 guest_VR17 __attribute__ ((aligned (16)));
-      /*  800 */ U128 guest_VR18 __attribute__ ((aligned (16)));
-      /*  816 */ U128 guest_VR19 __attribute__ ((aligned (16)));
-      /*  832 */ U128 guest_VR20 __attribute__ ((aligned (16)));
-      /*  848 */ U128 guest_VR21 __attribute__ ((aligned (16)));
-      /*  864 */ U128 guest_VR22 __attribute__ ((aligned (16)));
-      /*  880 */ U128 guest_VR23 __attribute__ ((aligned (16)));
-      /*  896 */ U128 guest_VR24 __attribute__ ((aligned (16)));
-      /*  912 */ U128 guest_VR25 __attribute__ ((aligned (16)));
-      /*  928 */ U128 guest_VR26 __attribute__ ((aligned (16)));
-      /*  944 */ U128 guest_VR27 __attribute__ ((aligned (16)));
-      /*  960 */ U128 guest_VR28 __attribute__ ((aligned (16)));
-      /*  976 */ U128 guest_VR29 __attribute__ ((aligned (16)));
-      /*  992 */ U128 guest_VR30 __attribute__ ((aligned (16)));
-      /* 1008 */ U128 guest_VR31 __attribute__ ((aligned (16)));
+      // IMPORTANT: the user of libvex must place the guest state so as
+      // to ensure that guest_VR{0..31}, and any shadows thereof, are
+      // 16-aligned.
+      /*  512 */ U128 guest_VR0;
+      /*  528 */ U128 guest_VR1;
+      /*  544 */ U128 guest_VR2;
+      /*  560 */ U128 guest_VR3;
+      /*  576 */ U128 guest_VR4;
+      /*  592 */ U128 guest_VR5;
+      /*  608 */ U128 guest_VR6;
+      /*  624 */ U128 guest_VR7;
+      /*  640 */ U128 guest_VR8;
+      /*  656 */ U128 guest_VR9;
+      /*  672 */ U128 guest_VR10;
+      /*  688 */ U128 guest_VR11;
+      /*  704 */ U128 guest_VR12;
+      /*  720 */ U128 guest_VR13;
+      /*  736 */ U128 guest_VR14;
+      /*  752 */ U128 guest_VR15;
+      /*  768 */ U128 guest_VR16;
+      /*  784 */ U128 guest_VR17;
+      /*  800 */ U128 guest_VR18;
+      /*  816 */ U128 guest_VR19;
+      /*  832 */ U128 guest_VR20;
+      /*  848 */ U128 guest_VR21;
+      /*  864 */ U128 guest_VR22;
+      /*  880 */ U128 guest_VR23;
+      /*  896 */ U128 guest_VR24;
+      /*  912 */ U128 guest_VR25;
+      /*  928 */ U128 guest_VR26;
+      /*  944 */ U128 guest_VR27;
+      /*  960 */ U128 guest_VR28;
+      /*  976 */ U128 guest_VR29;
+      /*  992 */ U128 guest_VR30;
+      /* 1008 */ U128 guest_VR31;
  
        /* 1024 */ ULong guest_CIA;    // IP (no arch visible register)
        /* 1032 */ ULong guest_LR;     // Link Register
diff --git a/VEX/pub/libvex_guest_x86.h b/VEX/pub/libvex_guest_x86.h

index af93c24dd584a78a34a8c4e1a81f7da174c875fb..062482e3f5983fda5f973d35e965c6064cd5d42f 100644 (file)
--- a/VEX/pub/libvex_guest_x86.h
+++ b/VEX/pub/libvex_guest_x86.h
@@ -220,7 +220,7 @@ typedef
           replace-style ones. */
        UInt guest_NRADDR;
  
-      /* Padding to make it have an 8-aligned size */
+      /* Padding to make it have an 16-aligned size */
        UInt padding;
     }
     VexGuestX86State;
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index e89bef119c4ef2d8e2dba5059944fe4ff89a9e18..6a8dc763226884e957886faf54d14ccdfff94018 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -446,6 +446,7 @@ typedef
        Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
        Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
        Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
+      Iop_Max32U, /* unsigned max */
  
        /* PowerPC-style 3-way integer comparisons.  Without them it is
           difficult to simulate PPC efficiently.
@@ -1411,14 +1412,17 @@ typedef
              that a given chunk of address space, [base .. base+len-1],
              has become undefined.  This is used on amd64-linux and
              some ppc variants to pass stack-redzoning hints to whoever
-            wants to see them.
+            wants to see them.  It also indicates the address of the
+            next (dynamic) instruction that will be executed.  This is
+            to help Memcheck to origin tracking.
  
-            ppIRExpr output: ====== AbiHint(<base>, <len>) ======
-                         eg. ====== AbiHint(t1, 16) ======
+            ppIRExpr output: ====== AbiHint(<base>, <len>, <nia>) ======
+                         eg. ====== AbiHint(t1, 16, t2) ======
           */
           struct {
              IRExpr* base;     /* Start  of undefined chunk */
              Int     len;      /* Length of undefined chunk */
+            IRExpr* nia;      /* Address of next (guest) insn */
           } AbiHint;
  
           /* Write a guest register, at a fixed offset in the guest state.
@@ -1505,7 +1509,7 @@ typedef
  /* Statement constructors. */
  extern IRStmt* IRStmt_NoOp    ( void );
  extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len );
-extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len );
+extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
  extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
  extern IRStmt* IRStmt_PutI    ( IRRegArray* descr, IRExpr* ix, Int bias, 
                                  IRExpr* data );
diff --git a/VEX/test_main.c b/VEX/test_main.c

index c67ea97900ebd3e2400c469a5e80948b45a62db3..6b9cb72d568739af3e97838383fdf3d97e71c742 100644 (file)
--- a/VEX/test_main.c
+++ b/VEX/test_main.c
@@ -48,9 +48,13 @@ static UChar transbuf[N_TRANSBUF];
  static Bool verbose = True;
  
  /* Forwards */
-#if 0 /* UNUSED */
+#if 1 /* UNUSED */
  static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
-static IRSB* mc_instrument ( IRSB*, VexGuestLayout*, IRType, IRType );
+static
+IRSB* mc_instrument ( void* closureV,
+                      IRSB* bb_in, VexGuestLayout* layout, 
+                      VexGuestExtents* vge,
+                      IRType gWordTy, IRType hWordTy );
  #endif
  
  static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) { return False; }
@@ -167,7 +171,7 @@ int main ( int argc, char** argv )
        vta.host_bytes      = transbuf;
        vta.host_bytes_size = N_TRANSBUF;
        vta.host_bytes_used = &trans_used;
-#if 1 /* no instrumentation */
+#if 0 /* no instrumentation */
        vta.instrument1     = NULL;
        vta.instrument2     = NULL;
  #endif
@@ -175,7 +179,7 @@ int main ( int argc, char** argv )
        vta.instrument1     = ac_instrument;
        vta.instrument2     = NULL;
  #endif
-#if 0 /* memcheck */
+#if 1 /* memcheck */
        vta.instrument1     = mc_instrument;
        vta.instrument2     = NULL;
  #endif
@@ -379,7 +383,15 @@ IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
  //////////////////////////////////////////////////////////////////////
  //////////////////////////////////////////////////////////////////////
  
-#if 0 /* UNUSED */
+#if 1 /* UNUSED */
+
+static
+__attribute((noreturn))
+void panic ( HChar* s )
+{
+  printf("\npanic: %s\n", s);
+  failure_exit();
+}
  
  #define tl_assert(xxx) assert(xxx)
  #define VG_(xxxx) xxxx
@@ -560,7 +572,7 @@ static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
  {
     if (a1->tag == Iex_Const)
        return True;
-   if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
+   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
        return True;
     return False;
  }
@@ -571,7 +583,7 @@ static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
  {
     if (a1->tag == Iex_Const)
        return True;
-   if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
+   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
        return True;
     return False;
  }
@@ -580,7 +592,7 @@ static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
     are identically-kinded. */
  static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
  {
-   if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
+   if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
        return True;
     if (a1->tag == Iex_Const && a1->tag == Iex_Const)
        return True;
@@ -634,7 +646,7 @@ static IRExpr* definedOfType ( IRType ty ) {
  
  /* assign value to tmp */
  #define assign(_bb,_tmp,_expr)   \
-   addStmtToIRSB((_bb), IRStmt_Tmp((_tmp),(_expr)))
+   addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
  
  /* add stmt to a bb */
  #define stmt(_bb,_stmt)    \
@@ -648,7 +660,7 @@ static IRExpr* definedOfType ( IRType ty ) {
  #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
  #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
  #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
-#define mkexpr(_tmp)             IRExpr_Tmp((_tmp))
+#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
  
  /* bind the given expression to a new temporary, and return the
     temporary.  This effectively converts an arbitrary expression into
@@ -1029,10 +1041,10 @@ static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
        getting a new value. */
     tl_assert(isIRAtom(vatom));
     /* sameKindedAtoms ... */
-   if (vatom->tag == Iex_Tmp) {
-      tl_assert(atom->tag == Iex_Tmp);
-      newShadowTmp(mce, atom->Iex.Tmp.tmp);
-      assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp), 
+   if (vatom->tag == Iex_RdTmp) {
+      tl_assert(atom->tag == Iex_RdTmp);
+      newShadowTmp(mce, atom->Iex.RdTmp.tmp);
+      assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp), 
                        definedOfType(ty));
     }
  }
@@ -1110,7 +1122,7 @@ void do_shadow_PUT ( MCEnv* mce,  Int offset,
  */
  static
  void do_shadow_PUTI ( MCEnv* mce, 
-                      IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
+                      IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
  {
     IRAtom* vatom;
     IRType  ty, tyS;
@@ -1132,8 +1144,8 @@ void do_shadow_PUTI ( MCEnv* mce,
     } else {
        /* Do a cloned version of the Put that refers to the shadow
           area. */
-      IRArray* new_descr 
-         = mkIRArray( descr->base + mce->layout->total_sizeB, 
+      IRRegArray* new_descr 
+         = mkIRRegArray( descr->base + mce->layout->total_sizeB, 
                        tyS, descr->nElems);
        stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
     }
@@ -1163,7 +1175,7 @@ IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
     given GETI (passed in in pieces). 
  */
  static
-IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
+IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
  {
     IRType ty   = descr->elemTy;
     IRType tyS  = shadowType(ty);
@@ -1177,8 +1189,8 @@ IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
     } else {
        /* return a cloned version of the Get that refers to the shadow
           area. */
-      IRArray* new_descr 
-         = mkIRArray( descr->base + mce->layout->total_sizeB, 
+      IRRegArray* new_descr 
+         = mkIRRegArray( descr->base + mce->layout->total_sizeB, 
                        tyS, descr->nElems);
        return IRExpr_GetI( new_descr, ix, bias );
     }
@@ -1684,7 +1696,7 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
  
        /* Scalar floating point */
  
-      case Iop_RoundF64:
+         //      case Iop_RoundF64:
        case Iop_F64toI64:
        case Iop_I64toF64:
           /* First arg is I32 (rounding mode), second is F64 or I64
@@ -2068,8 +2080,8 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
           return shadow_GETI( mce, e->Iex.GetI.descr, 
                                    e->Iex.GetI.ix, e->Iex.GetI.bias );
  
-      case Iex_Tmp:
-         return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
+      case Iex_RdTmp:
+         return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
  
        case Iex_Const:
           return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
@@ -2084,9 +2096,9 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
        case Iex_Unop:
           return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
  
-      case Iex_LDle:
-         return expr2vbits_LDle( mce, e->Iex.LDle.ty, 
-                                      e->Iex.LDle.addr, 0/*addr bias*/ );
+      case Iex_Load:
+         return expr2vbits_LDle( mce, e->Iex.Load.ty, 
+                                      e->Iex.Load.addr, 0/*addr bias*/ );
  
        case Iex_CCall:
           return mkLazyN( mce, e->Iex.CCall.args, 
@@ -2154,7 +2166,7 @@ void do_shadow_STle ( MCEnv* mce,
     IRAtom   *vdataLo64, *vdataHi64;
     IRAtom   *eBias, *eBias0, *eBias8;
     void*    helper = NULL;
-   Char*    hname = NULL;
+   HChar*   hname = NULL;
  
     tyAddr = mce->hWordTy;
     mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
@@ -2447,7 +2459,7 @@ static Bool isBogusAtom ( IRAtom* at )
     ULong n = 0;
     IRConst* con;
     tl_assert(isIRAtom(at));
-   if (at->tag == Iex_Tmp)
+   if (at->tag == Iex_RdTmp)
        return False;
     tl_assert(at->tag == Iex_Const);
     con = at->Iex.Const.con;
@@ -2470,11 +2482,11 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
     Int     i;
     IRExpr* e;
     switch (st->tag) {
-      case Ist_Tmp:
-         e = st->Ist.Tmp.data;
+      case Ist_WrTmp:
+         e = st->Ist.WrTmp.data;
           switch (e->tag) {
              case Iex_Get:
-            case Iex_Tmp:
+            case Iex_RdTmp:
                 return False;
              case Iex_Unop: 
                 return isBogusAtom(e->Iex.Unop.arg);
@@ -2485,8 +2497,8 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
                 return isBogusAtom(e->Iex.Mux0X.cond)
                        || isBogusAtom(e->Iex.Mux0X.expr0)
                        || isBogusAtom(e->Iex.Mux0X.exprX);
-            case Iex_LDle: 
-               return isBogusAtom(e->Iex.LDle.addr);
+            case Iex_Load: 
+               return isBogusAtom(e->Iex.Load.addr);
              case Iex_CCall:
                 for (i = 0; e->Iex.CCall.args[i]; i++)
                    if (isBogusAtom(e->Iex.CCall.args[i]))
@@ -2497,9 +2509,9 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
           }
        case Ist_Put:
           return isBogusAtom(st->Ist.Put.data);
-      case Ist_STle:
-         return isBogusAtom(st->Ist.STle.addr) 
-                || isBogusAtom(st->Ist.STle.data);
+      case Ist_Store:
+         return isBogusAtom(st->Ist.Store.addr) 
+                || isBogusAtom(st->Ist.Store.data);
        case Ist_Exit:
           return isBogusAtom(st->Ist.Exit.guard);
        default: 
@@ -2509,7 +2521,9 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
     }
  }
  
-IRSB* mc_instrument ( IRSB* bb_in, VexGuestLayout* layout, 
+IRSB* mc_instrument ( void* closureV,
+                      IRSB* bb_in, VexGuestLayout* layout, 
+                      VexGuestExtents* vge,
                        IRType gWordTy, IRType hWordTy )
  {
     Bool verboze = False; //True; 
@@ -2522,8 +2536,8 @@ IRSB* mc_instrument ( IRSB* bb_in, VexGuestLayout* layout,
  
     /* Set up BB */
     IRSB* bb     = emptyIRSB();
-   bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
-   bb->next     = dopyIRExpr(bb_in->next);
+   bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
+   bb->next     = deepCopyIRExpr(bb_in->next);
     bb->jumpkind = bb_in->jumpkind;
  
     /* Set up the running environment.  Only .bb is modified as we go
@@ -2563,9 +2577,9 @@ IRSB* mc_instrument ( IRSB* bb_in, VexGuestLayout* layout,
  
        switch (st->tag) {
  
-         case Ist_Tmp:
-            assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp), 
-                        expr2vbits( &mce, st->Ist.Tmp.data) );
+         case Ist_WrTmp:
+            assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp), 
+                        expr2vbits( &mce, st->Ist.WrTmp.data) );
              break;
  
           case Ist_Put:
@@ -2583,9 +2597,9 @@ IRSB* mc_instrument ( IRSB* bb_in, VexGuestLayout* layout,
                              st->Ist.PutI.data );
              break;
  
-         case Ist_STle:
-            do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
-                                  st->Ist.STle.data,
+         case Ist_Store:
+            do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
+                                  st->Ist.Store.data,
                                    NULL /* shadow data */ );
              break;
author	Julian Seward <jseward@acm.org>
	Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)
committer	Julian Seward <jseward@acm.org>
	Thu, 1 May 2008 20:13:04 +0000 (20:13 +0000)
VEX/priv/guest-amd64/toIR.c		patch \| blob \| blame \| history
VEX/priv/guest-ppc/toIR.c		patch \| blob \| blame \| history
VEX/priv/guest-x86/toIR.c		patch \| blob \| blame \| history
VEX/priv/host-amd64/isel.c		patch \| blob \| blame \| history
VEX/priv/host-generic/reg_alloc2.c		patch \| blob \| blame \| history
VEX/priv/host-ppc/hdefs.h		patch \| blob \| blame \| history
VEX/priv/host-ppc/isel.c		patch \| blob \| blame \| history
VEX/priv/host-x86/isel.c		patch \| blob \| blame \| history
VEX/priv/ir/irdefs.c		patch \| blob \| blame \| history
VEX/priv/ir/iropt.c		patch \| blob \| blame \| history
VEX/pub/libvex.h		patch \| blob \| blame \| history
VEX/pub/libvex_guest_amd64.h		patch \| blob \| blame \| history
VEX/pub/libvex_guest_ppc32.h		patch \| blob \| blame \| history
VEX/pub/libvex_guest_ppc64.h		patch \| blob \| blame \| history
VEX/pub/libvex_guest_x86.h		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history
VEX/test_main.c		patch \| blob \| blame \| history