Fix 376257 - helgrind history full speed up using a cached stack

author Philippe Waroquiers <philippe.waroquiers@skynet.be>

Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)

committer Philippe Waroquiers <philippe.waroquiers@skynet.be>

Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)
author Philippe Waroquiers <philippe.waroquiers@skynet.be>
Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)
committer Philippe Waroquiers <philippe.waroquiers@skynet.be>
Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)
diff --git a/NEWS b/NEWS

index c9961b448549c50331b206fb96ac655bf5e4214f..dc6cbad0be6633e4f9a6262152c6971b9cf66b36 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,11 @@ support for X86/macOS 10.13, AMD64/macOS 10.13.
  
  * ==================== TOOL CHANGES ====================
  
+* Helgrind:
+
+  - Addition of a flag --delta-stacktrace=no|yes [yes on linux amd64/x86], specifying
+    how full history stack traces are computed. Using yes can speed up Helgrind by
+    25%.
  
  * ==================== OTHER CHANGES ====================
  
@@ -39,6 +44,7 @@ To see details of a given bug, visit
  where XXXXXX is the bug number as listed below.
  
  255603  exp-sgcheck Assertion '!already_present' failed
+376257  helgrind history full speed up using a cached stack
  379373  Fix syscall param msg->desc.port.name points to uninitialised byte(s)
          on macOS 10.12
  379748  Fix missing pselect syscall (OS X 10.11)
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c

index 3817ecfe4ed768e06d4a257096859efc5594f6ba..e5ce4bef99fa896290a709f5a885e1aa35e576ab 100644 (file)
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -2729,6 +2729,12 @@ static inline CFSI_m_CacheEnt* cfsi_m_cache__find ( Addr ip )
     }
  }
  
+Bool VG_(has_CF_info)(Addr a)
+{
+   return cfsi_m_cache__find (a) != NULL;
+}
+
+
  
  inline
  static Addr compute_cfa ( const D3UnwindRegs* uregs,
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c

index 395989bbd60418480fe6f02da768180b38f631f2..e9fefacc2cd8458d3c2afa4364e9fdde2cfa9589 100644 (file)
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -46,6 +46,8 @@
  #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
  #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
  
+#define STACK_PTR_S1(regs)    ((regs).vex_shadow1.VG_STACK_PTR)
+
  Addr VG_(get_IP) ( ThreadId tid ) {
     return INSTR_PTR( VG_(threads)[tid].arch );
  }
@@ -56,6 +58,13 @@ Addr VG_(get_FP) ( ThreadId tid ) {
     return FRAME_PTR( VG_(threads)[tid].arch );
  }
  
+Addr VG_(get_SP_s1) ( ThreadId tid ) {
+   return STACK_PTR_S1( VG_(threads)[tid].arch );
+}
+void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
+   STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
+}
+
  void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     INSTR_PTR( VG_(threads)[tid].arch ) = ip;
  }
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c

index 41064af6b34ef5a8a2e0c7821e536ad45128a7af..787368b90aa45ecf6f2283334ba3babecebe99d2 100644 (file)
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -1478,11 +1478,14 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
  /*--- Exported functions.                                  ---*/
  /*------------------------------------------------------------*/
  
-UInt VG_(get_StackTrace) ( ThreadId tid, 
-                           /*OUT*/StackTrace ips, UInt max_n_ips,
-                           /*OUT*/StackTrace sps,
-                           /*OUT*/StackTrace fps,
-                           Word first_ip_delta )
+UInt VG_(get_StackTrace_with_deltas)(
+         ThreadId tid, 
+         /*OUT*/StackTrace ips, UInt n_ips,
+         /*OUT*/StackTrace sps,
+         /*OUT*/StackTrace fps,
+         Word first_ip_delta,
+         Word first_sp_delta
+      )
  {
     /* Get the register values with which to start the unwind. */
     UnwindStartRegs startRegs;
@@ -1524,8 +1527,9 @@ UInt VG_(get_StackTrace) ( ThreadId tid,
     VG_(stack_limits)( (Addr)startRegs.r_sp,
                        &stack_lowest_byte, &stack_highest_byte );
  
-   /* Take into account the first_ip_delta. */
-   startRegs.r_pc += (Long)(Word)first_ip_delta;
+   /* Take into account the first_ip_delta and first_sp_delta. */
+   startRegs.r_pc += (Long)first_ip_delta;
+   startRegs.r_sp += (Long)first_sp_delta;
  
     if (0)
        VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
@@ -1533,12 +1537,27 @@ UInt VG_(get_StackTrace) ( ThreadId tid,
                    tid, stack_highest_byte,
                    startRegs.r_pc, startRegs.r_sp);
  
-   return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips, 
+   return VG_(get_StackTrace_wrk)(tid, ips, n_ips, 
                                         sps, fps,
                                         &startRegs,
                                         stack_highest_byte);
  }
  
+UInt VG_(get_StackTrace) ( ThreadId tid, 
+                           /*OUT*/StackTrace ips, UInt max_n_ips,
+                           /*OUT*/StackTrace sps,
+                           /*OUT*/StackTrace fps,
+                           Word first_ip_delta )
+{
+   return VG_(get_StackTrace_with_deltas) (tid,
+                                           ips, max_n_ips,
+                                           sps,
+                                           fps,
+                                           first_ip_delta,
+                                           0 /* first_sp_delta */
+                                           );
+}
+
  static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
  {
     InlIPCursor *iipc = VG_(new_IIPC)(ip);
diff --git a/coregrind/pub_core_libcassert.h b/coregrind/pub_core_libcassert.h

index 1ea51929aa0e08c8ecd6dcfeebd20f32216f6583..2737f9cddf208e18ade6e9ff140aa430e27d4629 100644 (file)
--- a/coregrind/pub_core_libcassert.h
+++ b/coregrind/pub_core_libcassert.h
@@ -79,20 +79,6 @@ extern void VG_(unimplemented) ( const HChar* format, ... )
              __attribute__((__noreturn__))
              PRINTF_CHECK(1, 2);
  
-/* Show the state of various threads related information, such
-   as the guest stacktrace for each thread.
-   Mostly for debugging V.
-   The following activates optional output:
-     host_stacktrace : shows the host stacktrace.
-     stack_usage True means:
-                   shows how much of the valgrind stack was used.
-                   shows the client stack range
-     exited_thread_slots : show information for thread slots that were used
-        but the thread has now exited. */
-extern void VG_(show_sched_status) ( Bool host_stacktrace,
-                                     Bool stack_usage,
-                                     Bool exited_threads);
-
  #endif   // __PUB_CORE_LIBCASSERT_H
  
  /*--------------------------------------------------------------------*/
diff --git a/helgrind/docs/hg-manual.xml b/helgrind/docs/hg-manual.xml

index c47126d950cf5ba5e5eb889713565e6a1edb6f30..8a1d9166aa4769de67d0b063e4a468b2604877ad 100644 (file)
--- a/helgrind/docs/hg-manual.xml
+++ b/helgrind/docs/hg-manual.xml
@@ -1149,6 +1149,58 @@ unlock(mx)                             unlock(mx)
          <option>--history-level=none</option>.</para>
      </listitem>
    </varlistentry>
+  
+  <varlistentry id="opt.delta-stacktrace"
+                xreflabel="--delta-stacktrace">
+    <term>
+      <option><![CDATA[--delta-stacktrace=no|yes
+      [default: yes on linux amd64/x86] ]]></option>
+    </term>
+    <listitem>
+      <para>This flag only has any effect
+        at <option>--history-level=full</option>.</para>
+      <para><option>--delta-stacktrace</option> configures the way Helgrind
+        captures the stacktraces for the
+        option <option>--history-level=full</option>. Such a stacktrace is
+        typically needed each time a new piece of memory is read or written in a
+        basic block of instructions. </para>
+      <para><option>--delta-stacktrace=no</option> causes
+        Helgrind to compute a full history stacktrace from the unwind info
+        each time a stacktrace is needed.</para>
+      <para><option>--delta-stacktrace=yes</option> indicates to Helgrind to
+        derive a new stacktrace from the previous stacktrace, as long as there
+        was no call instruction, no return instruction, or any other instruction
+        changing the call stack since the previous stacktrace was captured. If
+        no such instruction was executed, the new stacktrace can be derived from
+        the previous stacktrace by just changing the top frame to the current
+        program counter. This option can speed up Helgrind by 25% when
+        using <option>--history-level=full</option>. </para>
+      <para>The following aspects have to be considered when
+        using <option>--delta-stacktrace=yes</option> :
+        <itemizedlist>
+          <listitem>In some cases (for example in a function prologue), the
+            valgrind unwinder might not properly unwind the stack, due to some
+            limitations and/or due to wrong unwind info. When using
+            --delta-stacktrace=yes, the wrong stack trace captured in the
+            function prologue will be kept till the next call or return.
+          </listitem>
+          <listitem>On the other hand, --delta-stacktrace=yes sometimes helps to
+            obtain a correct stacktrace, for example when the unwind info allows
+            a correct stacktrace to be done in the beginning of the sequence,
+            but not later on in the instruction sequence.</listitem>
+          <listitem>Determining which instructions are changing the callstack is
+            partially based on platform dependent heuristics, which have to be
+            tuned/validated specifically for the platform. Also, unwinding in a
+            function prologue must be good enough to allow using
+            --delta-stacktrace=yes. Currently, the option --delta-stacktrace=yes
+            has been reasonably validated only on linux x86 32 bits and linux
+            amd64 64 bits. For more details about how to validate
+            --delta-stacktrace=yes, see debug option --hg-sanity-flags and the
+            function check_cached_rcec_ok in libhb_core.c.</listitem>
+        </itemizedlist>
+      </para>
+    </listitem>
+  </varlistentry>
  
    <varlistentry id="opt.conflict-cache-size"
                  xreflabel="--conflict-cache-size">
diff --git a/helgrind/hg_basics.c b/helgrind/hg_basics.c

index 7c6a941a2cf3a73737b2d7e502fd4c5f94e97bd2..a397a5cdb45fbe2a180f54c1b9c4a3964b9a8806 100644 (file)
--- a/helgrind/hg_basics.c
+++ b/helgrind/hg_basics.c
@@ -75,6 +75,13 @@ Bool  HG_(clo_cmp_race_err_addrs) = False;
  
  UWord HG_(clo_history_level) = 2;
  
+#if (defined(VGA_x86) || defined(VGA_amd64)) && defined(VGO_linux)
+// Set to true on setup where it was (reasonably) validated.
+Bool  HG_(clo_delta_stacktrace) = True;
+#else
+Bool  HG_(clo_delta_stacktrace) = False;
+#endif
+
  UWord HG_(clo_conflict_cache_size) = 2000000;
  
  UWord HG_(clo_sanity_flags) = 0;
diff --git a/helgrind/hg_basics.h b/helgrind/hg_basics.h

index b50d48ed20c4034dea6faa962f8858314df99040..8b58bfd3be46ec701fb086076ea67a55f913bbe0 100644 (file)
--- a/helgrind/hg_basics.h
+++ b/helgrind/hg_basics.h
@@ -93,6 +93,16 @@ extern Bool HG_(clo_cmp_race_err_addrs);
        very useful). */
  extern UWord HG_(clo_history_level);
  
+/* For full history level, determines how the stack trace is computed.
+   no : a stacktrace is always computed from scratch, typically
+        using the unwind information.
+   yes : a stacktrace might be derived from a previous captured stacktrace,
+      by applying a delta to the inner frame. This can significantly
+      speed up --history-level=full. Typically, a delta can be applied
+      if there was no 'flow call control' instruction (call, return, ...) 
+      executed since the previous captured stacktrace. */
+extern Bool  HG_(clo_delta_stacktrace);
+
  /* When doing "full" history collection, this determines the size of
     the conflicting-access cache, measured in terms of maximum possible
     number of elements in the previous-access map.  Must be between 10k
diff --git a/helgrind/hg_lock_n_thread.h b/helgrind/hg_lock_n_thread.h

index 30ee02b67ebfae288297907dbd337c19a3d4cffc..f74babf5cd813c3a04cdc50c1b24d2ac94351435 100644 (file)
--- a/helgrind/hg_lock_n_thread.h
+++ b/helgrind/hg_lock_n_thread.h
@@ -92,6 +92,11 @@ typedef
        /* Place where parent was when this thread was created. */
        ExeContext* created_at;
        Bool        announced;
+      /* != 0 if SP fixup needed for unwind : it contains a delta SP value
+         to use when evh__mem_help_c(read|write)_X is called in the 
+         'middle of an instruction' (e.g. in a push)
+         after the SP was changed, but before the push has been completed. */
+      Word        first_sp_delta;
        /* Index for generating references in error messages. */
        Int         errmsg_index;
  
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c

index b956c3852f2d3932590780b0ca33423ff811015b..0f8c946743e73871b79a9ab114cf720d82c268c4 100644 (file)
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -183,6 +183,7 @@ static Thread* mk_Thread ( Thr* hbthr ) {
     thread->coretid      = VG_INVALID_THREADID;
     thread->created_at   = NULL;
     thread->announced    = False;
+   thread->first_sp_delta = 0;
     thread->errmsg_index = indx++;
     thread->admin        = admin_threads;
     thread->synchr_nesting = 0;
@@ -1975,6 +1976,19 @@ void evh__mem_help_cwrite_4(Addr a) {
        LIBHB_CWRITE_4(hbthr, a);
  }
  
+/* Same as evh__mem_help_cwrite_4 but unwind will use a first_sp_delta of
+   one word. */
+static VG_REGPARM(1)
+void evh__mem_help_cwrite_4_fixupSP(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+
+   thr->first_sp_delta = sizeof(Word);
+   if (LIKELY(thr->synchr_nesting == 0))
+      LIBHB_CWRITE_4(hbthr, a);
+   thr->first_sp_delta = 0;
+}
+
  static VG_REGPARM(1)
  void evh__mem_help_cwrite_8(Addr a) {
     Thread*  thr = get_current_Thread_in_C_C();
@@ -1983,6 +1997,19 @@ void evh__mem_help_cwrite_8(Addr a) {
        LIBHB_CWRITE_8(hbthr, a);
  }
  
+/* Same as evh__mem_help_cwrite_8 but unwind will use a first_sp_delta of
+   one word. */
+static VG_REGPARM(1)
+void evh__mem_help_cwrite_8_fixupSP(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+
+   thr->first_sp_delta = sizeof(Word);
+   if (LIKELY(thr->synchr_nesting == 0))
+      LIBHB_CWRITE_8(hbthr, a);
+   thr->first_sp_delta = 0;
+}
+
  static VG_REGPARM(2)
  void evh__mem_help_cwrite_N(Addr a, SizeT size) {
     Thread*  thr = get_current_Thread_in_C_C();
@@ -4428,8 +4455,13 @@ static void instrument_mem_access ( IRSB*   sbOut,
                                      IRExpr* addr,
                                      Int     szB,
                                      Bool    isStore,
+                                    Bool    fixupSP_needed,
                                      Int     hWordTy_szB,
                                      Int     goff_sp,
+                                    Int     goff_sp_s1,
+                                    /* goff_sp_s1 is the offset in guest
+                                       state where the cachedstack validity
+                                       is stored. */
                                      IRExpr* guard ) /* NULL => True */
  {
     IRType   tyAddr   = Ity_INVALID;
@@ -4465,13 +4497,27 @@ static void instrument_mem_access ( IRSB*   sbOut,
              argv = mkIRExprVec_1( addr );
              break;
           case 4:
-            hName = "evh__mem_help_cwrite_4";
-            hAddr = &evh__mem_help_cwrite_4;
+            if (fixupSP_needed) {
+               /* Unwind has to be done with a SP fixed up with one word.
+                  See Ist_Put heuristic in hg_instrument. */
+               hName = "evh__mem_help_cwrite_4_fixupSP";
+               hAddr = &evh__mem_help_cwrite_4_fixupSP;
+            } else {
+               hName = "evh__mem_help_cwrite_4";
+               hAddr = &evh__mem_help_cwrite_4;
+            }
              argv = mkIRExprVec_1( addr );
              break;
           case 8:
-            hName = "evh__mem_help_cwrite_8";
-            hAddr = &evh__mem_help_cwrite_8;
+            if (fixupSP_needed) {
+               /* Unwind has to be done with a SP fixed up with one word.
+                  See Ist_Put heuristic in hg_instrument. */
+               hName = "evh__mem_help_cwrite_8_fixupSP";
+               hAddr = &evh__mem_help_cwrite_8_fixupSP;
+            } else {
+               hName = "evh__mem_help_cwrite_8";
+               hAddr = &evh__mem_help_cwrite_8;
+            }
              argv = mkIRExprVec_1( addr );
              break;
           default:
@@ -4522,6 +4568,17 @@ static void instrument_mem_access ( IRSB*   sbOut,
                             hName, VG_(fnptr_to_fnentry)( hAddr ),
                             argv );
  
+   if (HG_(clo_delta_stacktrace)) {
+      /* memory access helper might read the shadow1 SP offset, that
+         indicates if the cached stacktrace is valid. */
+      di->fxState[0].fx = Ifx_Read;
+      di->fxState[0].offset = goff_sp_s1;
+      di->fxState[0].size = hWordTy_szB;
+      di->fxState[0].nRepeats = 0;
+      di->fxState[0].repeatLen = 0;
+      di->nFxState = 1;
+   }
+
     if (! HG_(clo_check_stack_refs)) {
        /* We're ignoring memory references which are (obviously) to the
           stack.  In fact just skip stack refs that are within 4 pages
@@ -4608,6 +4665,19 @@ static Bool is_in_dynamic_linker_shared_object( Addr ga )
     return VG_(is_soname_ld_so)(soname);
  }
  
+static
+void addInvalidateCachedStack (IRSB*   bbOut,
+                               Int     goff_sp_s1,
+                               Int     hWordTy_szB)
+{
+   /* Invalidate cached stack: Write 0 in the shadow1 offset 0 */
+   addStmtToIRSB( bbOut,
+                  IRStmt_Put(goff_sp_s1,
+                             hWordTy_szB == 4 ?
+                             mkU32(0) : mkU64(0)));
+   /// ???? anything more efficient than assign a Word???
+}
+
  static
  IRSB* hg_instrument ( VgCallbackClosure* closure,
                        IRSB* bbIn,
@@ -4623,7 +4693,15 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
     Bool    inLDSO = False;
     Addr    inLDSOmask4K = 1; /* mismatches on first check */
  
-   const Int goff_sp = layout->offset_SP;
+   // Set to True when SP must be fixed up when taking a stack trace for the
+   // mem accesses in the rest of the instruction
+   Bool    fixupSP_needed = False;
+
+   const Int goff_SP = layout->offset_SP;
+   /* SP in shadow1 indicates if cached stack is valid.
+      We have to invalidate the cached stack e.g. when seeing call or ret. */
+   const Int goff_SP_s1 = layout->total_sizeB + layout->offset_SP;
+   const Int hWordTy_szB = sizeofIRType(hWordTy);
  
     if (gWordTy != hWordTy) {
        /* We don't currently support this case. */
@@ -4661,17 +4739,45 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
        tl_assert(st);
        tl_assert(isFlatIRStmt(st));
        switch (st->tag) {
+         case Ist_Exit:
+            /* No memory reference, but if we do anything else than
+               Ijk_Boring, indicate to helgrind that the previously
+               recorded stack is invalid.
+               For Ijk_Boring, also invalidate the stack if the exit
+               instruction has no CF info. This heuristic avoids cached
+               stack trace mismatch in some cases such as longjmp
+               implementation. Similar logic below for the bb exit. */
+            if (HG_(clo_delta_stacktrace)
+                && (st->Ist.Exit.jk != Ijk_Boring || ! VG_(has_CF_info)(cia)))
+               addInvalidateCachedStack(bbOut, goff_SP_s1, hWordTy_szB);
+            break;
           case Ist_NoOp:
           case Ist_AbiHint:
+            /* None of these can contain any memory references. */
+            break;
           case Ist_Put:
+            /* This cannot contain any memory references. */
+            /* If we see a put to SP, from now on in this instruction, 
+               the SP needed to unwind has to be fixed up by one word.
+               This very simple heuristic ensures correct unwinding in the
+               typical case of a push instruction. If we need to cover more
+               cases, then we need to better track how the SP is modified by
+               the instruction (and calculate a precise sp delta), rather than
+               assuming that the SP is decremented by a Word size. */
+            if (HG_(clo_delta_stacktrace) && st->Ist.Put.offset == goff_SP) {
+               fixupSP_needed = True;
+            }
+            break;
           case Ist_PutI:
-         case Ist_Exit:
-            /* None of these can contain any memory references. */
+            /* This cannot contain any memory references. */
              break;
  
           case Ist_IMark:
+            fixupSP_needed = False;
+
              /* no mem refs, but note the insn address. */
              cia = st->Ist.IMark.addr;
+
              /* Don't instrument the dynamic linker.  It generates a
                 lot of races which we just expensively suppress, so
                 it's pointless.
@@ -4717,8 +4823,8 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
                    cas->addr,
                    (isDCAS ? 2 : 1)
                       * sizeofIRType(typeOfIRExpr(bbIn->tyenv, cas->dataLo)),
-                  False/*!isStore*/,
-                  sizeofIRType(hWordTy), goff_sp,
+                  False/*!isStore*/, fixupSP_needed,
+                  hWordTy_szB, goff_SP, goff_SP_s1,
                    NULL/*no-guard*/
                 );
              }
@@ -4738,8 +4844,8 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
                       bbOut,
                       st->Ist.LLSC.addr,
                       sizeofIRType(dataTy),
-                     False/*!isStore*/,
-                     sizeofIRType(hWordTy), goff_sp,
+                     False/*!isStore*/, fixupSP_needed,
+                     hWordTy_szB, goff_SP, goff_SP_s1,
                       NULL/*no-guard*/
                    );
                 }
@@ -4756,8 +4862,8 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
                    bbOut, 
                    st->Ist.Store.addr, 
                    sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
-                  True/*isStore*/,
-                  sizeofIRType(hWordTy), goff_sp,
+                  True/*isStore*/, fixupSP_needed,
+                  hWordTy_szB, goff_SP, goff_SP_s1,
                    NULL/*no-guard*/
                 );
              }
@@ -4770,9 +4876,9 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
              IRType    type = typeOfIRExpr(bbIn->tyenv, data);
              tl_assert(type != Ity_INVALID);
              instrument_mem_access( bbOut, addr, sizeofIRType(type),
-                                   True/*isStore*/,
-                                   sizeofIRType(hWordTy),
-                                   goff_sp, sg->guard );
+                                   True/*isStore*/, fixupSP_needed,
+                                   hWordTy_szB,
+                                   goff_SP, goff_SP_s1, sg->guard );
              break;
           }
  
@@ -4784,9 +4890,9 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
              typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
              tl_assert(type != Ity_INVALID);
              instrument_mem_access( bbOut, addr, sizeofIRType(type),
-                                   False/*!isStore*/,
-                                   sizeofIRType(hWordTy),
-                                   goff_sp, lg->guard );
+                                   False/*!isStore*/, fixupSP_needed,
+                                   hWordTy_szB,
+                                   goff_SP, goff_SP_s1, lg->guard );
              break;
           }
  
@@ -4798,8 +4904,8 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
                       bbOut,
                       data->Iex.Load.addr,
                       sizeofIRType(data->Iex.Load.ty),
-                     False/*!isStore*/,
-                     sizeofIRType(hWordTy), goff_sp,
+                     False/*!isStore*/, fixupSP_needed,
+                     hWordTy_szB, goff_SP, goff_SP_s1,
                       NULL/*no-guard*/
                    );
                 }
@@ -4819,16 +4925,20 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
                 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
                    if (!inLDSO) {
                       instrument_mem_access( 
-                        bbOut, d->mAddr, dataSize, False/*!isStore*/,
-                        sizeofIRType(hWordTy), goff_sp, NULL/*no-guard*/
+                        bbOut, d->mAddr, dataSize,
+                        False/*!isStore*/, fixupSP_needed,
+                        hWordTy_szB, goff_SP, goff_SP_s1,
+                        NULL/*no-guard*/
                       );
                    }
                 }
                 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
                    if (!inLDSO) {
                       instrument_mem_access( 
-                        bbOut, d->mAddr, dataSize, True/*isStore*/,
-                        sizeofIRType(hWordTy), goff_sp, NULL/*no-guard*/
+                        bbOut, d->mAddr, dataSize,
+                        True/*isStore*/, fixupSP_needed,
+                        hWordTy_szB, goff_SP, goff_SP_s1,
+                        NULL/*no-guard*/
                       );
                    }
                 }
@@ -4849,6 +4959,11 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
        addStmtToIRSB( bbOut, st );
     } /* iterate over bbIn->stmts */
  
+   // See above the case Ist_Exit:
+   if (HG_(clo_delta_stacktrace)
+       && (bbOut->jumpkind != Ijk_Boring || ! VG_(has_CF_info)(cia)))
+      addInvalidateCachedStack(bbOut, goff_SP_s1, hWordTy_szB);
+
     return bbOut;
  }
  
@@ -5588,6 +5703,9 @@ static Bool hg_process_cmd_line_option ( const HChar* arg )
     else if VG_XACT_CLO(arg, "--history-level=full",
                              HG_(clo_history_level), 2);
  
+   else if VG_BOOL_CLO(arg, "--delta-stacktrace",
+                            HG_(clo_delta_stacktrace)) {}
+
     else if VG_BINT_CLO(arg, "--conflict-cache-size",
                         HG_(clo_conflict_cache_size), 10*1000, 150*1000*1000) {}
  
@@ -5642,6 +5760,10 @@ static void hg_print_usage ( void )
  "       full:   show both stack traces for a data race (can be very slow)\n"
  "       approx: full trace for one thread, approx for the other (faster)\n"
  "       none:   only show trace for one thread in a race (fastest)\n"
+"    --delta-stacktrace=no|yes [yes on linux amd64/x86]\n"
+"        no : always compute a full history stacktrace from unwind info\n"
+"        yes : derive a stacktrace from the previous stacktrace\n"
+"          if there was no call/return or similar instruction\n"
  "    --conflict-cache-size=N   size of 'full' history cache [2000000]\n"
  "    --check-stack-refs=no|yes race-check reads and writes on the\n"
  "                              main stack and thread stacks? [yes]\n"
@@ -5660,7 +5782,7 @@ static void hg_print_debug_usage ( void )
     VG_(printf)("    --hg-sanity-flags values:\n");
     VG_(printf)("       010000   after changes to "
                 "lock-order-acquisition-graph\n");
-   VG_(printf)("       001000   at memory accesses (NB: not currently used)\n");
+   VG_(printf)("       001000   at memory accesses\n");
     VG_(printf)("       000100   at mem permission setting for "
                 "ranges >= %d bytes\n", SCE_BIGRANGE_T);
     VG_(printf)("       000010   at lock/unlock events\n");
@@ -5775,8 +5897,10 @@ void for_libhb__get_stacktrace ( Thr* hbt, Addr* frames, UWord nRequest )
     thr = libhb_get_Thr_hgthread( hbt );
     tl_assert(thr);
     tid = map_threads_maybe_reverse_lookup_SLOW(thr);
-   nActual = (UWord)VG_(get_StackTrace)( tid, frames, (UInt)nRequest,
-                                         NULL, NULL, 0 );
+   nActual = (UWord)VG_(get_StackTrace_with_deltas)
+                           ( tid, frames, (UInt)nRequest,
+                             NULL, NULL, 0,
+                             thr->first_sp_delta);
     tl_assert(nActual <= nRequest);
     for (; nActual < nRequest; nActual++)
        frames[nActual] = 0;
@@ -5802,6 +5926,17 @@ static void hg_post_clo_init ( void )
  {
     Thr* hbthr_root;
  
+   if (HG_(clo_delta_stacktrace)
+       && VG_(clo_vex_control).guest_chase_thresh != 0) {
+      if (VG_(clo_verbosity) >= 2)
+         VG_(message)(Vg_UserMsg,
+                      "helgrind --delta-stacktrace=yes only works with "
+                      "--vex-guest-chase-thresh=0\n"
+                      "=> (re-setting it to 0\n");
+      VG_(clo_vex_control).guest_chase_thresh = 0;
+   }
+
+
     /////////////////////////////////////////////
     hbthr_root = libhb_init( for_libhb__get_stacktrace, 
                              for_libhb__get_EC );
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c

index 4e102abea82e5e23daf491cf1e0fb5826bb4e238..7fbcdda0cdcef95f9e8cf7d71ba77e99447c91a7 100644 (file)
--- a/helgrind/libhb_core.c
+++ b/helgrind/libhb_core.c
@@ -35,6 +35,7 @@
  #include "pub_tool_libcassert.h"
  #include "pub_tool_libcbase.h"
  #include "pub_tool_libcprint.h"
+#include "pub_tool_machine.h"
  #include "pub_tool_mallocfree.h"
  #include "pub_tool_wordfm.h"
  #include "pub_tool_hashtable.h"
@@ -45,6 +46,8 @@
  #include "pub_tool_stacktrace.h"
  #include "pub_tool_execontext.h"
  #include "pub_tool_errormgr.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_gdbserver.h"
  #include "pub_tool_options.h"        // VG_(clo_stats)
  #include "hg_basics.h"
  #include "hg_wordset.h"
@@ -92,6 +95,8 @@
  #  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
  #endif
  
+/* Define to 1 to activate tracing cached rcec. */
+#define DEBUG_CACHED_RCEC 0
  
  /////////////////////////////////////////////////////////////////
  /////////////////////////////////////////////////////////////////
@@ -281,6 +286,23 @@ typedef
  #define N_KWs_N_STACKs_PER_THREAD 62500
  
  
+#define N_FRAMES 8
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+/* RCEC usage is commented more in details in the section 'Change-event map2'
+   later in this file */
+typedef
+   struct _RCEC {
+      UWord magic;  /* sanity check only */
+      struct _RCEC* next;
+      UWord rc;
+      UWord rcX; /* used for crosschecking */
+      UWord frames_hash;          /* hash of all the frames */
+      UWord frames[N_FRAMES];
+   }
+   RCEC;
+
  struct _Thr {
     /* Current VTSs for this thread.  They change as we go along.  viR
        is the VTS to be used for reads, viW for writes.  Usually they
@@ -317,6 +339,14 @@ struct _Thr {
        Thread should be merged into a single structure. */
     Thread* hgthread;
  
+   /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
+   RCEC cached_rcec; // cached_rcec value, not ref-counted.
+   /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
+      the validity of the cached rcec.
+      If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
+      If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
+      can be used to generate a new RCEC by changing just the last frame. */
+
     /* The ULongs (scalar Kws) in this accumulate in strictly
        increasing order, without duplicates.  This is important because
        we need to be able to find a given scalar Kw in this array
@@ -4018,6 +4048,19 @@ static Thr* Thr__from_ThrID ( UInt thrid ) {
     return thr;
  }
  
+/* True if the cached rcec for thr is valid and can be used to build the
+   current stack trace just by changing the last frame to the current IP. */
+static inline Bool cached_rcec_valid(Thr *thr)
+{
+   UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
+   return cached_stackvalid != 0;
+}
+/* Set the validity of the cached rcec of thr. */
+static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
+{
+   VG_(set_SP_s1) (thr->hgthread->coretid, valid);
+}
+
  static Thr* Thr__new ( void )
  {
     Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
@@ -4031,6 +4074,11 @@ static Thr* Thr__new ( void )
           = VG_(newXA)( HG_(zalloc),
                         "libhb.Thr__new.3 (local_Kws_and_stacks)",
                         HG_(free), sizeof(ULong_n_EC) );
+   /* Make an 'empty' cached rcec in thr. */
+   thr->cached_rcec.magic = RCEC_MAGIC;
+   thr->cached_rcec.rc = 0;
+   thr->cached_rcec.rcX = 0;
+   thr->cached_rcec.next = NULL;
  
     /* Add this Thr* <-> ThrID binding to the mapping, and
        cross-check */
@@ -4234,25 +4282,9 @@ static UWord stats__ctxt_tab_cmps = 0;
  //// Part (1): A hash table of RCECs
  ///
  
-#define N_FRAMES 8
-
-// (UInt) `echo "Reference Counted Execution Context" | md5sum`
-#define RCEC_MAGIC 0xab88abb2UL
-
  //#define N_RCEC_TAB 98317 /* prime */
  #define N_RCEC_TAB 196613 /* prime */
  
-typedef
-   struct _RCEC {
-      UWord magic;  /* sanity check only */
-      struct _RCEC* next;
-      UWord rc;
-      UWord rcX; /* used for crosschecking */
-      UWord frames_hash;          /* hash of all the frames */
-      UWord frames[N_FRAMES];
-   }
-   RCEC;
-
  //////////// BEGIN RCEC pool allocator
  static PoolAlloc* rcec_pool_allocator;
  static RCEC* alloc_RCEC ( void ) {
@@ -4270,21 +4302,18 @@ static RCEC** contextTab = NULL; /* hash table of RCEC*s */
  /* Count of allocated RCEC having ref count > 0 */
  static UWord RCEC_referenced = 0;
  
-/* Gives an arbitrary total order on RCEC .frames fields */
-static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+/* True if the frames of ec1 and ec2 are different. */
+static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
     Word i;
     tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
     tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
-   if (ec1->frames_hash < ec2->frames_hash) return -1;
-   if (ec1->frames_hash > ec2->frames_hash) return  1;
+   if (ec1->frames_hash != ec2->frames_hash) return True;
     for (i = 0; i < N_FRAMES; i++) {
-      if (ec1->frames[i] < ec2->frames[i]) return -1;
-      if (ec1->frames[i] > ec2->frames[i]) return  1;
+      if (ec1->frames[i] != ec2->frames[i]) return True;
     }
-   return 0;
+   return False;
  }
  
-
  /* Dec the ref of this RCEC. */
  static void ctxt__rcdec ( RCEC* ec )
  {
@@ -4371,7 +4400,7 @@ static RCEC* ctxt__find_or_add ( RCEC* example )
        if (!copy) break;
        tl_assert(copy->magic == RCEC_MAGIC);
        stats__ctxt_tab_cmps++;
-      if (0 == RCEC__cmp_by_frames(copy, example)) break;
+      if (!RCEC__differs_by_frames(copy, example)) break;
        copy = copy->next;
     }
  
@@ -4402,23 +4431,293 @@ static inline UWord ROLW ( UWord w, Int n )
     return w;
  }
  
+static UWord stats__cached_rcec_identical = 0;
+static UWord stats__cached_rcec_updated = 0;
+static UWord stats__cached_rcec_fresh = 0;
+static UWord stats__cached_rcec_diff = 0;
+static UWord stats__cached_rcec_diff_known_reason = 0;
+
+/* Check if the cached rcec in thr corresponds to the current
+   stacktrace of the thread. Returns True if ok, False otherwise.
+   This is just used for debugging the cached rcec logic, activated
+   using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
+   When this flag is activated, a call to this function will happen each time
+   a stack trace is needed for a memory access. */
+__attribute__((noinline))
+static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
+{
+   Bool  ok = True;
+   UInt  i;
+   UWord frames[N_FRAMES];
+   UWord sps[N_FRAMES];
+   UWord fps[N_FRAMES];
+
+   for (i = 0; i < N_FRAMES; i++)
+      frames[i] = sps[i] = fps[i] = 0;
+   VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0], N_FRAMES,
+                        &sps[0], &fps[0], 0);
+   for (i = 0; i < N_FRAMES; i++) {
+      if ( thr->cached_rcec.frames[i] != frames[i] ) {
+         /* There are a bunch of "normal" reasons for which a stack
+            derived from the cached rcec differs from frames. */
+         const HChar *reason = NULL;
+
+         /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
+            section (fix was added in binutils around June 2011).
+            Without PLT unwind info, stacktrace in the PLT section are
+            missing an entry. E.g. the cached stacktrace is:
+              ==4463==    at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
+              ==4463==    by 0x33B7F9: __libc_thread_freeres
+                                                (in /lib/libc-2.11.2.so)
+              ==4463==    by 0x39BA4F: start_thread (pthread_create.c:307)
+              ==4463==    by 0x2F107D: clone (clone.S:130)
+           while the 'check stacktrace' is
+              ==4463==    at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
+              ==4463==    by 0x33B82D: strerror_thread_freeres
+                                                (in /lib/libc-2.11.2.so)
+              ==4463==    by 0x33B7F9: __libc_thread_freeres
+                                                (in /lib/libc-2.11.2.so)
+              ==4463==    by 0x39BA4F: start_thread (pthread_create.c:307)
+              ==4463==    by 0x2F107D: clone (clone.S:130)
+           No cheap/easy way to detect or fix that. */
+
+         /* It seems that sometimes, the CFI unwind info looks wrong
+            for a 'ret' instruction. E.g. here is the unwind info
+            for a 'retq' on gcc20 (amd64, Debian 7)
+                [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
+                                                      SP=cfa+0 BP=*(cfa+-24)
+            This unwind info looks doubtful, as the RA should be at oldSP.
+            No easy way to detect this problem.
+            This gives a difference between cached rcec and
+            current stack trace: the cached rcec is correct. */
+
+         /* When returning from main, unwind info becomes erratic.
+            So, by default, only report errors for main and above,
+            unless asked to show below main. */
+         if (reason == NULL) {
+            UInt fr_main;
+            Vg_FnNameKind fr_kind;
+            for (fr_main = 0; fr_main < N_FRAMES; fr_main++) {
+               fr_kind = VG_(get_fnname_kind_from_IP)
+                                (frames[fr_main]);
+               if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
+                  break;
+            }
+            UInt kh_main;
+            Vg_FnNameKind kh_kind;
+            for (kh_main = 0; kh_main < N_FRAMES; kh_main++) {
+               kh_kind = VG_(get_fnname_kind_from_IP)
+                                (thr->cached_rcec.frames[kh_main]);
+               if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
+                  break;
+            }
+            if (kh_main == fr_main
+                && kh_kind == fr_kind
+                && (kh_main < i || (kh_main == i
+                                    && kh_kind == Vg_FnNameBelowMain))) {
+               // found main or below main before the difference
+               reason = "Below main";
+            }
+         }
+
+         /* We have places where the stack is missing some internal
+            pthread functions. For such stacktraces, GDB reports only
+            one function, telling:
+               #0  0xf7fa81fe in _L_unlock_669 ()
+                              from /lib/i386-linux-gnu/libpthread.so.0
+               Backtrace stopped: previous frame identical to
+                                            this frame (corrupt stack?)
+
+            This is when sps and fps are identical.
+            The cached stack trace is then
+               ==3336==    at 0x40641FE: _L_unlock_669
+                                              (pthread_mutex_unlock.c:310)
+               ==3336==    by 0x40302BE: pthread_mutex_unlock
+                                              (hg_intercepts.c:710)
+               ==3336==    by 0x80486AF: main (cond_timedwait_test.c:14)
+           while the 'check stacktrace' is
+               ==3336==    at 0x40641FE: _L_unlock_669
+                                              (pthread_mutex_unlock.c:310)
+               ==3336==    by 0x4064206: _L_unlock_669
+                                              (pthread_mutex_unlock.c:310)
+               ==3336==    by 0x4064132: __pthread_mutex_unlock_usercnt
+                                              (pthread_mutex_unlock.c:57)
+               ==3336==    by 0x40302BE: pthread_mutex_unlock
+                                               (hg_intercepts.c:710)
+               ==3336==    by 0x80486AF: main (cond_timedwait_test.c:14) */
+         if (reason == NULL) {
+            if ((i > 0
+                      && sps[i] == sps[i-1] && fps[i] == fps[i-1])
+                || (i < N_FRAMES-1
+                      && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
+               reason = "previous||next frame: identical sp and fp";
+            }
+         }
+         if (reason == NULL) {
+            if ((i > 0
+                      && fps[i] == fps[i-1])
+                || (i < N_FRAMES-1
+                      && fps[i] == fps[i+1])) {
+               reason = "previous||next frame: identical fp";
+            }
+         }
+
+         /* When we have a read or write 'in the middle of a push instruction',
+            then the normal backtrace is not very good, while the helgrind
+            stacktrace is better, as it undoes the not yet fully finished
+            push instruction before getting the stacktrace. */
+         if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
+            reason = "fixupSP probably needed for check stacktrace";
+         }
+
+         /* Unwinding becomes hectic when running the exit handlers.
+            None of GDB, cached stacktrace and check stacktrace corresponds.
+            So, if we find __run_exit_handlers, ignore the difference. */
+         if (reason == NULL) {
+            const HChar *fnname;
+            for (UInt f = 0; f < N_FRAMES; f++) {
+               if (VG_(get_fnname)( frames[f], &fnname)
+                   && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
+                  reason = "exit handlers";
+                  break;
+               }
+            }
+         }
+
+         // Show what we have found for this difference
+         if (reason == NULL) {
+            ok = False;
+            stats__cached_rcec_diff++;
+         } else {
+            ok = True;
+            stats__cached_rcec_diff_known_reason++;
+         }
+         if (!ok || VG_(clo_verbosity) > 2) {
+            Bool save_show_below_main = VG_(clo_show_below_main);
+            VG_(clo_show_below_main) = True;
+            /* The below error msg reports an unexpected diff in 'frame %d'.
+               The (maybe wrong) pc found in the cached stacktrace is
+               'cached_pc %p' while an unwind gives the (maybe wrong)
+               'check_pc %p'.
+               After, 'previous_frame0 %p' tells where the cached stacktrace
+               was taken.
+               This is then followed by the full resulting cache stack trace
+               and the full stack trace found doing unwind.
+               Such a diff can have various origins:
+                 * a bug in the unwinder, when the cached stack trace was taken
+                   at 'previous_frame0'
+                 * a bug in the unwinder, when the check stack trace was taken
+                   (i.e. at current pc).
+                 * a missing 'invalidate cache stack trace' somewhere in the
+                   instructions between 'previous_frame0' and current_pc.
+               To investigate the last case, typically, disass the range of
+               instructions where an invalidate cached stack might miss. */
+            VG_(printf)("%s diff tid %d frame %d "
+                        "cached_pc %p check_pc %p\n",
+                        reason ? reason : "unexpected",
+                        thr->hgthread->coretid,
+                        i,
+                        (void*)thr->cached_rcec.frames[i],
+                        (void*)frames[i]);
+            VG_(printf)("cached stack trace previous_frame0 %p\n",
+                        (void*)previous_frame0);
+            VG_(pp_StackTrace)(&previous_frame0, 1);
+            VG_(printf)("resulting cached stack trace:\n");
+            VG_(pp_StackTrace)(thr->cached_rcec.frames, N_FRAMES);
+            VG_(printf)("check stack trace:\n");
+            VG_(pp_StackTrace)(frames, N_FRAMES);
+
+            VG_(show_sched_status) (False,  // host_stacktrace
+                                    False,  // stack_usage
+                                    False); // exited_threads
+            if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
+               VG_(gdbserver) ( thr->hgthread->coretid );
+            VG_(clo_show_below_main) = save_show_below_main;
+         }
+         break; // Stop giving more errors for this stacktrace.
+      }
+   }
+   return ok;
+}
+
  __attribute__((noinline))
  static RCEC* get_RCEC ( Thr* thr )
  {
-   UWord hash, i;
-   RCEC  example;
-   example.magic = RCEC_MAGIC;
-   example.rc = 0;
-   example.rcX = 0;
-   example.next = NULL;
-   main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
+   UInt  i;
+   UWord hash;
+   Addr  previous_frame0 = 0; // Assignment needed to silence gcc
+   RCEC  *res;
+   const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
+   const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
+
+   if (DEBUG_CACHED_RCEC)
+      VG_(printf)("get rcec tid %d at IP %p SP %p"
+                  " first_sp_delta %ld cached valid %d\n",
+                  thr->hgthread->coretid,
+                  (void*)cur_ip,
+                  (void*)VG_(get_SP)(thr->hgthread->coretid),
+                  thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
+
+   /* If we have a valid cached rcec, derive the new rcec from the cached one
+      and update the cached one.
+      Otherwise, compute a fresh rcec. */
+
+   if (thr_cached_rcec_valid) {
+      /* Update the stacktrace of the cached rcec with the current IP */
+      previous_frame0 = thr->cached_rcec.frames[0];
+      thr->cached_rcec.frames[0] = cur_ip;
+
+#     if defined(VGP_x86_linux)
+      // See m_stacktrace.c kludge
+      extern Addr VG_(client__dl_sysinfo_int80);
+      /// #include pub_core_clientstate needed for the above ????
+      /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
+      if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
+          && cur_ip >= VG_(client__dl_sysinfo_int80)
+          && cur_ip < VG_(client__dl_sysinfo_int80)+3
+          ) {
+         thr->cached_rcec.frames[0]
+            = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
+      }
+#     endif
+
+      if (previous_frame0 == thr->cached_rcec.frames[0])
+         stats__cached_rcec_identical++;
+      else
+         stats__cached_rcec_updated++;
+   } else {
+      /* Compute a fresh stacktrace. */
+      main_get_stacktrace( thr, &thr->cached_rcec.frames[0], N_FRAMES );
+      if (DEBUG_CACHED_RCEC) {
+         Bool save_show_below_main = VG_(clo_show_below_main);
+         VG_(clo_show_below_main) = True;
+         VG_(printf)("caching stack trace:\n");
+         VG_(pp_StackTrace)(&thr->cached_rcec.frames[0], N_FRAMES);
+         VG_(clo_show_below_main) = save_show_below_main;
+      }
+      stats__cached_rcec_fresh++;
+   }
+
     hash = 0;
     for (i = 0; i < N_FRAMES; i++) {
-      hash ^= example.frames[i];
+      hash ^= thr->cached_rcec.frames[i];
        hash = ROLW(hash, 19);
     }
-   example.frames_hash = hash;
-   return ctxt__find_or_add( &example );
+   thr->cached_rcec.frames_hash = hash;
+   res = ctxt__find_or_add( &thr->cached_rcec );
+
+   if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
+       && thr_cached_rcec_valid) {
+      /* In case the cached and check differ, invalidate the cached rcec.
+         We have less duplicated diffs reported afterwards. */
+      if (!check_cached_rcec_ok (thr, previous_frame0))
+         set_cached_rcec_validity(thr, False);
+   } else {
+      if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
+            set_cached_rcec_validity(thr, True);
+   }
+
+   return res;
  }
  
  ///////////////////////////////////////////////////////
@@ -6478,6 +6777,17 @@ void libhb_shutdown ( Bool show_stats )
                     (UWord)N_RCEC_TAB,
                     stats__ctxt_tab_curr, RCEC_referenced,
                     stats__ctxt_tab_max );
+      VG_(printf) ("   libhb: stats__cached_rcec "
+                   "identical %'lu updated %'lu fresh %'lu\n",
+                   stats__cached_rcec_identical, stats__cached_rcec_updated,
+                   stats__cached_rcec_fresh);
+      if (stats__cached_rcec_diff > 0)
+         VG_(printf) ("   libhb: stats__cached_rcec diff unk reason%'lu\n",
+                      stats__cached_rcec_diff);
+      if (stats__cached_rcec_diff_known_reason > 0)
+         VG_(printf) ("   libhb: stats__cached_rcec diff known reason %'lu\n",
+                      stats__cached_rcec_diff_known_reason);
+
        {
  #        define  MAXCHAIN 10
           UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
@@ -6552,6 +6862,9 @@ void libhb_async_exit ( Thr* thr )
     tl_assert(!thr->llexit_done);
     thr->llexit_done = True;
  
+   /* Just to be sure, declare the cached stack invalid. */
+   set_cached_rcec_validity(thr, False);
+
     /* free up Filter and local_Kws_n_stacks (well, actually not the
        latter ..) */
     tl_assert(thr->filter);
diff --git a/include/pub_tool_debuginfo.h b/include/pub_tool_debuginfo.h

index 6250e1bf1eacd4806ec94597095b2e711d0659fc..86ab2380e34b97d79d3c8685775ad6f8cc059eaa 100644 (file)
--- a/include/pub_tool_debuginfo.h
+++ b/include/pub_tool_debuginfo.h
@@ -150,6 +150,9 @@ Bool VG_(get_data_description)(
          Addr data_addr
       );
  
+/* True if we have some Call Frame unwindo debuginfo for Addr a */
+extern Bool VG_(has_CF_info)(Addr a);
+
  /* Succeeds if the address is within a shared object or the main executable.
     It first searches if Addr a belongs to the text segment of debug info.
     If not found, it asks the address space manager whether it
diff --git a/include/pub_tool_libcassert.h b/include/pub_tool_libcassert.h

index 1b9dd7f58fbfecacbf838ed02322898bdedeabca..41af2d217001a38f4647c64e40be0b1a0268fba2 100644 (file)
--- a/include/pub_tool_libcassert.h
+++ b/include/pub_tool_libcassert.h
@@ -61,6 +61,20 @@ extern void VG_(assert_fail) ( Bool isCore, const HChar* expr, const HChar* file
                                 Int line, const HChar* fn, 
                                 const HChar* format, ... );
  
+/* Show the state of various threads related information, such
+   as the guest stacktrace for each thread.
+   Mostly for debugging V.
+   The following activates optional output:
+     host_stacktrace : shows the host stacktrace.
+     stack_usage True means:
+                   shows how much of the valgrind stack was used.
+                   shows the client stack range
+     exited_thread_slots : show information for thread slots that were used
+        but the thread has now exited. */
+extern void VG_(show_sched_status) ( Bool host_stacktrace,
+                                     Bool stack_usage,
+                                     Bool exited_threads);
+
  #endif   // __PUB_TOOL_LIBCBASSERT_H
  
  /*--------------------------------------------------------------------*/
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h

index 07b7b95f2153ae272e1cac78ec3703ea23b6a736..fab8ee4fd50e0c30d1debce4839a6689e75aba1c 100644 (file)
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -114,6 +114,9 @@
  Addr VG_(get_IP) ( ThreadId tid );
  Addr VG_(get_SP) ( ThreadId tid );
  
+// Get and set the shadow1 SP register
+Addr VG_(get_SP_s1) ( ThreadId tid );
+void VG_(set_SP_s1) ( ThreadId tid, Addr sp );
  
  // For get/set, 'area' is where the asked-for guest state will be copied
  // into/from.  If shadowNo == 0, the real (non-shadow) guest state is
diff --git a/include/pub_tool_stacktrace.h b/include/pub_tool_stacktrace.h

index c442d394d0d01ab98f0c337c4cbadad558406fce..848d026d13aad8376c2eb23b8dce53bb71b3879b 100644 (file)
--- a/include/pub_tool_stacktrace.h
+++ b/include/pub_tool_stacktrace.h
@@ -64,6 +64,17 @@ extern UInt VG_(get_StackTrace) ( ThreadId tid,
                                    /*OUT*/StackTrace fps,
                                    Word first_ip_delta );
  
+// Same as VG_(get_StackTrace), but applies a delta to the first SP used for
+//  unwinding the first frame.
+extern UInt VG_(get_StackTrace_with_deltas)(
+                ThreadId tid,
+                /*OUT*/StackTrace ips, UInt n_ips,
+                /*OUT*/StackTrace sps,
+                /*OUT*/StackTrace fps,
+                Word first_ip_delta,
+                Word first_sp_delta
+             );
+
  // Apply a function to every element in the StackTrace.  The parameter
  // 'n' gives the index of the passed ip.  'opaque' is an arbitrary
  // pointer provided to each invocation of 'action' (a poor man's
author	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)
committer	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Thu, 2 Nov 2017 20:33:35 +0000 (21:33 +0100)
NEWS		patch \| blob \| blame \| history
coregrind/m_debuginfo/debuginfo.c		patch \| blob \| blame \| history
coregrind/m_machine.c		patch \| blob \| blame \| history
coregrind/m_stacktrace.c		patch \| blob \| blame \| history
coregrind/pub_core_libcassert.h		patch \| blob \| blame \| history
helgrind/docs/hg-manual.xml		patch \| blob \| blame \| history
helgrind/hg_basics.c		patch \| blob \| blame \| history
helgrind/hg_basics.h		patch \| blob \| blame \| history
helgrind/hg_lock_n_thread.h		patch \| blob \| blame \| history
helgrind/hg_main.c		patch \| blob \| blame \| history
helgrind/libhb_core.c		patch \| blob \| blame \| history
include/pub_tool_debuginfo.h		patch \| blob \| blame \| history
include/pub_tool_libcassert.h		patch \| blob \| blame \| history
include/pub_tool_machine.h		patch \| blob \| blame \| history
include/pub_tool_stacktrace.h		patch \| blob \| blame \| history