This patch implements the flag --delta-stacktrace=yes/no.
Yes indicates to calculate the full history stack traces by
changing just the last frame if no call/return instruction was
executed.
This can speed up helgrind by up to 25%.
This flags is currently set to yes only on linux x86 and amd64, as some
platform dependent validation of the used heuristics is needed before
setting the default to yes on a platform. See function check_cached_rcec_ok
in libhb_core.c for more details about how to validate/check the behaviour
on a new platform.
* ==================== TOOL CHANGES ====================
+* Helgrind:
+
+ - Addition of a flag --delta-stacktrace=no|yes [yes on linux amd64/x86], specifying
+ how full history stack traces are computed. Using yes can speed up Helgrind by
+ 25%.
* ==================== OTHER CHANGES ====================
where XXXXXX is the bug number as listed below.
255603 exp-sgcheck Assertion '!already_present' failed
+376257 helgrind history full speed up using a cached stack
379373 Fix syscall param msg->desc.port.name points to uninitialised byte(s)
on macOS 10.12
379748 Fix missing pselect syscall (OS X 10.11)
}
}
+Bool VG_(has_CF_info)(Addr a)
+{
+ return cfsi_m_cache__find (a) != NULL;
+}
+
+
inline
static Addr compute_cfa ( const D3UnwindRegs* uregs,
#define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
#define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
+#define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
+
Addr VG_(get_IP) ( ThreadId tid ) {
return INSTR_PTR( VG_(threads)[tid].arch );
}
return FRAME_PTR( VG_(threads)[tid].arch );
}
+Addr VG_(get_SP_s1) ( ThreadId tid ) {
+ return STACK_PTR_S1( VG_(threads)[tid].arch );
+}
+void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
+ STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
+}
+
void VG_(set_IP) ( ThreadId tid, Addr ip ) {
INSTR_PTR( VG_(threads)[tid].arch ) = ip;
}
/*--- Exported functions. ---*/
/*------------------------------------------------------------*/
-UInt VG_(get_StackTrace) ( ThreadId tid,
- /*OUT*/StackTrace ips, UInt max_n_ips,
- /*OUT*/StackTrace sps,
- /*OUT*/StackTrace fps,
- Word first_ip_delta )
+UInt VG_(get_StackTrace_with_deltas)(
+ ThreadId tid,
+ /*OUT*/StackTrace ips, UInt n_ips,
+ /*OUT*/StackTrace sps,
+ /*OUT*/StackTrace fps,
+ Word first_ip_delta,
+ Word first_sp_delta
+ )
{
/* Get the register values with which to start the unwind. */
UnwindStartRegs startRegs;
VG_(stack_limits)( (Addr)startRegs.r_sp,
&stack_lowest_byte, &stack_highest_byte );
- /* Take into account the first_ip_delta. */
- startRegs.r_pc += (Long)(Word)first_ip_delta;
+ /* Take into account the first_ip_delta and first_sp_delta. */
+ startRegs.r_pc += (Long)first_ip_delta;
+ startRegs.r_sp += (Long)first_sp_delta;
if (0)
VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
tid, stack_highest_byte,
startRegs.r_pc, startRegs.r_sp);
- return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips,
+ return VG_(get_StackTrace_wrk)(tid, ips, n_ips,
sps, fps,
&startRegs,
stack_highest_byte);
}
+UInt VG_(get_StackTrace) ( ThreadId tid,
+ /*OUT*/StackTrace ips, UInt max_n_ips,
+ /*OUT*/StackTrace sps,
+ /*OUT*/StackTrace fps,
+ Word first_ip_delta )
+{
+ return VG_(get_StackTrace_with_deltas) (tid,
+ ips, max_n_ips,
+ sps,
+ fps,
+ first_ip_delta,
+ 0 /* first_sp_delta */
+ );
+}
+
static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
{
InlIPCursor *iipc = VG_(new_IIPC)(ip);
__attribute__((__noreturn__))
PRINTF_CHECK(1, 2);
-/* Show the state of various threads related information, such
- as the guest stacktrace for each thread.
- Mostly for debugging V.
- The following activates optional output:
- host_stacktrace : shows the host stacktrace.
- stack_usage True means:
- shows how much of the valgrind stack was used.
- shows the client stack range
- exited_thread_slots : show information for thread slots that were used
- but the thread has now exited. */
-extern void VG_(show_sched_status) ( Bool host_stacktrace,
- Bool stack_usage,
- Bool exited_threads);
-
#endif // __PUB_CORE_LIBCASSERT_H
/*--------------------------------------------------------------------*/
<option>--history-level=none</option>.</para>
</listitem>
</varlistentry>
+
+ <varlistentry id="opt.delta-stacktrace"
+ xreflabel="--delta-stacktrace">
+ <term>
+ <option><![CDATA[--delta-stacktrace=no|yes
+ [default: yes on linux amd64/x86] ]]></option>
+ </term>
+ <listitem>
+ <para>This flag only has any effect
+ at <option>--history-level=full</option>.</para>
+ <para><option>--delta-stacktrace</option> configures the way Helgrind
+ captures the stacktraces for the
+ option <option>--history-level=full</option>. Such a stacktrace is
+ typically needed each time a new piece of memory is read or written in a
+ basic block of instructions. </para>
+ <para><option>--delta-stacktrace=no</option> causes
+ Helgrind to compute a full history stacktrace from the unwind info
+ each time a stacktrace is needed.</para>
+ <para><option>--delta-stacktrace=yes</option> indicates to Helgrind to
+ derive a new stacktrace from the previous stacktrace, as long as there
+ was no call instruction, no return instruction, or any other instruction
+ changing the call stack since the previous stacktrace was captured. If
+ no such instruction was executed, the new stacktrace can be derived from
+ the previous stacktrace by just changing the top frame to the current
+ program counter. This option can speed up Helgrind by 25% when
+ using <option>--history-level=full</option>. </para>
+ <para>The following aspects have to be considered when
+ using <option>--delta-stacktrace=yes</option> :
+ <itemizedlist>
+ <listitem>In some cases (for example in a function prologue), the
+ valgrind unwinder might not properly unwind the stack, due to some
+ limitations and/or due to wrong unwind info. When using
+ --delta-stacktrace=yes, the wrong stack trace captured in the
+ function prologue will be kept till the next call or return.
+ </listitem>
+ <listitem>On the other hand, --delta-stacktrace=yes sometimes helps to
+ obtain a correct stacktrace, for example when the unwind info allows
+ a correct stacktrace to be done in the beginning of the sequence,
+ but not later on in the instruction sequence.</listitem>
+ <listitem>Determining which instructions are changing the callstack is
+ partially based on platform dependent heuristics, which have to be
+ tuned/validated specifically for the platform. Also, unwinding in a
+ function prologue must be good enough to allow using
+ --delta-stacktrace=yes. Currently, the option --delta-stacktrace=yes
+ has been reasonably validated only on linux x86 32 bits and linux
+ amd64 64 bits. For more details about how to validate
+ --delta-stacktrace=yes, see debug option --hg-sanity-flags and the
+ function check_cached_rcec_ok in libhb_core.c.</listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ </varlistentry>
<varlistentry id="opt.conflict-cache-size"
xreflabel="--conflict-cache-size">
UWord HG_(clo_history_level) = 2;
+#if (defined(VGA_x86) || defined(VGA_amd64)) && defined(VGO_linux)
+// Set to true on setup where it was (reasonably) validated.
+Bool HG_(clo_delta_stacktrace) = True;
+#else
+Bool HG_(clo_delta_stacktrace) = False;
+#endif
+
UWord HG_(clo_conflict_cache_size) = 2000000;
UWord HG_(clo_sanity_flags) = 0;
very useful). */
extern UWord HG_(clo_history_level);
+/* For full history level, determines how the stack trace is computed.
+ no : a stacktrace is always computed from scratch, typically
+ using the unwind information.
+ yes : a stacktrace might be derived from a previous captured stacktrace,
+ by applying a delta to the inner frame. This can significantly
+ speed up --history-level=full. Typically, a delta can be applied
+ if there was no 'flow call control' instruction (call, return, ...)
+ executed since the previous captured stacktrace. */
+extern Bool HG_(clo_delta_stacktrace);
+
/* When doing "full" history collection, this determines the size of
the conflicting-access cache, measured in terms of maximum possible
number of elements in the previous-access map. Must be between 10k
/* Place where parent was when this thread was created. */
ExeContext* created_at;
Bool announced;
+ /* != 0 if SP fixup needed for unwind : it contains a delta SP value
+ to use when evh__mem_help_c(read|write)_X is called in the
+ 'middle of an instruction' (e.g. in a push)
+ after the SP was changed, but before the push has been completed. */
+ Word first_sp_delta;
/* Index for generating references in error messages. */
Int errmsg_index;
thread->coretid = VG_INVALID_THREADID;
thread->created_at = NULL;
thread->announced = False;
+ thread->first_sp_delta = 0;
thread->errmsg_index = indx++;
thread->admin = admin_threads;
thread->synchr_nesting = 0;
LIBHB_CWRITE_4(hbthr, a);
}
+/* Same as evh__mem_help_cwrite_4 but unwind will use a first_sp_delta of
+ one word. */
+static VG_REGPARM(1)
+void evh__mem_help_cwrite_4_fixupSP(Addr a) {
+ Thread* thr = get_current_Thread_in_C_C();
+ Thr* hbthr = thr->hbthr;
+
+ thr->first_sp_delta = sizeof(Word);
+ if (LIKELY(thr->synchr_nesting == 0))
+ LIBHB_CWRITE_4(hbthr, a);
+ thr->first_sp_delta = 0;
+}
+
static VG_REGPARM(1)
void evh__mem_help_cwrite_8(Addr a) {
Thread* thr = get_current_Thread_in_C_C();
LIBHB_CWRITE_8(hbthr, a);
}
+/* Same as evh__mem_help_cwrite_8 but unwind will use a first_sp_delta of
+ one word. */
+static VG_REGPARM(1)
+void evh__mem_help_cwrite_8_fixupSP(Addr a) {
+ Thread* thr = get_current_Thread_in_C_C();
+ Thr* hbthr = thr->hbthr;
+
+ thr->first_sp_delta = sizeof(Word);
+ if (LIKELY(thr->synchr_nesting == 0))
+ LIBHB_CWRITE_8(hbthr, a);
+ thr->first_sp_delta = 0;
+}
+
static VG_REGPARM(2)
void evh__mem_help_cwrite_N(Addr a, SizeT size) {
Thread* thr = get_current_Thread_in_C_C();
IRExpr* addr,
Int szB,
Bool isStore,
+ Bool fixupSP_needed,
Int hWordTy_szB,
Int goff_sp,
+ Int goff_sp_s1,
+ /* goff_sp_s1 is the offset in guest
+ state where the cachedstack validity
+ is stored. */
IRExpr* guard ) /* NULL => True */
{
IRType tyAddr = Ity_INVALID;
argv = mkIRExprVec_1( addr );
break;
case 4:
- hName = "evh__mem_help_cwrite_4";
- hAddr = &evh__mem_help_cwrite_4;
+ if (fixupSP_needed) {
+ /* Unwind has to be done with a SP fixed up with one word.
+ See Ist_Put heuristic in hg_instrument. */
+ hName = "evh__mem_help_cwrite_4_fixupSP";
+ hAddr = &evh__mem_help_cwrite_4_fixupSP;
+ } else {
+ hName = "evh__mem_help_cwrite_4";
+ hAddr = &evh__mem_help_cwrite_4;
+ }
argv = mkIRExprVec_1( addr );
break;
case 8:
- hName = "evh__mem_help_cwrite_8";
- hAddr = &evh__mem_help_cwrite_8;
+ if (fixupSP_needed) {
+ /* Unwind has to be done with a SP fixed up with one word.
+ See Ist_Put heuristic in hg_instrument. */
+ hName = "evh__mem_help_cwrite_8_fixupSP";
+ hAddr = &evh__mem_help_cwrite_8_fixupSP;
+ } else {
+ hName = "evh__mem_help_cwrite_8";
+ hAddr = &evh__mem_help_cwrite_8;
+ }
argv = mkIRExprVec_1( addr );
break;
default:
hName, VG_(fnptr_to_fnentry)( hAddr ),
argv );
+ if (HG_(clo_delta_stacktrace)) {
+ /* memory access helper might read the shadow1 SP offset, that
+ indicates if the cached stacktrace is valid. */
+ di->fxState[0].fx = Ifx_Read;
+ di->fxState[0].offset = goff_sp_s1;
+ di->fxState[0].size = hWordTy_szB;
+ di->fxState[0].nRepeats = 0;
+ di->fxState[0].repeatLen = 0;
+ di->nFxState = 1;
+ }
+
if (! HG_(clo_check_stack_refs)) {
/* We're ignoring memory references which are (obviously) to the
stack. In fact just skip stack refs that are within 4 pages
return VG_(is_soname_ld_so)(soname);
}
+static
+void addInvalidateCachedStack (IRSB* bbOut,
+ Int goff_sp_s1,
+ Int hWordTy_szB)
+{
+ /* Invalidate cached stack: Write 0 in the shadow1 offset 0 */
+ addStmtToIRSB( bbOut,
+ IRStmt_Put(goff_sp_s1,
+ hWordTy_szB == 4 ?
+ mkU32(0) : mkU64(0)));
+ /// ???? anything more efficient than assign a Word???
+}
+
static
IRSB* hg_instrument ( VgCallbackClosure* closure,
IRSB* bbIn,
Bool inLDSO = False;
Addr inLDSOmask4K = 1; /* mismatches on first check */
- const Int goff_sp = layout->offset_SP;
+ // Set to True when SP must be fixed up when taking a stack trace for the
+ // mem accesses in the rest of the instruction
+ Bool fixupSP_needed = False;
+
+ const Int goff_SP = layout->offset_SP;
+ /* SP in shadow1 indicates if cached stack is valid.
+ We have to invalidate the cached stack e.g. when seeing call or ret. */
+ const Int goff_SP_s1 = layout->total_sizeB + layout->offset_SP;
+ const Int hWordTy_szB = sizeofIRType(hWordTy);
if (gWordTy != hWordTy) {
/* We don't currently support this case. */
tl_assert(st);
tl_assert(isFlatIRStmt(st));
switch (st->tag) {
+ case Ist_Exit:
+ /* No memory reference, but if we do anything else than
+ Ijk_Boring, indicate to helgrind that the previously
+ recorded stack is invalid.
+ For Ijk_Boring, also invalidate the stack if the exit
+ instruction has no CF info. This heuristic avoids cached
+ stack trace mismatch in some cases such as longjmp
+ implementation. Similar logic below for the bb exit. */
+ if (HG_(clo_delta_stacktrace)
+ && (st->Ist.Exit.jk != Ijk_Boring || ! VG_(has_CF_info)(cia)))
+ addInvalidateCachedStack(bbOut, goff_SP_s1, hWordTy_szB);
+ break;
case Ist_NoOp:
case Ist_AbiHint:
+ /* None of these can contain any memory references. */
+ break;
case Ist_Put:
+ /* This cannot contain any memory references. */
+ /* If we see a put to SP, from now on in this instruction,
+ the SP needed to unwind has to be fixed up by one word.
+ This very simple heuristic ensures correct unwinding in the
+ typical case of a push instruction. If we need to cover more
+ cases, then we need to better track how the SP is modified by
+ the instruction (and calculate a precise sp delta), rather than
+ assuming that the SP is decremented by a Word size. */
+ if (HG_(clo_delta_stacktrace) && st->Ist.Put.offset == goff_SP) {
+ fixupSP_needed = True;
+ }
+ break;
case Ist_PutI:
- case Ist_Exit:
- /* None of these can contain any memory references. */
+ /* This cannot contain any memory references. */
break;
case Ist_IMark:
+ fixupSP_needed = False;
+
/* no mem refs, but note the insn address. */
cia = st->Ist.IMark.addr;
+
/* Don't instrument the dynamic linker. It generates a
lot of races which we just expensively suppress, so
it's pointless.
cas->addr,
(isDCAS ? 2 : 1)
* sizeofIRType(typeOfIRExpr(bbIn->tyenv, cas->dataLo)),
- False/*!isStore*/,
- sizeofIRType(hWordTy), goff_sp,
+ False/*!isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
NULL/*no-guard*/
);
}
bbOut,
st->Ist.LLSC.addr,
sizeofIRType(dataTy),
- False/*!isStore*/,
- sizeofIRType(hWordTy), goff_sp,
+ False/*!isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
NULL/*no-guard*/
);
}
bbOut,
st->Ist.Store.addr,
sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
- True/*isStore*/,
- sizeofIRType(hWordTy), goff_sp,
+ True/*isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
NULL/*no-guard*/
);
}
IRType type = typeOfIRExpr(bbIn->tyenv, data);
tl_assert(type != Ity_INVALID);
instrument_mem_access( bbOut, addr, sizeofIRType(type),
- True/*isStore*/,
- sizeofIRType(hWordTy),
- goff_sp, sg->guard );
+ True/*isStore*/, fixupSP_needed,
+ hWordTy_szB,
+ goff_SP, goff_SP_s1, sg->guard );
break;
}
typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
tl_assert(type != Ity_INVALID);
instrument_mem_access( bbOut, addr, sizeofIRType(type),
- False/*!isStore*/,
- sizeofIRType(hWordTy),
- goff_sp, lg->guard );
+ False/*!isStore*/, fixupSP_needed,
+ hWordTy_szB,
+ goff_SP, goff_SP_s1, lg->guard );
break;
}
bbOut,
data->Iex.Load.addr,
sizeofIRType(data->Iex.Load.ty),
- False/*!isStore*/,
- sizeofIRType(hWordTy), goff_sp,
+ False/*!isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
NULL/*no-guard*/
);
}
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
if (!inLDSO) {
instrument_mem_access(
- bbOut, d->mAddr, dataSize, False/*!isStore*/,
- sizeofIRType(hWordTy), goff_sp, NULL/*no-guard*/
+ bbOut, d->mAddr, dataSize,
+ False/*!isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
+ NULL/*no-guard*/
);
}
}
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
if (!inLDSO) {
instrument_mem_access(
- bbOut, d->mAddr, dataSize, True/*isStore*/,
- sizeofIRType(hWordTy), goff_sp, NULL/*no-guard*/
+ bbOut, d->mAddr, dataSize,
+ True/*isStore*/, fixupSP_needed,
+ hWordTy_szB, goff_SP, goff_SP_s1,
+ NULL/*no-guard*/
);
}
}
addStmtToIRSB( bbOut, st );
} /* iterate over bbIn->stmts */
+ // See above the case Ist_Exit:
+ if (HG_(clo_delta_stacktrace)
+ && (bbOut->jumpkind != Ijk_Boring || ! VG_(has_CF_info)(cia)))
+ addInvalidateCachedStack(bbOut, goff_SP_s1, hWordTy_szB);
+
return bbOut;
}
else if VG_XACT_CLO(arg, "--history-level=full",
HG_(clo_history_level), 2);
+ else if VG_BOOL_CLO(arg, "--delta-stacktrace",
+ HG_(clo_delta_stacktrace)) {}
+
else if VG_BINT_CLO(arg, "--conflict-cache-size",
HG_(clo_conflict_cache_size), 10*1000, 150*1000*1000) {}
" full: show both stack traces for a data race (can be very slow)\n"
" approx: full trace for one thread, approx for the other (faster)\n"
" none: only show trace for one thread in a race (fastest)\n"
+" --delta-stacktrace=no|yes [yes on linux amd64/x86]\n"
+" no : always compute a full history stacktrace from unwind info\n"
+" yes : derive a stacktrace from the previous stacktrace\n"
+" if there was no call/return or similar instruction\n"
" --conflict-cache-size=N size of 'full' history cache [2000000]\n"
" --check-stack-refs=no|yes race-check reads and writes on the\n"
" main stack and thread stacks? [yes]\n"
VG_(printf)(" --hg-sanity-flags values:\n");
VG_(printf)(" 010000 after changes to "
"lock-order-acquisition-graph\n");
- VG_(printf)(" 001000 at memory accesses (NB: not currently used)\n");
+ VG_(printf)(" 001000 at memory accesses\n");
VG_(printf)(" 000100 at mem permission setting for "
"ranges >= %d bytes\n", SCE_BIGRANGE_T);
VG_(printf)(" 000010 at lock/unlock events\n");
thr = libhb_get_Thr_hgthread( hbt );
tl_assert(thr);
tid = map_threads_maybe_reverse_lookup_SLOW(thr);
- nActual = (UWord)VG_(get_StackTrace)( tid, frames, (UInt)nRequest,
- NULL, NULL, 0 );
+ nActual = (UWord)VG_(get_StackTrace_with_deltas)
+ ( tid, frames, (UInt)nRequest,
+ NULL, NULL, 0,
+ thr->first_sp_delta);
tl_assert(nActual <= nRequest);
for (; nActual < nRequest; nActual++)
frames[nActual] = 0;
{
Thr* hbthr_root;
+ if (HG_(clo_delta_stacktrace)
+ && VG_(clo_vex_control).guest_chase_thresh != 0) {
+ if (VG_(clo_verbosity) >= 2)
+ VG_(message)(Vg_UserMsg,
+ "helgrind --delta-stacktrace=yes only works with "
+ "--vex-guest-chase-thresh=0\n"
+ "=> (re-setting it to 0\n");
+ VG_(clo_vex_control).guest_chase_thresh = 0;
+ }
+
+
/////////////////////////////////////////////
hbthr_root = libhb_init( for_libhb__get_stacktrace,
for_libhb__get_EC );
#include "pub_tool_libcassert.h"
#include "pub_tool_libcbase.h"
#include "pub_tool_libcprint.h"
+#include "pub_tool_machine.h"
#include "pub_tool_mallocfree.h"
#include "pub_tool_wordfm.h"
#include "pub_tool_hashtable.h"
#include "pub_tool_stacktrace.h"
#include "pub_tool_execontext.h"
#include "pub_tool_errormgr.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_gdbserver.h"
#include "pub_tool_options.h" // VG_(clo_stats)
#include "hg_basics.h"
#include "hg_wordset.h"
# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
#endif
+/* Define to 1 to activate tracing cached rcec. */
+#define DEBUG_CACHED_RCEC 0
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
#define N_KWs_N_STACKs_PER_THREAD 62500
+#define N_FRAMES 8
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+/* RCEC usage is commented more in details in the section 'Change-event map2'
+ later in this file */
+typedef
+ struct _RCEC {
+ UWord magic; /* sanity check only */
+ struct _RCEC* next;
+ UWord rc;
+ UWord rcX; /* used for crosschecking */
+ UWord frames_hash; /* hash of all the frames */
+ UWord frames[N_FRAMES];
+ }
+ RCEC;
+
struct _Thr {
/* Current VTSs for this thread. They change as we go along. viR
is the VTS to be used for reads, viW for writes. Usually they
Thread should be merged into a single structure. */
Thread* hgthread;
+ /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
+ RCEC cached_rcec; // cached_rcec value, not ref-counted.
+ /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
+ the validity of the cached rcec.
+ If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
+ If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
+ can be used to generate a new RCEC by changing just the last frame. */
+
/* The ULongs (scalar Kws) in this accumulate in strictly
increasing order, without duplicates. This is important because
we need to be able to find a given scalar Kw in this array
return thr;
}
+/* True if the cached rcec for thr is valid and can be used to build the
+ current stack trace just by changing the last frame to the current IP. */
+static inline Bool cached_rcec_valid(Thr *thr)
+{
+ UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
+ return cached_stackvalid != 0;
+}
+/* Set the validity of the cached rcec of thr. */
+static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
+{
+ VG_(set_SP_s1) (thr->hgthread->coretid, valid);
+}
+
static Thr* Thr__new ( void )
{
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
= VG_(newXA)( HG_(zalloc),
"libhb.Thr__new.3 (local_Kws_and_stacks)",
HG_(free), sizeof(ULong_n_EC) );
+ /* Make an 'empty' cached rcec in thr. */
+ thr->cached_rcec.magic = RCEC_MAGIC;
+ thr->cached_rcec.rc = 0;
+ thr->cached_rcec.rcX = 0;
+ thr->cached_rcec.next = NULL;
/* Add this Thr* <-> ThrID binding to the mapping, and
cross-check */
//// Part (1): A hash table of RCECs
///
-#define N_FRAMES 8
-
-// (UInt) `echo "Reference Counted Execution Context" | md5sum`
-#define RCEC_MAGIC 0xab88abb2UL
-
//#define N_RCEC_TAB 98317 /* prime */
#define N_RCEC_TAB 196613 /* prime */
-typedef
- struct _RCEC {
- UWord magic; /* sanity check only */
- struct _RCEC* next;
- UWord rc;
- UWord rcX; /* used for crosschecking */
- UWord frames_hash; /* hash of all the frames */
- UWord frames[N_FRAMES];
- }
- RCEC;
-
//////////// BEGIN RCEC pool allocator
static PoolAlloc* rcec_pool_allocator;
static RCEC* alloc_RCEC ( void ) {
/* Count of allocated RCEC having ref count > 0 */
static UWord RCEC_referenced = 0;
-/* Gives an arbitrary total order on RCEC .frames fields */
-static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+/* True if the frames of ec1 and ec2 are different. */
+static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
Word i;
tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
- if (ec1->frames_hash < ec2->frames_hash) return -1;
- if (ec1->frames_hash > ec2->frames_hash) return 1;
+ if (ec1->frames_hash != ec2->frames_hash) return True;
for (i = 0; i < N_FRAMES; i++) {
- if (ec1->frames[i] < ec2->frames[i]) return -1;
- if (ec1->frames[i] > ec2->frames[i]) return 1;
+ if (ec1->frames[i] != ec2->frames[i]) return True;
}
- return 0;
+ return False;
}
-
/* Dec the ref of this RCEC. */
static void ctxt__rcdec ( RCEC* ec )
{
if (!copy) break;
tl_assert(copy->magic == RCEC_MAGIC);
stats__ctxt_tab_cmps++;
- if (0 == RCEC__cmp_by_frames(copy, example)) break;
+ if (!RCEC__differs_by_frames(copy, example)) break;
copy = copy->next;
}
return w;
}
+static UWord stats__cached_rcec_identical = 0;
+static UWord stats__cached_rcec_updated = 0;
+static UWord stats__cached_rcec_fresh = 0;
+static UWord stats__cached_rcec_diff = 0;
+static UWord stats__cached_rcec_diff_known_reason = 0;
+
+/* Check if the cached rcec in thr corresponds to the current
+ stacktrace of the thread. Returns True if ok, False otherwise.
+ This is just used for debugging the cached rcec logic, activated
+ using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
+ When this flag is activated, a call to this function will happen each time
+ a stack trace is needed for a memory access. */
+__attribute__((noinline))
+static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
+{
+ Bool ok = True;
+ UInt i;
+ UWord frames[N_FRAMES];
+ UWord sps[N_FRAMES];
+ UWord fps[N_FRAMES];
+
+ for (i = 0; i < N_FRAMES; i++)
+ frames[i] = sps[i] = fps[i] = 0;
+ VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0], N_FRAMES,
+ &sps[0], &fps[0], 0);
+ for (i = 0; i < N_FRAMES; i++) {
+ if ( thr->cached_rcec.frames[i] != frames[i] ) {
+ /* There are a bunch of "normal" reasons for which a stack
+ derived from the cached rcec differs from frames. */
+ const HChar *reason = NULL;
+
+ /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
+ section (fix was added in binutils around June 2011).
+ Without PLT unwind info, stacktrace in the PLT section are
+ missing an entry. E.g. the cached stacktrace is:
+ ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
+ ==4463== by 0x33B7F9: __libc_thread_freeres
+ (in /lib/libc-2.11.2.so)
+ ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
+ ==4463== by 0x2F107D: clone (clone.S:130)
+ while the 'check stacktrace' is
+ ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
+ ==4463== by 0x33B82D: strerror_thread_freeres
+ (in /lib/libc-2.11.2.so)
+ ==4463== by 0x33B7F9: __libc_thread_freeres
+ (in /lib/libc-2.11.2.so)
+ ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
+ ==4463== by 0x2F107D: clone (clone.S:130)
+ No cheap/easy way to detect or fix that. */
+
+ /* It seems that sometimes, the CFI unwind info looks wrong
+ for a 'ret' instruction. E.g. here is the unwind info
+ for a 'retq' on gcc20 (amd64, Debian 7)
+ [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
+ SP=cfa+0 BP=*(cfa+-24)
+ This unwind info looks doubtful, as the RA should be at oldSP.
+ No easy way to detect this problem.
+ This gives a difference between cached rcec and
+ current stack trace: the cached rcec is correct. */
+
+ /* When returning from main, unwind info becomes erratic.
+ So, by default, only report errors for main and above,
+ unless asked to show below main. */
+ if (reason == NULL) {
+ UInt fr_main;
+ Vg_FnNameKind fr_kind;
+ for (fr_main = 0; fr_main < N_FRAMES; fr_main++) {
+ fr_kind = VG_(get_fnname_kind_from_IP)
+ (frames[fr_main]);
+ if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
+ break;
+ }
+ UInt kh_main;
+ Vg_FnNameKind kh_kind;
+ for (kh_main = 0; kh_main < N_FRAMES; kh_main++) {
+ kh_kind = VG_(get_fnname_kind_from_IP)
+ (thr->cached_rcec.frames[kh_main]);
+ if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
+ break;
+ }
+ if (kh_main == fr_main
+ && kh_kind == fr_kind
+ && (kh_main < i || (kh_main == i
+ && kh_kind == Vg_FnNameBelowMain))) {
+ // found main or below main before the difference
+ reason = "Below main";
+ }
+ }
+
+ /* We have places where the stack is missing some internal
+ pthread functions. For such stacktraces, GDB reports only
+ one function, telling:
+ #0 0xf7fa81fe in _L_unlock_669 ()
+ from /lib/i386-linux-gnu/libpthread.so.0
+ Backtrace stopped: previous frame identical to
+ this frame (corrupt stack?)
+
+ This is when sps and fps are identical.
+ The cached stack trace is then
+ ==3336== at 0x40641FE: _L_unlock_669
+ (pthread_mutex_unlock.c:310)
+ ==3336== by 0x40302BE: pthread_mutex_unlock
+ (hg_intercepts.c:710)
+ ==3336== by 0x80486AF: main (cond_timedwait_test.c:14)
+ while the 'check stacktrace' is
+ ==3336== at 0x40641FE: _L_unlock_669
+ (pthread_mutex_unlock.c:310)
+ ==3336== by 0x4064206: _L_unlock_669
+ (pthread_mutex_unlock.c:310)
+ ==3336== by 0x4064132: __pthread_mutex_unlock_usercnt
+ (pthread_mutex_unlock.c:57)
+ ==3336== by 0x40302BE: pthread_mutex_unlock
+ (hg_intercepts.c:710)
+ ==3336== by 0x80486AF: main (cond_timedwait_test.c:14) */
+ if (reason == NULL) {
+ if ((i > 0
+ && sps[i] == sps[i-1] && fps[i] == fps[i-1])
+ || (i < N_FRAMES-1
+ && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
+ reason = "previous||next frame: identical sp and fp";
+ }
+ }
+ if (reason == NULL) {
+ if ((i > 0
+ && fps[i] == fps[i-1])
+ || (i < N_FRAMES-1
+ && fps[i] == fps[i+1])) {
+ reason = "previous||next frame: identical fp";
+ }
+ }
+
+ /* When we have a read or write 'in the middle of a push instruction',
+ then the normal backtrace is not very good, while the helgrind
+ stacktrace is better, as it undoes the not yet fully finished
+ push instruction before getting the stacktrace. */
+ if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
+ reason = "fixupSP probably needed for check stacktrace";
+ }
+
+ /* Unwinding becomes hectic when running the exit handlers.
+ None of GDB, cached stacktrace and check stacktrace corresponds.
+ So, if we find __run_exit_handlers, ignore the difference. */
+ if (reason == NULL) {
+ const HChar *fnname;
+ for (UInt f = 0; f < N_FRAMES; f++) {
+ if (VG_(get_fnname)( frames[f], &fnname)
+ && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
+ reason = "exit handlers";
+ break;
+ }
+ }
+ }
+
+ // Show what we have found for this difference
+ if (reason == NULL) {
+ ok = False;
+ stats__cached_rcec_diff++;
+ } else {
+ ok = True;
+ stats__cached_rcec_diff_known_reason++;
+ }
+ if (!ok || VG_(clo_verbosity) > 2) {
+ Bool save_show_below_main = VG_(clo_show_below_main);
+ VG_(clo_show_below_main) = True;
+ /* The below error msg reports an unexpected diff in 'frame %d'.
+ The (maybe wrong) pc found in the cached stacktrace is
+ 'cached_pc %p' while an unwind gives the (maybe wrong)
+ 'check_pc %p'.
+ After, 'previous_frame0 %p' tells where the cached stacktrace
+ was taken.
+ This is then followed by the full resulting cache stack trace
+ and the full stack trace found doing unwind.
+ Such a diff can have various origins:
+ * a bug in the unwinder, when the cached stack trace was taken
+ at 'previous_frame0'
+ * a bug in the unwinder, when the check stack trace was taken
+ (i.e. at current pc).
+ * a missing 'invalidate cache stack trace' somewhere in the
+ instructions between 'previous_frame0' and current_pc.
+ To investigate the last case, typically, disass the range of
+ instructions where an invalidate cached stack might miss. */
+ VG_(printf)("%s diff tid %d frame %d "
+ "cached_pc %p check_pc %p\n",
+ reason ? reason : "unexpected",
+ thr->hgthread->coretid,
+ i,
+ (void*)thr->cached_rcec.frames[i],
+ (void*)frames[i]);
+ VG_(printf)("cached stack trace previous_frame0 %p\n",
+ (void*)previous_frame0);
+ VG_(pp_StackTrace)(&previous_frame0, 1);
+ VG_(printf)("resulting cached stack trace:\n");
+ VG_(pp_StackTrace)(thr->cached_rcec.frames, N_FRAMES);
+ VG_(printf)("check stack trace:\n");
+ VG_(pp_StackTrace)(frames, N_FRAMES);
+
+ VG_(show_sched_status) (False, // host_stacktrace
+ False, // stack_usage
+ False); // exited_threads
+ if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
+ VG_(gdbserver) ( thr->hgthread->coretid );
+ VG_(clo_show_below_main) = save_show_below_main;
+ }
+ break; // Stop giving more errors for this stacktrace.
+ }
+ }
+ return ok;
+}
+
__attribute__((noinline))
static RCEC* get_RCEC ( Thr* thr )
{
- UWord hash, i;
- RCEC example;
- example.magic = RCEC_MAGIC;
- example.rc = 0;
- example.rcX = 0;
- example.next = NULL;
- main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
+ UInt i;
+ UWord hash;
+ Addr previous_frame0 = 0; // Assignment needed to silence gcc
+ RCEC *res;
+ const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
+ const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
+
+ if (DEBUG_CACHED_RCEC)
+ VG_(printf)("get rcec tid %d at IP %p SP %p"
+ " first_sp_delta %ld cached valid %d\n",
+ thr->hgthread->coretid,
+ (void*)cur_ip,
+ (void*)VG_(get_SP)(thr->hgthread->coretid),
+ thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
+
+ /* If we have a valid cached rcec, derive the new rcec from the cached one
+ and update the cached one.
+ Otherwise, compute a fresh rcec. */
+
+ if (thr_cached_rcec_valid) {
+ /* Update the stacktrace of the cached rcec with the current IP */
+ previous_frame0 = thr->cached_rcec.frames[0];
+ thr->cached_rcec.frames[0] = cur_ip;
+
+# if defined(VGP_x86_linux)
+ // See m_stacktrace.c kludge
+ extern Addr VG_(client__dl_sysinfo_int80);
+ /// #include pub_core_clientstate needed for the above ????
+ /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
+ if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
+ && cur_ip >= VG_(client__dl_sysinfo_int80)
+ && cur_ip < VG_(client__dl_sysinfo_int80)+3
+ ) {
+ thr->cached_rcec.frames[0]
+ = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
+ }
+# endif
+
+ if (previous_frame0 == thr->cached_rcec.frames[0])
+ stats__cached_rcec_identical++;
+ else
+ stats__cached_rcec_updated++;
+ } else {
+ /* Compute a fresh stacktrace. */
+ main_get_stacktrace( thr, &thr->cached_rcec.frames[0], N_FRAMES );
+ if (DEBUG_CACHED_RCEC) {
+ Bool save_show_below_main = VG_(clo_show_below_main);
+ VG_(clo_show_below_main) = True;
+ VG_(printf)("caching stack trace:\n");
+ VG_(pp_StackTrace)(&thr->cached_rcec.frames[0], N_FRAMES);
+ VG_(clo_show_below_main) = save_show_below_main;
+ }
+ stats__cached_rcec_fresh++;
+ }
+
hash = 0;
for (i = 0; i < N_FRAMES; i++) {
- hash ^= example.frames[i];
+ hash ^= thr->cached_rcec.frames[i];
hash = ROLW(hash, 19);
}
- example.frames_hash = hash;
- return ctxt__find_or_add( &example );
+ thr->cached_rcec.frames_hash = hash;
+ res = ctxt__find_or_add( &thr->cached_rcec );
+
+ if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
+ && thr_cached_rcec_valid) {
+ /* In case the cached and check differ, invalidate the cached rcec.
+ We have less duplicated diffs reported afterwards. */
+ if (!check_cached_rcec_ok (thr, previous_frame0))
+ set_cached_rcec_validity(thr, False);
+ } else {
+ if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
+ set_cached_rcec_validity(thr, True);
+ }
+
+ return res;
}
///////////////////////////////////////////////////////
(UWord)N_RCEC_TAB,
stats__ctxt_tab_curr, RCEC_referenced,
stats__ctxt_tab_max );
+ VG_(printf) (" libhb: stats__cached_rcec "
+ "identical %'lu updated %'lu fresh %'lu\n",
+ stats__cached_rcec_identical, stats__cached_rcec_updated,
+ stats__cached_rcec_fresh);
+ if (stats__cached_rcec_diff > 0)
+ VG_(printf) (" libhb: stats__cached_rcec diff unk reason%'lu\n",
+ stats__cached_rcec_diff);
+ if (stats__cached_rcec_diff_known_reason > 0)
+ VG_(printf) (" libhb: stats__cached_rcec diff known reason %'lu\n",
+ stats__cached_rcec_diff_known_reason);
+
{
# define MAXCHAIN 10
UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
tl_assert(!thr->llexit_done);
thr->llexit_done = True;
+ /* Just to be sure, declare the cached stack invalid. */
+ set_cached_rcec_validity(thr, False);
+
/* free up Filter and local_Kws_n_stacks (well, actually not the
latter ..) */
tl_assert(thr->filter);
Addr data_addr
);
+/* True if we have some Call Frame unwindo debuginfo for Addr a */
+extern Bool VG_(has_CF_info)(Addr a);
+
/* Succeeds if the address is within a shared object or the main executable.
It first searches if Addr a belongs to the text segment of debug info.
If not found, it asks the address space manager whether it
Int line, const HChar* fn,
const HChar* format, ... );
+/* Show the state of various threads related information, such
+ as the guest stacktrace for each thread.
+ Mostly for debugging V.
+ The following activates optional output:
+ host_stacktrace : shows the host stacktrace.
+ stack_usage True means:
+ shows how much of the valgrind stack was used.
+ shows the client stack range
+ exited_thread_slots : show information for thread slots that were used
+ but the thread has now exited. */
+extern void VG_(show_sched_status) ( Bool host_stacktrace,
+ Bool stack_usage,
+ Bool exited_threads);
+
#endif // __PUB_TOOL_LIBCBASSERT_H
/*--------------------------------------------------------------------*/
Addr VG_(get_IP) ( ThreadId tid );
Addr VG_(get_SP) ( ThreadId tid );
+// Get and set the shadow1 SP register
+Addr VG_(get_SP_s1) ( ThreadId tid );
+void VG_(set_SP_s1) ( ThreadId tid, Addr sp );
// For get/set, 'area' is where the asked-for guest state will be copied
// into/from. If shadowNo == 0, the real (non-shadow) guest state is
/*OUT*/StackTrace fps,
Word first_ip_delta );
+// Same as VG_(get_StackTrace), but applies a delta to the first SP used for
+// unwinding the first frame.
+extern UInt VG_(get_StackTrace_with_deltas)(
+ ThreadId tid,
+ /*OUT*/StackTrace ips, UInt n_ips,
+ /*OUT*/StackTrace sps,
+ /*OUT*/StackTrace fps,
+ Word first_ip_delta,
+ Word first_sp_delta
+ );
+
// Apply a function to every element in the StackTrace. The parameter
// 'n' gives the index of the passed ip. 'opaque' is an arbitrary
// pointer provided to each invocation of 'action' (a poor man's