#if defined(VGP_x86_linux) || defined(VGP_x86_darwin)
+#define N_FP_CF_VERIF 1021
+// prime number so that size of fp_CF_verif is just below 4K or 8K
+// Note that this prime nr differs from the one chosen in
+// m_debuginfo/debuginfo.c for the cfsi cache : in case we have
+// a collision here between two IPs, we expect to not (often) have the
+// same collision in the cfsi cache (and vice-versa).
+
+// unwinding with fp chain is ok:
+#define FPUNWIND 0
+// there is no CFI info for this IP:
+#define NOINFO 1
+// Unwind with FP is not ok, must use CF unwind:
+#define CFUNWIND 2
+
+static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
+
+/* An unwind done by following the fp chain technique can be incorrect
+ as not all frames are respecting the standard bp/sp ABI.
+ The CF information is now generated by default by gcc
+ (as part of the dwarf info). However, unwinding using CF information
+ is significantly slower : a slowdown of 20% has been observed
+ on an helgrind test case.
+ So, by default, the unwinding will be done using the fp chain.
+ But before accepting to unwind an IP with fp_chain, the result
+ of the unwind will be checked with the CF information.
+ This check can give 3 results:
+ FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
+ => it is assumed that future unwind for this IP can be done
+ with the fast fp chain, without further CF checking
+ NOINFO (1): there is no CF info (so, fp unwind is the only do-able thing)
+ CFUNWIND (2): there is CF info, but unwind result differs.
+ => it is assumed that future unwind for this IP must be done
+ with the CF info.
+ Of course, if each fp unwind implies a check done with a CF unwind,
+ it would just be slower => we cache the check result in an
+ array of checked Addr.
+ The check for an IP will be stored at
+ fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
+ IP ^ FPUNWIND
+ IP ^ NOINFO
+ IP ^ CFUNWIND
+
+ Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
+ to store the check result, as they are guaranteed to be non significant
+ in the comparison between 2 IPs stored in fp_CF_verif_cache).
+ In other words, if two IPs are only differing on the last 2 bits,
+ then they will not land in the same cache bucket.
+*/
+
+static UInt fp_CF_verif_generation = 0;
+// Our cache has to be maintained in sync with the CFI cache.
+// Each time the CFI cache is changed, its generation will be incremented.
+// We will clear our cache when our saved generation differs from
+// the CFI cache generation.
+
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
/*OUT*/Addr* ips, UInt max_n_ips,
/*OUT*/Addr* sps, /*OUT*/Addr* fps,
UnwindStartRegs* startRegs,
Addr fp_max_orig )
{
- Bool debug = False;
+ const Bool debug = False; // VG_(debugLog_getLevel) () > 3; // True;
+ const HChar* unwind_case; // used when debug is True.
+ // Debugging this function is not straightforward.
+ // Here is the easiest way I have found:
+ // 1. Change the above to True.
+ // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
+ // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
+ // 4. Continue till breakpoint is encountered
+ // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
+ // You might have to do twice 'monitor v.info scheduler' to see
+ // the effect of caching the results of the verification.
+ // You can also modify the debug dynamically using by using
+ // 'monitor v.set debuglog 4.
+
+ const Bool do_stats = False; // compute and output some stats regularly.
+ static struct {
+ UInt nr; // nr of stacktraces computed
+ UInt nf; // nr of frames computed
+ UInt Ca; // unwind for which cache indicates CFUnwind must be used.
+ UInt FF; // unwind for which cache indicates FPUnwind can be used.
+ UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
+ UInt Fw; // unwind at end of stack+store FPUNWIND
+ UInt FO; // unwind + store FPUNWIND
+ UInt CF; // unwind + store CFUNWIND. Details below.
+ UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
+ UInt Ck; // unwind fp invalid+store FPUNWIND
+ UInt MS; // microsoft unwind
+ } stats;
+
Int i;
Addr fp_max;
UInt n_found = 0;
vg_assert(sizeof(Addr) == sizeof(UWord));
vg_assert(sizeof(Addr) == sizeof(void*));
+ D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
+ Addr xip_verified; // xip for which we have calculated fpverif_uregs
+
D3UnwindRegs uregs;
uregs.xip = (Addr)startRegs->r_pc;
uregs.xsp = (Addr)startRegs->r_sp;
}
# endif
- /* fp is %ebp. sp is %esp. ip is %eip. */
+ if (UNLIKELY (fp_CF_verif_generation != VG_(CF_info_generation)())) {
+ fp_CF_verif_generation = VG_(CF_info_generation)();
+ VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
+ }
- if (sps) sps[0] = uregs.xsp;
- if (fps) fps[0] = uregs.xbp;
- ips[0] = uregs.xip;
- i = 1;
/* Loop unwinding the stack. Note that the IP value we get on
* each pass (whether from CFI info or a stack frame) is a
* a tail call occurs and we wind up using the CFI info for the
* next function which is completely wrong.
*/
+ if (sps) sps[0] = uregs.xsp;
+ if (fps) fps[0] = uregs.xbp;
+ ips[0] = uregs.xip;
+ i = 1;
+ if (do_stats) stats.nr++;
+
while (True) {
if (i >= max_n_ips)
break;
- /* Try to derive a new (ip,sp,fp) triple from the current
- set. */
+ UWord hash = uregs.xip % N_FP_CF_VERIF;
+ Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
+ if (debug)
+ VG_(printf)(" uregs.xip 0x%08lx xip_verif[0x%08lx]\n",
+ uregs.xip, xip_verif);
+ // If xip is in cache, then xip_verif will be <= CFUNWIND.
+ // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
+
+ /* Try to derive a new (ip,sp,fp) triple from the current set. */
+
+ /* Do we have to do CFI unwinding ?
+ We do CFI unwinding if one of the following condition holds:
+ a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
+ be done (i.e. fp unwind check failed when we did the first
+ unwind for this IP).
+ b. fp_CF_verif_cache does not contain xip.
+ We will try CFI unwinding in fpverif_uregs and compare with
+ FP unwind result to insert xip in the cache with the correct
+ indicator. */
+ if (UNLIKELY(xip_verif >= CFUNWIND)) {
+ if (xip_verif == CFUNWIND) {
+ /* case a : do "real" cfi unwind */
+ if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
+ if (debug) unwind_case = "Ca";
+ if (do_stats) stats.Ca++;
+ goto unwind_done;
+ }
+ /* ??? cache indicates we have to do CFI unwind (so, we
+ previously found CFI info, and failed the fp unwind
+ check). Now, we just failed with CFI. So, once we
+ succeed, once we fail. No idea what is going on =>
+ cleanup the cache entry and fallover to fp unwind (this
+ time). */
+ fp_CF_verif_cache [hash] = 0;
+ if (debug) VG_(printf)(" cache reset as CFI ok then nok\n");
+ //??? stats
+ xip_verif = NOINFO;
+ } else {
+ /* case b : do "verif" cfi unwind in fpverif_uregs */
+ fpverif_uregs = uregs;
+ xip_verified = uregs.xip;
+ if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
+ fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
+ if (debug) VG_(printf)(" cache NOINFO fpverif_uregs\n");
+ xip_verif = NOINFO;
+ }
+ }
+ }
- /* On x86, first try the old-fashioned method of following the
- %ebp-chain. Code which doesn't use this (that is, compiled
- with -fomit-frame-pointer) is not ABI compliant and so
- relatively rare. Besides, trying the CFI first almost always
- fails, and is expensive. */
- /* Deal with frames resulting from functions which begin "pushl%
+ /* On x86, try the old-fashioned method of following the
+ %ebp-chain. This can be done if the fp_CF_verif_cache for xip
+ indicate fp unwind is ok. This must be done if the cache indicates
+ there is no info. This is also done to confirm what to put in the cache
+ if xip was not in the cache. */
+ /* This deals with frames resulting from functions which begin "pushl%
ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
if (fp_min <= uregs.xbp &&
uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/)
// entries [1] and above in a stack trace, by subtracting 1 from
// them. Hence stacks that used to end with a zero value now end in
// -1 and so we must detect that too.
- if (0 == uregs.xip || 1 == uregs.xip) break;
+ if (0 == uregs.xip || 1 == uregs.xip) {
+ if (xip_verif > CFUNWIND) {
+ // Check if we obtain the same result with fp unwind.
+ // If same result, then mark xip as fp unwindable
+ if (uregs.xip == fpverif_uregs.xip) {
+ fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
+ if (debug) VG_(printf)(" cache FPUNWIND 0\n");
+ unwind_case = "Fw";
+ if (do_stats) stats.Fw++;
+ break;
+ } else {
+ fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
+ uregs = fpverif_uregs;
+ if (debug) VG_(printf)(" cache CFUNWIND 0\n");
+ unwind_case = "Cf";
+ if (do_stats) stats.Cf++;
+ goto unwind_done;
+ }
+ } else {
+ // end of stack => out of the loop.
+ break;
+ }
+ }
+
uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
+ sizeof(Addr) /*ra*/;
uregs.xbp = (((UWord*)uregs.xbp)[0]);
- if (sps) sps[i] = uregs.xsp;
- if (fps) fps[i] = uregs.xbp;
- ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
- if (debug)
- VG_(printf)(" ipsF[%d]=0x%08lx\n", i-1, ips[i-1]);
- uregs.xip = uregs.xip - 1;
- /* as per comment at the head of this loop */
- if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
- continue;
- }
-
- /* That didn't work out, so see if there is any CF info to hand
- which can be used. */
- if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
- if (0 == uregs.xip || 1 == uregs.xip) break;
- if (sps) sps[i] = uregs.xsp;
- if (fps) fps[i] = uregs.xbp;
- ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
- if (debug)
- VG_(printf)(" ipsC[%d]=0x%08lx\n", i-1, ips[i-1]);
- uregs.xip = uregs.xip - 1;
- /* as per comment at the head of this loop */
- if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
- continue;
+ if (xip_verif > CFUNWIND) {
+ if (uregs.xip == fpverif_uregs.xip
+ && uregs.xsp == fpverif_uregs.xsp
+ && uregs.xbp == fpverif_uregs.xbp) {
+ fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
+ if (debug) VG_(printf)(" cache FPUNWIND >2\n");
+ if (debug) unwind_case = "FO";
+ if (do_stats) stats.FO++;
+ } else {
+ fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
+ if (debug) VG_(printf)(" cache CFUNWIND >2\n");
+ if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
+ if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
+ if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
+ uregs = fpverif_uregs;
+ if (debug) unwind_case = "CF";
+ if (do_stats) stats.CF++;
+ }
+ } else {
+ if (debug) unwind_case = "FF";
+ if (do_stats) stats.FF++;
+ }
+ goto unwind_done;
+ } else {
+ // fp unwind has failed.
+ // If we were checking the validity of the cfi unwinding,
+ // we mark in the cache that the fp unwind cannot be done, and that
+ // cfi unwind is desired.
+ if (xip_verif > CFUNWIND) {
+ // We know that fpverif_uregs contains valid information,
+ // as a failed cf unwind would have put NOINFO in xip_verif.
+ fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
+ if (debug) VG_(printf)(" cache CFUNWIND as fp failed\n");
+ uregs = fpverif_uregs;
+ if (debug) unwind_case = "Ck";
+ if (do_stats) stats.Ck++;
+ goto unwind_done;
+ }
+ // xip_verif is FPUNWIND or NOINFO.
+ // We failed the cfi unwind and/or the fp unwind.
+ // => fallback to FPO info.
}
/* And, similarly, try for MSVC FPO unwind info. */
if ( VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
fp_min, fp_max ) ) {
- if (0 == uregs.xip || 1 == uregs.xip) break;
- if (sps) sps[i] = uregs.xsp;
- if (fps) fps[i] = uregs.xbp;
- ips[i++] = uregs.xip;
- if (debug)
- VG_(printf)(" ipsC[%d]=0x%08lx\n", i-1, ips[i-1]);
- uregs.xip = uregs.xip - 1;
- if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
- continue;
+ if (debug) unwind_case = "MS";
+ if (do_stats) stats.MS++;
+ goto unwind_done;
}
/* No luck. We have to give up. */
break;
+
+ unwind_done:
+ /* Add a frame in ips/sps/fps */
+ /* fp is %ebp. sp is %esp. ip is %eip. */
+ if (0 == uregs.xip || 1 == uregs.xip) break;
+ if (sps) sps[i] = uregs.xsp;
+ if (fps) fps[i] = uregs.xbp;
+ ips[i++] = uregs.xip - 1;
+ /* -1: refer to calling insn, not the RA */
+ if (debug)
+ VG_(printf)(" ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
+ uregs.xip = uregs.xip - 1;
+ /* as per comment at the head of this loop */
+ if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
}
+ if (do_stats) stats.nf += i;
+ if (do_stats && stats.nr % 10000 == 0) {
+ VG_(printf)("nr %u nf %u "
+ "Ca %u FF %u "
+ "Cf %u "
+ "Fw %u FO %u "
+ "CF %u (xi %u xs %u xb %u) "
+ "Ck %u MS %u\n",
+ stats.nr, stats.nf,
+ stats.Ca, stats.FF,
+ stats.Cf,
+ stats.Fw, stats.FO,
+ stats.CF, stats.xi, stats.xs, stats.xb,
+ stats.Ck, stats.MS);
+ }
n_found = i;
return n_found;
}