Merge Helgrind from branches/YARD into the trunk. Also includes some

author Julian Seward <jseward@acm.org>

Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)

committer Julian Seward <jseward@acm.org>

Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)
author Julian Seward <jseward@acm.org>
Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)
committer Julian Seward <jseward@acm.org>
Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c

index c73fba37ea6b4b123ac1e80af27655714facaad1..67498c763fc774dad0f656a5c125b5c095bc2d82 100644 (file)
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -98,6 +98,13 @@
  */
  
  
+/*------------------------------------------------------------*/
+/*--- fwdses                                               ---*/
+/*------------------------------------------------------------*/
+
+static void cfsi_cache__invalidate ( void );
+
+
  /*------------------------------------------------------------*/
  /*--- Root structure                                       ---*/
  /*------------------------------------------------------------*/
@@ -320,10 +327,11 @@ static void discard_DebugInfo ( DebugInfo* di )
  /* Repeatedly scan debugInfo_list, looking for DebugInfos with text
     AVMAs intersecting [start,start+length), and call discard_DebugInfo
     to get rid of them.  This modifies the list, hence the multiple
-   iterations.
+   iterations.  Returns True iff any such DebugInfos were found.
  */
-static void discard_syms_in_range ( Addr start, SizeT length )
+static Bool discard_syms_in_range ( Addr start, SizeT length )
  {
+   Bool       anyFound = False;
     Bool       found;
     DebugInfo* curr;
  
@@ -347,8 +355,11 @@ static void discard_syms_in_range ( Addr start, SizeT length )
        }
  
        if (!found) break;
+      anyFound = True;
        discard_DebugInfo( curr );
     }
+
+   return anyFound;
  }
  
  
@@ -478,6 +489,84 @@ DebugInfo* find_or_create_DebugInfo_for ( UChar* filename, UChar* memname )
  }
  
  
+/* Debuginfo reading for 'di' has just been successfully completed.
+   Check that the invariants stated in
+   "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
+   priv_storage.h are observed. */
+static void check_CFSI_related_invariants ( DebugInfo* di )
+{
+   DebugInfo* di2 = NULL;
+   vg_assert(di);
+   /* This fn isn't called until after debuginfo for this object has
+      been successfully read.  And that shouldn't happen until we have
+      both a r-x and rw- mapping for the object.  Hence: */
+   vg_assert(di->have_rx_map);
+   vg_assert(di->have_rw_map);
+   /* degenerate case: r-x section is empty */
+   if (di->rx_map_size == 0) {
+      vg_assert(di->cfsi == NULL);
+      return;
+   }
+   /* normal case: r-x section is nonempty */
+   /* invariant (0) */
+   vg_assert(di->rx_map_size > 0);
+   /* invariant (1) */
+   for (di2 = debugInfo_list; di2; di2 = di2->next) {
+      if (di2 == di)
+         continue;
+      if (di2->rx_map_size == 0)
+         continue;
+      vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma
+                || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma);
+   }
+   di2 = NULL;
+   /* invariant (2) */
+   if (di->cfsi) {
+      vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */
+      vg_assert(di->cfsi_minavma >= di->rx_map_avma);
+      vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size);
+   }
+   /* invariants (3) and (4) */
+   if (di->cfsi) {
+      Word i;
+      vg_assert(di->cfsi_used > 0);
+      vg_assert(di->cfsi_size > 0);
+      for (i = 0; i < di->cfsi_used; i++) {
+         DiCfSI* cfsi = &di->cfsi[i];
+         vg_assert(cfsi->len > 0);
+         vg_assert(cfsi->base >= di->cfsi_minavma);
+         vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma);
+         if (i > 0) {
+            DiCfSI* cfsip = &di->cfsi[i-1];
+            vg_assert(cfsip->base + cfsip->len <= cfsi->base);
+         }
+      }
+   } else {
+      vg_assert(di->cfsi_used == 0);
+      vg_assert(di->cfsi_size == 0);
+   }
+}
+
+
+/*--------------------------------------------------------------*/
+/*---                                                        ---*/
+/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM             ---*/
+/*---                                                        ---*/
+/*--------------------------------------------------------------*/
+
+void VG_(di_initialise) ( void )
+{
+   /* There's actually very little to do here, since everything
+      centers around the DebugInfos in debugInfo_list, they are
+      created and destroyed on demand, and each one is treated more or
+      less independently. */
+   vg_assert(debugInfo_list == NULL);
+
+   /* flush the CFI fast query cache. */
+   cfsi_cache__invalidate();
+}
+
+
  /*--------------------------------------------------------------*/
  /*---                                                        ---*/
  /*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/
@@ -718,6 +807,8 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
  
        TRACE_SYMTAB("\n------ Canonicalising the "
                     "acquired info ------\n");
+      /* invalidate the CFI unwind cache. */
+      cfsi_cache__invalidate();
        /* prepare read data for use */
        ML_(canonicaliseTables)( di );
        /* notify m_redir about it */
@@ -727,6 +818,10 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
        di->have_dinfo = True;
        tl_assert(di->handle > 0);
        di_handle = di->handle;
+      /* Check invariants listed in
+         Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+         priv_storage.h. */
+      check_CFSI_related_invariants(di);
  
     } else {
        TRACE_SYMTAB("\n------ ELF reading failed ------\n");
@@ -734,6 +829,7 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
           this DebugInfo?  No - it contains info on the rw/rx
           mappings, at least. */
        di_handle = 0;
+      vg_assert(di->have_dinfo == False);
     }
  
     TRACE_SYMTAB("\n");
@@ -750,8 +846,11 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
     [a, a+len).  */
  void VG_(di_notify_munmap)( Addr a, SizeT len )
  {
+   Bool anyFound;
     if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len);
-   discard_syms_in_range(a, len);
+   anyFound = discard_syms_in_range(a, len);
+   if (anyFound)
+      cfsi_cache__invalidate();
  }
  
  
@@ -765,8 +864,11 @@ void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot )
  #  if defined(VGP_x86_linux)
     exe_ok = exe_ok || toBool(prot & VKI_PROT_READ);
  #  endif
-   if (0 && !exe_ok)
-      discard_syms_in_range(a, len);
+   if (0 && !exe_ok) {
+      Bool anyFound = discard_syms_in_range(a, len);
+      if (anyFound)
+         cfsi_cache__invalidate();
+   }
  }
  
  #endif /* defined(VGO_linux) */
@@ -797,6 +899,10 @@ ULong VG_(di_aix5_notify_segchange)(
  {
     ULong hdl = 0;
  
+   /* play safe; always invalidate the CFI cache.  Not
+      that it should be used on AIX, but still .. */
+   cfsi_cache__invalidate();
+
     if (acquire) {
  
        Bool       ok;
@@ -840,6 +946,10 @@ ULong VG_(di_aix5_notify_segchange)(
           di->have_dinfo = True;
           hdl = di->handle;
           vg_assert(hdl > 0);
+         /* Check invariants listed in
+            Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+            priv_storage.h. */
+         check_CFSI_related_invariants(di);
        } else {
           /*  Something went wrong (eg. bad XCOFF file). */
           discard_DebugInfo( di );
@@ -850,8 +960,11 @@ ULong VG_(di_aix5_notify_segchange)(
  
        /* Dump all the debugInfos whose text segments intersect
           code_start/code_len. */
+      /* CFI cache is always invalidated at start of this routine.
+         Hence it's safe to ignore the return value of
+         discard_syms_in_range. */
        if (code_len > 0)
-         discard_syms_in_range( code_start, code_len );
+         (void)discard_syms_in_range( code_start, code_len );
  
     }
  
@@ -893,11 +1006,11 @@ void VG_(di_discard_ALL_debuginfo)( void )
     If findText==False, only data symbols are searched for.
  */
  static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
-                                           /*OUT*/Int* symno,
+                                           /*OUT*/Word* symno,
                                   Bool match_anywhere_in_sym,
                                   Bool findText )
  {
-   Int        sno;
+   Word       sno;
     DebugInfo* di;
     Bool       inRange;
  
@@ -944,9 +1057,9 @@ static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
     *pdi to the relevant DebugInfo, and *locno to the loctab entry
     *number within that.  If not found, *pdi is set to NULL. */
  static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
-                                           /*OUT*/Int* locno )
+                                           /*OUT*/Word* locno )
  {
-   Int        lno;
+   Word       lno;
     DebugInfo* di;
     for (di = debugInfo_list; di != NULL; di = di->next) {
        if (di->text_present
@@ -977,7 +1090,7 @@ Bool get_sym_name ( Bool demangle, Addr a, Char* buf, Int nbuf,
                      Bool findText, /*OUT*/OffT* offsetP )
  {
     DebugInfo* di;
-   Int        sno;
+   Word       sno;
     Int        offset;
  
     search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText );
@@ -1019,7 +1132,7 @@ Bool get_sym_name ( Bool demangle, Addr a, Char* buf, Int nbuf,
  Addr VG_(get_tocptr) ( Addr guest_code_addr )
  {
     DebugInfo* si;
-   Int        sno;
+   Word       sno;
     search_all_symtabs ( guest_code_addr, 
                          &si, &sno,
                          True/*match_anywhere_in_fun*/,
@@ -1186,7 +1299,7 @@ DebugInfo* VG_(find_seginfo) ( Addr a )
  Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
  {
     DebugInfo* si;
-   Int      locno;
+   Word       locno;
     search_all_loctabs ( a, &si, &locno );
     if (si == NULL) 
        return False;
@@ -1198,7 +1311,7 @@ Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
  Bool VG_(get_linenum)( Addr a, UInt* lineno )
  {
     DebugInfo* si;
-   Int      locno;
+   Word       locno;
     search_all_loctabs ( a, &si, &locno );
     if (si == NULL) 
        return False;
@@ -1217,7 +1330,7 @@ Bool VG_(get_filename_linenum) ( Addr a,
                                   /*OUT*/UInt* lineno )
  {
     DebugInfo* si;
-   Int      locno;
+   Word       locno;
  
     vg_assert( (dirname == NULL && dirname_available == NULL)
                ||
@@ -1541,73 +1654,175 @@ UWord evalCfiExpr ( XArray* exprs, Int ix,
  }
  
  
-/* The main function for DWARF2/3 CFI-based stack unwinding.
-   Given an IP/SP/FP triple, produce the IP/SP/FP values for the
-   previous frame, if possible. */
-/* Returns True if OK.  If not OK, *{ip,sp,fp}P are not changed. */
-/* NOTE: this function may rearrange the order of entries in the
-   DebugInfo list. */
-Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP,
-                        /*MOD*/Addr* spP,
-                        /*MOD*/Addr* fpP,
-                        Addr min_accessible,
-                        Addr max_accessible )
-{
-   Bool     ok;
-   Int      i;
-   DebugInfo* si;
-   DiCfSI*  cfsi = NULL;
-   Addr     cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
+/* Search all the DebugInfos in the entire system, to find the DiCfSI
+   that pertains to 'ip'. 
  
-   CfiExprEvalContext eec;
+   If found, set *diP to the DebugInfo in which it resides, and
+   *ixP to the index in that DebugInfo's cfsi array.
  
-   static UInt n_search = 0;
-   static UInt n_steps = 0;
+   If not found, set *diP to (DebugInfo*)1 and *ixP to zero.
+*/
+__attribute__((noinline))
+static void find_DiCfSI ( /*OUT*/DebugInfo** diP, 
+                          /*OUT*/Word* ixP,
+                          Addr ip )
+{
+   DebugInfo* di;
+   Word       i = -1;
+
+   static UWord n_search = 0;
+   static UWord n_steps = 0;
     n_search++;
  
-   if (0) VG_(printf)("search for %#lx\n", *ipP);
+   if (0) VG_(printf)("search for %#lx\n", ip);
  
-   for (si = debugInfo_list; si != NULL; si = si->next) {
+   for (di = debugInfo_list; di != NULL; di = di->next) {
+      Word j;
        n_steps++;
  
        /* Use the per-DebugInfo summary address ranges to skip
           inapplicable DebugInfos quickly. */
-      if (si->cfsi_used == 0)
+      if (di->cfsi_used == 0)
           continue;
-      if (*ipP < si->cfsi_minavma || *ipP > si->cfsi_maxavma)
+      if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma)
           continue;
  
-      i = ML_(search_one_cfitab)( si, *ipP );
-      if (i != -1) {
-         vg_assert(i >= 0 && i < si->cfsi_used);
-         cfsi = &si->cfsi[i];
-         break;
+      /* It might be in this DebugInfo.  Search it. */
+      j = ML_(search_one_cfitab)( di, ip );
+      vg_assert(j >= -1 && j < (Word)di->cfsi_used);
+
+      if (j != -1) {
+         i = j;
+         break; /* found it */
        }
     }
  
-   if (cfsi == NULL)
-      return False;
+   if (i == -1) {
  
-   if (0 && ((n_search & 0x7FFFF) == 0))
-      VG_(printf)("VG_(use_CF_info): %u searches, "
-                  "%u DebugInfos looked at\n", 
-                  n_search, n_steps);
+      /* we didn't find it. */
+      *diP = (DebugInfo*)1;
+      *ixP = 0;
+
+   } else {
+
+      /* found it. */
+      /* ensure that di is 4-aligned (at least), so it can't possibly
+         be equal to (DebugInfo*)1. */
+      vg_assert(di && VG_IS_4_ALIGNED(di));
+      vg_assert(i >= 0 && i < di->cfsi_used);
+      *diP = di;
+      *ixP = i;
+
+      /* Start of performance-enhancing hack: once every 64 (chosen
+         hackily after profiling) successful searches, move the found
+         DebugInfo one step closer to the start of the list.  This
+         makes future searches cheaper.  For starting konqueror on
+         amd64, this in fact reduces the total amount of searching
+         done by the above find-the-right-DebugInfo loop by more than
+         a factor of 20. */
+      if ((n_search & 0xF) == 0) {
+         /* Move di one step closer to the start of the list. */
+         move_DebugInfo_one_step_forward( di );
+      }
+      /* End of performance-enhancing hack. */
+
+      if (0 && ((n_search & 0x7FFFF) == 0))
+         VG_(printf)("find_DiCfSI: %lu searches, "
+                     "%lu DebugInfos looked at\n", 
+                     n_search, n_steps);
  
-   /* Start of performance-enhancing hack: once every 64 (chosen
-      hackily after profiling) successful searches, move the found
-      DebugInfo one step closer to the start of the list.  This makes
-      future searches cheaper.  For starting konqueror on amd64, this
-      in fact reduces the total amount of searching done by the above
-      find-the-right-DebugInfo loop by more than a factor of 20. */
-   if ((n_search & 0x3F) == 0) {
-      /* Move si one step closer to the start of the list. */
-      move_DebugInfo_one_step_forward( si );
     }
-   /* End of performance-enhancing hack. */
+
+}
+
+
+/* Now follows a mechanism for caching queries to find_DiCfSI, since
+   they are extremely frequent on amd64-linux, during stack unwinding.
+
+   Each cache entry binds an ip value to a (di, ix) pair.  Possible
+   values:
+
+   di is non-null, ix >= 0  ==>  cache slot in use, "di->cfsi[ix]"
+   di is (DebugInfo*)1      ==>  cache slot in use, no associated di
+   di is NULL               ==>  cache slot not in use
+
+   Hence simply zeroing out the entire cache invalidates all
+   entries.
+
+   Why not map ip values directly to DiCfSI*'s?  Because this would
+   cause problems if/when the cfsi array is moved due to resizing.
+   Instead we cache .cfsi array index value, which should be invariant
+   across resizing.  (That said, I don't think the current
+   implementation will resize whilst during queries, since the DiCfSI
+   records are added all at once, when the debuginfo for an object is
+   read, and is not changed ever thereafter. */
+
+#define N_CFSI_CACHE 511
+
+typedef
+   struct { Addr ip; DebugInfo* di; Word ix; }
+   CFSICacheEnt;
+
+static CFSICacheEnt cfsi_cache[N_CFSI_CACHE];
+
+static void cfsi_cache__invalidate ( void ) {
+   VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache));
+}
+
+
+/* The main function for DWARF2/3 CFI-based stack unwinding.
+   Given an IP/SP/FP triple, produce the IP/SP/FP values for the
+   previous frame, if possible. */
+/* Returns True if OK.  If not OK, *{ip,sp,fp}P are not changed. */
+/* NOTE: this function may rearrange the order of entries in the
+   DebugInfo list. */
+Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP,
+                        /*MOD*/Addr* spP,
+                        /*MOD*/Addr* fpP,
+                        Addr min_accessible,
+                        Addr max_accessible )
+{
+   Bool       ok;
+   DebugInfo* di;
+   DiCfSI*    cfsi = NULL;
+   Addr       cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
+
+   CfiExprEvalContext eec;
+
+   static UWord n_q = 0, n_m = 0;
+   n_q++;
+   if (0 && 0 == (n_q & 0x1FFFFF))
+      VG_(printf)("QQQ %lu %lu\n", n_q, n_m);
+
+   { UWord hash = (*ipP) % N_CFSI_CACHE;
+     CFSICacheEnt* ce = &cfsi_cache[hash];
+
+     if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) {
+        /* found an entry in the cache .. */
+     } else {
+        /* not found in cache.  Search and update. */
+        n_m++;
+        ce->ip = *ipP;
+        find_DiCfSI( &ce->di, &ce->ix, *ipP );
+     }
+
+     if (UNLIKELY(ce->di == (DebugInfo*)1)) {
+        /* no DiCfSI for this address */
+        cfsi = NULL;
+        di = NULL;
+     } else {
+        /* found a DiCfSI for this address */
+        di = ce->di;
+        cfsi = &di->cfsi[ ce->ix ];
+     }
+   }
+
+   if (UNLIKELY(cfsi == NULL))
+      return False; /* no info.  Nothing we can do. */
  
     if (0) {
        VG_(printf)("found cfisi: "); 
-      ML_(ppDiCfSI)(si->cfsi_exprs, cfsi);
+      ML_(ppDiCfSI)(di->cfsi_exprs, cfsi);
     }
  
     ipPrev = spPrev = fpPrev = 0;
@@ -1628,7 +1843,7 @@ Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP,
        case CFIC_EXPR: 
           if (0) {
              VG_(printf)("CFIC_EXPR: ");
-            ML_(ppCfiExpr)(si->cfsi_exprs, cfsi->cfa_off);
+            ML_(ppCfiExpr)(di->cfsi_exprs, cfsi->cfa_off);
              VG_(printf)("\n");
           }
           eec.ipHere = ipHere;
@@ -1637,7 +1852,7 @@ Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP,
           eec.min_accessible = min_accessible;
           eec.max_accessible = max_accessible;
           ok = True;
-         cfa = evalCfiExpr(si->cfsi_exprs, cfsi->cfa_off, &eec, &ok );
+         cfa = evalCfiExpr(di->cfsi_exprs, cfsi->cfa_off, &eec, &ok );
           if (!ok) return False;
           break;
        default: 
@@ -1667,14 +1882,14 @@ Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP,
                 break;                                   \
              case CFIR_EXPR:                             \
                 if (0)                                   \
-                  ML_(ppCfiExpr)(si->cfsi_exprs,_off);  \
+                  ML_(ppCfiExpr)(di->cfsi_exprs,_off);  \
                 eec.ipHere = ipHere;                     \
                 eec.spHere = spHere;                     \
                 eec.fpHere = fpHere;                     \
                 eec.min_accessible = min_accessible;     \
                 eec.max_accessible = max_accessible;     \
                 ok = True;                               \
-               _prev = evalCfiExpr(si->cfsi_exprs, _off, &eec, &ok ); \
+               _prev = evalCfiExpr(di->cfsi_exprs, _off, &eec, &ok ); \
                 if (!ok) return False;                   \
                 break;                                   \
              default:                                    \
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h

index fedd90c6e3f8ae41c3f6478ca099b49ca957d61d..eb174600855ff438c86e13cb15e555e8b20a4871 100644 (file)
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -308,7 +308,46 @@ struct _DebugInfo {
        in some obscure circumstances (to do with data/sdata/bss) it is
        possible for the mapping to be present but have zero size.
        Certainly text_ is mandatory on all platforms; not sure about
-      the rest though. */
+      the rest though. 
+
+      Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
+ 
+      either (rx_map_size == 0 && cfsi == NULL) (the degenerate case)
+
+      or the normal case, which is the AND of the following:
+      (0) rx_map_size > 0
+      (1) no two DebugInfos with rx_map_size > 0 
+          have overlapping [rx_map_avma,+rx_map_size)
+      (2) [cfsi_minavma,cfsi_maxavma] does not extend 
+          beyond [rx_map_avma,+rx_map_size); that is, the former is a 
+          subrange or equal to the latter.
+      (3) all DiCfSI in the cfsi array all have ranges that fall within
+          [rx_map_avma,+rx_map_size).
+      (4) all DiCfSI in the cfsi array are non-overlapping
+
+      The cumulative effect of these restrictions is to ensure that
+      all the DiCfSI records in the entire system are non overlapping.
+      Hence any address falls into either exactly one DiCfSI record,
+      or none.  Hence it is safe to cache the results of searches for
+      DiCfSI records.  This is the whole point of these restrictions.
+      The caching of DiCfSI searches is done in VG_(use_CF_info).  The
+      cache is flushed after any change to debugInfo_list.  DiCfSI
+      searches are cached because they are central to stack unwinding
+      on amd64-linux.
+
+      Where are these invariants imposed and checked?
+
+      They are checked after a successful read of debuginfo into
+      a DebugInfo*, in check_CFSI_related_invariants.
+
+      (1) is not really imposed anywhere.  We simply assume that the
+      kernel will not map the text segments from two different objects
+      into the same space.  Sounds reasonable.
+
+      (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
+      (3) is ensured by ML_(addDiCfSI).
+      (4) is ensured by canonicaliseCFI.
+   */
     /* .text */
     Bool   text_present;
     Addr   text_avma;
@@ -372,8 +411,8 @@ struct _DebugInfo {
        records require any expression nodes, they are stored in
        cfsi_exprs. */
     DiCfSI* cfsi;
-   UInt    cfsi_used;
-   UInt    cfsi_size;
+   UWord   cfsi_used;
+   UWord   cfsi_size;
     Addr    cfsi_minavma;
     Addr    cfsi_maxavma;
     XArray* cfsi_exprs; /* XArray of CfiExpr */
@@ -464,17 +503,17 @@ extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
  
  /* Find a symbol-table index containing the specified pointer, or -1
     if not found.  Binary search.  */
-extern Int ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
-                                    Bool match_anywhere_in_sym,
-                                    Bool findText );
+extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
+                                     Bool match_anywhere_in_sym,
+                                     Bool findText );
  
  /* Find a location-table index containing the specified pointer, or -1
     if not found.  Binary search.  */
-extern Int ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
+extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
  
  /* Find a CFI-table index containing the specified pointer, or -1 if
     not found.  Binary search.  */
-extern Int ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
+extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
  
  /* ------ Misc ------ */
  
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c

index 4076d0034594c03397d5f2041143f7ea4d14a047..c534dcea6d977c08a71dcad8ba2e9d006ba7ab55 100644 (file)
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -978,9 +978,9 @@ static Int compare_DiSym ( void* va, void* vb )
   */
  static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b )
  {
-   Int cmp;
-   Int lena, lenb;             /* full length */
-   Int vlena, vlenb;           /* length without version */
+   Word cmp;
+   Word lena, lenb;            /* full length */
+   Word vlena, vlenb;          /* length without version */
     const UChar *vpa, *vpb;
  
     Bool preferA = False;
@@ -1062,7 +1062,7 @@ static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b )
  
  static void canonicaliseSymtab ( struct _DebugInfo* di )
  {
-   Int   i, j, n_merged, n_truncated;
+   Word  i, j, n_merged, n_truncated;
     Addr  s1, s2, e1, e2;
  
  #  define SWAP(ty,aa,bb) \
@@ -1095,14 +1095,14 @@ static void canonicaliseSymtab ( struct _DebugInfo* di )
              di->symtab[di->symtab_used++] = di->symtab[i];
           }
        }
-      TRACE_SYMTAB( "canonicaliseSymtab: %d symbols merged\n", n_merged);
+      TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
     }
     while (n_merged > 0);
  
     /* Detect and "fix" overlapping address ranges. */
     n_truncated = 0;
  
-   for (i = 0; i < ((Int)di->symtab_used) -1; i++) {
+   for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
  
        vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr);
  
@@ -1149,7 +1149,7 @@ static void canonicaliseSymtab ( struct _DebugInfo* di )
        /* It may be that the i+1 entry now needs to be moved further
           along to maintain the address order requirement. */
        j = i+1;
-      while (j < ((Int)di->symtab_used)-1 
+      while (j < ((Word)di->symtab_used)-1 
               && di->symtab[j].addr > di->symtab[j+1].addr) {
           SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
           j++;
@@ -1160,7 +1160,7 @@ static void canonicaliseSymtab ( struct _DebugInfo* di )
     if (n_truncated > 0) goto cleanup_more;
  
     /* Ensure relevant postconditions hold. */
-   for (i = 0; i < ((Int)di->symtab_used)-1; i++) {
+   for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
        /* No zero-sized symbols. */
        vg_assert(di->symtab[i].size > 0);
        /* In order. */
@@ -1189,7 +1189,7 @@ static Int compare_DiLoc ( void* va, void* vb )
  
  static void canonicaliseLoctab ( struct _DebugInfo* di )
  {
-   Int i, j;
+   Word i, j;
  
  #  define SWAP(ty,aa,bb) \
        do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
@@ -1202,7 +1202,7 @@ static void canonicaliseLoctab ( struct _DebugInfo* di )
                            sizeof(*di->loctab), compare_DiLoc);
  
     /* If two adjacent entries overlap, truncate the first. */
-   for (i = 0; i < ((Int)di->loctab_used)-1; i++) {
+   for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
        vg_assert(di->loctab[i].size < 10000);
        if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
           /* Do this in signed int32 because the actual .size fields
@@ -1222,7 +1222,7 @@ static void canonicaliseLoctab ( struct _DebugInfo* di )
     /* Zap any zero-sized entries resulting from the truncation
        process. */
     j = 0;
-   for (i = 0; i < (Int)di->loctab_used; i++) {
+   for (i = 0; i < (Word)di->loctab_used; i++) {
        if (di->loctab[i].size > 0) {
           if (j != i)
              di->loctab[j] = di->loctab[i];
@@ -1232,7 +1232,7 @@ static void canonicaliseLoctab ( struct _DebugInfo* di )
     di->loctab_used = j;
  
     /* Ensure relevant postconditions hold. */
-   for (i = 0; i < ((Int)di->loctab_used)-1; i++) {
+   for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
        /* 
        VG_(printf)("%d   (%d) %d 0x%x\n", 
                     i, di->loctab[i+1].confident, 
@@ -1272,7 +1272,7 @@ static Int compare_DiCfSI ( void* va, void* vb )
  
  static void canonicaliseCFI ( struct _DebugInfo* di )
  {
-   Int   i, j;
+   Word  i, j;
     const Addr minAvma = 0;
     const Addr maxAvma = ~minAvma;
  
@@ -1287,7 +1287,7 @@ static void canonicaliseCFI ( struct _DebugInfo* di )
        address range contained in cfsi[0 .. cfsi_used-1]. */
     di->cfsi_minavma = maxAvma; 
     di->cfsi_maxavma = minAvma;
-   for (i = 0; i < (Int)di->cfsi_used; i++) {
+   for (i = 0; i < (Word)di->cfsi_used; i++) {
        Addr here_min = di->cfsi[i].base;
        Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
        if (here_min < di->cfsi_minavma)
@@ -1297,7 +1297,7 @@ static void canonicaliseCFI ( struct _DebugInfo* di )
     }
  
     if (di->trace_cfi)
-      VG_(printf)("canonicaliseCfiSI: %d entries, %#lx .. %#lx\n",
+      VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
                    di->cfsi_used,
                   di->cfsi_minavma, di->cfsi_maxavma);
  
@@ -1305,9 +1305,9 @@ static void canonicaliseCFI ( struct _DebugInfo* di )
     VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
  
     /* If two adjacent entries overlap, truncate the first. */
-   for (i = 0; i < (Int)di->cfsi_used-1; i++) {
+   for (i = 0; i < (Word)di->cfsi_used-1; i++) {
        if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
-         Int new_len = di->cfsi[i+1].base - di->cfsi[i].base;
+         Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
           /* how could it be otherwise?  The entries are sorted by the
              .base field. */         
           vg_assert(new_len >= 0);
@@ -1319,7 +1319,7 @@ static void canonicaliseCFI ( struct _DebugInfo* di )
     /* Zap any zero-sized entries resulting from the truncation
        process. */
     j = 0;
-   for (i = 0; i < (Int)di->cfsi_used; i++) {
+   for (i = 0; i < (Word)di->cfsi_used; i++) {
        if (di->cfsi[i].len > 0) {
           if (j != i)
              di->cfsi[j] = di->cfsi[i];
@@ -1330,7 +1330,7 @@ static void canonicaliseCFI ( struct _DebugInfo* di )
     di->cfsi_used = j;
  
     /* Ensure relevant postconditions hold. */
-   for (i = 0; i < (Int)di->cfsi_used; i++) {
+   for (i = 0; i < (Word)di->cfsi_used; i++) {
        /* No zero-length ranges. */
        vg_assert(di->cfsi[i].len > 0);
        /* Makes sense w.r.t. summary address range */
@@ -1375,9 +1375,9 @@ void ML_(canonicaliseTables) ( struct _DebugInfo* di )
  /* Find a symbol-table index containing the specified pointer, or -1
     if not found.  Binary search.  */
  
-Int ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
-                             Bool match_anywhere_in_sym,
-                             Bool findText )
+Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
+                              Bool match_anywhere_in_sym,
+                              Bool findText )
  {
     Addr a_mid_lo, a_mid_hi;
     Word mid, size, 
@@ -1408,7 +1408,7 @@ Int ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
  /* Find a location-table index containing the specified pointer, or -1
     if not found.  Binary search.  */
  
-Int ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
+Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
  {
     Addr a_mid_lo, a_mid_hi;
     Word mid, 
@@ -1432,10 +1432,10 @@ Int ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
  /* Find a CFI-table index containing the specified pointer, or -1
     if not found.  Binary search.  */
  
-Int ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
+Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
  {
     Addr a_mid_lo, a_mid_hi;
-   Int  mid, size, 
+   Word mid, size, 
          lo = 0, 
          hi = di->cfsi_used-1;
     while (True) {
diff --git a/coregrind/m_execontext.c b/coregrind/m_execontext.c

index 08de8f045f47072cbf10b0c1903e8a9cebd9ab08..0e430c8050e4dc9b6c14def3870cc6b81ef3bf1a 100644 (file)
--- a/coregrind/m_execontext.c
+++ b/coregrind/m_execontext.c
@@ -469,6 +469,11 @@ ExeContext* VG_(get_ExeContext_from_ECU)( UInt ecu )
     return NULL;
  }
  
+ExeContext* VG_(make_ExeContext_from_StackTrace)( Addr* ips, UInt n_ips )
+{
+   return record_ExeContext_wrk2(ips, n_ips);
+}
+
  /*--------------------------------------------------------------------*/
  /*--- end                                           m_execontext.c ---*/
  /*--------------------------------------------------------------------*/
diff --git a/coregrind/m_main.c b/coregrind/m_main.c

index 8eb759a985c01a1384532853333c5cf3e6dc6169..9a16050fdb77b43be630a27302b011ffd7fc28bb 100644 (file)
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -1359,6 +1359,12 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp )
     //
     //============================================================
  
+   //--------------------------------------------------------------
+   // Initialise m_debuginfo
+   //  p: dynamic memory allocation
+   VG_(debugLog)(1, "main", "Initialise m_debuginfo\n");
+   VG_(di_initialise)();
+
     //--------------------------------------------------------------
     // Look for alternative libdir                                  
     { HChar *cp = VG_(getenv)(VALGRIND_LIB);
@@ -1729,6 +1735,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp )
     //   p: setup_code_redirect_table [so that redirs can be recorded]
     //   p: mallocfree
     //   p: probably: setup fds and process CLOs, so that logging works
+   //   p: initialise m_debuginfo
     //
     // While doing this, make a note of the debuginfo-handles that
     // come back from VG_(di_notify_mmap)/VG_(di_aix5_notify_segchange).
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c

index 79de72cc31eff05f3d0eb0cf1179183bb7c660e7..f835dcaf8fad551e1c59646b12e4d026bcc51071 100644 (file)
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -161,7 +161,8 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
           fails, and is expensive. */
        /* Deal with frames resulting from functions which begin "pushl%
           ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
-      if (fp_min <= fp && fp <= fp_max) {
+      if (fp_min <= fp && fp <= fp_max
+                                - 1 * sizeof(UWord)/*see comment below*/) {
           /* fp looks sane, so use it. */
           ip = (((UWord*)fp)[1]);
           sp = fp + sizeof(Addr) /*saved %ebp*/ 
@@ -251,7 +252,11 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
           the start of the fn, like GDB does, there's no reliable way
           to tell.  Hence the hack of first trying out CFI, and if that
           fails, then use this as a fallback. */
-      if (fp_min <= fp && fp <= fp_max) {
+      /* Note: re "- 1 * sizeof(UWord)", need to take account of the
+         fact that we are prodding at & ((UWord*)fp)[1] and so need to
+         adjust the limit check accordingly.  Omitting this has been
+         observed to cause segfaults on rare occasions. */
+      if (fp_min <= fp && fp <= fp_max - 1 * sizeof(UWord)) {
           /* fp looks sane, so use it. */
           ip = (((UWord*)fp)[1]);
           sp = fp + sizeof(Addr) /*saved %rbp*/ 
@@ -371,7 +376,7 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
  
           /* Try to derive a new (ip,fp) pair from the current set. */
  
-         if (fp_min <= fp && fp <= fp_max) {
+         if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
              /* fp looks sane, so use it. */
  
              if (i == 1 && lr_is_first_RA)
diff --git a/coregrind/m_xarray.c b/coregrind/m_xarray.c

index 8f749d230408c4d17f5f707a0b31934522e0ee6e..6393050959cb3856b9b7cdd455257ac83dc93fdf 100644 (file)
--- a/coregrind/m_xarray.c
+++ b/coregrind/m_xarray.c
@@ -162,7 +162,7 @@ static inline void ensureSpaceXA ( struct _XArray* xa )
           else if (xa->elemSzB == 2) newsz = 4;
           else newsz = 2;
        } else {
-         newsz = 1 + (3 * xa->totsizeE) / 2;  /* 2 * xa->totsizeE; */
+         newsz = 2 + (3 * xa->totsizeE) / 2;  /* 2 * xa->totsizeE; */
        }
        if (0 && xa->totsizeE >= 10000) 
           VG_(printf)("addToXA: increasing from %ld to %ld\n", 
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h

index 3965a716451e113d09020afeb359512946165296..dd4f89496bf702d2c113e311e5a67681ebc6147c 100644 (file)
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -39,6 +39,9 @@
  
  #include "pub_tool_debuginfo.h"
  
+/* Initialise the entire module.  Must be called first of all. */
+extern void VG_(di_initialise) ( void );
+
  /* LINUX: Notify the debuginfo system about a new mapping, or the
     disappearance of such, or a permissions change on an existing
     mapping.  This is the way new debug information gets loaded.  If
diff --git a/glibc-2.34567-NPTL-helgrind.supp b/glibc-2.34567-NPTL-helgrind.supp

index 9121da576ded591eb7ba92ef8bfc54626e0548ef..ba6d50c73d418e50bc4063de626512f1726d2963 100644 (file)
--- a/glibc-2.34567-NPTL-helgrind.supp
+++ b/glibc-2.34567-NPTL-helgrind.supp
@@ -210,6 +210,11 @@
     Helgrind:Race
     fun:__lll_*lock_*
  }
+{
+   helgrind-glibc28-112
+   Helgrind:Race
+   fun:pthread_create@*
+}
  
  ######------------ glibc-2.7 specific ---------######
  #
diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am

index 1776adc3d49cdeab2a28de879e2c2a27f11438fa..502f6f9a801912493840739635ec81c213d1b2a7 100644 (file)
--- a/helgrind/Makefile.am
+++ b/helgrind/Makefile.am
@@ -70,7 +70,9 @@ vgpreload_helgrind_ppc64_aix5_so_LDFLAGS      = \
         $(PRELOAD_LDFLAGS_PPC64_AIX5) \
         $(LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5)
  
-HELGRIND_SOURCES_COMMON = hg_wordset.c hg_main.c
+HELGRIND_SOURCES_COMMON = \
+       hg_basics.c hg_lock_n_thread.c hg_wordset.c libhb_core.c \
+       hg_errors.c hg_main.c
  
  helgrind_x86_linux_SOURCES      = $(HELGRIND_SOURCES_COMMON)
  helgrind_x86_linux_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
@@ -118,4 +120,7 @@ hgincludedir = $(includedir)/valgrind
  
  hginclude_HEADERS = helgrind.h
  
-noinst_HEADERS = hg_wordset.h
+noinst_HEADERS = \
+       hg_basics.h hg_lock_n_thread.h hg_errors.h hg_wordset.h
+
+EXTRA_DIST = README_MSMProp2.txt README_YARD.txt
diff --git a/helgrind/README_MSMProp2.txt b/helgrind/README_MSMProp2.txt

new file mode 100644 (file)

index 0000000..6b4ac5f
--- /dev/null
+++ b/helgrind/README_MSMProp2.txt
@@ -0,0 +1,156 @@
+
+MSMProp2, a simplified but functionally equivalent version of MSMProp1
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Julian Seward, OpenWorks Ltd, 19 August 2008
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note that this file does NOT describe the state machine used in the
+svn://svn.valgrind.org/branches/YARD version of Helgrind.  That state
+machine is different again from any previously described machine.
+
+See the file README_YARD.txt for more details on YARD.
+
+                     ----------------------
+
+In early 2008 Konstantin Serebryany proposed "MSMProp1", a memory
+state machine for data race detection.  It is described at
+http://code.google.com/p/data-race-test/wiki/MSMProp1
+
+Implementation experiences show MSMProp1 is useful, but difficult to
+implement efficiently.  In particular keeping the memory usage under
+control is complex and difficult.
+
+This note points out a key simplification of MSMProp1, which makes it
+easier to implement without changing the functionality.
+
+
+The idea
+~~~~~~~~
+
+The core of the idea pertains to the "Condition" entry for MSMProp1
+state machine rules E5 and E6(r).  These are, respectively:
+
+    HB(SS, currS)  and its negation
+    ! HB(SS, currS).
+
+Here, SS is a set of segments, and currS is a single segment.  Each
+segment contains a vector timestamp.  The expression "HB(SS, currS)"
+is intended to denote
+
+   for each segment S in SS  .  happens_before(S,currS)
+
+where happens_before(S,T) means that S's vector timestamp is ordered
+before-or-equal to T's vector timestamp.
+
+In words, the expression
+
+   for each segment S in SS  .  happens_before(S,currS)
+
+is equivalent to saying that currS has a timestamp which is
+greater-than-equal to the timestamps of all the segments in SS.
+
+The key observation is that this is equivalent to
+
+   happens_before( JOIN(SS), currS )
+
+where JOIN is the lattice-theoretic "max" or "least upper bound"
+operation on vector clocks.  Given the definition of HB,
+happens_before and (binary) JOIN, this is easy to prove.
+
+
+The consequences
+~~~~~~~~~~~~~~~~
+
+With that observation in place, it is a short step to observe that
+storing segment sets in MSMProp1 is unnecessary.  Instead of
+storing a segment set in each shadow value, just store and
+update a single vector timestamp.  The following two equivalences
+hold:
+
+   MSMProp1                        MSMProp2
+
+   adding a segment S              join-ing S's vector timestamp
+   to the segment-set              to the current vector timestamp
+
+   HB(SS,currS)                    happens_before(
+                                      currS's timestamp,
+                                      current vector timestamp )
+
+Once it is no longer necessary to represent segment sets, it then
+also becomes unnecessary to represent segments.  This constitutes
+a significant simplication to the implementation.
+
+
+The resulting state machine, MSMProp2
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MSMProp2 is isomorphic to MSMProp1, with the following changes:
+
+States are    New,   Read(VTS,LS),   Write(VTS,LS)
+
+where LS is a lockset (as before) and VTS is a vector timestamp.
+
+For a thread T with current lockset 'currLS' and current VTS 'currVTS'
+making a memory access, the new rules are
+
+Name  Old-State         Op  Guard         New-State              Race-If
+
+E1  New                 rd  True          Read(currVTS,currLS)   False
+
+E2  New                 wr  True          Write(currVTS,currLS)  False
+
+E3  Read(oldVTS,oldLS)  rd  True          Read(newVTS,newLS)     False
+
+E4  Read(oldVTS,oldLS)  wr  True          Write(newVTS,newLS)    #newLS == 0 
+                                                                 && !hb(oldVTS,currVTS)
+
+E5  Write(oldVTS,oldLS) rd  hb(oldVTS,    Read(currVTS,currLS)   False
+                               currVTS)
+
+E6r Write(oldVTS,oldLS) rd  !hb(oldVTS,   Write(newVTS,newLS)    #newLS == 0 
+                                currVTS)                         && !hb(oldVTS,currVTS)
+
+E6w Write(oldVTS,oldLS) wr  True          Write(newVTS,newLS)    #newLS == 0 
+                                                                 && !hb(oldVTS,currVTS)
+
+   where newVTS = join2(oldVTS,currVTS)
+
+         newLS  = if   hb(oldVTS,currVTS)
+                  then currLS
+                  else intersect(oldLS,currLS)
+
+         hb(vts1, vts2) =  vts1 happens before or is equal to vts2
+
+
+Interpretation of the states
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+I always found the state names in MSMProp1 confusing.  Both MSMProp1
+and MSMProp2 are easier to understand if the states Read and Write are
+renamed, like this:
+
+   old name           new name
+
+   Read               WriteConstraint
+   Write              AllConstraint
+
+The effect of a state Read(VTS,LS) is to constrain all later-observed
+writes so that either (1) the writing thread holds at least one lock
+in common with LS, or (2) those writes must happen-after VTS.  If
+neither of those two conditions hold, a race is reported.
+
+Hence a Read state places a constraint on writes.
+
+The effect of a state Write(VTS,LS) is similar, but it applies to all
+later-observed accesses: either (1) the accessing thread holds at
+least one lock in common with LS, or (2) those accesses must
+happen-after VTS.  If neither of those two conditions hold, a race is
+reported.
+
+Hence a Write state places a constraint on all accesses.
+
+If we ignore the LS component of these states, the intuitive
+interpretation of the VTS component is that it states the earliest
+vector-time that the next write / access may safely happen.
+
diff --git a/helgrind/README_YARD.txt b/helgrind/README_YARD.txt

new file mode 100644 (file)

index 0000000..992769c
--- /dev/null
+++ b/helgrind/README_YARD.txt
@@ -0,0 +1,34 @@
+
+YARD, Yet Another Race Detector, built on the Helgrind framework
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Julian Seward, OpenWorks Ltd, 19 August 2008
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The YARD race detector lives in svn://svn.valgrind.org/branches/YARD.
+
+It uses a new and relatively simple race detection engine, based on
+the idea of shadowing each memory location with two vector timestamps,
+indicating respectively the "earliest safe read point" and "earliest
+safe write point".  As far as I know this is a novel approach.  Some
+features of the implementation:
+
+* Modularity.  The entire race detection engine is placed in a
+  standalone library (libhb_core.c) with a simple interface (libhb.h).
+  This makes it easier to debug and verify the engine; indeed it can
+  be built as a standalone executable with test harness using "make -f
+  Makefile_sa".
+
+* Simplified and scalable storage management, so that large programs,
+  with many synchronisation events, can be handled.
+
+* Ability to report both call stacks involved in a race, without
+  excessive time or space overhead.
+
+* Pure happens before operation, so as not to give any false
+  positives.
+
+To use, build as usual and run as "--tool=helgrind".
+
+You can disable lock order checking with --track-lockorders=no, as it
+sometimes produces an annoying amount of output.
diff --git a/helgrind/helgrind.h b/helgrind/helgrind.h

index b4de044586636833870aa09ac5e0c7d27affc56f..46359a999951b04cca736f34194e53dc7578f551 100644 (file)
--- a/helgrind/helgrind.h
+++ b/helgrind/helgrind.h
@@ -82,6 +82,7 @@ typedef
        _VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE, /* pth_cond_t* */
        _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE,     /* pth_cond_t*, pth_mx_t* */
        _VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,    /* pth_cond_t*, pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE,   /* pth_cond_t* */
        _VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST,   /* pth_rwlk_t* */
        _VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE, /* pth_rwlk_t* */
        _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,    /* pth_rwlk_t*, long isW */
diff --git a/helgrind/hg_basics.c b/helgrind/hg_basics.c

new file mode 100644 (file)

index 0000000..7c25109
--- /dev/null
+++ b/helgrind/hg_basics.c
@@ -0,0 +1,86 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Basic definitions for all of Helgrind.                       ---*/
+/*---                                                  hg_basics.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_threadstate.h"
+
+#include "hg_basics.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*--- Very basic stuff                                         ---*/
+/*----------------------------------------------------------------*/
+
+void* HG_(zalloc) ( HChar* cc, SizeT n )
+{
+   void* p;
+   tl_assert(n > 0);
+   p = VG_(malloc)( cc, n );
+   tl_assert(p);
+   VG_(memset)(p, 0, n);
+   return p;
+}
+
+void HG_(free) ( void* p )
+{
+   tl_assert(p);
+   VG_(free)(p);
+}
+
+Char* HG_(strdup) ( HChar* cc, const Char* s )
+{
+   return VG_(strdup)( cc, s );
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Command line options                                     ---*/
+/*----------------------------------------------------------------*/
+
+/* Description of these flags is in hg_basics.h. */
+
+Bool HG_(clo_track_lockorders) = True;
+
+Bool HG_(clo_cmp_race_err_addrs) = False;
+
+Addr HG_(clo_trace_addr) = 0;
+
+Word HG_(clo_trace_level) = 0;
+
+Word HG_(clo_sanity_flags) = 0;
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_basics.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_basics.h b/helgrind/hg_basics.h

new file mode 100644 (file)

index 0000000..0a295f7
--- /dev/null
+++ b/helgrind/hg_basics.h
@@ -0,0 +1,92 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Basic definitions for all of Helgrind.                       ---*/
+/*---                                                  hg_basics.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_BASICS_H
+#define __HG_BASICS_H
+
+
+/*----------------------------------------------------------------*/
+/*--- Very basic stuff                                         ---*/
+/*----------------------------------------------------------------*/
+
+#define HG_(str) VGAPPEND(vgHelgrind_,str)
+
+void* HG_(zalloc) ( HChar* cc, SizeT n );
+void  HG_(free)   ( void* p );
+Char* HG_(strdup) ( HChar* cc, const Char* s );
+
+static inline Bool HG_(is_sane_ThreadId) ( ThreadId coretid ) {
+   return coretid >= 0 && coretid < VG_N_THREADS;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Command line options                                     ---*/
+/*----------------------------------------------------------------*/
+
+/* Flags for controlling for which events sanity checking is done */
+#define SCE_THREADS  (1<<0)  // Sanity check at thread create/join
+#define SCE_LOCKS    (1<<1)  // Sanity check at lock events
+#define SCE_BIGRANGE (1<<2)  // Sanity check at big mem range events
+#define SCE_ACCESS   (1<<3)  // Sanity check at mem accesses
+#define SCE_LAOG     (1<<4)  // Sanity check at significant LAOG events
+
+#define SCE_BIGRANGE_T 256  // big mem range minimum size
+
+
+/* Enable/disable lock order checking.  Sometimes it produces a lot of
+   errors, possibly genuine, which nevertheless can be very
+   annoying. */
+extern Bool HG_(clo_track_lockorders);
+
+/* When comparing race errors for equality, should the race address be
+   taken into account?  For users, no, but for verification purposes
+   (regtesting) this is sometimes important. */
+extern Bool HG_(clo_cmp_race_err_addrs);
+
+/* Tracing memory accesses, so we can see what's going on.
+   clo_trace_addr is the address to monitor.  clo_trace_level = 0 for
+   no tracing, 1 for summary, 2 for detailed. */
+extern Addr HG_(clo_trace_addr);
+extern Word HG_(clo_trace_level);
+
+/* Sanity check level.  This is an or-ing of
+   SCE_{THREADS,LOCKS,BIGRANGE,ACCESS,LAOG}. */
+extern Word HG_(clo_sanity_flags);
+
+
+
+
+#endif /* ! __HG_BASICS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_basics.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_errors.c b/helgrind/hg_errors.c

new file mode 100644 (file)

index 0000000..d5baf37
--- /dev/null
+++ b/helgrind/hg_errors.c
@@ -0,0 +1,768 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Error management for Helgrind.                               ---*/
+/*---                                                  hg_errors.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_threadstate.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*---                                                          ---*/
+/*----------------------------------------------------------------*/
+
+/* This has to do with printing error messages.  See comments on
+   announce_threadset() and summarise_threadset().  Perhaps it
+   should be a command line option. */
+#define N_THREADS_TO_ANNOUNCE 5
+
+
+/*----------------------------------------------------------------*/
+/*--- Error management                                         ---*/
+/*----------------------------------------------------------------*/
+
+/* maps (by value) strings to a copy of them in ARENA_TOOL */
+
+static WordFM* string_table = NULL;
+
+ULong HG_(stats__string_table_queries) = 0;
+
+ULong HG_(stats__string_table_get_map_size) ( void ) {
+   return string_table ? (ULong)VG_(sizeFM)(string_table) : 0;
+}
+
+static Word string_table_cmp ( UWord s1, UWord s2 ) {
+   return (Word)VG_(strcmp)( (HChar*)s1, (HChar*)s2 );
+}
+
+static HChar* string_table_strdup ( HChar* str ) {
+   HChar* copy = NULL;
+   HG_(stats__string_table_queries)++;
+   if (!str)
+      str = "(null)";
+   if (!string_table) {
+      string_table = VG_(newFM)( HG_(zalloc), "hg.sts.1",
+                                 HG_(free), string_table_cmp );
+      tl_assert(string_table);
+   }
+   if (VG_(lookupFM)( string_table,
+                      NULL, (Word*)&copy, (Word)str )) {
+      tl_assert(copy);
+      if (0) VG_(printf)("string_table_strdup: %p -> %p\n", str, copy );
+      return copy;
+   } else {
+      copy = HG_(strdup)("hg.sts.2", str);
+      tl_assert(copy);
+      VG_(addToFM)( string_table, (Word)copy, (Word)copy );
+      return copy;
+   }
+}
+
+/* maps from Lock .unique fields to LockP*s */
+
+static WordFM* map_LockN_to_P = NULL;
+
+ULong HG_(stats__LockN_to_P_queries) = 0;
+
+ULong HG_(stats__LockN_to_P_get_map_size) ( void ) {
+   return map_LockN_to_P ? (ULong)VG_(sizeFM)(map_LockN_to_P) : 0;
+}
+
+static Word lock_unique_cmp ( UWord lk1W, UWord lk2W )
+{
+   Lock* lk1 = (Lock*)lk1W;
+   Lock* lk2 = (Lock*)lk2W;
+   tl_assert( HG_(is_sane_LockNorP)(lk1) );
+   tl_assert( HG_(is_sane_LockNorP)(lk2) );
+   if (lk1->unique < lk2->unique) return -1;
+   if (lk1->unique > lk2->unique) return 1;
+   return 0;
+}
+
+static Lock* mk_LockP_from_LockN ( Lock* lkn )
+{
+   Lock* lkp = NULL;
+   HG_(stats__LockN_to_P_queries)++;
+   tl_assert( HG_(is_sane_LockN)(lkn) );
+   if (!map_LockN_to_P) {
+      map_LockN_to_P = VG_(newFM)( HG_(zalloc), "hg.mLPfLN.1",
+                                   HG_(free), lock_unique_cmp );
+      tl_assert(map_LockN_to_P);
+   }
+   if (!VG_(lookupFM)( map_LockN_to_P, NULL, (Word*)&lkp, (Word)lkn)) {
+      lkp = HG_(zalloc)( "hg.mLPfLN.2", sizeof(Lock) );
+      *lkp = *lkn;
+      lkp->admin = NULL;
+      lkp->magic = LockP_MAGIC;
+      /* Forget about the bag of lock holders - don't copy that.
+         Also, acquired_at should be NULL whenever heldBy is, and vice
+         versa.  Also forget about the associated libhb synch object. */
+      lkp->heldW  = False;
+      lkp->heldBy = NULL;
+      lkp->acquired_at = NULL;
+      lkp->hbso = NULL;
+      VG_(addToFM)( map_LockN_to_P, (Word)lkp, (Word)lkp );
+   }
+   tl_assert( HG_(is_sane_LockP)(lkp) );
+   return lkp;
+}
+
+/* Errors:
+
+      race: program counter
+            read or write
+            data size
+            previous state
+            current state
+
+      FIXME: how does state printing interact with lockset gc?
+      Are the locksets in prev/curr state always valid?
+      Ditto question for the threadsets
+          ThreadSets - probably are always valid if Threads
+          are never thrown away.
+          LockSets - could at least print the lockset elements that
+          correspond to actual locks at the time of printing.  Hmm.
+*/
+
+/* Error kinds */
+typedef
+   enum {
+      XE_Race=1101,      // race
+      XE_FreeMemLock,    // freeing memory containing a locked lock
+      XE_UnlockUnlocked, // unlocking a not-locked lock
+      XE_UnlockForeign,  // unlocking a lock held by some other thread
+      XE_UnlockBogus,    // unlocking an address not known to be a lock
+      XE_PthAPIerror,    // error from the POSIX pthreads API
+      XE_LockOrder,      // lock order error
+      XE_Misc            // misc other error (w/ string to describe it)
+   }
+   XErrorTag;
+
+/* Extra contexts for kinds */
+typedef
+   struct  {
+      XErrorTag tag;
+      union {
+         struct {
+            Addr  data_addr;
+            Int   szB;
+            Bool  isWrite;
+            ExeContext* mb_lastlock;
+            ExeContext* mb_confacc;
+            Thread* thr;
+            Thread* mb_confaccthr;
+            Char  descr1[96];
+            Char  descr2[96];
+         } Race;
+         struct {
+            Thread* thr;  /* doing the freeing */
+            Lock*   lock; /* lock which is locked */
+         } FreeMemLock;
+         struct {
+            Thread* thr;  /* doing the unlocking */
+            Lock*   lock; /* lock (that is already unlocked) */
+         } UnlockUnlocked;
+         struct {
+            Thread* thr;    /* doing the unlocking */
+            Thread* owner;  /* thread that actually holds the lock */
+            Lock*   lock;   /* lock (that is held by 'owner') */
+         } UnlockForeign;
+         struct {
+            Thread* thr;     /* doing the unlocking */
+            Addr    lock_ga; /* purported address of the lock */
+         } UnlockBogus;
+         struct {
+            Thread* thr; 
+            HChar*  fnname; /* persistent, in tool-arena */
+            Word    err;    /* pth error code */
+            HChar*  errstr; /* persistent, in tool-arena */
+         } PthAPIerror;
+         struct {
+            Thread*     thr;
+            Addr        before_ga; /* always locked first in prog. history */
+            Addr        after_ga;
+            ExeContext* before_ec;
+            ExeContext* after_ec;
+         } LockOrder;
+         struct {
+            Thread* thr;
+            HChar*  errstr; /* persistent, in tool-arena */
+         } Misc;
+      } XE;
+   }
+   XError;
+
+static void init_XError ( XError* xe ) {
+   VG_(memset)(xe, 0, sizeof(*xe) );
+   xe->tag = XE_Race-1; /* bogus */
+}
+
+
+/* Extensions of suppressions */
+typedef
+   enum {
+      XS_Race=1201, /* race */
+      XS_FreeMemLock,
+      XS_UnlockUnlocked,
+      XS_UnlockForeign,
+      XS_UnlockBogus,
+      XS_PthAPIerror,
+      XS_LockOrder,
+      XS_Misc
+   }
+   XSuppTag;
+
+
+/* Updates the copy with address info if necessary. */
+UInt HG_(update_extra) ( Error* err )
+{
+   XError* xe = (XError*)VG_(get_error_extra)(err);
+   tl_assert(xe);
+   //if (extra != NULL && Undescribed == extra->addrinfo.akind) {
+   //   describe_addr ( VG_(get_error_address)(err), &(extra->addrinfo) );
+   //}
+
+   if (xe->tag == XE_Race) {
+      /* See if we can come up with a source level description of the
+         raced-upon address.  This is potentially expensive, which is
+         why it's only done at the update_extra point, not when the
+         error is initially created. */
+      tl_assert(sizeof(xe->XE.Race.descr1) == sizeof(xe->XE.Race.descr2));
+      if (VG_(get_data_description)(
+                &xe->XE.Race.descr1[0],
+                &xe->XE.Race.descr2[0],
+                sizeof(xe->XE.Race.descr1)-1,
+                xe->XE.Race.data_addr )) {
+         tl_assert( xe->XE.Race.descr1
+                       [ sizeof(xe->XE.Race.descr1)-1 ] == 0);
+         tl_assert( xe->XE.Race.descr2
+                       [ sizeof(xe->XE.Race.descr2)-1 ] == 0);
+      }
+   }
+
+   return sizeof(XError);
+}
+
+void HG_(record_error_Race) ( Thread* thr, 
+                              Addr data_addr, Bool isWrite, Int szB,
+                              ExeContext* mb_lastlock,
+                              ExeContext* mb_confacc,
+                              Thread* mb_confaccthr )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+
+#  if defined(VGO_linux)
+   /* Skip any races on locations apparently in GOTPLT sections.  This
+      is said to be caused by ld.so poking PLT table entries (or
+      whatever) when it writes the resolved address of a dynamically
+      linked routine, into the table (or whatever) when it is called
+      for the first time. */
+   {
+     VgSectKind sect = VG_(seginfo_sect_kind)( NULL, 0, data_addr );
+     if (0) VG_(printf)("XXXXXXXXX RACE on %#lx %s\n",
+                        data_addr, VG_(pp_SectKind)(sect));
+     if (sect == Vg_SectGOTPLT) return;
+   }
+#  endif
+
+   init_XError(&xe);
+   xe.tag = XE_Race;
+   xe.XE.Race.data_addr   = data_addr;
+   xe.XE.Race.szB         = szB;
+   xe.XE.Race.isWrite     = isWrite;
+   xe.XE.Race.mb_lastlock = mb_lastlock;
+   xe.XE.Race.mb_confacc  = mb_confacc;
+   xe.XE.Race.thr         = thr;
+   xe.XE.Race.mb_confaccthr = mb_confaccthr;
+   tl_assert(isWrite == False || isWrite == True);
+   //   tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
+   xe.XE.Race.descr1[0] = xe.XE.Race.descr2[0] = 0;
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_Race, data_addr, NULL, &xe );
+}
+
+void HG_(record_error_FreeMemLock) ( Thread* thr, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_FreeMemLock;
+   xe.XE.FreeMemLock.thr  = thr;
+   xe.XE.FreeMemLock.lock = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_FreeMemLock, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockUnlocked) ( Thread* thr, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockUnlocked;
+   xe.XE.UnlockUnlocked.thr  = thr;
+   xe.XE.UnlockUnlocked.lock = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockUnlocked, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockForeign) ( Thread* thr,
+                                       Thread* owner, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_Thread)(owner) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockForeign;
+   xe.XE.UnlockForeign.thr   = thr;
+   xe.XE.UnlockForeign.owner = owner;
+   xe.XE.UnlockForeign.lock  = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockForeign, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockBogus) ( Thread* thr, Addr lock_ga )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockBogus;
+   xe.XE.UnlockBogus.thr     = thr;
+   xe.XE.UnlockBogus.lock_ga = lock_ga;
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockBogus, 0, NULL, &xe );
+}
+
+void HG_(record_error_LockOrder)(
+        Thread* thr, Addr before_ga, Addr after_ga,
+        ExeContext* before_ec, ExeContext* after_ec 
+     )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   if (!HG_(clo_track_lockorders))
+      return;
+   init_XError(&xe);
+   xe.tag = XE_LockOrder;
+   xe.XE.LockOrder.thr       = thr;
+   xe.XE.LockOrder.before_ga = before_ga;
+   xe.XE.LockOrder.before_ec = before_ec;
+   xe.XE.LockOrder.after_ga  = after_ga;
+   xe.XE.LockOrder.after_ec  = after_ec;
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_LockOrder, 0, NULL, &xe );
+}
+
+void HG_(record_error_PthAPIerror) ( Thread* thr, HChar* fnname, 
+                                     Word err, HChar* errstr )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert(fnname);
+   tl_assert(errstr);
+   init_XError(&xe);
+   xe.tag = XE_PthAPIerror;
+   xe.XE.PthAPIerror.thr    = thr;
+   xe.XE.PthAPIerror.fnname = string_table_strdup(fnname);
+   xe.XE.PthAPIerror.err    = err;
+   xe.XE.PthAPIerror.errstr = string_table_strdup(errstr);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_PthAPIerror, 0, NULL, &xe );
+}
+
+void HG_(record_error_Misc) ( Thread* thr, HChar* errstr )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert(errstr);
+   init_XError(&xe);
+   xe.tag = XE_Misc;
+   xe.XE.Misc.thr    = thr;
+   xe.XE.Misc.errstr = string_table_strdup(errstr);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_Misc, 0, NULL, &xe );
+}
+
+Bool HG_(eq_Error) ( VgRes not_used, Error* e1, Error* e2 )
+{
+   XError *xe1, *xe2;
+
+   tl_assert(VG_(get_error_kind)(e1) == VG_(get_error_kind)(e2));
+
+   xe1 = (XError*)VG_(get_error_extra)(e1);
+   xe2 = (XError*)VG_(get_error_extra)(e2);
+   tl_assert(xe1);
+   tl_assert(xe2);
+
+   switch (VG_(get_error_kind)(e1)) {
+      case XE_Race:
+         return xe1->XE.Race.szB == xe2->XE.Race.szB
+                && xe1->XE.Race.isWrite == xe2->XE.Race.isWrite
+                && (HG_(clo_cmp_race_err_addrs)
+                       ? xe1->XE.Race.data_addr == xe2->XE.Race.data_addr
+                       : True);
+      case XE_FreeMemLock:
+         return xe1->XE.FreeMemLock.thr == xe2->XE.FreeMemLock.thr
+                && xe1->XE.FreeMemLock.lock == xe2->XE.FreeMemLock.lock;
+      case XE_UnlockUnlocked:
+         return xe1->XE.UnlockUnlocked.thr == xe2->XE.UnlockUnlocked.thr
+                && xe1->XE.UnlockUnlocked.lock == xe2->XE.UnlockUnlocked.lock;
+      case XE_UnlockForeign:
+         return xe1->XE.UnlockForeign.thr == xe2->XE.UnlockForeign.thr
+                && xe1->XE.UnlockForeign.owner == xe2->XE.UnlockForeign.owner
+                && xe1->XE.UnlockForeign.lock == xe2->XE.UnlockForeign.lock;
+      case XE_UnlockBogus:
+         return xe1->XE.UnlockBogus.thr == xe2->XE.UnlockBogus.thr
+                && xe1->XE.UnlockBogus.lock_ga == xe2->XE.UnlockBogus.lock_ga;
+      case XE_PthAPIerror:
+         return xe1->XE.PthAPIerror.thr == xe2->XE.PthAPIerror.thr
+                && 0==VG_(strcmp)(xe1->XE.PthAPIerror.fnname,
+                                  xe2->XE.PthAPIerror.fnname)
+                && xe1->XE.PthAPIerror.err == xe2->XE.PthAPIerror.err;
+      case XE_LockOrder:
+         return xe1->XE.LockOrder.thr == xe2->XE.LockOrder.thr;
+      case XE_Misc:
+         return xe1->XE.Misc.thr == xe2->XE.Misc.thr
+                && 0==VG_(strcmp)(xe1->XE.Misc.errstr, xe2->XE.Misc.errstr);
+      default:
+         tl_assert(0);
+   }
+
+   /*NOTREACHED*/
+   tl_assert(0);
+}
+
+
+/* Announce (that is, print the point-of-creation) of 'thr'.  Only do
+   this once, as we only want to see these announcements once per
+   thread. */
+static void announce_one_thread ( Thread* thr ) 
+{
+   tl_assert(HG_(is_sane_Thread)(thr));
+   tl_assert(thr->errmsg_index >= 1);
+   if (!thr->announced) {
+      if (thr->errmsg_index == 1) {
+         tl_assert(thr->created_at == NULL);
+         VG_(message)(Vg_UserMsg, "Thread #%d is the program's root thread",
+                                  thr->errmsg_index);
+      } else {
+         tl_assert(thr->created_at != NULL);
+         VG_(message)(Vg_UserMsg, "Thread #%d was created",
+                                  thr->errmsg_index);
+         VG_(pp_ExeContext)( thr->created_at );
+      }
+      VG_(message)(Vg_UserMsg, "");
+      thr->announced = True;
+   }
+}
+
+
+void HG_(pp_Error) ( Error* err )
+{
+   XError *xe = (XError*)VG_(get_error_extra)(err);
+
+   switch (VG_(get_error_kind)(err)) {
+
+   case XE_Misc: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.Misc.thr ) );
+      announce_one_thread( xe->XE.Misc.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d: %s",
+                  (Int)xe->XE.Misc.thr->errmsg_index,
+                  xe->XE.Misc.errstr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_LockOrder: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.LockOrder.thr ) );
+      announce_one_thread( xe->XE.LockOrder.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d: lock order \"%p before %p\" violated",
+                  (Int)xe->XE.LockOrder.thr->errmsg_index,
+                  (void*)xe->XE.LockOrder.before_ga,
+                  (void*)xe->XE.LockOrder.after_ga);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.LockOrder.before_ec && xe->XE.LockOrder.after_ec) {
+         VG_(message)(Vg_UserMsg,
+            "  Required order was established by acquisition of lock at %p",
+            (void*)xe->XE.LockOrder.before_ga);
+         VG_(pp_ExeContext)( xe->XE.LockOrder.before_ec );
+         VG_(message)(Vg_UserMsg,
+            "  followed by a later acquisition of lock at %p", 
+            (void*)xe->XE.LockOrder.after_ga);
+         VG_(pp_ExeContext)( xe->XE.LockOrder.after_ec );
+      }
+      break;
+   }
+
+   case XE_PthAPIerror: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.PthAPIerror.thr ) );
+      announce_one_thread( xe->XE.PthAPIerror.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d's call to %s failed",
+                  (Int)xe->XE.PthAPIerror.thr->errmsg_index,
+                  xe->XE.PthAPIerror.fnname);
+      VG_(message)(Vg_UserMsg,
+                  "   with error code %ld (%s)",
+                  xe->XE.PthAPIerror.err,
+                  xe->XE.PthAPIerror.errstr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_UnlockBogus: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockBogus.thr ) );
+      announce_one_thread( xe->XE.UnlockBogus.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked an invalid lock at %p ",
+                   (Int)xe->XE.UnlockBogus.thr->errmsg_index,
+                   (void*)xe->XE.UnlockBogus.lock_ga);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_UnlockForeign: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.UnlockForeign.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockForeign.owner ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockForeign.thr ) );
+      announce_one_thread( xe->XE.UnlockForeign.thr );
+      announce_one_thread( xe->XE.UnlockForeign.owner );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked lock at %p "
+                   "currently held by thread #%d",
+                   (Int)xe->XE.UnlockForeign.thr->errmsg_index,
+                   (void*)xe->XE.UnlockForeign.lock->guestaddr,
+                   (Int)xe->XE.UnlockForeign.owner->errmsg_index );
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.UnlockForeign.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.UnlockForeign.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.UnlockForeign.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_UnlockUnlocked: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.UnlockUnlocked.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockUnlocked.thr ) );
+      announce_one_thread( xe->XE.UnlockUnlocked.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked a not-locked lock at %p ",
+                   (Int)xe->XE.UnlockUnlocked.thr->errmsg_index,
+                   (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.UnlockUnlocked.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.UnlockUnlocked.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_FreeMemLock: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.FreeMemLock.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.FreeMemLock.thr ) );
+      announce_one_thread( xe->XE.FreeMemLock.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d deallocated location %p "
+                   "containing a locked lock",
+                   (Int)xe->XE.FreeMemLock.thr->errmsg_index,
+                   (void*)xe->XE.FreeMemLock.lock->guestaddr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.FreeMemLock.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.FreeMemLock.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.FreeMemLock.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_Race: {
+      Addr      err_ga;
+      HChar*    what;
+      Int       szB;
+      what      = xe->XE.Race.isWrite ? "write" : "read";
+      szB       = xe->XE.Race.szB;
+      err_ga = VG_(get_error_address)(err);
+
+      announce_one_thread( xe->XE.Race.thr );
+      if (xe->XE.Race.mb_confaccthr)
+         announce_one_thread( xe->XE.Race.mb_confaccthr );
+      VG_(message)(Vg_UserMsg,
+         "Possible data race during %s of size %d at %#lx by thread #%d",
+         what, szB, err_ga, (Int)xe->XE.Race.thr->errmsg_index
+      );
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.Race.mb_confacc) {
+         if (xe->XE.Race.mb_confaccthr) {
+            VG_(message)(Vg_UserMsg,
+               " This conflicts with a previous access by thread #%d",
+               xe->XE.Race.mb_confaccthr->errmsg_index
+            );
+         } else {
+            VG_(message)(Vg_UserMsg,
+               " This conflicts with a previous access"
+            );
+         }
+         VG_(pp_ExeContext)( xe->XE.Race.mb_confacc );
+      }
+
+
+      /* If we have a better description of the address, show it. */
+      if (xe->XE.Race.descr1[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", &xe->XE.Race.descr1[0]);
+      if (xe->XE.Race.descr2[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", &xe->XE.Race.descr2[0]);
+
+      break; /* case XE_Race */
+   } /* case XE_Race */
+
+   default:
+      tl_assert(0);
+   } /* switch (VG_(get_error_kind)(err)) */
+}
+
+Char* HG_(get_error_name) ( Error* err )
+{
+   switch (VG_(get_error_kind)(err)) {
+      case XE_Race:           return "Race";
+      case XE_FreeMemLock:    return "FreeMemLock";
+      case XE_UnlockUnlocked: return "UnlockUnlocked";
+      case XE_UnlockForeign:  return "UnlockForeign";
+      case XE_UnlockBogus:    return "UnlockBogus";
+      case XE_PthAPIerror:    return "PthAPIerror";
+      case XE_LockOrder:      return "LockOrder";
+      case XE_Misc:           return "Misc";
+      default: tl_assert(0); /* fill in missing case */
+   }
+}
+
+Bool HG_(recognised_suppression) ( Char* name, Supp *su )
+{
+#  define TRY(_name,_xskind)                   \
+      if (0 == VG_(strcmp)(name, (_name))) {   \
+         VG_(set_supp_kind)(su, (_xskind));    \
+         return True;                          \
+      }
+   TRY("Race",           XS_Race);
+   TRY("FreeMemLock",    XS_FreeMemLock);
+   TRY("UnlockUnlocked", XS_UnlockUnlocked);
+   TRY("UnlockForeign",  XS_UnlockForeign);
+   TRY("UnlockBogus",    XS_UnlockBogus);
+   TRY("PthAPIerror",    XS_PthAPIerror);
+   TRY("LockOrder",      XS_LockOrder);
+   TRY("Misc",           XS_Misc);
+   return False;
+#  undef TRY
+}
+
+Bool HG_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf,
+                                        Supp* su )
+{
+   /* do nothing -- no extra suppression info present.  Return True to
+      indicate nothing bad happened. */
+   return True;
+}
+
+Bool HG_(error_matches_suppression) ( Error* err, Supp* su )
+{
+   switch (VG_(get_supp_kind)(su)) {
+   case XS_Race:           return VG_(get_error_kind)(err) == XE_Race;
+   case XS_FreeMemLock:    return VG_(get_error_kind)(err) == XE_FreeMemLock;
+   case XS_UnlockUnlocked: return VG_(get_error_kind)(err) == XE_UnlockUnlocked;
+   case XS_UnlockForeign:  return VG_(get_error_kind)(err) == XE_UnlockForeign;
+   case XS_UnlockBogus:    return VG_(get_error_kind)(err) == XE_UnlockBogus;
+   case XS_PthAPIerror:    return VG_(get_error_kind)(err) == XE_PthAPIerror;
+   case XS_LockOrder:      return VG_(get_error_kind)(err) == XE_LockOrder;
+   case XS_Misc:           return VG_(get_error_kind)(err) == XE_Misc;
+   //case XS_: return VG_(get_error_kind)(err) == XE_;
+   default: tl_assert(0); /* fill in missing cases */
+   }
+}
+
+void HG_(print_extra_suppression_info) ( Error* err )
+{
+   /* Do nothing */
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_errors.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_errors.h b/helgrind/hg_errors.h

new file mode 100644 (file)

index 0000000..ddf92c3
--- /dev/null
+++ b/helgrind/hg_errors.h
@@ -0,0 +1,73 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Error management for Helgrind.                               ---*/
+/*---                                                  hg_errors.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_ERRORS_H
+#define __HG_ERRORS_H
+
+
+/* The standard bundle of error management functions that we are
+required to present to the core/tool interface at startup. */
+Bool  HG_(eq_Error) ( VgRes not_used, Error* e1, Error* e2 );
+void  HG_(pp_Error) ( Error* err );
+UInt  HG_(update_extra) ( Error* err );
+Bool  HG_(recognised_suppression) ( Char* name, Supp *su );
+Bool  HG_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf,
+                                         Supp* su );
+Bool  HG_(error_matches_suppression) ( Error* err, Supp* su );
+Char* HG_(get_error_name) ( Error* err );
+void  HG_(print_extra_suppression_info) ( Error* err );
+
+/* Functions for recording various kinds of errors. */
+void HG_(record_error_Race) ( Thread* thr, 
+                              Addr data_addr, Bool isWrite, Int szB,
+                              ExeContext* mb_lastlock,
+                              ExeContext* mb_confacc,
+                              Thread* mb_confaccthr );
+void HG_(record_error_FreeMemLock)    ( Thread* thr, Lock* lk );
+void HG_(record_error_UnlockUnlocked) ( Thread*, Lock* );
+void HG_(record_error_UnlockForeign)  ( Thread*, Thread*, Lock* );
+void HG_(record_error_UnlockBogus)    ( Thread*, Addr );
+void HG_(record_error_PthAPIerror)    ( Thread*, HChar*, Word, HChar* );
+void HG_(record_error_LockOrder)      ( Thread*, Addr, Addr,
+                                        ExeContext*, ExeContext* );
+void HG_(record_error_Misc)           ( Thread*, HChar* );
+
+/* Statistics pertaining to error management. */
+extern ULong HG_(stats__LockN_to_P_queries);
+extern ULong HG_(stats__LockN_to_P_get_map_size) ( void );
+extern ULong HG_(stats__string_table_queries);
+extern ULong HG_(stats__string_table_get_map_size) ( void );
+
+#endif /* ! __HG_ERRORS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_errors.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_intercepts.c b/helgrind/hg_intercepts.c

index 8e988e69273ed6667d0ae405b2e4016904849202..172edfe9ac87bbda390377e33e037a4e38c485a6 100644 (file)
--- a/helgrind/hg_intercepts.c
+++ b/helgrind/hg_intercepts.c
@@ -534,9 +534,10 @@ PTH_FUNC(int, pthreadZumutexZuunlock, // pthread_mutex_unlock
  
  /* Handled:   pthread_cond_wait pthread_cond_timedwait
                pthread_cond_signal pthread_cond_broadcast
+              pthread_cond_destroy
  
-   Unhandled: pthread_cond_init pthread_cond_destroy
-              -- are these important?
+   Unhandled: pthread_cond_init
+              -- is this important?
  */
  
  // pthread_cond_wait
@@ -719,6 +720,73 @@ PTH_FUNC(int, pthreadZucondZubroadcastZAZa, // pthread_cond_broadcast@*
  }
  
  
+// pthread_cond_destroy
+PTH_FUNC(int, pthreadZucondZudestroyZAZa, // pthread_cond_destroy@*
+              pthread_cond_t* cond)
+{
+   int ret;
+   OrigFn fn;
+
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_cond_destroy %p", cond);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE,
+               pthread_cond_t*,cond);
+
+   CALL_FN_W_W(ret, fn, cond);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_cond_destroy", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " codestr -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- pthread_barrier_t functions                              ---*/
+/*----------------------------------------------------------------*/
+
+PTH_FUNC(int, pthreadZubarrierZuwait, // pthread_barrier_wait.
+              pthread_barrier_t* b)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_barrier_wait %p", b);
+      fflush(stderr);
+   }
+
+   // We blocked, signal.
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE,
+               void*,b);
+   CALL_FN_W_W(ret, fn, b);
+
+   // FIXME: handle ret
+
+   // We unblocked, finish wait.
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,
+               void *, b, void *, b);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "  pthread_barrier_wait -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+
  /*----------------------------------------------------------------*/
  /*--- pthread_rwlock_t functions                               ---*/
  /*----------------------------------------------------------------*/
diff --git a/helgrind/hg_lock_n_thread.c b/helgrind/hg_lock_n_thread.c

new file mode 100644 (file)

index 0000000..1d4085a
--- /dev/null
+++ b/helgrind/hg_lock_n_thread.c
@@ -0,0 +1,123 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Definitions for Locks and Threads.                           ---*/
+/*---                                           hg_lock_n_thread.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_wordfm.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*--- Sanity checking                                          ---*/
+/*----------------------------------------------------------------*/
+
+inline Bool HG_(is_sane_Thread) ( Thread* thr ) {
+   return thr != NULL && thr->magic == Thread_MAGIC;
+}
+
+static Bool is_sane_Bag_of_Threads ( WordBag* bag )
+{
+   Thread* thr;
+   Word    count;
+   VG_(initIterBag)( bag );
+   while (VG_(nextIterBag)( bag, (Word*)&thr, &count )) {
+      if (count < 1) return False;
+      if (!HG_(is_sane_Thread)(thr)) return False;
+   }
+   VG_(doneIterBag)( bag );
+   return True;
+}
+
+static Bool is_sane_Lock_BASE ( Lock* lock )
+{
+   if (lock == NULL
+       || (lock->magic != LockN_MAGIC && lock->magic != LockP_MAGIC))
+      return False;
+   switch (lock->kind) { 
+      case LK_mbRec: case LK_nonRec: case LK_rdwr: break; 
+      default: return False; 
+   }
+   if (lock->heldBy == NULL) {
+      if (lock->acquired_at != NULL) return False;
+      /* Unheld.  We arbitrarily require heldW to be False. */
+      return !lock->heldW;
+   } else {
+      if (lock->acquired_at == NULL) return False;
+   }
+
+   /* If heldBy is non-NULL, we require it to contain at least one
+      thread. */
+   if (VG_(isEmptyBag)(lock->heldBy))
+      return False;
+
+   /* Lock is either r- or w-held. */
+   if (!is_sane_Bag_of_Threads(lock->heldBy)) 
+      return False;
+   if (lock->heldW) {
+      /* Held in write-mode */
+      if ((lock->kind == LK_nonRec || lock->kind == LK_rdwr)
+          && !VG_(isSingletonTotalBag)(lock->heldBy))
+         return False;
+   } else {
+      /* Held in read-mode */
+      if (lock->kind != LK_rdwr) return False;
+   }
+   return True;
+}
+
+Bool HG_(is_sane_LockP) ( Lock* lock ) {
+   return lock != NULL 
+          && lock->magic == LockP_MAGIC
+          && lock->hbso  == NULL
+          && is_sane_Lock_BASE(lock);
+}
+
+Bool HG_(is_sane_LockN) ( Lock* lock ) {
+   return lock != NULL 
+          && lock->magic == LockN_MAGIC
+          && lock->hbso  != NULL
+          && is_sane_Lock_BASE(lock);
+}
+
+Bool HG_(is_sane_LockNorP) ( Lock* lock ) {
+   return is_sane_Lock_BASE(lock);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       hg_lock_n_thread.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_lock_n_thread.h b/helgrind/hg_lock_n_thread.h

new file mode 100644 (file)

index 0000000..64b1f1e
--- /dev/null
+++ b/helgrind/hg_lock_n_thread.h
@@ -0,0 +1,165 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Definitions for Locks and Threads.                           ---*/
+/*---                                           hg_lock_n_thread.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_LOCK_N_THREAD_H
+#define __HG_LOCK_N_THREAD_H
+
+
+/*----------------------------------------------------------------*/
+/*--- Primary data definitions                                 ---*/
+/*----------------------------------------------------------------*/
+
+/* Magic numbers, for doing assertions that structures really are of
+   the right type.  Useful as some of the code can get a bit
+   complex. */
+#define Thread_MAGIC   0x504fc5e5
+#define LockN_MAGIC    0x6545b557 /* normal nonpersistent locks */
+#define LockP_MAGIC    0x755b5456 /* persistent (copied) locks */
+
+
+/* These are handles for Word sets.  CONSTRAINTS: must be (very) small
+   ints numbered from zero, since < 30-bit versions of them are used to
+   encode thread-sets and lock-sets in 32-bit shadow words. */
+typedef  WordSet  WordSetID;
+
+
+/* Synchronisation Objects, exported abstractly by libhb. */
+typedef  struct _SO  SO;
+
+/* Thr, libhb's private thread record, exported abstractly */
+typedef  struct _Thr  Thr;
+
+
+/* Stores information about a thread.  Addresses of these also serve
+   as unique thread identifiers and so are never freed, so they should
+   be as small as possible.  Freeing Thread structures makes the
+   storage management just too complex, and most programs don't create
+   many threads, so tolerating this leak seems like a not-bad
+   tradeoff.
+
+   Since these are never freed, the .coretid field only indicates the
+   core's ThreadId associated with this Thread whilst it is alive.
+   Once the thread finishes, the ThreadId is set to
+   VG_INVALID_THREADID.
+
+   The core may later re-use the same ThreadId for what is a logically
+   completely different thread, which of course must have a different
+   Thread structure. */
+typedef
+   struct _Thread {
+      /* ADMIN */
+      struct _Thread* admin;
+      UInt            magic;
+      Thr*            hbthr;
+      ThreadId        coretid;
+      /* USEFUL */
+      WordSetID locksetA; /* WordSet of Lock* currently held by thread */
+      WordSetID locksetW; /* subset of locksetA held in w-mode */
+      /* EXPOSITION */
+      /* Place where parent was when this thread was created. */
+      ExeContext* created_at;
+      Bool        announced;
+      /* Index for generating references in error messages. */
+      Int         errmsg_index;
+   }
+   Thread;
+
+
+/* Stores information about a lock's current state.  These are
+   allocated and later freed (when the containing memory becomes
+   NoAccess).  This gives a problem for the XError type, which
+   contains Lock*s.  Solution is to copy any Lock which is to be
+   incorporated into an XErrors, so as to make it independent from the
+   'normal' collection of Locks, which can come and go.  When the lock
+   is copied, its .magic is changed from LockN_Magic to
+   LockP_Magic. */
+
+/* Lock kinds. */
+typedef
+   enum {
+      LK_mbRec=1001, /* normal mutex, possibly recursive */
+      LK_nonRec,     /* normal mutex, definitely non recursive */
+      LK_rdwr        /* reader-writer lock */
+   }
+   LockKind;
+
+typedef
+   struct _Lock {
+      /* ADMIN */
+      struct _Lock* admin;
+      ULong         unique; /* used for persistence-hashing */
+      UInt          magic;  /* LockN_MAGIC or LockP_MAGIC */
+      /* EXPOSITION */
+      /* Place where lock first came to the attention of Helgrind. */
+      ExeContext*   appeared_at;
+      /* If the lock is held, place where the lock most recently made
+         an unlocked->locked transition.  Must be sync'd with .heldBy:
+         either both NULL or both non-NULL. */
+      ExeContext*   acquired_at;
+      /* USEFUL-STATIC */
+      SO*           hbso;      /* associated SO */
+      Addr          guestaddr; /* Guest address of lock */
+      LockKind      kind;      /* what kind of lock this is */
+      /* USEFUL-DYNAMIC */
+      Bool          heldW; 
+      WordBag*      heldBy; /* bag of threads that hold this lock */
+      /* .heldBy is NULL: lock is unheld, and .heldW is meaningless
+                          but arbitrarily set to False
+         .heldBy is non-NULL:
+            .heldW is True:  lock is w-held by threads in heldBy
+            .heldW is False: lock is r-held by threads in heldBy
+            Either way, heldBy may not validly be an empty Bag.
+
+         for LK_nonRec, r-holdings are not allowed, and w-holdings may
+         only have sizeTotal(heldBy) == 1
+
+         for LK_mbRec, r-holdings are not allowed, and w-holdings may
+         only have sizeUnique(heldBy) == 1
+
+         for LK_rdwr, w-holdings may only have sizeTotal(heldBy) == 1 */
+   }
+   Lock;
+
+/*----------------------------------------------------------------*/
+/*--- Sanity checking                                          ---*/
+/*----------------------------------------------------------------*/
+
+Bool HG_(is_sane_Thread)   ( Thread* thr );
+Bool HG_(is_sane_LockP)    ( Lock* lock );
+Bool HG_(is_sane_LockN)    ( Lock* lock );
+Bool HG_(is_sane_LockNorP) ( Lock* lock );
+
+
+#endif /* ! __HG_LOCK_N_THREAD_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       hg_lock_n_thread.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c

index 19de00d66e68ccdc896373f0ab908b224961435a..d290c4dc8c28ba3ba4881b331dc694bfef932265 100644 (file)
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -35,11 +35,9 @@
  */
  
  #include "pub_tool_basics.h"
-#include "pub_tool_aspacemgr.h"
  #include "pub_tool_libcassert.h"
  #include "pub_tool_libcbase.h"
  #include "pub_tool_libcprint.h"
-#include "pub_tool_mallocfree.h"
  #include "pub_tool_threadstate.h"
  #include "pub_tool_tooliface.h"
  #include "pub_tool_hashtable.h"
@@ -51,10 +49,20 @@
  #include "pub_tool_debuginfo.h"  /* VG_(get_data_description) */
  #include "pub_tool_wordfm.h"
  
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
  #include "helgrind.h"
  
-#define HG_(str) VGAPPEND(vgHelgrind_,str)
-#include "hg_wordset.h"
+
+// FIXME: new_mem_w_tid ignores the supplied tid. (wtf?!)
+
+// FIXME: when client destroys a lock or a CV, remove these
+// from our mappings, so that the associated SO can be freed up
  
  /*----------------------------------------------------------------*/
  /*---                                                          ---*/
@@ -96,28 +104,6 @@
  // 0 for silent, 1 for some stuff, 2 for lots of stuff
  #define SHOW_EVENTS 0
  
-// Flags for controlling for which events sanity checking is done
-#define SCE_THREADS  (1<<0)  // Sanity check at thread create/join
-#define SCE_LOCKS    (1<<1)  // Sanity check at lock events
-#define SCE_BIGRANGE (1<<2)  // Sanity check at big mem range events
-#define SCE_ACCESS   (1<<3)  // Sanity check at mem accesses
-#define SCE_LAOG     (1<<4)  // Sanity check at significant LAOG events
-#define SCE_HBEFORE  (1<<5)  // Crosscheck VTS vs Explicit h-before-graph
-
-#define SCE_BIGRANGE_T 256  // big mem range minimum size
-
-
-/* For the shadow mem cache stuff we may want more intrusive
-   checks.  Unfortunately there's no almost-zero-cost way to make them
-   selectable at run time.  Hence set the #if 0 to #if 1 and
-   rebuild if you want them. */
-#if 0
-#  define SCE_CACHELINE 1  /* do sanity-check CacheLine stuff */
-#  define inline __attribute__((noinline))
-   /* probably want to ditch -fomit-frame-pointer too */
-#else
-#  define SCE_CACHELINE 0   /* don't sanity-check CacheLine stuff */
-#endif
  
  static void all__sanity_check ( Char* who ); /* fwds */
  
@@ -127,416 +113,25 @@ static void all__sanity_check ( Char* who ); /* fwds */
  #define SHOW_DATA_STRUCTURES 0
  
  
-/* ------------ Command line options ------------ */
-
-// 0 = no segments at all
-// 1 = segments at thread create/join
-// 2 = as 1 + segments at condition variable signal/broadcast/wait
-//          + segments at sem_wait/sem_post
-static Int clo_happens_before = 2;  /* default setting */
-
-/* Generate .vcg output of the happens-before graph?
-   0: no  1: yes, without VTSs  2: yes, with VTSs */
-static Int clo_gen_vcg = 0;
-
-/* When comparing race errors for equality, should the race address be
-   taken into account?  For users, no, but for verification purposes
-   (regtesting) this is sometimes important. */
-static Bool clo_cmp_race_err_addrs = False;
-
-/* Tracing memory accesses, so we can see what's going on.
-   clo_trace_addr is the address to monitor.  clo_trace_level = 0 for
-   no tracing, 1 for summary, 2 for detailed. */
-static Addr clo_trace_addr  = 0;
-static Int  clo_trace_level = 0;
-
-/* Sanity check level.  This is an or-ing of
-   SCE_{THREADS,LOCKS,BIGRANGE,ACCESS,LAOG}. */
-static Int clo_sanity_flags = 0;
-
-/* This has to do with printing error messages.  See comments on
-   announce_threadset() and summarise_threadset().  Perhaps it
-   should be a command line option. */
-#define N_THREADS_TO_ANNOUNCE 5
-
-
  /* ------------ Misc comments ------------ */
  
  // FIXME: don't hardwire initial entries for root thread.
  // Instead, let the pre_thread_ll_create handler do this.
  
-// FIXME: when a SecMap is completely set via and address range
-// setting operation to a non-ShR/M state, clear its .mbHasShared 
-// bit
-
-/* FIXME: figure out what the real rules are for Excl->ShR/M
-   transitions w.r.t locksets.
-
-   Muelenfeld thesis Sec 2.2.1 p 8/9 says that
-
-     When another thread accesses the memory location, the lock-set
-     is initialized with all active locks and the algorithm reports
-     the next access that results in an empty lock-set.
-
-   What does "all active locks" mean?  All locks held by the accessing
-   thread, or all locks held by the system as a whole?
-
-   However: Muelenfeld's enhanced Helgrind (eraser_mem_read_word)
-   seems to use simply the set of locks held by the thread causing the
-   transition into a shared state at the time of the transition:
-
-     *sword = SW(Vge_Shar, packLockSet(thread_locks_rd[tid]));
-
-   Original Eraser paper also says "all active locks".
-*/
-
-// Major stuff to fix:
-// - reader-writer locks
-
-/* Thread async exit:
-   
-   remove the map_threads entry
-   leave the Thread object in place
-   complain if holds any locks
-   
-   unlike with Join, do not change any memory states
-
-   I _think_ this is correctly handled now.
-*/
-
-/*----------------------------------------------------------------*/
-/*--- Some very basic stuff                                    ---*/
-/*----------------------------------------------------------------*/
-
-static void* hg_zalloc ( HChar* cc, SizeT n ) {
-   void* p;
-   tl_assert(n > 0);
-   p = VG_(malloc)( cc, n );
-   tl_assert(p);
-   VG_(memset)(p, 0, n);
-   return p;
-}
-static void hg_free ( void* p ) {
-   tl_assert(p);
-   VG_(free)(p);
-}
-
-/* Round a up to the next multiple of N.  N must be a power of 2 */
-#define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
-/* Round a down to the next multiple of N.  N must be a power of 2 */
-#define ROUNDDN(a, N)   ((a) & ~(N-1))
-
  
  /*----------------------------------------------------------------*/
-/*--- Primary data definitions                                 ---*/
+/*--- Primary data structures                                  ---*/
  /*----------------------------------------------------------------*/
  
-/* Shadow values. */
-typedef  UInt  SVal;
-
-
-/* These are handles for thread segments.  CONSTRAINTS: Must be small
-   ints numbered from zero, since 30-bit versions of them must are
-   used to represent Exclusive shadow states.  Are used as keys in
-   WordFMs so must be castable to Words at the appropriate points. */
-typedef  UInt  SegmentID;
-
-
-/* These are handles for Word sets.  CONSTRAINTS: must be (very) small
-   ints numbered from zero, since < 30-bit versions of them are used to
-   encode thread-sets and lock-sets in 32-bit shadow words. */
-typedef  WordSet  WordSetID;
-
-
-/* Stores information about a thread.  Addresses of these also serve
-   as unique thread identifiers and so are never freed, so they should
-   be as small as possible. */
-typedef
-   struct _Thread {
-      /* ADMIN */
-      struct _Thread* admin;
-      UInt            magic;
-      /* USEFUL */
-      WordSetID locksetA; /* WordSet of Lock* currently held by thread */
-      WordSetID locksetW; /* subset of locksetA held in w-mode */
-      SegmentID csegid;  /* current thread segment for thread */
-      /* EXPOSITION */
-      /* Place where parent was when this thread was created. */
-      ExeContext* created_at;
-      Bool        announced;
-      /* Index for generating references in error messages. */
-      Int         errmsg_index;
-   }
-   Thread;
-
-
-/* Stores information about a lock's current state.  These are
-   allocated and later freed (when the containing memory becomes
-   NoAccess).  This gives a problem for the XError type, which
-   contains Lock*s.  Solution is to copy any Lock which is to be
-   incorporated into an XErrors, so as to make it independent from the
-   'normal' collection of Locks, which can come and go.  When the lock
-   is copied, its .magic is changed from LockN_Magic to
-   LockP_Magic. */
-
-/* Lock kinds. */
-typedef
-   enum {
-      LK_mbRec=1001, /* normal mutex, possibly recursive */
-      LK_nonRec,     /* normal mutex, definitely non recursive */
-      LK_rdwr        /* reader-writer lock */
-   }
-   LockKind;
-
-typedef
-   struct _Lock {
-      /* ADMIN */
-      struct _Lock* admin;
-      ULong         unique; /* used for persistence-hashing */
-      UInt          magic;  /* LockN_MAGIC or LockP_MAGIC */
-      /* EXPOSITION */
-      /* Place where lock first came to the attention of Helgrind. */
-      ExeContext*   appeared_at;
-      /* If the lock is held, place where the lock most recently made
-         an unlocked->locked transition.  Must be sync'd with .heldBy:
-         either both NULL or both non-NULL. */
-      ExeContext*   acquired_at;
-      /* USEFUL-STATIC */
-      Addr          guestaddr; /* Guest address of lock */
-      LockKind      kind;      /* what kind of lock this is */
-      /* USEFUL-DYNAMIC */
-      Bool          heldW; 
-      WordBag*      heldBy; /* bag of threads that hold this lock */
-      /* .heldBy is NULL: lock is unheld, and .heldW is meaningless
-                          but arbitrarily set to False
-         .heldBy is non-NULL:
-            .heldW is True:  lock is w-held by threads in heldBy
-            .heldW is False: lock is r-held by threads in heldBy
-            Either way, heldBy may not validly be an empty Bag.
-
-         for LK_nonRec, r-holdings are not allowed, and w-holdings may
-         only have sizeTotal(heldBy) == 1
-
-         for LK_mbRec, r-holdings are not allowed, and w-holdings may
-         only have sizeUnique(heldBy) == 1
-
-         for LK_rdwr, w-holdings may only have sizeTotal(heldBy) == 1 */
-   }
-   Lock;
-
-
-/* Stores information about thread segments.  .prev can be NULL only
-   when this is the first segment for the thread.  .other is NULL
-   unless this segment depends on a message (create, join, signal)
-   from some other thread.  Segments are never freed (!) */
-typedef
-   struct _Segment {
-      /* ADMIN */
-      struct _Segment* admin;
-      UInt             magic;
-      /* USEFUL */
-      UInt             dfsver; /* Version # for depth-first searches */
-      Thread*          thr;    /* The thread that I am part of */
-      struct _Segment* prev;   /* The previous segment in this thread */
-      struct _Segment* other;  /* Possibly a segment from some other 
-                                  thread, which happened-before me */
-      XArray*          vts;    /* XArray of ScalarTS */
-      /* DEBUGGING ONLY: what does 'other' arise from?  
-         c=thread creation, j=join, s=cvsignal, S=semaphore */
-      Char other_hint;
-   }
-   Segment;
-
-
-/* ------ CacheLine ------ */
-
-#define N_LINE_BITS      5 /* must be >= 3 */
-#define N_LINE_ARANGE    (1 << N_LINE_BITS)
-#define N_LINE_TREES     (N_LINE_ARANGE >> 3)
-
-typedef
-   struct {
-      UShort descrs[N_LINE_TREES];
-      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
-   }
-   CacheLine;
-
-#define TREE_DESCR_16_0 (1<<0)
-#define TREE_DESCR_32_0 (1<<1)
-#define TREE_DESCR_16_1 (1<<2)
-#define TREE_DESCR_64   (1<<3)
-#define TREE_DESCR_16_2 (1<<4)
-#define TREE_DESCR_32_1 (1<<5)
-#define TREE_DESCR_16_3 (1<<6)
-#define TREE_DESCR_8_0  (1<<7)
-#define TREE_DESCR_8_1  (1<<8)
-#define TREE_DESCR_8_2  (1<<9)
-#define TREE_DESCR_8_3  (1<<10)
-#define TREE_DESCR_8_4  (1<<11)
-#define TREE_DESCR_8_5  (1<<12)
-#define TREE_DESCR_8_6  (1<<13)
-#define TREE_DESCR_8_7  (1<<14)
-#define TREE_DESCR_DTY  (1<<15)
-
-typedef
-   struct {
-      SVal  dict[4]; /* can represent up to 4 diff values in the line */
-      UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
-                                      dict indexes */
-      /* if dict[0] == 0 then dict[1] is the index of the CacheLineF
-         to use */
-   }
-   CacheLineZ; /* compressed rep for a cache line */
-
-typedef
-   struct {
-      Bool inUse;
-      SVal w32s[N_LINE_ARANGE];
-   }
-   CacheLineF; /* full rep for a cache line */
-
-
-/* Shadow memory.
-   Primary map is a WordFM Addr SecMap*.  
-   SecMaps cover some page-size-ish section of address space and hold
-     a compressed representation.
-   CacheLine-sized chunks of SecMaps are copied into a Cache, being
-   decompressed when moved into the cache and recompressed on the
-   way out.  Because of this, the cache must operate as a writeback
-   cache, not a writethrough one.
-*/
-/* See comments below on shadow_mem_make_NoAccess re performance
-   effects of N_SECMAP_BITS settings.  On a 2.4GHz Core2,
-   starting/quitting OOo (32-bit), I have these rough numbers:
-      N_SECMAP_BITS = 11    2m23
-      N_SECMAP_BITS = 12    1m58
-      N_SECMAP_BITS = 13    1m53
-
-   Each SecMap must hold a power-of-2 number of CacheLines.  Hence
-   N_SECMAP_BITS must >= N_LINE_BITS.
-*/
-#define N_SECMAP_BITS   13
-#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
-
-// # CacheLines held by a SecMap
-#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
-typedef
-   struct {
-      UInt magic;
-      Bool mbHasLocks;  /* hint: any locks in range?  safe: True */
-      Bool mbHasShared; /* hint: any ShM/ShR states in range?  safe: True */
-      CacheLineZ  linesZ[N_SECMAP_ZLINES];
-      CacheLineF* linesF;
-      Int         linesF_size;
-   }
-   SecMap;
-
-typedef
-   struct {
-      Int line_no; /* which Z-line are we in? */
-      Int word_no; /* inside the line, which word is current? */
-   }
-   SecMapIter;
-
-static void initSecMapIter ( SecMapIter* itr ) {
-   itr->line_no = 0;
-   itr->word_no = 0;
-}
-
-/* Get the current val, and move to the next position.  This is called
-   a huge amount in some programs (eg OpenOffice).  Hence the
-   'inline'. */
-static UWord stats__secmap_iterator_steppings; /* fwds */
-
-inline
-static Bool stepSecMapIter ( /*OUT*/SVal** pVal, 
-                             /*MOD*/SecMapIter* itr, SecMap* sm )
-{
-   CacheLineZ* lineZ = NULL;
-   CacheLineF* lineF = NULL;
-   /* Either it points to a valid place, or to (-1,-1) */
-   stats__secmap_iterator_steppings++;
-   if (UNLIKELY(itr->line_no == -1)) {
-      tl_assert(itr->word_no == -1);
-      return False;
-   }
-   /* so now it must be a valid place in the SecMap. */
-   if (0) VG_(printf)("%p %d %d\n", sm, (Int)itr->line_no, (Int)itr->word_no);
-   tl_assert(itr->line_no >= 0 && itr->line_no < N_SECMAP_ZLINES);
-   lineZ = &sm->linesZ[itr->line_no];
-   if (UNLIKELY(lineZ->dict[0] == 0)) {
-      tl_assert(sm->linesF);
-      tl_assert(sm->linesF_size > 0);
-      tl_assert(lineZ->dict[1] >= 0);
-      tl_assert(lineZ->dict[1] < sm->linesF_size);
-      lineF = &sm->linesF[ lineZ->dict[1] ];
-      tl_assert(lineF->inUse);
-      tl_assert(itr->word_no >= 0 && itr->word_no < N_LINE_ARANGE);
-      *pVal = &lineF->w32s[itr->word_no];
-      itr->word_no++;
-      if (itr->word_no == N_LINE_ARANGE)
-         itr->word_no = 0;
-   } else {
-      tl_assert(itr->word_no >= 0 && itr->word_no <= 3);
-      tl_assert(lineZ->dict[itr->word_no] != 0);
-      *pVal = &lineZ->dict[itr->word_no];
-      itr->word_no++;
-      if (itr->word_no == 4 || lineZ->dict[itr->word_no] == 0)
-         itr->word_no = 0;
-   }
-
-   if (itr->word_no == 0) {
-      itr->line_no++;
-      if (itr->line_no == N_SECMAP_ZLINES) {
-         itr->line_no = -1;
-         itr->word_no = -1;
-      }
-   }
-
-   return True;
-}
-
-/* ------ Cache ------ */
-
-#define N_WAY_BITS 16
-#define N_WAY_NENT (1 << N_WAY_BITS)
-
-/* Each tag is the address of the associated CacheLine, rounded down
-   to a CacheLine address boundary.  A CacheLine size must be a power
-   of 2 and must be 8 or more.  Hence an easy way to initialise the
-   cache so it is empty is to set all the tag values to any value % 8
-   != 0, eg 1.  This means all queries in the cache initially miss.
-   It does however require us to detect and not writeback, any line
-   with a bogus tag. */
-typedef
-   struct {
-      CacheLine lyns0[N_WAY_NENT];
-      Addr      tags0[N_WAY_NENT];
-   }
-   Cache;
-
-
-/* --------- Primary data structures --------- */
-
  /* Admin linked list of Threads */
  static Thread* admin_threads = NULL;
  
  /* Admin linked list of Locks */
  static Lock* admin_locks = NULL;
  
-/* Admin linked list of Segments */
-static Segment* admin_segments = NULL;
-
-/* Shadow memory primary map */
-static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
-static Cache   cache_shmem;
-
  /* Mapping table for core ThreadIds to Thread* */
  static Thread** map_threads = NULL; /* Array[VG_N_THREADS] of Thread* */
  
-/* Mapping table for thread segments IDs to Segment* */
-static WordFM* map_segments = NULL; /* WordFM SegmentID Segment* */
-
  /* Mapping table for lock guest addresses to Lock* */
  static WordFM* map_locks = NULL; /* WordFM LockAddr Lock* */
  
@@ -559,27 +154,19 @@ static Lock* __bus_lock_Lock = NULL;
  static UWord stats__lockN_acquires = 0;
  static UWord stats__lockN_releases = 0;
  
-static ThreadId map_threads_maybe_reverse_lookup_SLOW ( Thread* ); /*fwds*/
-
-#define Thread_MAGIC   0x504fc5e5
-#define LockN_MAGIC    0x6545b557 /* normal nonpersistent locks */
-#define LockP_MAGIC    0x755b5456 /* persistent (copied) locks */
-#define Segment_MAGIC  0x49e94d81
-#define SecMap_MAGIC   0x571e58cb
-
-static UWord stats__mk_Segment = 0;
+static
+ThreadId map_threads_maybe_reverse_lookup_SLOW ( Thread* thr ); /*fwds*/
  
  /* --------- Constructors --------- */
  
-static inline Bool is_sane_LockN ( Lock* lock ); /* fwds */
-
-static Thread* mk_Thread ( SegmentID csegid ) {
+static Thread* mk_Thread ( Thr* hbthr ) {
     static Int indx      = 1;
-   Thread* thread       = hg_zalloc( "hg", sizeof(Thread) );
+   Thread* thread       = HG_(zalloc)( "hg.mk_Thread.1", sizeof(Thread) );
     thread->locksetA     = HG_(emptyWS)( univ_lsets );
     thread->locksetW     = HG_(emptyWS)( univ_lsets );
-   thread->csegid       = csegid;
     thread->magic        = Thread_MAGIC;
+   thread->hbthr        = hbthr;
+   thread->coretid      = VG_INVALID_THREADID;
     thread->created_at   = NULL;
     thread->announced    = False;
     thread->errmsg_index = indx++;
@@ -587,116 +174,37 @@ static Thread* mk_Thread ( SegmentID csegid ) {
     admin_threads        = thread;
     return thread;
  }
+
  // Make a new lock which is unlocked (hence ownerless)
  static Lock* mk_LockN ( LockKind kind, Addr guestaddr ) {
     static ULong unique = 0;
-   Lock* lock             = hg_zalloc( "hg", sizeof(Lock) );
+   Lock* lock             = HG_(zalloc)( "hg.mk_Lock.1", sizeof(Lock) );
     lock->admin            = admin_locks;
     lock->unique           = unique++;
     lock->magic            = LockN_MAGIC;
     lock->appeared_at      = NULL;
     lock->acquired_at      = NULL;
+   lock->hbso             = libhb_so_alloc();
     lock->guestaddr        = guestaddr;
     lock->kind             = kind;
     lock->heldW            = False;
     lock->heldBy           = NULL;
-   tl_assert(is_sane_LockN(lock));
+   tl_assert(HG_(is_sane_LockN)(lock));
     admin_locks            = lock;
     return lock;
  }
-static Segment* mk_Segment ( Thread* thr, Segment* prev, Segment* other ) {
-   Segment* seg    = hg_zalloc( "hg", sizeof(Segment) );
-   seg->dfsver     = 0;
-   seg->thr        = thr;
-   seg->prev       = prev;
-   seg->other      = other;
-   seg->vts        = NULL;
-   seg->other_hint = ' ';
-   seg->magic      = Segment_MAGIC;
-   seg->admin      = admin_segments;
-   admin_segments = seg;
-   stats__mk_Segment++;
-   return seg;
-}
-
-static inline Bool is_sane_Segment ( Segment* seg ) {
-   return seg != NULL && seg->magic == Segment_MAGIC;
-}
-static inline Bool is_sane_Thread ( Thread* thr ) {
-   return thr != NULL && thr->magic == Thread_MAGIC;
-}
-
-static Bool is_sane_Bag_of_Threads ( WordBag* bag )
-{
-   Thread* thr;
-   Word    count;
-   VG_(initIterBag)( bag );
-   while (VG_(nextIterBag)( bag, (Word*)&thr, &count )) {
-      if (count < 1) return False;
-      if (!is_sane_Thread(thr)) return False;
-   }
-   VG_(doneIterBag)( bag );
-   return True;
-}
-static Bool is_sane_Lock_BASE ( Lock* lock )
-{
-   if (lock == NULL
-       || (lock->magic != LockN_MAGIC && lock->magic != LockP_MAGIC))
-      return False;
-   switch (lock->kind) { 
-      case LK_mbRec: case LK_nonRec: case LK_rdwr: break; 
-      default: return False; 
-   }
-   if (lock->heldBy == NULL) {
-      if (lock->acquired_at != NULL) return False;
-      /* Unheld.  We arbitrarily require heldW to be False. */
-      return !lock->heldW;
-   } else {
-      if (lock->acquired_at == NULL) return False;
-   }
-
-   /* If heldBy is non-NULL, we require it to contain at least one
-      thread. */
-   if (VG_(isEmptyBag)(lock->heldBy))
-      return False;
-
-   /* Lock is either r- or w-held. */
-   if (!is_sane_Bag_of_Threads(lock->heldBy)) 
-      return False;
-   if (lock->heldW) {
-      /* Held in write-mode */
-      if ((lock->kind == LK_nonRec || lock->kind == LK_rdwr)
-          && !VG_(isSingletonTotalBag)(lock->heldBy))
-         return False;
-   } else {
-      /* Held in read-mode */
-      if (lock->kind != LK_rdwr) return False;
-   }
-   return True;
-}
-static inline Bool is_sane_LockP ( Lock* lock ) {
-   return lock != NULL 
-          && lock->magic == LockP_MAGIC
-          && is_sane_Lock_BASE(lock);
-}
-static inline Bool is_sane_LockN ( Lock* lock ) {
-   return lock != NULL 
-          && lock->magic == LockN_MAGIC
-          && is_sane_Lock_BASE(lock);
-}
-static inline Bool is_sane_LockNorP ( Lock* lock ) {
-   return is_sane_Lock_BASE(lock);
-}
  
  /* Release storage for a Lock.  Also release storage in .heldBy, if
     any. */
  static void del_LockN ( Lock* lk ) 
  {
-   tl_assert(is_sane_LockN(lk));
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(lk->hbso);
+   libhb_so_dealloc(lk->hbso);
     if (lk->heldBy)
        VG_(deleteBag)( lk->heldBy );
     VG_(memset)(lk, 0xAA, sizeof(*lk));
-   hg_free(lk);
+   HG_(free)(lk);
  }
  
  /* Update 'lk' to reflect that 'thr' now has a write-acquisition of
@@ -704,8 +212,8 @@ static void del_LockN ( Lock* lk )
     correct program and libpthread behaviour are allowed. */
  static void lockN_acquire_writer ( Lock* lk, Thread* thr ) 
  {
-   tl_assert(is_sane_LockN(lk));
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
  
     stats__lockN_acquires++;
  
@@ -717,7 +225,7 @@ static void lockN_acquire_writer ( Lock* lk, Thread* thr )
        tl_assert(lk->heldBy == NULL);
        tid = map_threads_maybe_reverse_lookup_SLOW(thr);
        lk->acquired_at
-         = VG_(record_ExeContext(tid, 0/*first_ip_delta*/));
+         = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
     } else {
        tl_assert(lk->heldBy != NULL);
     }
@@ -729,7 +237,7 @@ static void lockN_acquire_writer ( Lock* lk, Thread* thr )
           tl_assert(lk->heldBy == NULL); /* can't w-lock recursively */
           tl_assert(!lk->heldW);
           lk->heldW  = True;
-         lk->heldBy = VG_(newBag)( hg_zalloc, "hg", hg_free );
+         lk->heldBy = VG_(newBag)( HG_(zalloc), "hg.lNaw.1", HG_(free) );
           VG_(addToBag)( lk->heldBy, (Word)thr );
           break;
        case LK_mbRec:
@@ -750,13 +258,13 @@ static void lockN_acquire_writer ( Lock* lk, Thread* thr )
        default: 
           tl_assert(0);
    }
-  tl_assert(is_sane_LockN(lk));
+  tl_assert(HG_(is_sane_LockN)(lk));
  }
  
  static void lockN_acquire_reader ( Lock* lk, Thread* thr )
  {
-   tl_assert(is_sane_LockN(lk));
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
     /* can only add reader to a reader-writer lock. */
     tl_assert(lk->kind == LK_rdwr);
     /* lk must be free or already r-held. */
@@ -773,7 +281,7 @@ static void lockN_acquire_reader ( Lock* lk, Thread* thr )
        tl_assert(lk->heldBy == NULL);
        tid = map_threads_maybe_reverse_lookup_SLOW(thr);
        lk->acquired_at
-         = VG_(record_ExeContext(tid, 0/*first_ip_delta*/));
+         = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
     } else {
        tl_assert(lk->heldBy != NULL);
     }
@@ -783,11 +291,11 @@ static void lockN_acquire_reader ( Lock* lk, Thread* thr )
        VG_(addToBag)(lk->heldBy, (Word)thr);
     } else {
        lk->heldW  = False;
-      lk->heldBy = VG_(newBag)( hg_zalloc, "hg", hg_free );
+      lk->heldBy = VG_(newBag)( HG_(zalloc), "hg.lNar.1", HG_(free) );
        VG_(addToBag)( lk->heldBy, (Word)thr );
     }
     tl_assert(!lk->heldW);
-   tl_assert(is_sane_LockN(lk));
+   tl_assert(HG_(is_sane_LockN)(lk));
  }
  
  /* Update 'lk' to reflect a release of it by 'thr'.  This is done
@@ -797,8 +305,8 @@ static void lockN_acquire_reader ( Lock* lk, Thread* thr )
  static void lockN_release ( Lock* lk, Thread* thr )
  {
     Bool b;
-   tl_assert(is_sane_LockN(lk));
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
     /* lock must be held by someone */
     tl_assert(lk->heldBy);
     stats__lockN_releases++;
@@ -814,7 +322,7 @@ static void lockN_release ( Lock* lk, Thread* thr )
        lk->heldW       = False;
        lk->acquired_at = NULL;
     }
-   tl_assert(is_sane_LockN(lk));
+   tl_assert(HG_(is_sane_LockN)(lk));
  }
  
  static void remove_Lock_from_locksets_of_all_owning_Threads( Lock* lk )
@@ -827,7 +335,7 @@ static void remove_Lock_from_locksets_of_all_owning_Threads( Lock* lk )
     /* for each thread that holds this lock do ... */
     VG_(initIterBag)( lk->heldBy );
     while (VG_(nextIterBag)( lk->heldBy, (Word*)&thr, NULL )) {
-      tl_assert(is_sane_Thread(thr));
+      tl_assert(HG_(is_sane_Thread)(thr));
        tl_assert(HG_(elemWS)( univ_lsets,
                               thr->locksetA, (Word)lk ));
        thr->locksetA
@@ -843,177 +351,16 @@ static void remove_Lock_from_locksets_of_all_owning_Threads( Lock* lk )
     VG_(doneIterBag)( lk->heldBy );
  }
  
-/* --------- xxxID functions --------- */
-
-/* Proposal (for debugging sanity):
-
-   SegmentIDs from 0x1000000 .. 0x1FFFFFF (16777216)
-
-   All other xxxID handles are invalid.
-*/
-static inline Bool is_sane_SegmentID ( SegmentID tseg ) {
-   return tseg >= 0x1000000 && tseg <= 0x1FFFFFF;
-}
-static inline Bool is_sane_ThreadId ( ThreadId coretid ) {
-   return coretid >= 0 && coretid < VG_N_THREADS;
-}
-static SegmentID alloc_SegmentID ( void ) {
-   static SegmentID next = 0x1000000;
-   tl_assert(is_sane_SegmentID(next));
-   return next++;
-}
-
-/* --------- Shadow memory --------- */
-
-static inline Bool is_valid_scache_tag ( Addr tag ) {
-   /* a valid tag should be naturally aligned to the start of
-      a CacheLine. */
-   return 0 == (tag & (N_LINE_ARANGE - 1));
-}
-
-static inline Bool is_sane_SecMap ( SecMap* sm ) {
-   return sm != NULL && sm->magic == SecMap_MAGIC;
-}
-
-/* Shadow value encodings:
-
-   11 WordSetID:TSID_BITS WordSetID:LSID_BITS  ShM  thread-set lock-set
-   10 WordSetID:TSID_BITS WordSetID:LSID_BITS  ShR  thread-set lock-set
-   01 TSegmentID:30                            Excl thread-segment
-   00 0--(20)--0 10 0000 0000                  New
-   00 0--(20)--0 01 0000 0000                  NoAccess
-   00 0--(20)--0 00 0000 0000                  Invalid
-
-   TSID_BITS + LSID_BITS must equal 30.
-   The elements in thread sets are Thread*, casted to Word.
-   The elements in lock sets are Lock*, casted to Word.
-*/
-
-#define N_LSID_BITS  17
-#define N_LSID_MASK  ((1 << (N_LSID_BITS)) - 1)
-#define N_LSID_SHIFT 0
-
-#define N_TSID_BITS  (30 - (N_LSID_BITS))
-#define N_TSID_MASK  ((1 << (N_TSID_BITS)) - 1)
-#define N_TSID_SHIFT (N_LSID_BITS)
-
-static inline Bool is_sane_WordSetID_LSet ( WordSetID wset ) {
-   return wset >= 0 && wset <= N_LSID_MASK;
-}
-static inline Bool is_sane_WordSetID_TSet ( WordSetID wset ) {
-   return wset >= 0 && wset <= N_TSID_MASK;
-}
-
-
-__attribute__((noinline))
-__attribute__((noreturn))
-static void mk_SHVAL_fail ( WordSetID tset, WordSetID lset, HChar* who ) {
-   VG_(printf)("\n");
-   VG_(printf)("Helgrind: Fatal internal error -- cannot continue.\n");
-   VG_(printf)("Helgrind: mk_SHVAL_ShR(tset=%d,lset=%d): FAILED\n",
-               (Int)tset, (Int)lset);
-   VG_(printf)("Helgrind: max allowed tset=%d, lset=%d\n",
-               (Int)N_TSID_MASK, (Int)N_LSID_MASK);
-   VG_(printf)("Helgrind: program has too many thread "
-              "sets or lock sets to track.\n");
-   tl_assert(0);
-}
-
-static inline SVal mk_SHVAL_ShM ( WordSetID tset, WordSetID lset ) {
-   if (LIKELY(is_sane_WordSetID_TSet(tset) 
-              && is_sane_WordSetID_LSet(lset))) {
-      return (SVal)( (3<<30) | (tset << N_TSID_SHIFT) 
-                             | (lset << N_LSID_SHIFT));
-   } else {
-      mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShM");
-   }
-}
-static inline SVal mk_SHVAL_ShR ( WordSetID tset, WordSetID lset ) {
-   if (LIKELY(is_sane_WordSetID_TSet(tset) 
-              && is_sane_WordSetID_LSet(lset))) {
-      return (SVal)( (2<<30) | (tset << N_TSID_SHIFT) 
-                             | (lset << N_LSID_SHIFT) );
-   } else {
-      mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShR");
-   }
-}
-static inline SVal mk_SHVAL_Excl ( SegmentID tseg ) {
-   tl_assert(is_sane_SegmentID(tseg));
-   return (SVal)( (1<<30) | tseg );
-}
-#define SHVAL_New      ((SVal)(2<<8))
-#define SHVAL_NoAccess ((SVal)(1<<8))
-#define SHVAL_Invalid  ((SVal)(0<<8))
-
-static inline Bool is_SHVAL_ShM ( SVal w32 ) { 
-   return (w32 >> 30) == 3;
-}
-static inline Bool is_SHVAL_ShR ( SVal w32 ) {
-   return (w32 >> 30) == 2;
-}
-static inline Bool is_SHVAL_Sh ( SVal w32 ) {
-   return (w32 >> 31) == 1;
-}
-static inline Bool is_SHVAL_Excl ( SVal w32 ) {
-   return (w32 >> 30) == 1; 
-}
-static inline Bool is_SHVAL_New ( SVal w32 ) {
-   return w32 == SHVAL_New;
-}
-static inline Bool is_SHVAL_NoAccess ( SVal w32 ) { 
-   return w32 == SHVAL_NoAccess;
-}
-static inline Bool is_SHVAL_valid ( SVal w32 ) {
-   return is_SHVAL_Excl(w32) || is_SHVAL_NoAccess(w32)
-          || is_SHVAL_Sh(w32) || is_SHVAL_New(w32);
-}
-
-static inline SegmentID un_SHVAL_Excl ( SVal w32 ) {
-   tl_assert(is_SHVAL_Excl(w32));
-   return w32 & ~(3<<30);
-}
-static inline WordSetID un_SHVAL_ShR_tset ( SVal w32 ) {
-   tl_assert(is_SHVAL_ShR(w32));
-   return (w32 >> N_TSID_SHIFT) & N_TSID_MASK;
-}
-static inline WordSetID un_SHVAL_ShR_lset ( SVal w32 ) {
-   tl_assert(is_SHVAL_ShR(w32));
-   return (w32 >> N_LSID_SHIFT) & N_LSID_MASK;
-}
-static inline WordSetID un_SHVAL_ShM_tset ( SVal w32 ) {
-   tl_assert(is_SHVAL_ShM(w32));
-   return (w32 >> N_TSID_SHIFT) & N_TSID_MASK;
-}
-static inline WordSetID un_SHVAL_ShM_lset ( SVal w32 ) {
-   tl_assert(is_SHVAL_ShM(w32));
-   return (w32 >> N_LSID_SHIFT) & N_LSID_MASK;
-}
-static inline WordSetID un_SHVAL_Sh_tset ( SVal w32 ) {
-   tl_assert(is_SHVAL_Sh(w32));
-   return (w32 >> N_TSID_SHIFT) & N_TSID_MASK;
-}
-static inline WordSetID un_SHVAL_Sh_lset ( SVal w32 ) {
-   tl_assert(is_SHVAL_Sh(w32));
-   return (w32 >> N_LSID_SHIFT) & N_LSID_MASK;
-}
-
  
  /*----------------------------------------------------------------*/
  /*--- Print out the primary data structures                    ---*/
  /*----------------------------------------------------------------*/
  
  static WordSetID del_BHL ( WordSetID lockset ); /* fwds */
-static 
-void get_ZF_by_index ( /*OUT*/CacheLineZ** zp, /*OUT*/CacheLineF** fp,
-                       SecMap* sm, Int zix ); /* fwds */
-static 
-Segment* map_segments_maybe_lookup ( SegmentID segid ); /* fwds */
  
  #define PP_THREADS      (1<<1)
  #define PP_LOCKS        (1<<2)
-#define PP_SEGMENTS     (1<<3)
-#define PP_SHMEM_SHARED (1<<4)
-#define PP_ALL (PP_THREADS | PP_LOCKS | PP_SEGMENTS | PP_SHMEM_SHARED)
+#define PP_ALL (PP_THREADS | PP_LOCKS)
  
  
  static const Int sHOW_ADMIN = 0;
@@ -1041,7 +388,6 @@ static void pp_Thread ( Int d, Thread* t )
     }
     space(d+3); VG_(printf)("locksetA %d\n",   (Int)t->locksetA);
     space(d+3); VG_(printf)("locksetW %d\n",   (Int)t->locksetW);
-   space(d+3); VG_(printf)("csegid   0x%x\n", (UInt)t->csegid);
     space(d+0); VG_(printf)("}\n");
  }
  
@@ -1151,314 +497,110 @@ static void pp_map_locks ( Int d )
     space(d); VG_(printf)("}\n");
  }
  
-static void pp_Segment ( Int d, Segment* s )
-{
-   space(d+0); VG_(printf)("Segment %p {\n", s);
-   if (sHOW_ADMIN) {
-   space(d+3); VG_(printf)("admin  %p\n",   s->admin);
-   space(d+3); VG_(printf)("magic  0x%x\n", (UInt)s->magic);
-   }
-   space(d+3); VG_(printf)("dfsver    %u\n", s->dfsver);
-   space(d+3); VG_(printf)("thr       %p\n", s->thr);
-   space(d+3); VG_(printf)("prev      %p\n", s->prev);
-   space(d+3); VG_(printf)("other[%c] %p\n", s->other_hint, s->other);
-   space(d+0); VG_(printf)("}\n");
-}
-
-static void pp_admin_segments ( Int d )
+static void pp_everything ( Int flags, Char* caller )
  {
-   Int      i, n;
-   Segment* s;
-   for (n = 0, s = admin_segments;  s;  n++, s = s->admin) {
-      /* nothing */
+   Int d = 0;
+   VG_(printf)("\n");
+   VG_(printf)("All_Data_Structures (caller = \"%s\") {\n", caller);
+   if (flags & PP_THREADS) {
+      VG_(printf)("\n");
+      pp_admin_threads(d+3);
+      VG_(printf)("\n");
+      pp_map_threads(d+3);
     }
-   space(d); VG_(printf)("admin_segments (%d records) {\n", n);
-   for (i = 0, s = admin_segments;  s;  i++, s = s->admin) {
-      if (0) {
-         space(n); 
-         VG_(printf)("admin_segments record %d of %d:\n", i, n);
-      }
-      pp_Segment(d+3, s);
+   if (flags & PP_LOCKS) {
+      VG_(printf)("\n");
+      pp_admin_locks(d+3);
+      VG_(printf)("\n");
+      pp_map_locks(d+3);
     }
-   space(d); VG_(printf)("}\n");
-}
  
-static void pp_map_segments ( Int d )
-{
-   SegmentID segid;
-   Segment*  seg;
-   space(d); VG_(printf)("map_segments (%d entries) {\n", 
-                         (Int)VG_(sizeFM)( map_segments ));
-   VG_(initIterFM)( map_segments );
-   while (VG_(nextIterFM)( map_segments, (Word*)&segid,
-                                         (Word*)&seg )) {
-      space(d+3);
-      VG_(printf)("segid 0x%x -> Segment %p\n", (UInt)segid, seg);
-   }
-   VG_(doneIterFM)( map_segments );
-   space(d); VG_(printf)("}\n");
+   VG_(printf)("\n");
+   VG_(printf)("}\n");
+   VG_(printf)("\n");
  }
  
-static void show_shadow_w32 ( /*OUT*/Char* buf, Int nBuf, SVal w32 )
-{
-   tl_assert(nBuf-1 >= 99);
-   VG_(memset)(buf, 0, nBuf);
-   if (is_SHVAL_ShM(w32)) {
-      VG_(sprintf)(buf, "ShM(%u,%u)", 
-                   un_SHVAL_ShM_tset(w32), un_SHVAL_ShM_lset(w32));
-   }
-   else
-   if (is_SHVAL_ShR(w32)) {
-      VG_(sprintf)(buf, "ShR(%u,%u)", 
-                   un_SHVAL_ShR_tset(w32), un_SHVAL_ShR_lset(w32));
-   }
-   else
-   if (is_SHVAL_Excl(w32)) {
-      VG_(sprintf)(buf, "Excl(%u)", un_SHVAL_Excl(w32));
-   }
-   else
-   if (is_SHVAL_New(w32)) {
-      VG_(sprintf)(buf, "%s", "New");
-   }
-   else
-   if (is_SHVAL_NoAccess(w32)) {
-      VG_(sprintf)(buf, "%s", "NoAccess");
-   }
-   else {
-      VG_(sprintf)(buf, "Invalid-shadow-word(%u)", w32);
-   }
-}
+#undef SHOW_ADMIN
  
-static
-void show_shadow_w32_for_user ( /*OUT*/Char* buf, Int nBuf, SVal w32 )
+
+/*----------------------------------------------------------------*/
+/*--- Initialise the primary data structures                   ---*/
+/*----------------------------------------------------------------*/
+
+static void initialise_data_structures ( Thr* hbthr_root )
  {
-   tl_assert(nBuf-1 >= 99);
-   VG_(memset)(buf, 0, nBuf);
-   if (is_SHVAL_ShM(w32)) {
-      WordSetID tset = un_SHVAL_ShM_tset(w32);
-      WordSetID lset = del_BHL( un_SHVAL_ShM_lset(w32) );
-      VG_(sprintf)(buf, "ShMod(#Tset=%ld,#Lset=%ld)",
-                   HG_(cardinalityWS)(univ_tsets, tset),
-                   HG_(cardinalityWS)(univ_lsets, lset));
-   }
-   else
-   if (is_SHVAL_ShR(w32)) {
-      WordSetID tset = un_SHVAL_ShR_tset(w32);
-      WordSetID lset = del_BHL( un_SHVAL_ShR_lset(w32) );
-      VG_(sprintf)(buf, "ShRO(#Tset=%ld,#Lset=%ld)",
-                   HG_(cardinalityWS)(univ_tsets, tset),
-                   HG_(cardinalityWS)(univ_lsets, lset));
-   }
-   else
-   if (is_SHVAL_Excl(w32)) {
-      SegmentID segid  = un_SHVAL_Excl(w32);
-      Segment*  mb_seg = map_segments_maybe_lookup(segid);
-      if (mb_seg && mb_seg->thr && is_sane_Thread(mb_seg->thr)) {
-         VG_(sprintf)(buf, "Exclusive(thr#%d)", mb_seg->thr->errmsg_index);
-      } else {
-         VG_(sprintf)(buf, "Exclusive(segid=%u)", un_SHVAL_Excl(w32));
-      }
-   }
-   else
-   if (is_SHVAL_New(w32)) {
-      VG_(sprintf)(buf, "%s", "New");
-   }
-   else
-   if (is_SHVAL_NoAccess(w32)) {
-      VG_(sprintf)(buf, "%s", "NoAccess");
-   }
-   else {
-      VG_(sprintf)(buf, "Invalid-shadow-word(%u)", w32);
-   }
-}
-
-static void pp_SecMap_shared ( Int d, SecMap* sm, Addr ga )
-{
-   Int  i;
-#if 0
-   Addr a;
-   SVal w32;
-   Char buf[100];
-#endif
-   CacheLineZ* lineZ;
-   CacheLineF* lineF;
-   space(d+0); VG_(printf)("SecMap %p (ga %#lx) {\n", sm, ga);
-
-   for (i = 0; i < N_SECMAP_ZLINES; i++) {
-      get_ZF_by_index( &lineZ, &lineF, sm, i );
-      space(d+3); VG_(printf)("// pp_SecMap_shared: not implemented\n");
-   }
-
-#if 0
-   for (i = 0; i < N_SECMAP_ARANGE; i++) {
-      w32 = sm->w32s[i];
-      a   = ga + 1 * i;
-      if (! (is_SHVAL_ShM(w32) || is_SHVAL_ShR(w32)))
-         continue;
-      space(d+3); VG_(printf)("%#lx -> 0x%08x ", (void*)a, w32);
-      show_shadow_w32(buf, sizeof(buf), w32);
-      VG_(printf)("%s\n", buf);
-   }
-#endif
-
-   space(d+0); VG_(printf)("}\n");
-}
-
-static void pp_map_shmem_shared ( Int d )
-{
-   Addr    ga;
-   SecMap* sm;
-   space(d); VG_(printf)("map_shmem_ShR_and_ShM_only {\n");
-   VG_(initIterFM)( map_shmem );
-   while (VG_(nextIterFM)( map_shmem, (Word*)&ga,
-                                      (Word*)&sm )) {
-      pp_SecMap_shared( d+3, sm, ga );
-   }
-   VG_(doneIterFM) ( map_shmem );
-   space(d); VG_(printf)("}\n");
-}
-
-static void pp_everything ( Int flags, Char* caller )
-{
-   Int d = 0;
-   VG_(printf)("\n");
-   VG_(printf)("All_Data_Structures (caller = \"%s\") {\n", caller);
-   if (flags & PP_THREADS) {
-      VG_(printf)("\n");
-      pp_admin_threads(d+3);
-      VG_(printf)("\n");
-      pp_map_threads(d+3);
-   }
-   if (flags & PP_LOCKS) {
-      VG_(printf)("\n");
-      pp_admin_locks(d+3);
-      VG_(printf)("\n");
-      pp_map_locks(d+3);
-   }
-   if (flags & PP_SEGMENTS) {
-      VG_(printf)("\n");
-      pp_admin_segments(d+3);
-      VG_(printf)("\n");
-      pp_map_segments(d+3);
-   }
-   if (flags & PP_SHMEM_SHARED) {
-      VG_(printf)("\n");
-      pp_map_shmem_shared( d+3 );
-   }
-
-   VG_(printf)("\n");
-   VG_(printf)("}\n");
-   VG_(printf)("\n");
-}
-
-#undef SHOW_ADMIN
-
-
-/*----------------------------------------------------------------*/
-/*--- Initialise the primary data structures                   ---*/
-/*----------------------------------------------------------------*/
-
-/* fwds */
-static void map_segments_add ( SegmentID segid, Segment* seg );
-static void shmem__invalidate_scache ( void );
-static void hbefore__invalidate_cache ( void );
-static void shmem__set_mbHasLocks ( Addr a, Bool b );
-static Bool shmem__get_mbHasLocks ( Addr a );
-static void shadow_mem_set8 ( Thread* uu_thr_acc, Addr a, SVal svNew );
-static XArray* singleton_VTS ( Thread* thr, UWord tym );
-
-static void initialise_data_structures ( void )
-{
-   SegmentID segid;
-   Segment*  seg;
     Thread*   thr;
  
     /* Get everything initialised and zeroed. */
     tl_assert(admin_threads == NULL);
     tl_assert(admin_locks == NULL);
-   tl_assert(admin_segments == NULL);
  
     tl_assert(sizeof(Addr) == sizeof(Word));
-   tl_assert(map_shmem == NULL);
-   map_shmem = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
-   tl_assert(map_shmem != NULL);
-   shmem__invalidate_scache();
  
     tl_assert(map_threads == NULL);
-   map_threads = hg_zalloc( "hg", VG_N_THREADS * sizeof(Thread*) );
+   map_threads = HG_(zalloc)( "hg.ids.1", VG_N_THREADS * sizeof(Thread*) );
     tl_assert(map_threads != NULL);
  
-   /* re <=: < on 64-bit platforms, == on 32-bit ones */
-   tl_assert(sizeof(SegmentID) <= sizeof(Word));
-   tl_assert(sizeof(Segment*) == sizeof(Word));
-   tl_assert(map_segments == NULL);
-   map_segments = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
-   tl_assert(map_segments != NULL);
-   hbefore__invalidate_cache();
-
     tl_assert(sizeof(Addr) == sizeof(Word));
     tl_assert(map_locks == NULL);
-   map_locks = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
+   map_locks = VG_(newFM)( HG_(zalloc), "hg.ids.2", HG_(free), 
+                           NULL/*unboxed Word cmp*/);
     tl_assert(map_locks != NULL);
  
     __bus_lock_Lock = mk_LockN( LK_nonRec, (Addr)&__bus_lock );
-   tl_assert(is_sane_LockN(__bus_lock_Lock));
+   tl_assert(HG_(is_sane_LockN)(__bus_lock_Lock));
     VG_(addToFM)( map_locks, (Word)&__bus_lock, (Word)__bus_lock_Lock );
  
     tl_assert(univ_tsets == NULL);
-   univ_tsets = HG_(newWordSetU)( hg_zalloc, hg_free, 8/*cacheSize*/ );
+   univ_tsets = HG_(newWordSetU)( HG_(zalloc), "hg.ids.3", HG_(free),
+                                  8/*cacheSize*/ );
     tl_assert(univ_tsets != NULL);
  
     tl_assert(univ_lsets == NULL);
-   univ_lsets = HG_(newWordSetU)( hg_zalloc, hg_free, 8/*cacheSize*/ );
+   univ_lsets = HG_(newWordSetU)( HG_(zalloc), "hg.ids.4", HG_(free),
+                                  8/*cacheSize*/ );
     tl_assert(univ_lsets != NULL);
  
     tl_assert(univ_laog == NULL);
-   univ_laog = HG_(newWordSetU)( hg_zalloc, hg_free, 24/*cacheSize*/ );
+   univ_laog = HG_(newWordSetU)( HG_(zalloc), "hg.ids.5 (univ_laog)",
+                                 HG_(free), 24/*cacheSize*/ );
     tl_assert(univ_laog != NULL);
  
     /* Set up entries for the root thread */
     // FIXME: this assumes that the first real ThreadId is 1
  
-   /* a segment for the new thread ... */
-   // FIXME: code duplication in ev__post_thread_create
-   segid = alloc_SegmentID();
-   seg   = mk_Segment( NULL, NULL, NULL );
-   map_segments_add( segid, seg );
-
     /* a Thread for the new thread ... */
-   thr = mk_Thread( segid );
-   seg->thr = thr;
+   thr = mk_Thread(hbthr_root);
+   thr->coretid = 1; /* FIXME: hardwires an assumption about the
+                        identity of the root thread. */
+   tl_assert( libhb_get_Thr_opaque(hbthr_root) == NULL );
+   libhb_set_Thr_opaque(hbthr_root, thr);
  
-   /* Give the thread a starting-off vector timestamp. */
-   seg->vts = singleton_VTS( seg->thr, 1 );
+   /* and bind it in the thread-map table. */
+   tl_assert(HG_(is_sane_ThreadId)(thr->coretid));
+   tl_assert(thr->coretid != VG_INVALID_THREADID);
  
-   /* and bind it in the thread-map table.
-      FIXME: assumes root ThreadId == 1. */
-   map_threads[1] = thr;
+   map_threads[thr->coretid] = thr;
  
     tl_assert(VG_INVALID_THREADID == 0);
  
     /* Mark the new bus lock correctly (to stop the sanity checks
        complaining) */
     tl_assert( sizeof(__bus_lock) == 4 );
-   shadow_mem_set8( NULL/*unused*/, __bus_lock_Lock->guestaddr, 
-                                    mk_SHVAL_Excl(segid) );
-   shmem__set_mbHasLocks( __bus_lock_Lock->guestaddr, True );
  
     all__sanity_check("initialise_data_structures");
  }
  
  
  /*----------------------------------------------------------------*/
-/*--- map_threads :: WordFM core-ThreadId Thread*              ---*/
+/*--- map_threads :: array[core-ThreadId] of Thread*           ---*/
  /*----------------------------------------------------------------*/
  
  /* Doesn't assert if the relevant map_threads entry is NULL. */
  static Thread* map_threads_maybe_lookup ( ThreadId coretid )
  {
     Thread* thr;
-   tl_assert( is_sane_ThreadId(coretid) );
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
     thr = map_threads[coretid];
     return thr;
  }
@@ -1467,26 +609,24 @@ static Thread* map_threads_maybe_lookup ( ThreadId coretid )
  static inline Thread* map_threads_lookup ( ThreadId coretid )
  {
     Thread* thr;
-   tl_assert( is_sane_ThreadId(coretid) );
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
     thr = map_threads[coretid];
     tl_assert(thr);
     return thr;
  }
  
-/* Do a reverse lookup.  Warning: POTENTIALLY SLOW.  Does not assert
-   if 'thr' is not found in map_threads. */
+/* Do a reverse lookup.  Does not assert if 'thr' is not found in
+   map_threads. */
  static ThreadId map_threads_maybe_reverse_lookup_SLOW ( Thread* thr )
  {
-   Int i;
-   tl_assert(is_sane_Thread(thr));
+   ThreadId tid;
+   tl_assert(HG_(is_sane_Thread)(thr));
     /* Check nobody used the invalid-threadid slot */
     tl_assert(VG_INVALID_THREADID >= 0 && VG_INVALID_THREADID < VG_N_THREADS);
     tl_assert(map_threads[VG_INVALID_THREADID] == NULL);
-   for (i = 0; i < VG_N_THREADS; i++) {
-      if (i != VG_INVALID_THREADID && map_threads[i] == thr)
-         return (ThreadId)i;
-   }
-   return VG_INVALID_THREADID;
+   tid = thr->coretid;
+   tl_assert(HG_(is_sane_ThreadId)(tid));
+   return tid;
  }
  
  /* Do a reverse lookup.  Warning: POTENTIALLY SLOW.  Asserts if 'thr'
@@ -1495,6 +635,8 @@ static ThreadId map_threads_reverse_lookup_SLOW ( Thread* thr )
  {
     ThreadId tid = map_threads_maybe_reverse_lookup_SLOW( thr );
     tl_assert(tid != VG_INVALID_THREADID);
+   tl_assert(map_threads[tid]);
+   tl_assert(map_threads[tid]->coretid == tid);
     return tid;
  }
  
@@ -1502,7 +644,7 @@ static void map_threads_delete ( ThreadId coretid )
  {
     Thread* thr;
     tl_assert(coretid != 0);
-   tl_assert( is_sane_ThreadId(coretid) );
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
     thr = map_threads[coretid];
     tl_assert(thr);
     map_threads[coretid] = NULL;
@@ -1521,24 +663,20 @@ Lock* map_locks_lookup_or_create ( LockKind lkk, Addr ga, ThreadId tid )
  {
     Bool  found;
     Lock* oldlock = NULL;
-   tl_assert(is_sane_ThreadId(tid));
+   tl_assert(HG_(is_sane_ThreadId)(tid));
     found = VG_(lookupFM)( map_locks, 
                            NULL, (Word*)&oldlock, (Word)ga );
     if (!found) {
        Lock* lock = mk_LockN(lkk, ga);
        lock->appeared_at = VG_(record_ExeContext)( tid, 0 );
-      tl_assert(is_sane_LockN(lock));
+      tl_assert(HG_(is_sane_LockN)(lock));
        VG_(addToFM)( map_locks, (Word)ga, (Word)lock );
        tl_assert(oldlock == NULL);
-      // mark the relevant secondary map has .mbHasLocks
-      shmem__set_mbHasLocks( ga, True );
        return lock;
     } else {
        tl_assert(oldlock != NULL);
-      tl_assert(is_sane_LockN(oldlock));
+      tl_assert(HG_(is_sane_LockN)(oldlock));
        tl_assert(oldlock->guestaddr == ga);
-      // check the relevant secondary map has .mbHasLocks?
-      tl_assert(shmem__get_mbHasLocks(ga) == True);
        return oldlock;
     }
  }
@@ -1549,10 +687,6 @@ static Lock* map_locks_maybe_lookup ( Addr ga )
     Lock* lk = NULL;
     found = VG_(lookupFM)( map_locks, NULL, (Word*)&lk, (Word)ga );
     tl_assert(found  ?  lk != NULL  :  lk == NULL);
-   if (found) {
-      // check the relevant secondary map has .mbHasLocks?
-      tl_assert(shmem__get_mbHasLocks(ga) == True);
-   }
     return lk;
  }
  
@@ -1570,982 +704,132 @@ static void map_locks_delete ( Addr ga )
  }
  
  
+
  /*----------------------------------------------------------------*/
-/*--- map_segments :: WordFM SegmentID Segment*                ---*/
-/*--- the DAG of thread segments                               ---*/
+/*--- Sanity checking the data structures                      ---*/
  /*----------------------------------------------------------------*/
  
-static void segments__generate_vcg ( void ); /* fwds */
+static UWord stats__sanity_checks = 0;
  
-/*--------------- SegmentID to Segment* maps ---------------*/
+static void laog__sanity_check ( Char* who ); /* fwds */
  
-static Segment* map_segments_lookup ( SegmentID segid )
-{
-   Bool     found;
-   Segment* seg = NULL;
-   tl_assert( is_sane_SegmentID(segid) );
-   found = VG_(lookupFM)( map_segments,
-                          NULL, (Word*)&seg, (Word)segid );
-   tl_assert(found);
-   tl_assert(seg != NULL);
-   return seg;
-}
+/* REQUIRED INVARIANTS:
  
-static Segment* map_segments_maybe_lookup ( SegmentID segid )
-{
-   Bool     found;
-   Segment* seg = NULL;
-   tl_assert( is_sane_SegmentID(segid) );
-   found = VG_(lookupFM)( map_segments,
-                          NULL, (Word*)&seg, (Word)segid );
-   if (!found) tl_assert(seg == NULL);
-   return seg;
-}
+   Thread vs Segment/Lock/SecMaps
  
-static void map_segments_add ( SegmentID segid, Segment* seg )
-{
-   /* This is a bit inefficient.  Oh well. */
-   tl_assert( !VG_(lookupFM)( map_segments, NULL, NULL, segid ));
-   VG_(addToFM)( map_segments, (Word)segid, (Word)seg );
-}
+      for each t in Threads {
  
-/*--------------- to do with Vector Timestamps ---------------*/
+         // Thread.lockset: each element is really a valid Lock
  
-/* Scalar Timestamp */
-typedef
-   struct {
-      Thread* thr;
-      UWord   tym;
-   }
-   ScalarTS;
+         // Thread.lockset: each Lock in set is actually held by that thread
+         for lk in Thread.lockset 
+            lk == LockedBy(t)
  
-/* Vector Timestamp = XArray* ScalarTS */
+         // Thread.csegid is a valid SegmentID
+         // and the associated Segment has .thr == t
  
-static Bool is_sane_VTS ( XArray* vts )
-{
-   UWord     i, n;
-   ScalarTS  *st1, *st2;
-   n = VG_(sizeXA)( vts );
-   if (n >= 2) {
-      for (i = 0; i < n-1; i++) {
-         st1 = VG_(indexXA)( vts, i );
-         st2 = VG_(indexXA)( vts, i+1 );
-         if (st1->thr >= st2->thr)
-            return False;
-         if (st1->tym == 0 || st2->tym == 0)
-            return False;
        }
-   }
-   return True;
-}
-
-static XArray* new_VTS ( void ) {
-   return VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(ScalarTS) );
-}
-static XArray* singleton_VTS ( Thread* thr, UWord tym ) {
-   ScalarTS st;
-   XArray*  vts;
-   tl_assert(thr);
-   tl_assert(tym >= 1);
-   vts = new_VTS();
-   tl_assert(vts);
-   st.thr = thr;
-   st.tym = tym;
-   VG_(addToXA)( vts, &st );
-   return vts;
-}
  
+      all thread Locksets are pairwise empty under intersection
+      (that is, no lock is claimed to be held by more than one thread)
+      -- this is guaranteed if all locks in locksets point back to their
+      owner threads
  
-static Bool cmpGEQ_VTS ( XArray* a, XArray* b )
-{
-   Word     ia, ib, useda, usedb;
-   UWord    tyma, tymb;
-   Thread*  thr;
-   ScalarTS *tmpa, *tmpb;
-
-   Bool all_leq = True;
-   Bool all_geq = True;
+   Lock vs Thread/Segment/SecMaps
  
-   tl_assert(a);
-   tl_assert(b);
-   useda = VG_(sizeXA)( a );
-   usedb = VG_(sizeXA)( b );
+      for each entry (gla, la) in map_locks
+         gla == la->guest_addr
  
-   ia = ib = 0;
+      for each lk in Locks {
  
-   while (1) {
+         lk->tag is valid
+         lk->guest_addr does not have shadow state NoAccess
+         if lk == LockedBy(t), then t->lockset contains lk
+         if lk == UnlockedBy(segid) then segid is valid SegmentID
+             and can be mapped to a valid Segment(seg)
+             and seg->thr->lockset does not contain lk
+         if lk == UnlockedNew then (no lockset contains lk)
  
-      /* This logic is to enumerate triples (thr, tyma, tymb) drawn
-         from a and b in order, where thr is the next Thread*
-         occurring in either a or b, and tyma/b are the relevant
-         scalar timestamps, taking into account implicit zeroes. */
-      tl_assert(ia >= 0 && ia <= useda);
-      tl_assert(ib >= 0 && ib <= usedb);
-      tmpa = tmpb = NULL;
+         secmaps for lk has .mbHasLocks == True
  
-      if (ia == useda && ib == usedb) {
-         /* both empty - done */
-         break;
        }
-      else
-      if (ia == useda && ib != usedb) {
-         /* a empty, use up b */
-         tmpb = VG_(indexXA)( b, ib );
-         thr  = tmpb->thr;
-         tyma = 0;
-         tymb = tmpb->tym;
-         ib++;
-      }
-      else
-      if (ia != useda && ib == usedb) {
-         /* b empty, use up a */
-         tmpa = VG_(indexXA)( a, ia );
-         thr  = tmpa->thr;
-         tyma = tmpa->tym;
-         tymb = 0;
-         ia++;
-      }
-      else {
-         /* both not empty; extract lowest-Thread*'d triple */
-         tmpa = VG_(indexXA)( a, ia );
-         tmpb = VG_(indexXA)( b, ib );
-         if (tmpa->thr < tmpb->thr) {
-            /* a has the lowest unconsidered Thread* */
-            thr  = tmpa->thr;
-            tyma = tmpa->tym;
-            tymb = 0;
-            ia++;
-         }
-         else
-         if (tmpa->thr > tmpb->thr) {
-            /* b has the lowest unconsidered Thread* */
-            thr  = tmpb->thr;
-            tyma = 0;
-            tymb = tmpb->tym;
-            ib++;
-         } else {
-            /* they both next mention the same Thread* */
-            tl_assert(tmpa->thr == tmpb->thr);
-            thr  = tmpa->thr; /* == tmpb->thr */
-            tyma = tmpa->tym;
-            tymb = tmpb->tym;
-            ia++;
-            ib++;
-         }
-      }
-
-      /* having laboriously determined (thr, tyma, tymb), do something
-         useful with it. */
-      if (tyma < tymb)
-         all_geq = False;
-      if (tyma > tymb)
-         all_leq = False;
-   }
-
-   if (all_leq && all_geq)
-      return True; /* PordEQ */
-   /* now we know they aren't equal, so either all_leq or all_geq or
-      both are false. */
-   if (all_leq)
-      return False; /* PordLT */
-   if (all_geq)
-      return True; /* PordGT */
-   /* hmm, neither all_geq or all_leq.  This means unordered. */
-   return False; /* PordUN */
-}
-
-
-/* Compute max((tick(thra,a),b) into a new XArray.  a and b are
-   unchanged.  If neither a nor b supply a value for 'thra',
-   assert. */
-static
-XArray* tickL_and_joinR_VTS ( Thread* thra, XArray* a, XArray* b )
-{
-   Word     ia, ib, useda, usedb, ticks_found;
-   UWord    tyma, tymb, tymMax;
-   Thread*  thr;
-   XArray*  res;
-   ScalarTS *tmpa, *tmpb;
  
-   tl_assert(a);
-   tl_assert(b);
-   tl_assert(thra);
-   useda = VG_(sizeXA)( a );
-   usedb = VG_(sizeXA)( b );
+   Segment vs Thread/Lock/SecMaps
  
-   res = new_VTS();
-   ia = ib = ticks_found = 0;
+      the Segment graph is a dag (no cycles)
+      all of the Segment graph must be reachable from the segids
+         mentioned in the Threads
  
-   while (1) {
+      for seg in Segments {
  
-      /* This logic is to enumerate triples (thr, tyma, tymb) drawn
-         from a and b in order, where thr is the next Thread*
-         occurring in either a or b, and tyma/b are the relevant
-         scalar timestamps, taking into account implicit zeroes. */
-      tl_assert(ia >= 0 && ia <= useda);
-      tl_assert(ib >= 0 && ib <= usedb);
-      tmpa = tmpb = NULL;
+         seg->thr is a sane Thread
  
-      if (ia == useda && ib == usedb) {
-         /* both empty - done */
-         break;
-      }
-      else
-      if (ia == useda && ib != usedb) {
-         /* a empty, use up b */
-         tmpb = VG_(indexXA)( b, ib );
-         thr  = tmpb->thr;
-         tyma = 0;
-         tymb = tmpb->tym;
-         ib++;
-      }
-      else
-      if (ia != useda && ib == usedb) {
-         /* b empty, use up a */
-         tmpa = VG_(indexXA)( a, ia );
-         thr  = tmpa->thr;
-         tyma = tmpa->tym;
-         tymb = 0;
-         ia++;
-      }
-      else {
-         /* both not empty; extract lowest-Thread*'d triple */
-         tmpa = VG_(indexXA)( a, ia );
-         tmpb = VG_(indexXA)( b, ib );
-         if (tmpa->thr < tmpb->thr) {
-            /* a has the lowest unconsidered Thread* */
-            thr  = tmpa->thr;
-            tyma = tmpa->tym;
-            tymb = 0;
-            ia++;
-         }
-         else
-         if (tmpa->thr > tmpb->thr) {
-            /* b has the lowest unconsidered Thread* */
-            thr  = tmpb->thr;
-            tyma = 0;
-            tymb = tmpb->tym;
-            ib++;
-         } else {
-            /* they both next mention the same Thread* */
-            tl_assert(tmpa->thr == tmpb->thr);
-            thr  = tmpa->thr; /* == tmpb->thr */
-            tyma = tmpa->tym;
-            tymb = tmpb->tym;
-            ia++;
-            ib++;
-         }
        }
  
-      /* having laboriously determined (thr, tyma, tymb), do something
-         useful with it. */
-      if (thr == thra) {
-         if (tyma > 0) {
-            /* VTS 'a' actually supplied this value; it is not a
-               default zero.  Do the required 'tick' action. */
-            tyma++;
-            ticks_found++;
-         } else {
-            /* 'a' didn't supply this value, so 'b' must have. */
-            tl_assert(tymb > 0);
-         }
-      }
-      tymMax = tyma > tymb ? tyma : tymb;
-      if (tymMax > 0) {
-         ScalarTS st;
-         st.thr = thr;
-         st.tym = tymMax;
-         VG_(addToXA)( res, &st );
-      }
+   SecMaps vs Segment/Thread/Lock
  
-   }
+      for sm in SecMaps {
  
-   tl_assert(is_sane_VTS( res ));
+         sm properly aligned
+         if any shadow word is ShR or ShM then .mbHasShared == True
  
-   if (thra != NULL) {
-      tl_assert(ticks_found == 1);
-   } else {
-      tl_assert(ticks_found == 0);
-   }
+         for each Excl(segid) state
+            map_segments_lookup maps to a sane Segment(seg)
+         for each ShM/ShR(tsetid,lsetid) state
+            each lk in lset is a valid Lock
+            each thr in tset is a valid thread, which is non-dead
  
-   return res;
-}
-
-
-/* Do 'vts[me]++', so to speak.  If 'me' does not have an entry in
-   'vts', set it to 1 in the returned VTS. */
-
-static XArray* tick_VTS ( Thread* me, XArray* vts ) {
-   ScalarTS* here = NULL;
-   ScalarTS  tmp;
-   XArray*   res;
-   Word      i, n; 
-   tl_assert(me);
-   tl_assert(is_sane_VTS(vts));
-   if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
-                      (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
-   res = new_VTS();
-   n = VG_(sizeXA)( vts );
-   for (i = 0; i < n; i++) {
-      here = VG_(indexXA)( vts, i );
-      if (me < here->thr) {
-         /* We just went past 'me', without seeing it. */
-         tmp.thr = me;
-         tmp.tym = 1;
-         VG_(addToXA)( res, &tmp );
-         tmp = *here;
-         VG_(addToXA)( res, &tmp );
-         i++;
-         break;
-      } 
-      else if (me == here->thr) {
-         tmp = *here;
-         tmp.tym++;
-         VG_(addToXA)( res, &tmp );
-         i++;
-         break;
-      }
-      else /* me > here->thr */ {
-         tmp = *here;
-         VG_(addToXA)( res, &tmp );
-      }
-   }
-   tl_assert(i >= 0 && i <= n);
-   if (i == n && here && here->thr < me) {
-      tmp.thr = me;
-      tmp.tym = 1;
-      VG_(addToXA)( res, &tmp );
-   } else {
-      for (/*keepgoing*/; i < n; i++) {
-         here = VG_(indexXA)( vts, i );
-         tmp = *here;
-         VG_(addToXA)( res, &tmp );
-      }
-   }
-   tl_assert(is_sane_VTS(res));
-   if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
-                      (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
-   return res;
-}
-
-static void show_VTS ( HChar* buf, Int nBuf, XArray* vts ) {
-   ScalarTS* st;
-   HChar     unit[64];
-   Word      i, n;
-   Int       avail = nBuf;
-   tl_assert(avail > 16);
-   buf[0] = '[';
-   buf[1] = 0;
-   n = VG_(sizeXA)( vts );
-   for (i = 0; i < n; i++) {
-      tl_assert(avail >= 10);
-      st = VG_(indexXA)( vts, i );
-      VG_(memset)(unit, 0, sizeof(unit));
-      VG_(sprintf)(unit, i < n-1 ? "%ld:%ld " : "%ld:%ld",
-                         (Word)st->thr->errmsg_index, st->tym);
-      if (avail < VG_(strlen)(unit) + 10/*let's say*/) {
-         VG_(strcat)(buf, " ...]");
-         return;
        }
-      VG_(strcat)(buf, unit);
-      avail -= VG_(strlen)(unit);
-   }
-   VG_(strcat)(buf, "]");
-}
-
-
-/*------------ searching the happens-before graph ------------*/
-
-static UWord stats__hbefore_queries   = 0; // total # queries
-static UWord stats__hbefore_cache0s   = 0; // hits at cache[0]
-static UWord stats__hbefore_cacheNs   = 0; // hits at cache[> 0]
-static UWord stats__hbefore_probes    = 0; // # checks in cache
-static UWord stats__hbefore_gsearches = 0; // # searches in graph
-static UWord stats__hbefore_gsearchFs = 0; // # fast searches in graph
-static UWord stats__hbefore_invals    = 0; // # cache invals
-static UWord stats__hbefore_stk_hwm   = 0; // stack high water mark
-
-/* Running marker for depth-first searches */
-/* NOTE: global variable */
-static UInt dfsver_current = 0;
+*/
  
-/* A stack of possibly-unexplored nodes used in the depth first search */
-/* NOTE: global variable */
-static XArray* dfsver_stack = NULL;
  
-// FIXME: check this - is it really correct?
-__attribute__((noinline))
-static Bool happens_before_do_dfs_from_to ( Segment* src, Segment* dst )
+/* Return True iff 'thr' holds 'lk' in some mode. */
+static Bool thread_is_a_holder_of_Lock ( Thread* thr, Lock* lk )
  {
-   Segment* here;
-   Word     ssz;
-
-   /* begin SPEEDUP HACK -- the following can safely be omitted */
-   /* fast track common case, without favouring either the
-      ->prev or ->other links */
-   tl_assert(src);
-   tl_assert(dst);
-   if ((src->prev && src->prev == dst)
-       || (src->other && src->other == dst)) {
-      stats__hbefore_gsearchFs++;
-      return True;
-   }
-   /* end SPEEDUP HACK */
-
-   /* empty out the stack */
-   tl_assert(dfsver_stack);
-   VG_(dropTailXA)( dfsver_stack, VG_(sizeXA)( dfsver_stack ));
-   tl_assert(VG_(sizeXA)( dfsver_stack ) == 0);
-
-   /* push starting point */
-   (void) VG_(addToXA)( dfsver_stack, &src );
-
-   while (True) {
-      /* While the stack is not empty, pop the next node off it and
-         consider. */
-      ssz = VG_(sizeXA)( dfsver_stack );
-      tl_assert(ssz >= 0);
-      if (ssz == 0)
-         return False; /* stack empty ==> no path from src to dst */
-
-      if (UNLIKELY( ((UWord)ssz) > stats__hbefore_stk_hwm ))
-         stats__hbefore_stk_hwm = (UWord)ssz;
-
-      /* here = pop(stack) */
-      here = *(Segment**) VG_(indexXA)( dfsver_stack, ssz-1 );
-      VG_(dropTailXA)( dfsver_stack, 1 );
-
-     again:
-      /* consider the node 'here' */
-      if (here == dst)
-         return True; /* found a path from src and dst */
-
-      /* have we been to 'here' before? */
-      tl_assert(here->dfsver <= dfsver_current);
-      if (here->dfsver == dfsver_current)
-         continue; /* We've been 'here' before - node is not interesting*/
-
-      /* Mark that we've been here */
-      here->dfsver = dfsver_current;
-
-      /* Now push both children on the stack */
-
-      /* begin SPEEDUP hack -- the following can safely be omitted */
-      /* idea is, if there is exactly one child, avoid the overhead of
-         pushing it on the stack and immediately popping it off again.
-         Kinda like doing a tail-call. */
-      if (here->prev && !here->other) {
-         here = here->prev;
-         goto again;
-      }
-      if (here->other && !here->prev) {
-         here = here->other;
-         goto again;
-      }
-      /* end of SPEEDUP HACK */
-
-      /* Push all available children on stack.  From some quick
-         experimentation it seems like exploring ->other first leads
-         to lower maximum stack use, although getting repeatable
-         results is difficult. */
-      if (here->prev)
-         (void) VG_(addToXA)( dfsver_stack, &(here->prev) );
-      if (here->other)
-         (void) VG_(addToXA)( dfsver_stack, &(here->other) );
-   }
+   if (lk->heldBy)
+      return VG_(elemBag)( lk->heldBy, (Word)thr ) > 0;
+   else
+      return False;
  }
  
+/* Sanity check Threads, as far as possible */
  __attribute__((noinline))
-static Bool happens_before_wrk ( Segment* seg1, Segment* seg2 )
-{
-   Bool reachable;
-
-   { static Int nnn = 0;
-     if (SHOW_EXPENSIVE_STUFF && (nnn++ % 1000) == 0)
-        VG_(printf)("happens_before_wrk: %d\n", nnn);
-   }
-
-   /* Now the question is, is there a chain of pointers through the
-      .prev and .other fields, that leads from seg2 back to seg1 ? */
-   tl_assert(dfsver_current < 0xFFFFFFFF);
-   dfsver_current++;
-   
-   if (dfsver_stack == NULL) {
-     dfsver_stack = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Segment*) );
-     tl_assert(dfsver_stack);
-   }
-
-   reachable = happens_before_do_dfs_from_to( seg2, seg1 );
-
-   return reachable;
-}
-
-/*--------------- the happens_before cache ---------------*/
-
-#define HBEFORE__N_CACHE 64
-typedef 
-   struct { SegmentID segid1; SegmentID segid2; Bool result; } 
-   HBeforeCacheEnt;
-
-static HBeforeCacheEnt hbefore__cache[HBEFORE__N_CACHE];
-
-static void hbefore__invalidate_cache ( void ) 
-{
-   Int i;
-   SegmentID bogus = 0;
-   tl_assert(!is_sane_SegmentID(bogus));
-   stats__hbefore_invals++;
-   for (i = 0; i < HBEFORE__N_CACHE; i++) {
-      hbefore__cache[i].segid1 = bogus;
-      hbefore__cache[i].segid2 = bogus;
-      hbefore__cache[i].result = False;
-   }
-}
-
-static Bool happens_before ( SegmentID segid1, SegmentID segid2 )
+static void threads__sanity_check ( Char* who )
  {
-   Bool    hbG, hbV;
-   Int     i, j, iNSERT_POINT;
-   Segment *seg1, *seg2;
-   tl_assert(is_sane_SegmentID(segid1));
-   tl_assert(is_sane_SegmentID(segid2));
-   tl_assert(segid1 != segid2);
-   stats__hbefore_queries++;
-   stats__hbefore_probes++;
-   if (segid1 == hbefore__cache[0].segid1 
-       && segid2 == hbefore__cache[0].segid2) {
-      stats__hbefore_cache0s++;
-      return hbefore__cache[0].result;
-   }
-   for (i = 1; i < HBEFORE__N_CACHE; i++) {
-      stats__hbefore_probes++;
-      if (segid1 == hbefore__cache[i].segid1 
-          && segid2 == hbefore__cache[i].segid2) {
-         /* Found it.  Move it 1 step closer to the front. */
-         HBeforeCacheEnt tmp = hbefore__cache[i];
-         hbefore__cache[i]   = hbefore__cache[i-1];
-         hbefore__cache[i-1] = tmp;
-         stats__hbefore_cacheNs++;
-         return tmp.result;
+#define BAD(_str) do { how = (_str); goto bad; } while (0)
+   Char*     how = "no error";
+   Thread*   thr;
+   WordSetID wsA, wsW;
+   UWord*    ls_words;
+   Word      ls_size, i;
+   Lock*     lk;
+   for (thr = admin_threads; thr; thr = thr->admin) {
+      if (!HG_(is_sane_Thread)(thr)) BAD("1");
+      wsA = thr->locksetA;
+      wsW = thr->locksetW;
+      // locks held in W mode are a subset of all locks held
+      if (!HG_(isSubsetOf)( univ_lsets, wsW, wsA )) BAD("7");
+      HG_(getPayloadWS)( &ls_words, &ls_size, univ_lsets, wsA );
+      for (i = 0; i < ls_size; i++) {
+         lk = (Lock*)ls_words[i];
+         // Thread.lockset: each element is really a valid Lock
+         if (!HG_(is_sane_LockN)(lk)) BAD("2");
+         // Thread.lockset: each Lock in set is actually held by that
+         // thread
+         if (!thread_is_a_holder_of_Lock(thr,lk)) BAD("3");
        }
     }
-   /* Not found.  Search the graph and add an entry to the cache. */
-   stats__hbefore_gsearches++;
-
-   seg1 = map_segments_lookup(segid1);
-   seg2 = map_segments_lookup(segid2);
-   tl_assert(is_sane_Segment(seg1));
-   tl_assert(is_sane_Segment(seg2));
-   tl_assert(seg1 != seg2);
-   tl_assert(seg1->vts);
-   tl_assert(seg2->vts);
-
-   hbV = cmpGEQ_VTS( seg2->vts, seg1->vts );
-   if (clo_sanity_flags & SCE_HBEFORE) {
-      /* Crosscheck the vector-timestamp comparison result against that
-         obtained from the explicit graph approach.  Can be very
-         slow. */
-      hbG = happens_before_wrk( seg1, seg2 );
-   } else {
-      /* Assume the vector-timestamp comparison result is correct, and
-         use it as-is. */
-      hbG = hbV;
-   }
-
-   if (hbV != hbG) {
-      VG_(printf)("seg1 %p  seg2 %p  hbV %d  hbG %d\n", 
-                  seg1,seg2,(Int)hbV,(Int)hbG);
-      segments__generate_vcg();
-   }
-   tl_assert(hbV == hbG);
-
-   iNSERT_POINT = (1*HBEFORE__N_CACHE)/4 - 1;
-   /* if (iNSERT_POINT > 4) iNSERT_POINT = 4; */
-
-   for (j = HBEFORE__N_CACHE-1; j > iNSERT_POINT; j--) {
-      hbefore__cache[j] = hbefore__cache[j-1];
-   }
-   hbefore__cache[iNSERT_POINT].segid1 = segid1;
-   hbefore__cache[iNSERT_POINT].segid2 = segid2;
-   hbefore__cache[iNSERT_POINT].result = hbG;
-
-   if (0)
-   VG_(printf)("hb %d %d\n", (Int)segid1-(1<<24), (Int)segid2-(1<<24));
-   return hbG;
+   return;
+  bad:
+   VG_(printf)("threads__sanity_check: who=\"%s\", bad=\"%s\"\n", who, how);
+   tl_assert(0);
+#undef BAD
  }
  
-/*--------------- generating .vcg output ---------------*/
  
-static void segments__generate_vcg ( void )
-{
-#define PFX "xxxxxx"
-   /* Edge colours:
-         Black  -- the chain of .prev links
-         Green  -- thread creation, link to parent
-         Red    -- thread exit, link to exiting thread
-         Yellow -- signal edge
-         Pink   -- semaphore-up edge
-   */
-   Segment* seg;
-   HChar vtsstr[128];
-   VG_(printf)(PFX "graph: { title: \"Segments\"\n");
-   VG_(printf)(PFX "orientation: top_to_bottom\n");
-   VG_(printf)(PFX "height: 900\n");
-   VG_(printf)(PFX "width: 500\n");
-   VG_(printf)(PFX "x: 20\n");
-   VG_(printf)(PFX "y: 20\n");
-   VG_(printf)(PFX "color: lightgrey\n");
-   for (seg = admin_segments; seg; seg=seg->admin) {
-
-      VG_(printf)(PFX "node: { title: \"%p\" color: lightcyan "
-                  "textcolor: darkgreen label: \"Seg %p\\n", 
-                  seg, seg);
-      if (seg->thr->errmsg_index == 1) {
-         VG_(printf)("ROOT_THREAD");
-      } else {
-         VG_(printf)("Thr# %d", seg->thr->errmsg_index);
-      }
-
-      if (clo_gen_vcg >= 2) {
-         show_VTS( vtsstr, sizeof(vtsstr)-1, seg->vts );
-         vtsstr[sizeof(vtsstr)-1] = 0;
-         VG_(printf)("\\n%s", vtsstr);
-      }
-
-      VG_(printf)("\" }\n");
-
-      if (seg->prev)
-         VG_(printf)(PFX "edge: { sourcename: \"%p\" targetname: \"%p\""
-                     "color: black }\n", seg->prev, seg );
-      if (seg->other) {
-         HChar* colour = "orange";
-         switch (seg->other_hint) {
-            case 'c': colour = "darkgreen";  break; /* creation */
-            case 'j': colour = "red";        break; /* join (exit) */
-            case 's': colour = "orange";     break; /* signal */
-            case 'S': colour = "pink";       break; /* sem_post->wait */
-            case 'u': colour = "cyan";       break; /* unlock */
-            default: tl_assert(0);
-         }
-         VG_(printf)(PFX "edge: { sourcename: \"%p\" targetname: \"%p\""
-                     " color: %s }\n", seg->other, seg, colour );
-      }
-   }
-   VG_(printf)(PFX "}\n");
-#undef PFX
-}
-
-
-/*----------------------------------------------------------------*/
-/*--- map_shmem :: WordFM Addr SecMap                          ---*/
-/*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
-/*----------------------------------------------------------------*/
-
-
-static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
-static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
-static UWord stats__secmap_linesZ_allocd = 0; // # CacheLineZ's issued
-static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
-static UWord stats__secmap_linesF_allocd = 0; // # CacheLineF's issued
-static UWord stats__secmap_linesF_bytes  = 0; //  .. using this much storage
-static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
-static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
-static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
-static UWord stats__cache_F_fetches      = 0; // # F lines fetched
-static UWord stats__cache_F_wbacks       = 0; // # F lines written back
-static UWord stats__cache_invals         = 0; // # cache invals
-static UWord stats__cache_flushes        = 0; // # cache flushes
-static UWord stats__cache_totrefs        = 0; // # total accesses
-static UWord stats__cache_totmisses      = 0; // # misses
-static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
-static UWord stats__cline_read64s        = 0; // # calls to s_m_read64
-static UWord stats__cline_read32s        = 0; // # calls to s_m_read32
-static UWord stats__cline_read16s        = 0; // # calls to s_m_read16
-static UWord stats__cline_read8s         = 0; // # calls to s_m_read8
-static UWord stats__cline_write64s       = 0; // # calls to s_m_write64
-static UWord stats__cline_write32s       = 0; // # calls to s_m_write32
-static UWord stats__cline_write16s       = 0; // # calls to s_m_write16
-static UWord stats__cline_write8s        = 0; // # calls to s_m_write8
-static UWord stats__cline_set64s         = 0; // # calls to s_m_set64
-static UWord stats__cline_set32s         = 0; // # calls to s_m_set32
-static UWord stats__cline_set16s         = 0; // # calls to s_m_set16
-static UWord stats__cline_set8s          = 0; // # calls to s_m_set8
-static UWord stats__cline_get8s          = 0; // # calls to s_m_get8
-static UWord stats__cline_copy8s         = 0; // # calls to s_m_copy8
-static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
-static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
-static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
-static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
-static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
-static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
-
-
-static SVal shadow_mem_get8 ( Addr a ); /* fwds */
-
-static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
-   return a & ~(N_SECMAP_ARANGE - 1);
-}
-static inline UWord shmem__get_SecMap_offset ( Addr a ) {
-   return a & (N_SECMAP_ARANGE - 1);
-}
-
-/*--------------- SecMap allocation --------------- */
-
-static HChar* shmem__bigchunk_next = NULL;
-static HChar* shmem__bigchunk_end1 = NULL;
-
-static void* shmem__bigchunk_alloc ( SizeT n )
-{
-   const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256;
-   tl_assert(n > 0);
-   n = ROUNDUP(n, 16);
-   tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
-   tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
-             <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
-   if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
-      if (0)
-      VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n", 
-                  (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
-      shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
-      shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
-   }
-   tl_assert(shmem__bigchunk_next);
-   tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
-   tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
-   shmem__bigchunk_next += n;
-   return shmem__bigchunk_next - n;
-}
-
-static SecMap* shmem__alloc_SecMap ( void )
-{
-   Word    i, j;
-   SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
-   if (0) VG_(printf)("alloc_SecMap %p\n",sm);
-   tl_assert(sm);
-   sm->magic       = SecMap_MAGIC;
-   sm->mbHasLocks  = False; /* dangerous */
-   sm->mbHasShared = False; /* dangerous */
-   for (i = 0; i < N_SECMAP_ZLINES; i++) {
-      sm->linesZ[i].dict[0] = SHVAL_NoAccess;
-      sm->linesZ[i].dict[1] = 0; /* completely invalid SHVAL */
-      sm->linesZ[i].dict[2] = 0;
-      sm->linesZ[i].dict[3] = 0;
-      for (j = 0; j < N_LINE_ARANGE/4; j++)
-         sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
-   }
-   sm->linesF      = NULL;
-   sm->linesF_size = 0;
-   stats__secmaps_allocd++;
-   stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
-   stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
-   stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(CacheLineZ);
-   return sm;
-}
-
-static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
-{
-   SecMap* sm    = NULL;
-   Addr    gaKey = shmem__round_to_SecMap_base(ga);
-   if (VG_(lookupFM)( map_shmem,
-                      NULL/*keyP*/, (Word*)&sm, (Word)gaKey )) {
-      /* Found; address of SecMap is in sm */
-      tl_assert(sm);
-   } else {
-      /* create a new one */
-      sm = shmem__alloc_SecMap();
-      tl_assert(sm);
-      VG_(addToFM)( map_shmem, (Word)gaKey, (Word)sm );
-   }
-   return sm;
-}
-
-
-/*--------------- cache management/lookup --------------- */
-
-/*--------------- misc --------------- */
-
-static Bool shmem__get_mbHasLocks ( Addr a )
-{
-   SecMap* sm;
-   Addr aKey = shmem__round_to_SecMap_base(a);
-   if (VG_(lookupFM)( map_shmem,
-                      NULL/*keyP*/, (Word*)&sm, (Word)aKey )) {
-      /* Found */
-      return sm->mbHasLocks;
-   } else {
-      return False;
-   }
-}
-
-static void shmem__set_mbHasLocks ( Addr a, Bool b )
-{
-   SecMap* sm;
-   Addr aKey = shmem__round_to_SecMap_base(a);
-   tl_assert(b == False || b == True);
-   if (VG_(lookupFM)( map_shmem,
-                      NULL/*keyP*/, (Word*)&sm, (Word)aKey )) {
-      /* Found; address of SecMap is in sm */
-   } else {
-      /* create a new one */
-      sm = shmem__alloc_SecMap();
-      tl_assert(sm);
-      VG_(addToFM)( map_shmem, (Word)aKey, (Word)sm );
-   }
-   sm->mbHasLocks = b;
-}
-
-static void shmem__set_mbHasShared ( Addr a, Bool b )
-{
-   SecMap* sm;
-   Addr aKey = shmem__round_to_SecMap_base(a);
-   tl_assert(b == False || b == True);
-   if (VG_(lookupFM)( map_shmem,
-                      NULL/*keyP*/, (Word*)&sm, (Word)aKey )) {
-      /* Found; address of SecMap is in sm */
-   } else {
-      /* create a new one */
-      sm = shmem__alloc_SecMap();
-      tl_assert(sm);
-      VG_(addToFM)( map_shmem, (Word)aKey, (Word)sm );
-   }
-   sm->mbHasShared = b;
-}
-
-
-/*----------------------------------------------------------------*/
-/*--- Sanity checking the data structures                      ---*/
-/*----------------------------------------------------------------*/
-
-static UWord stats__sanity_checks = 0;
-
-static Bool is_sane_CacheLine ( CacheLine* cl ); /* fwds */
-static Bool cmpGEQ_VTS ( XArray* a, XArray* b ); /* fwds */
-static void laog__sanity_check ( Char* who ); /* fwds */
-
-/* REQUIRED INVARIANTS:
-
-   Thread vs Segment/Lock/SecMaps
-
-      for each t in Threads {
-
-         // Thread.lockset: each element is really a valid Lock
-
-         // Thread.lockset: each Lock in set is actually held by that thread
-         for lk in Thread.lockset 
-            lk == LockedBy(t)
-
-         // Thread.csegid is a valid SegmentID
-         // and the associated Segment has .thr == t
-
-      }
-
-      all thread Locksets are pairwise empty under intersection
-      (that is, no lock is claimed to be held by more than one thread)
-      -- this is guaranteed if all locks in locksets point back to their
-      owner threads
-
-   Lock vs Thread/Segment/SecMaps
-
-      for each entry (gla, la) in map_locks
-         gla == la->guest_addr
-
-      for each lk in Locks {
-
-         lk->tag is valid
-         lk->guest_addr does not have shadow state NoAccess
-         if lk == LockedBy(t), then t->lockset contains lk
-         if lk == UnlockedBy(segid) then segid is valid SegmentID
-             and can be mapped to a valid Segment(seg)
-             and seg->thr->lockset does not contain lk
-         if lk == UnlockedNew then (no lockset contains lk)
-
-         secmaps for lk has .mbHasLocks == True
-
-      }
-
-   Segment vs Thread/Lock/SecMaps
-
-      the Segment graph is a dag (no cycles)
-      all of the Segment graph must be reachable from the segids
-         mentioned in the Threads
-
-      for seg in Segments {
-
-         seg->thr is a sane Thread
-
-      }
-
-   SecMaps vs Segment/Thread/Lock
-
-      for sm in SecMaps {
-
-         sm properly aligned
-         if any shadow word is ShR or ShM then .mbHasShared == True
-
-         for each Excl(segid) state
-            map_segments_lookup maps to a sane Segment(seg)
-         for each ShM/ShR(tsetid,lsetid) state
-            each lk in lset is a valid Lock
-            each thr in tset is a valid thread, which is non-dead
-
-      }
-*/
-
-
-/* Return True iff 'thr' holds 'lk' in some mode. */
-static Bool thread_is_a_holder_of_Lock ( Thread* thr, Lock* lk )
-{
-   if (lk->heldBy)
-      return VG_(elemBag)( lk->heldBy, (Word)thr ) > 0;
-   else
-      return False;
-}
-
-/* Sanity check Threads, as far as possible */
-__attribute__((noinline))
-static void threads__sanity_check ( Char* who )
-{
-#define BAD(_str) do { how = (_str); goto bad; } while (0)
-   Char*     how = "no error";
-   Thread*   thr;
-   WordSetID wsA, wsW;
-   UWord*    ls_words;
-   Word      ls_size, i;
-   Lock*     lk;
-   Segment*  seg;
-   for (thr = admin_threads; thr; thr = thr->admin) {
-      if (!is_sane_Thread(thr)) BAD("1");
-      wsA = thr->locksetA;
-      wsW = thr->locksetW;
-      // locks held in W mode are a subset of all locks held
-      if (!HG_(isSubsetOf)( univ_lsets, wsW, wsA )) BAD("7");
-      HG_(getPayloadWS)( &ls_words, &ls_size, univ_lsets, wsA );
-      for (i = 0; i < ls_size; i++) {
-         lk = (Lock*)ls_words[i];
-         // Thread.lockset: each element is really a valid Lock
-         if (!is_sane_LockN(lk)) BAD("2");
-         // Thread.lockset: each Lock in set is actually held by that
-         // thread
-         if (!thread_is_a_holder_of_Lock(thr,lk)) BAD("3");
-         // Thread.csegid is a valid SegmentID
-         if (!is_sane_SegmentID(thr->csegid)) BAD("4");
-         // and the associated Segment has .thr == t
-         seg = map_segments_maybe_lookup(thr->csegid);
-         if (!is_sane_Segment(seg)) BAD("5");
-         if (seg->thr != thr) BAD("6");
-      }
-   }
-   return;
-  bad:
-   VG_(printf)("threads__sanity_check: who=\"%s\", bad=\"%s\"\n", who, how);
-   tl_assert(0);
-#undef BAD
-}
-
-
-/* Sanity check Locks, as far as possible */
-__attribute__((noinline))
-static void locks__sanity_check ( Char* who )
+/* Sanity check Locks, as far as possible */
+__attribute__((noinline))
+static void locks__sanity_check ( Char* who )
  {
  #define BAD(_str) do { how = (_str); goto bad; } while (0)
     Char*     how = "no error";
@@ -2568,14 +852,9 @@ static void locks__sanity_check ( Char* who )
     for (lk = admin_locks; lk; lk = lk->admin) {
        // lock is sane.  Quite comprehensive, also checks that
        // referenced (holder) threads are sane.
-      if (!is_sane_LockN(lk)) BAD("3");
+      if (!HG_(is_sane_LockN)(lk)) BAD("3");
        // map_locks binds guest address back to this lock
        if (lk != map_locks_maybe_lookup(lk->guestaddr)) BAD("4");
-      // lk->guest_addr does not have shadow state NoAccess
-      // FIXME: this could legitimately arise from a buggy guest
-      // that attempts to lock in (eg) freed memory.  Detect this
-      // and warn about it in the pre/post-mutex-lock event handler.
-      if (is_SHVAL_NoAccess(shadow_mem_get8(lk->guestaddr))) BAD("5");
        // look at all threads mentioned as holders of this lock.  Ensure
        // this lock is mentioned in their locksets.
        if (lk->heldBy) {
@@ -2584,9 +863,9 @@ static void locks__sanity_check ( Char* who )
           VG_(initIterBag)( lk->heldBy );
           while (VG_(nextIterBag)( lk->heldBy, 
                                    (Word*)&thr, &count )) {
-            // is_sane_LockN above ensures these
+            // HG_(is_sane_LockN) above ensures these
              tl_assert(count >= 1);
-            tl_assert(is_sane_Thread(thr));
+            tl_assert(HG_(is_sane_Thread)(thr));
              if (!HG_(elemWS)(univ_lsets, thr->locksetA, (Word)lk)) 
                 BAD("6");
              // also check the w-only lockset
@@ -2604,8 +883,6 @@ static void locks__sanity_check ( Char* who )
           // since lk is unheld, then (no lockset contains lk)
           // hmm, this is really too expensive to check.  Hmm.
        }
-      // secmaps for lk has .mbHasLocks == True
-      if (!shmem__get_mbHasLocks(lk->guestaddr)) BAD("10");
     }
  
     return;
@@ -2616,172 +893,10 @@ static void locks__sanity_check ( Char* who )
  }
  
  
-/* Sanity check Segments, as far as possible */
-__attribute__((noinline))
-static void segments__sanity_check ( Char* who )
-{
-#define BAD(_str) do { how = (_str); goto bad; } while (0)
-   Char*    how = "no error";
-   Int      i;
-   Segment* seg;
-   // FIXME
-   //   the Segment graph is a dag (no cycles)
-   //   all of the Segment graph must be reachable from the segids
-   //      mentioned in the Threads
-   // # entries in admin_segments == # entries in map_segments
-   for (i = 0, seg = admin_segments;  seg;  i++, seg = seg->admin)
-      ;
-   if (i != VG_(sizeFM)(map_segments)) BAD("1");
-   // for seg in Segments {
-   for (seg = admin_segments; seg; seg = seg->admin) {
-      if (!is_sane_Segment(seg)) BAD("2");
-      if (!is_sane_Thread(seg->thr)) BAD("3");
-      if (!seg->vts) BAD("4");
-      if (seg->prev && seg->prev->vts
-          && !cmpGEQ_VTS(seg->vts, seg->prev->vts))
-         BAD("5");
-      if (seg->other && seg->other->vts
-          && !cmpGEQ_VTS(seg->vts, seg->other->vts))
-         BAD("6");
-   }
-   return;
-  bad:
-   VG_(printf)("segments__sanity_check: who=\"%s\", bad=\"%s\"\n", 
-               who, how);
-   tl_assert(0);
-#undef BAD
-}
-
-
-/* Sanity check shadow memory, as far as possible */
-static Int cmp_Addr_for_ssort ( void* p1, void* p2 ) {
-   Addr a1 = *(Addr*)p1;
-   Addr a2 = *(Addr*)p2;
-   if (a1 < a2) return -1;
-   if (a1 > a2) return 1;
-   return 0;
-}
-__attribute__((noinline))
-static void shmem__sanity_check ( Char* who )
-{
-#define BAD(_str) do { how = (_str); goto bad; } while (0)
-   Char*   how = "no error";
-   Word    smga;
-   SecMap* sm;
-   Word    i, j, ws_size, n_valid_tags;
-   UWord*  ws_words;
-   Addr*   valid_tags;
-   VG_(initIterFM)( map_shmem );
-   // for sm in SecMaps {
-   while (VG_(nextIterFM)( map_shmem,
-                           (Word*)&smga, (Word*)&sm )) {
-      SecMapIter itr;
-      SVal*      w32p = NULL;
-      Bool       mbHasShared = False;
-      Bool       allNoAccess = True;
-      if (!is_sane_SecMap(sm)) BAD("1");
-      // sm properly aligned
-      if (smga != shmem__round_to_SecMap_base(smga)) BAD("2");
-      // if any shadow word is ShR or ShM then .mbHasShared == True
-      initSecMapIter( &itr );
-      while (stepSecMapIter( &w32p, &itr, sm )) {
-         SVal w32 = *w32p;
-         if (is_SHVAL_Sh(w32)) 
-            mbHasShared = True;
-         if (!is_SHVAL_NoAccess(w32))
-            allNoAccess = False;
-         if (is_SHVAL_Excl(w32)) {
-            // for each Excl(segid) state
-            // map_segments_lookup maps to a sane Segment(seg)
-            Segment*  seg;
-            SegmentID segid = un_SHVAL_Excl(w32);
-            if (!is_sane_SegmentID(segid)) BAD("3");
-            seg = map_segments_maybe_lookup(segid);
-            if (!is_sane_Segment(seg)) BAD("4");
-         } 
-         else if (is_SHVAL_Sh(w32)) {
-            WordSetID tset = un_SHVAL_Sh_tset(w32);
-            WordSetID lset = un_SHVAL_Sh_lset(w32);
-            if (!HG_(plausibleWS)( univ_tsets, tset )) BAD("5");
-            if (!HG_(saneWS_SLOW)( univ_tsets, tset )) BAD("6");
-            if (HG_(cardinalityWS)( univ_tsets, tset ) < 2) BAD("7");
-            if (!HG_(plausibleWS)( univ_lsets, lset )) BAD("8");
-            if (!HG_(saneWS_SLOW)( univ_lsets, lset )) BAD("9");
-            HG_(getPayloadWS)( &ws_words, &ws_size, univ_lsets, lset );
-            for (j = 0; j < ws_size; j++) {
-               Lock* lk = (Lock*)ws_words[j];
-               // for each ShM/ShR(tsetid,lsetid) state
-               // each lk in lset is a valid Lock
-               if (!is_sane_LockN(lk)) BAD("10");
-            }
-            HG_(getPayloadWS)( &ws_words, &ws_size, univ_tsets, tset );
-            for (j = 0; j < ws_size; j++) {
-               Thread* thr = (Thread*)ws_words[j];
-               //for each ShM/ShR(tsetid,lsetid) state
-               // each thr in tset is a valid thread, which is non-dead
-               if (!is_sane_Thread(thr)) BAD("11");
-            }
-         }
-         else if (is_SHVAL_NoAccess(w32) || is_SHVAL_New(w32)) {
-            /* nothing to check */
-         }
-         else {
-            /* bogus shadow mem value */
-            BAD("12");
-         }
-      } /* iterating over a SecMap */
-      // Check essential safety property
-      if (mbHasShared && !sm->mbHasShared) BAD("13");
-      // This is optional - check that destroyed memory has its hint
-      // bits cleared.  NB won't work properly unless full, eager
-      // GCing of SecMaps is implemented
-      //if (allNoAccess && sm->mbHasLocks) BAD("13a");
-   }
-   VG_(doneIterFM)( map_shmem );
-
-   // check the cache
-   valid_tags   = hg_zalloc("hg", N_WAY_NENT * sizeof(Addr));
-   n_valid_tags = 0;
-   tl_assert(valid_tags);
-   for (i = 0; i < N_WAY_NENT; i++) {
-      CacheLine* cl;
-      Addr       tag; 
-      /* way0, dude */
-      cl  = &cache_shmem.lyns0[i];
-      tag =  cache_shmem.tags0[i];
-      if (tag != 1) {
-         if (!is_valid_scache_tag(tag)) BAD("14-0");
-         if (!is_sane_CacheLine(cl)) BAD("15-0");
-         /* A valid tag should be of the form 
-            X---X line_number:N_WAY_BITS 0:N_LINE_BITS */
-         if (tag & (N_LINE_ARANGE-1)) BAD("16-0");
-         if ( i != ((tag >> N_LINE_BITS) & (N_WAY_NENT-1)) ) BAD("16-1");
-         valid_tags[n_valid_tags++] = tag;
-      }
-   }
-   tl_assert(n_valid_tags <= N_WAY_NENT);
-   if (n_valid_tags > 1) {
-      /* Check that the valid tags are unique */
-      VG_(ssort)( valid_tags, n_valid_tags, sizeof(Addr), cmp_Addr_for_ssort );
-      for (i = 0; i < n_valid_tags-1; i++) {
-         if (valid_tags[i] >= valid_tags[i+1])
-            BAD("16-2");
-      }
-   }
-   hg_free(valid_tags);
-   return;
-  bad:
-   VG_(printf)("shmem__sanity_check: who=\"%s\", bad=\"%s\"\n", who, how);
-   tl_assert(0);
-#undef BAD
-}
-
  static void all_except_Locks__sanity_check ( Char* who ) {
     stats__sanity_checks++;
     if (0) VG_(printf)("all_except_Locks__sanity_check(%s)\n", who);
     threads__sanity_check(who);
-   segments__sanity_check(who);
-   shmem__sanity_check(who);
     laog__sanity_check(who);
  }
  static void all__sanity_check ( Char* who ) {
@@ -2794,40 +909,6 @@ static void all__sanity_check ( Char* who ) {
  /*--- the core memory state machine (msm__* functions)         ---*/
  /*----------------------------------------------------------------*/
  
-static UWord stats__msm_read_Excl_nochange = 0;
-static UWord stats__msm_read_Excl_transfer = 0;
-static UWord stats__msm_read_Excl_to_ShR   = 0;
-static UWord stats__msm_read_ShR_to_ShR    = 0;
-static UWord stats__msm_read_ShM_to_ShM    = 0;
-static UWord stats__msm_read_New_to_Excl   = 0;
-static UWord stats__msm_read_NoAccess      = 0;
-
-static UWord stats__msm_write_Excl_nochange = 0;
-static UWord stats__msm_write_Excl_transfer = 0;
-static UWord stats__msm_write_Excl_to_ShM   = 0;
-static UWord stats__msm_write_ShR_to_ShM    = 0;
-static UWord stats__msm_write_ShM_to_ShM    = 0;
-static UWord stats__msm_write_New_to_Excl   = 0;
-static UWord stats__msm_write_NoAccess      = 0;
-
-/* fwds */
-static void record_error_Race ( Thread* thr, 
-                                Addr data_addr, Bool isWrite, Int szB,
-                                SVal old_sv, SVal new_sv,
-                                ExeContext* mb_lastlock );
-
-static void record_error_FreeMemLock ( Thread* thr, Lock* lk );
-
-static void record_error_UnlockUnlocked ( Thread*, Lock* );
-static void record_error_UnlockForeign  ( Thread*, Thread*, Lock* );
-static void record_error_UnlockBogus    ( Thread*, Addr );
-static void record_error_PthAPIerror    ( Thread*, HChar*, Word, HChar* );
-static void record_error_LockOrder      ( Thread*, Addr, Addr,
-                                                   ExeContext*, ExeContext* );
-
-static void record_error_Misc ( Thread*, HChar* );
-static void announce_one_thread ( Thread* thr ); /* fwds */
-
  static WordSetID add_BHL ( WordSetID lockset ) {
     return HG_(addToWS)( univ_lsets, lockset, (Word)__bus_lock_Lock );
  }
@@ -2929,7 +1010,7 @@ void record_last_lock_lossage ( Addr ga_of_access,
                        HG_(cardinalityWS)( univ_lsets, lset_old), lk );
     if (lk->appeared_at) {
        if (ga_to_lastlock == NULL)
-         ga_to_lastlock = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
+         ga_to_lastlock = VG_(newFM)( HG_(zalloc), "hg.rlll.1", HG_(free), NULL );
        VG_(addToFM)( ga_to_lastlock, ga_of_access, (Word)lk->appeared_at );
        stats__ga_LL_adds++;
     }
@@ -2954,2113 +1035,44 @@ static ExeContext* maybe_get_lastlock_initpoint ( Addr ga )
  }
  
  
-static void msm__show_state_change ( Thread* thr_acc, Addr a, Int szB,
-                                     Char howC,
-                                     SVal sv_old, SVal sv_new )
-{
-   ThreadId tid;
-   UChar txt_old[100], txt_new[100];
-   Char* how = "";
-   tl_assert(is_sane_Thread(thr_acc));
-   tl_assert(clo_trace_level == 1 || clo_trace_level == 2);
-   switch (howC) {
-      case 'r': how = "rd"; break;
-      case 'w': how = "wr"; break;
-      case 'p': how = "pa"; break;
-      default: tl_assert(0);
-   }
-   show_shadow_w32_for_user(txt_old, sizeof(txt_old), sv_old);
-   show_shadow_w32_for_user(txt_new, sizeof(txt_new), sv_new);
-   txt_old[sizeof(txt_old)-1] = 0;
-   txt_new[sizeof(txt_new)-1] = 0;
-   if (clo_trace_level == 2) {
-      /* show everything */
-      VG_(message)(Vg_UserMsg, "");
-      announce_one_thread( thr_acc );
-      VG_(message)(Vg_UserMsg, 
-                   "TRACE: %#lx %s %d thr#%d :: %s --> %s",
-                   a, how, szB, thr_acc->errmsg_index, txt_old, txt_new );
-      tid = map_threads_maybe_reverse_lookup_SLOW(thr_acc);
-      if (tid != VG_INVALID_THREADID) {
-         VG_(get_and_pp_StackTrace)( tid, 8 );
-      }
-   } else {
-      /* Just print one line */
-      VG_(message)(Vg_UserMsg, 
-                   "TRACE: %#lx %s %d thr#%d :: %22s --> %22s",
-                   a, how, szB, thr_acc->errmsg_index, txt_old, txt_new );
-   }
-}
-
-
-/* Here are some MSM stats from startup/shutdown of OpenOffice.
-
-     msm:  489,734,723   80,278,862 rd/wr_Excl_nochange
-     msm:    3,171,542       93,738 rd/wr_Excl_transfer
-     msm:       45,036          167 rd/wr_Excl_to_ShR/ShM
-     msm:   13,352,594          285 rd/wr_ShR_to_ShR/ShM
-     msm:    1,125,879      815,779 rd/wr_ShM_to_ShM
-     msm:    7,561,842  250,629,935 rd/wr_New_to_Excl
-     msm:       17,778            0 rd/wr_NoAccess
-
-   This says how the clauses should be ordered for greatest speed:
+/*----------------------------------------------------------------*/
+/*--- Shadow value and address range handlers                  ---*/
+/*----------------------------------------------------------------*/
  
-   * the vast majority of memory reads (490 million out of a total of
-     515 million) are of memory in an exclusive state, and the state
-     is unchanged.  All other read accesses are insignificant by
-     comparison.
+static void laog__pre_thread_acquires_lock ( Thread*, Lock* ); /* fwds */
+static void laog__handle_lock_deletions    ( WordSetID ); /* fwds */
+static inline Thread* get_current_Thread ( void ); /* fwds */
  
-   * 75% (251 million out of a total of 332 million) writes are 'first
-     time' writes, which take New memory into exclusive ownership.
-     Almost all the rest (80 million) are accesses to exclusive state,
-     which remains unchanged.  All other write accesses are
-     insignificant. */
  
-/* The core MSM.  If 'wold' is the old 32-bit shadow word for a
-   location, return the new shadow word that would result for a read
-   of the location, and report any errors necessary on the way.  This
-   does not update shadow memory - it merely produces new shadow words
-   from old.  'thr_acc' and 'a' are supplied only so it can produce
-   coherent error messages if necessary. */
-static
-SVal msm__handle_read ( Thread* thr_acc, Addr a, SVal wold, Int szB )
+/* Block-copy states (needed for implementing realloc()). */
+static void shadow_mem_copy_range ( Addr src, Addr dst, SizeT len )
  {
-   SVal wnew = SHVAL_Invalid;
-
-   tl_assert(is_sane_Thread(thr_acc));
-
-   if (0) VG_(printf)("read thr=%p %#lx\n", thr_acc, a);
-
-   /* Exclusive */
-   if (LIKELY(is_SHVAL_Excl(wold))) {
-      /* read Excl(segid) 
-           |  segid_old == segid-of-thread
-           -> no change
-           |  segid_old `happens_before` segid-of-this-thread
-           -> Excl(segid-of-this-thread)
-           |  otherwise
-           -> ShR
-      */
-      SegmentID segid_old = un_SHVAL_Excl(wold);
-      tl_assert(is_sane_SegmentID(segid_old));
-      if (LIKELY(segid_old == thr_acc->csegid)) {
-         /* no change */
-         stats__msm_read_Excl_nochange++;
-         /*NOCHANGE*/return wold;
-      }
-      if (happens_before(segid_old, thr_acc->csegid)) {
-         /* -> Excl(segid-of-this-thread) */
-         wnew = mk_SHVAL_Excl(thr_acc->csegid);
-         stats__msm_read_Excl_transfer++;
-         goto changed;
-      }
-      /* else */ {
-         /* Enter the shared-readonly (ShR) state. */
-         WordSetID tset, lset;
-         /* This location has been accessed by precisely two threads.
-            Make an appropriate tset. */
-         // FIXME: performance: duplicate map_segments_lookup(segid_old)
-         // since must also be done in happens_before()
-         Segment* seg_old = map_segments_lookup( segid_old );
-         Thread*  thr_old = seg_old->thr;
-         tset = HG_(doubletonWS)( univ_tsets, (Word)thr_old, (Word)thr_acc );
-         lset = add_BHL( thr_acc->locksetA ); /* read ==> use all locks */
-         wnew = mk_SHVAL_ShR( tset, lset );
-         stats__msm_read_Excl_to_ShR++;
-         goto changed;
-      }
-      /*NOTREACHED*/
-   } 
-
-   /* Shared-Readonly */
-   if (is_SHVAL_ShR(wold)) {
-     /* read Shared-Readonly(threadset, lockset)
-        We remain in ShR state, but add this thread to the 
-        threadset and refine the lockset accordingly.  Do not
-        complain if the lockset becomes empty -- that's ok. */
-      WordSetID tset_old = un_SHVAL_ShR_tset(wold);
-      WordSetID lset_old = un_SHVAL_ShR_lset(wold);
-      WordSetID tset_new = HG_(addToWS)( univ_tsets, 
-                                         tset_old, (Word)thr_acc );
-      WordSetID lset_new = HG_(intersectWS)( univ_lsets,
-                                             lset_old, 
-                                             add_BHL(thr_acc->locksetA)
-                                             /* read ==> use all locks */ );
-      /*SVal*/  wnew     = mk_SHVAL_ShR( tset_new, lset_new );
-      if (lset_old != lset_new)
-         record_last_lock_lossage(a,lset_old,lset_new);
-      stats__msm_read_ShR_to_ShR++;
-      goto changed;
-   }
-
-   /* Shared-Modified */
-   if (is_SHVAL_ShM(wold)) {
-      /* read Shared-Modified(threadset, lockset)
-         We remain in ShM state, but add this thread to the 
-         threadset and refine the lockset accordingly.
-         If the lockset becomes empty, complain. */
-      WordSetID tset_old = un_SHVAL_ShM_tset(wold);
-      WordSetID lset_old = un_SHVAL_ShM_lset(wold);
-      WordSetID tset_new = HG_(addToWS)( univ_tsets,
-                                         tset_old, (Word)thr_acc );
-      WordSetID lset_new = HG_(intersectWS)( univ_lsets,
-                                             lset_old,
-                                             add_BHL(thr_acc->locksetA)
-                                             /* read ==> use all locks */ ); 
-      /*SVal*/  wnew     = mk_SHVAL_ShM( tset_new, lset_new );
-      if (lset_old != lset_new)
-         record_last_lock_lossage(a,lset_old,lset_new);
-      if (HG_(isEmptyWS)(univ_lsets, lset_new)
-          && !HG_(isEmptyWS)(univ_lsets, lset_old)) {
-         record_error_Race( thr_acc, a, 
-                            False/*isWrite*/, szB, wold, wnew,
-                            maybe_get_lastlock_initpoint(a) );
-      }
-      stats__msm_read_ShM_to_ShM++;
-      goto changed;
-   }
- 
-   /* New */
-   if (is_SHVAL_New(wold)) {
-      /* read New -> Excl(segid) */
-      wnew = mk_SHVAL_Excl( thr_acc->csegid );
-      stats__msm_read_New_to_Excl++;
-      goto changed;
-   } 
-
-   /* NoAccess */
-   if (is_SHVAL_NoAccess(wold)) {
-      // FIXME: complain if accessing here
-      // FIXME: transition to Excl?
-      if (0)
-      VG_(printf)(
-         "msm__handle_read_aligned_32(thr=%p, addr=%p): NoAccess\n",
-         thr_acc, (void*)a );
-      stats__msm_read_NoAccess++;
-      /*NOCHANGE*/return wold; /* no change */
-   }
-
-   /* hmm, bogus state */
-   tl_assert(0);
-
-  changed:
-   if (UNLIKELY(clo_trace_level > 0)) {
-      if (a <= clo_trace_addr && clo_trace_addr < a+szB
-          && wold != wnew) {
-         msm__show_state_change( thr_acc, a, szB, 'r', wold, wnew );
-      }
-   }
-   return wnew;
+   libhb_copy_shadow_state( src, dst, len );
  }
  
-/* Similar to msm__handle_read, compute a new 32-bit shadow word
-   resulting from a write to a location, and report any errors
-   necessary on the way. */
-static
-SVal msm__handle_write ( Thread* thr_acc, Addr a, SVal wold, Int szB )
+static void shadow_mem_read_range ( Thread* thr, Addr a, SizeT len )
  {
-   SVal wnew = SHVAL_Invalid;
-
-   tl_assert(is_sane_Thread(thr_acc));
-
-   if (0) VG_(printf)("write32 thr=%p %#lx\n", thr_acc, a);
-
-   /* New */
-   if (LIKELY(is_SHVAL_New(wold))) {
-      /* write New -> Excl(segid) */
-      wnew = mk_SHVAL_Excl( thr_acc->csegid );
-      stats__msm_write_New_to_Excl++;
-      goto changed;
-   }
-
-   /* Exclusive */
-   if (is_SHVAL_Excl(wold)) {
-      // I believe is identical to case for read Excl
-      // apart from enters ShM rather than ShR 
-      /* read Excl(segid) 
-           |  segid_old == segid-of-thread
-           -> no change
-           |  segid_old `happens_before` segid-of-this-thread
-           -> Excl(segid-of-this-thread)
-           |  otherwise
-           -> ShM
-      */
-      SegmentID segid_old = un_SHVAL_Excl(wold);
-      tl_assert(is_sane_SegmentID(segid_old));
-      if (segid_old == thr_acc->csegid) {
-         /* no change */
-         stats__msm_write_Excl_nochange++;
-         /*NOCHANGE*/return wold;
-      }
-      if (happens_before(segid_old, thr_acc->csegid)) {
-         /* -> Excl(segid-of-this-thread) */
-         wnew = mk_SHVAL_Excl(thr_acc->csegid);
-         stats__msm_write_Excl_transfer++;
-         goto changed;
-      }
-      /* else */ {
-         /* Enter the shared-modified (ShM) state. */
-         WordSetID tset, lset;
-         /* This location has been accessed by precisely two threads.
-            Make an appropriate tset. */
-         // FIXME: performance: duplicate map_segments_lookup(segid_old)
-         // since must also be done in happens_before()
-         Segment* seg_old = map_segments_lookup( segid_old );
-         Thread*  thr_old = seg_old->thr;
-         tset = HG_(doubletonWS)( univ_tsets, (Word)thr_old, (Word)thr_acc );
-         lset = thr_acc->locksetW; /* write ==> use only w-held locks */
-         wnew = mk_SHVAL_ShM( tset, lset );
-         if (HG_(isEmptyWS)(univ_lsets, lset)) {
-            record_error_Race( thr_acc, 
-                               a, True/*isWrite*/, szB, wold, wnew,
-                               maybe_get_lastlock_initpoint(a) );
-         }
-         stats__msm_write_Excl_to_ShM++;
-         goto changed;
-      }
-      /*NOTREACHED*/
-   } 
-
-   /* Shared-Readonly */
-   if (is_SHVAL_ShR(wold)) {
-      /* write Shared-Readonly(threadset, lockset)
-         We move to ShM state, add this thread to the 
-         threadset and refine the lockset accordingly.
-         If the lockset becomes empty, complain. */
-      WordSetID tset_old = un_SHVAL_ShR_tset(wold);
-      WordSetID lset_old = un_SHVAL_ShR_lset(wold);
-      WordSetID tset_new = HG_(addToWS)( univ_tsets, 
-                                         tset_old, (Word)thr_acc );
-      WordSetID lset_new = HG_(intersectWS)(
-                              univ_lsets, 
-                              lset_old, 
-                              thr_acc->locksetW
-                              /* write ==> use only w-held locks */
-                           );
-      /*SVal*/  wnew     = mk_SHVAL_ShM( tset_new, lset_new );
-      if (lset_old != lset_new)
-         record_last_lock_lossage(a,lset_old,lset_new);
-      if (HG_(isEmptyWS)(univ_lsets, lset_new)) {
-         record_error_Race( thr_acc, a, 
-                            True/*isWrite*/, szB, wold, wnew,
-                            maybe_get_lastlock_initpoint(a) );
-      }
-      stats__msm_write_ShR_to_ShM++;
-      goto changed;
-   }
-
-   /* Shared-Modified */
-   else if (is_SHVAL_ShM(wold)) {
-      /* write Shared-Modified(threadset, lockset)
-         We remain in ShM state, but add this thread to the 
-         threadset and refine the lockset accordingly.
-         If the lockset becomes empty, complain. */
-      WordSetID tset_old = un_SHVAL_ShM_tset(wold);
-      WordSetID lset_old = un_SHVAL_ShM_lset(wold);
-      WordSetID tset_new = HG_(addToWS)( univ_tsets,
-                                         tset_old, (Word)thr_acc );
-      WordSetID lset_new = HG_(intersectWS)( 
-                              univ_lsets,
-                              lset_old, 
-                              thr_acc->locksetW 
-                              /* write ==> use only w-held locks */
-                           ); 
-      /*SVal*/  wnew     = mk_SHVAL_ShM( tset_new, lset_new );
-      if (lset_old != lset_new)
-         record_last_lock_lossage(a,lset_old,lset_new);
-      if (HG_(isEmptyWS)(univ_lsets, lset_new)
-          && !HG_(isEmptyWS)(univ_lsets, lset_old)) {
-         record_error_Race( thr_acc, a, 
-                            True/*isWrite*/, szB, wold, wnew,
-                            maybe_get_lastlock_initpoint(a) );
-      }
-      stats__msm_write_ShM_to_ShM++;
-      goto changed;
-   }
-
-   /* NoAccess */
-   if (is_SHVAL_NoAccess(wold)) {
-      // FIXME: complain if accessing here
-      // FIXME: transition to Excl?
-      if (0)
-      VG_(printf)(
-         "msm__handle_write_aligned_32(thr=%p, addr=%p): NoAccess\n",
-         thr_acc, (void*)a );
-      stats__msm_write_NoAccess++;
-      /*NOCHANGE*/return wold;
-   } 
-
-   /* hmm, bogus state */
-   VG_(printf)("msm__handle_write_aligned_32: bogus old state 0x%x\n", 
-               wold);
-   tl_assert(0);
-
-  changed:
-   if (UNLIKELY(clo_trace_level > 0)) {
-      if (a <= clo_trace_addr && clo_trace_addr < a+szB
-          && wold != wnew) {
-         msm__show_state_change( thr_acc, a, szB, 'w', wold, wnew );
-      }
-   }
-   return wnew;
-}
-
-
-/*----------------------------------------------------------------*/
-/*--- Shadow value and address range handlers                  ---*/
-/*----------------------------------------------------------------*/
-
-static void laog__pre_thread_acquires_lock ( Thread*, Lock* ); /* fwds */
-static void laog__handle_lock_deletions    ( WordSetID ); /* fwds */
-static inline Thread* get_current_Thread ( void ); /* fwds */
-
-/* ------------ CacheLineF and CacheLineZ related ------------ */
-
-static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
-   Word bix, shft, mask, prep;
-   tl_assert((b2 & ~3) == 0);
-   tl_assert(ix >= 0);
-   bix  = ix >> 2;
-   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
-   mask = 3 << shft;
-   prep = b2 << shft;
-   arr[bix] = (arr[bix] & ~mask) | prep;
-}
-
-static UWord read_twobit_array ( UChar* arr, UWord ix ) {
-   Word bix, shft;
-   tl_assert(ix >= 0);
-   bix  = ix >> 2;
-   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
-   return (arr[bix] >> shft) & 3;
-}
-
-/* Given a lineZ index and a SecMap, return the CacheLineZ* and CacheLineF*
-   for that index. */
-static void get_ZF_by_index ( /*OUT*/CacheLineZ** zp,
-                              /*OUT*/CacheLineF** fp,
-                              SecMap* sm, Int zix ) {
-   CacheLineZ* lineZ;
-   tl_assert(zp);
-   tl_assert(fp);
-   tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
-   tl_assert(is_sane_SecMap(sm));
-   lineZ = &sm->linesZ[zix];
-   if (lineZ->dict[0] == 0) {
-      Int fix = lineZ->dict[1];
-      tl_assert(sm->linesF);
-      tl_assert(sm->linesF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linesF_size);
-      *zp = NULL;
-      *fp = &sm->linesF[fix];
-      tl_assert(sm->linesF[fix].inUse);
-   } else {
-      *zp = lineZ;
-      *fp = NULL;
-   }
-}
-
-static void find_ZF_for_reading ( /*OUT*/CacheLineZ** zp,
-                                  /*OUT*/CacheLineF** fp, Addr tag ) {
-   CacheLineZ* lineZ;
-   CacheLineF* lineF;
-   UWord   zix;
-   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
-   UWord   smoff = shmem__get_SecMap_offset(tag);
-   /* since smoff is derived from a valid tag, it should be
-      cacheline-aligned. */
-   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
-   zix = smoff >> N_LINE_BITS;
-   tl_assert(zix < N_SECMAP_ZLINES);
-   lineZ = &sm->linesZ[zix];
-   lineF = NULL;
-   if (lineZ->dict[0] == 0) {
-      Word fix = lineZ->dict[1];
-      tl_assert(sm->linesF);
-      tl_assert(sm->linesF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linesF_size);
-      lineF = &sm->linesF[fix];
-      tl_assert(lineF->inUse);
-      lineZ = NULL;
-   }
-   *zp = lineZ;
-   *fp = lineF;
-}
-
-static void find_Z_for_writing ( /*OUT*/SecMap** smp,
-                                 /*OUT*/Word* zixp,
-                                 Addr tag ) {
-   CacheLineZ* lineZ;
-   CacheLineF* lineF;
-   UWord   zix;
-   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
-   UWord   smoff = shmem__get_SecMap_offset(tag);
-   /* since smoff is derived from a valid tag, it should be
-      cacheline-aligned. */
-   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
-   zix = smoff >> N_LINE_BITS;
-   tl_assert(zix < N_SECMAP_ZLINES);
-   lineZ = &sm->linesZ[zix];
-   lineF = NULL;
-   /* If lineZ has an associated lineF, free it up. */
-   if (lineZ->dict[0] == 0) {
-      Word fix = lineZ->dict[1];
-      tl_assert(sm->linesF);
-      tl_assert(sm->linesF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linesF_size);
-      lineF = &sm->linesF[fix];
-      tl_assert(lineF->inUse);
-      lineF->inUse = False;
-   }
-   *smp  = sm;
-   *zixp = zix;
-}
-
-static 
-void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
-   Word        i, new_size;
-   CacheLineF* nyu;
-
-   if (sm->linesF) {
-      tl_assert(sm->linesF_size > 0);
-   } else {
-      tl_assert(sm->linesF_size == 0);
-   }
-
-   if (sm->linesF) {
-      for (i = 0; i < sm->linesF_size; i++) {
-         if (!sm->linesF[i].inUse) {
-            *fixp = (Word)i;
-            return;
-         }
-      }
-   }
-
-   /* No free F line found.  Expand existing array and try again. */
-   new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
-   nyu      = hg_zalloc( "hg", new_size * sizeof(CacheLineF) );
-   tl_assert(nyu);
-
-   stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
-   stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
-                                  * sizeof(CacheLineF);
-
-   if (0)
-   VG_(printf)("SM %p: expand F array from %d to %ld\n",
-               sm, (Int)sm->linesF_size, new_size);
-
-   for (i = 0; i < new_size; i++)
-      nyu[i].inUse = False;
-
-   if (sm->linesF) {
-      for (i = 0; i < sm->linesF_size; i++) {
-         tl_assert(sm->linesF[i].inUse);
-         nyu[i] = sm->linesF[i];
-      }
-      VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(CacheLineF) );
-      hg_free(sm->linesF);
-   }
-
-   sm->linesF      = nyu;
-   sm->linesF_size = new_size;
-
-   for (i = 0; i < sm->linesF_size; i++) {
-      if (!sm->linesF[i].inUse) {
-         *fixp = (Word)i;
-         return;
-      }
-    }
-
-    /*NOTREACHED*/
-    tl_assert(0);
-}
-
-
-/* ------------ CacheLine and implicit-tree related ------------ */
-
-__attribute__((unused))
-static void pp_CacheLine ( CacheLine* cl ) {
-   Word i;
-   if (!cl) {
-      VG_(printf)("pp_CacheLine(NULL)\n");
-      return;
-   }
-   for (i = 0; i < N_LINE_TREES; i++) 
-      VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
-   for (i = 0; i < N_LINE_ARANGE; i++) 
-      VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
-}
-
-static UChar descr_to_validbits ( UShort descr )
-{
-   /* a.k.a Party Time for gcc's constant folder */
-#  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
-                b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
-             ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
-                          ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
-                          ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
-                          ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
-                          ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
-                          ( (b16_2) << 4)  | ( (b64)   << 3)  | \
-                          ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
-                          ( (b16_0) << 0) ) )
-
-#  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
-             ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
-                         ( (bit5) << 5) | ( (bit4) << 4) | \
-                         ( (bit3) << 3) | ( (bit2) << 2) | \
-                         ( (bit1) << 1) | ( (bit0) << 0) ) )
-
-   /* these should all get folded out at compile time */
-   tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
-   tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
-   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
-
-   switch (descr) {
-   /*
-              +--------------------------------- TREE_DESCR_8_7
-              |             +------------------- TREE_DESCR_8_0
-              |             |  +---------------- TREE_DESCR_16_3
-              |             |  | +-------------- TREE_DESCR_32_1
-              |             |  | | +------------ TREE_DESCR_16_2
-              |             |  | | |  +--------- TREE_DESCR_64
-              |             |  | | |  |  +------ TREE_DESCR_16_1
-              |             |  | | |  |  | +---- TREE_DESCR_32_0
-              |             |  | | |  |  | | +-- TREE_DESCR_16_0
-              |             |  | | |  |  | | |
-              |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
-   case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
-                                                 return BYTE(1,1,1,1,1,1,1,1);
-   case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
-                                                 return BYTE(1,1,0,1,1,1,1,1);
-   case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */ 
-                                                 return BYTE(0,1,1,1,1,1,1,1);
-   case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
-                                                 return BYTE(0,1,0,1,1,1,1,1);
-
-   case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */ 
-                                                 return BYTE(1,1,1,1,1,1,0,1);
-   case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
-                                                 return BYTE(1,1,0,1,1,1,0,1);
-   case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
-                                                 return BYTE(0,1,1,1,1,1,0,1);
-   case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
-                                                 return BYTE(0,1,0,1,1,1,0,1);
-
-   case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
-                                                 return BYTE(1,1,1,1,0,1,1,1);
-   case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
-                                                 return BYTE(1,1,0,1,0,1,1,1);
-   case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
-                                                 return BYTE(0,1,1,1,0,1,1,1);
-   case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
-                                                 return BYTE(0,1,0,1,0,1,1,1);
-
-   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
-                                                 return BYTE(1,1,1,1,0,1,0,1);
-   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
-                                                 return BYTE(1,1,0,1,0,1,0,1);
-   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
-                                                 return BYTE(0,1,1,1,0,1,0,1);
-   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
-                                                 return BYTE(0,1,0,1,0,1,0,1);
-
-   case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
-                                                 return BYTE(0,0,0,1,1,1,1,1);
-   case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
-                                                 return BYTE(0,0,0,1,1,1,0,1);
-   case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
-                                                 return BYTE(0,0,0,1,0,1,1,1);
-   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
-                                                 return BYTE(0,0,0,1,0,1,0,1);
-
-   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
-                                                 return BYTE(1,1,1,1,0,0,0,1);
-   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
-                                                 return BYTE(1,1,0,1,0,0,0,1);
-   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
-                                                 return BYTE(0,1,1,1,0,0,0,1);
-   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
-                                                 return BYTE(0,1,0,1,0,0,0,1);
-
-   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
-                                                 return BYTE(0,0,0,1,0,0,0,1);
-
-   case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
-                                                 return BYTE(0,0,0,0,0,0,0,1);
-
-   default: return BYTE(0,0,0,0,0,0,0,0); 
-                   /* INVALID - any valid descr produces at least one
-                      valid bit in tree[0..7]*/
-   }
-   /* NOTREACHED*/
-   tl_assert(0);
-
-#  undef DESCR
-#  undef BYTE
-}
-
-__attribute__((unused))
-static Bool is_sane_Descr ( UShort descr ) {
-   return descr_to_validbits(descr) != 0;
-}
-
-static void sprintf_Descr ( /*OUT*/UChar* dst, UShort descr ) {
-   VG_(sprintf)(dst, 
-                "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
-                (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
-                (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
-                (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
-   );
-}
-static void sprintf_Byte ( /*OUT*/UChar* dst, UChar byte ) {
-   VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
-                     (Int)((byte & 128) ? 1 : 0),
-                     (Int)((byte &  64) ? 1 : 0),
-                     (Int)((byte &  32) ? 1 : 0),
-                     (Int)((byte &  16) ? 1 : 0),
-                     (Int)((byte &   8) ? 1 : 0),
-                     (Int)((byte &   4) ? 1 : 0),
-                     (Int)((byte &   2) ? 1 : 0),
-                     (Int)((byte &   1) ? 1 : 0)
-   );
-}
-
-static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
-   Word  i;
-   UChar validbits = descr_to_validbits(descr);
-   UChar buf[128], buf2[128];
-   if (validbits == 0)
-      goto bad;
-   for (i = 0; i < 8; i++) {
-      if (validbits & (1<<i)) {
-         if (!is_SHVAL_valid(tree[i]))
-            goto bad;
-      } else {
-         if (tree[i] != 0)
-            goto bad;
-      }
-   }
-   return True;
-  bad:
-   sprintf_Descr( buf, descr );
-   sprintf_Byte( buf2, validbits );
-   VG_(printf)("is_sane_Descr_and_Tree: bad tree {\n");
-   VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
-   VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
-   for (i = 0; i < 8; i++)
-      VG_(printf)("   [%ld] 0x%08x\n", i, tree[i]);
-   VG_(printf)("}\n");
-   return 0;
-}
-
-
-static Bool is_sane_CacheLine ( CacheLine* cl )
-{
-   Word tno, cloff;
-
-   if (!cl) goto bad;
-
-   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
-      UShort descr = cl->descrs[tno];
-      SVal*  tree  = &cl->svals[cloff];
-      if (!is_sane_Descr_and_Tree(descr, tree))
-         goto bad;
-   }
-   tl_assert(cloff == N_LINE_ARANGE);
-   return True;
-  bad:
-   pp_CacheLine(cl);
-   return False;
-}
-
-
-static UShort normalise_tree ( /*MOD*/SVal* tree ) {
-   Word   i;
-   UShort descr;
-   /* pre: incoming tree[0..7] does not have any invalid shvals, in
-      particular no zeroes. */
-   for (i = 0; i < 8; i++)
-      tl_assert(tree[i] != 0);
-   
-   descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
-           | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
-           | TREE_DESCR_8_1 | TREE_DESCR_8_0;
-   /* build 16-bit layer */
-   if (tree[1] == tree[0]) {
-      tree[1] = 0/*INVALID*/;
-      descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
-      descr |= TREE_DESCR_16_0;
-   }
-   if (tree[3] == tree[2]) {
-      tree[3] = 0/*INVALID*/;
-      descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
-      descr |= TREE_DESCR_16_1;
-   }
-   if (tree[5] == tree[4]) {
-      tree[5] = 0/*INVALID*/;
-      descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
-      descr |= TREE_DESCR_16_2;
-   }
-   if (tree[7] == tree[6]) {
-      tree[7] = 0/*INVALID*/;
-      descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
-      descr |= TREE_DESCR_16_3;
-   }
-   /* build 32-bit layer */
-   if (tree[2] == tree[0]
-       && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
-      tree[2] = 0; /* [3,1] must already be 0 */
-      descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
-      descr |= TREE_DESCR_32_0;
-   }
-   if (tree[6] == tree[4]
-       && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
-      tree[6] = 0; /* [7,5] must already be 0 */
-      descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
-      descr |= TREE_DESCR_32_1;
-   }
-   /* build 64-bit layer */
-   if (tree[4] == tree[0]
-       && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
-      tree[4] = 0; /* [7,6,5,3,2,1] must already be 0 */
-      descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
-      descr |= TREE_DESCR_64;
-   }
-   return descr;
-}
-
-/* This takes a cacheline where all the data is at the leaves
-   (w8[..]) and builds a correctly normalised tree. */
-static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
-{
-   Word tno, cloff;
-   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
-      SVal* tree = &cl->svals[cloff];
-      cl->descrs[tno] = normalise_tree( tree );
-   }
-   tl_assert(cloff == N_LINE_ARANGE);
-   if (SCE_CACHELINE)
-      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   stats__cline_normalises++;
-}
-
-
-static 
-SVal* sequentialise_tree ( /*MOD*/SVal* dst, /*OUT*/Bool* anyShared,
-                           UShort descr, SVal* tree ) {
-   SVal* dst0 = dst;
-   *anyShared = False;
-
-#  define PUT(_n,_v)                                \
-      do { Word i;                                  \
-           if (is_SHVAL_Sh(_v))                     \
-              *anyShared = True;                    \
-           for (i = 0; i < (_n); i++)               \
-                  *dst++ = (_v);                    \
-      } while (0)
-
-   /* byte 0 */
-   if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
-   if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
-   if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
-   if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
-   /* byte 1 */
-   if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
-   /* byte 2 */
-   if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
-   if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
-   /* byte 3 */
-   if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
-   /* byte 4 */
-   if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
-   if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
-   if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
-   /* byte 5 */
-   if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
-   /* byte 6 */
-   if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
-   if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
-   /* byte 7 */
-   if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
-
-#  undef PUT
-
-   tl_assert( (((Char*)dst) - ((Char*)dst0)) == 8 * sizeof(SVal) );
-   return dst;
-}
-
-/* Write the cacheline 'wix' to backing store.  Where it ends up
-   is determined by its tag field. */
-static
-Bool sequentialise_CacheLine ( /*OUT*/SVal* dst, Word nDst, CacheLine* src )
-{
-   Word  tno, cloff;
-   Bool  anyShared = False;
-   SVal* dst0      = dst;
-
-   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
-      UShort descr = src->descrs[tno];
-      SVal*  tree  = &src->svals[cloff];
-      Bool   bTmp  = False;
-      dst = sequentialise_tree ( dst, &bTmp, descr, tree );
-      anyShared |= bTmp;
-   }
-   tl_assert(cloff == N_LINE_ARANGE);
-
-   /* Assert we wrote N_LINE_ARANGE shadow values. */
-   tl_assert( ((HChar*)dst) - ((HChar*)dst0) 
-              == nDst * sizeof(SVal) );
-
-   return anyShared;
-}
-
-
-static __attribute__((noinline)) void cacheline_wback ( UWord wix )
-{
-   Word        i, j;
-   Bool        anyShared = False;
-   Addr        tag;
-   SecMap*     sm;
-   CacheLine*  cl;
-   CacheLineZ* lineZ;
-   CacheLineF* lineF;
-   Word        zix, fix;
-   SVal        shvals[N_LINE_ARANGE];
-   SVal        sv;
-
-   if (0)
-   VG_(printf)("scache wback line %d\n", (Int)wix);
-
-   tl_assert(wix >= 0 && wix < N_WAY_NENT);
-
-   tag =  cache_shmem.tags0[wix];
-   cl  = &cache_shmem.lyns0[wix];
-
-   /* The cache line may have been invalidated; if so, ignore it. */
-   if (!is_valid_scache_tag(tag))
-      return;
-
-   /* Where are we going to put it? */
-   sm         = NULL;
-   lineZ      = NULL;
-   lineF      = NULL;
-   zix = fix = -1;
-
-   find_Z_for_writing( &sm, &zix, tag );
-   tl_assert(sm);
-   tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
-   lineZ = &sm->linesZ[zix];
-
-   /* Generate the data to be stored */
-   if (SCE_CACHELINE)
-      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   anyShared = sequentialise_CacheLine( shvals, N_LINE_ARANGE, cl );
-
-   lineZ->dict[0] = lineZ->dict[1] 
-                  = lineZ->dict[2] = lineZ->dict[3] = 0;
-
-   for (i = 0; i < N_LINE_ARANGE; i++) {
-
-      sv = shvals[i];
-      for (j = 0; j < 4; j++) {
-         if (sv == lineZ->dict[j])
-            goto dict_ok;
-      }
-      for (j = 0; j < 4; j++) {
-         if (lineZ->dict[j] == 0)
-            break;
-      }
-      tl_assert(j >= 0 && j <= 4);
-      if (j == 4) break; /* we'll have to use the f rep */
-      tl_assert(is_SHVAL_valid(sv));
-      lineZ->dict[j] = sv;
-     dict_ok:
-      write_twobit_array( lineZ->ix2s, i, j );
-
-   }
-
-   tl_assert(i >= 0 && i <= N_LINE_ARANGE);
-
-   if (i < N_LINE_ARANGE) {
-      /* cannot use the compressed rep.  Use f rep instead. */
-      alloc_F_for_writing( sm, &fix );
-      tl_assert(sm->linesF);
-      tl_assert(sm->linesF_size > 0);
-      tl_assert(fix >= 0 && fix < sm->linesF_size);
-      lineF = &sm->linesF[fix];
-      tl_assert(!lineF->inUse);
-      lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = 0;
-      lineZ->dict[1] = (SVal)fix;
-      lineF->inUse = True;
-      for (i = 0; i < N_LINE_ARANGE; i++) {
-         sv = shvals[i];
-         tl_assert(is_SHVAL_valid(sv));
-         lineF->w32s[i] = sv;
-      }
-      stats__cache_F_wbacks++;
-   } else {
-      stats__cache_Z_wbacks++;
-   }
-
-   if (anyShared)
-      sm->mbHasShared = True;
-
-   /* mb_tidy_one_cacheline(); */
-}
-
-/* Fetch the cacheline 'wix' from the backing store.  The tag
-   associated with 'wix' is assumed to have already been filled in;
-   hence that is used to determine where in the backing store to read
-   from. */
-static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
-{
-   Word        i;
-   Addr        tag;
-   CacheLine*  cl;
-   CacheLineZ* lineZ;
-   CacheLineF* lineF;
-
-   if (0)
-   VG_(printf)("scache fetch line %d\n", (Int)wix);
-
-   tl_assert(wix >= 0 && wix < N_WAY_NENT);
-
-   tag =  cache_shmem.tags0[wix];
-   cl  = &cache_shmem.lyns0[wix];
-
-   /* reject nonsense requests */
-   tl_assert(is_valid_scache_tag(tag));
-
-   lineZ = NULL;
-   lineF = NULL;
-   find_ZF_for_reading( &lineZ, &lineF, tag );
-   tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
-
-   /* expand the data into the bottom layer of the tree, then get
-      cacheline_normalise to build the descriptor array. */
-   if (lineF) {
-      tl_assert(lineF->inUse);
-      for (i = 0; i < N_LINE_ARANGE; i++) {
-         cl->svals[i] = lineF->w32s[i];
-      }
-      stats__cache_F_fetches++;
-   } else {
-      for (i = 0; i < N_LINE_ARANGE; i++) {
-         SVal sv;
-         UWord ix = read_twobit_array( lineZ->ix2s, i );
-         tl_assert(ix >= 0 && ix <= 3);
-         sv = lineZ->dict[ix];
-         tl_assert(sv != 0);
-         cl->svals[i] = sv;
-      }
-      stats__cache_Z_fetches++;
-   }
-   normalise_CacheLine( cl );
-}
-
-static void shmem__invalidate_scache ( void ) {
-   Word wix;
-   if (0) VG_(printf)("scache inval\n");
-   tl_assert(!is_valid_scache_tag(1));
-   for (wix = 0; wix < N_WAY_NENT; wix++) {
-      cache_shmem.tags0[wix] = 1/*INVALID*/;
-   }
-   stats__cache_invals++;
-}
-
-static void shmem__flush_and_invalidate_scache ( void ) {
-   Word wix;
-   Addr tag;
-   if (0) VG_(printf)("scache flush and invalidate\n");
-   tl_assert(!is_valid_scache_tag(1));
-   for (wix = 0; wix < N_WAY_NENT; wix++) {
-      tag = cache_shmem.tags0[wix];
-      if (tag == 1/*INVALID*/) {
-         /* already invalid; nothing to do */
-      } else {
-         tl_assert(is_valid_scache_tag(tag));
-         cacheline_wback( wix );
-      }
-      cache_shmem.tags0[wix] = 1/*INVALID*/;
-   }
-   stats__cache_flushes++;
-   stats__cache_invals++;
-}
-
-
-/* ------------ Basic shadow memory read/write ops ------------ */
-
-static inline Bool aligned16 ( Addr a ) {
-   return 0 == (a & 1);
-}
-static inline Bool aligned32 ( Addr a ) {
-   return 0 == (a & 3);
-}
-static inline Bool aligned64 ( Addr a ) {
-   return 0 == (a & 7);
-}
-static inline UWord get_cacheline_offset ( Addr a ) {
-   return (UWord)(a & (N_LINE_ARANGE - 1));
-}
-static inline UWord get_treeno ( Addr a ) {
-   return get_cacheline_offset(a) >> 3;
-}
-static inline UWord get_tree_offset ( Addr a ) {
-   return a & 7;
-}
-
-static __attribute__((noinline))
-       CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
-static inline CacheLine* get_cacheline ( Addr a )
-{
-   /* tag is 'a' with the in-line offset masked out, 
-      eg a[31]..a[4] 0000 */
-   Addr       tag = a & ~(N_LINE_ARANGE - 1);
-   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
-   stats__cache_totrefs++;
-   if (LIKELY(tag == cache_shmem.tags0[wix])) {
-      return &cache_shmem.lyns0[wix];
-   } else {
-      return get_cacheline_MISS( a );
-   }
-}
-
-static __attribute__((noinline))
-       CacheLine* get_cacheline_MISS ( Addr a )
-{
-   /* tag is 'a' with the in-line offset masked out, 
-      eg a[31]..a[4] 0000 */
-
-   CacheLine* cl;
-   Addr*      tag_old_p;
-   Addr       tag = a & ~(N_LINE_ARANGE - 1);
-   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
-
-   tl_assert(tag != cache_shmem.tags0[wix]);
-
-   /* Dump the old line into the backing store. */
-   stats__cache_totmisses++;
-
-   cl        = &cache_shmem.lyns0[wix];
-   tag_old_p = &cache_shmem.tags0[wix];
-
-   if (is_valid_scache_tag( *tag_old_p )) {
-      /* EXPENSIVE and REDUNDANT: callee does it */
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-      cacheline_wback( wix );
-   }
-   /* and reload the new one */
-   *tag_old_p = tag;
-   cacheline_fetch( wix );
-   if (SCE_CACHELINE)
-      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   return cl;
-}
-
-static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
-   stats__cline_64to32pulldown++;
-   switch (toff) {
-      case 0: case 4:
-         tl_assert(descr & TREE_DESCR_64);
-         tree[4] = tree[0];
-         descr &= ~TREE_DESCR_64;
-         descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
-         break;
-      default:
-         tl_assert(0);
-   }
-   return descr;
-}
-
-static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
-   stats__cline_32to16pulldown++;
-   switch (toff) {
-      case 0: case 2:
-         if (!(descr & TREE_DESCR_32_0)) {
-            descr = pulldown_to_32(tree, 0, descr);
-         }
-         tl_assert(descr & TREE_DESCR_32_0);
-         tree[2] = tree[0];
-         descr &= ~TREE_DESCR_32_0;
-         descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
-         break;
-      case 4: case 6:
-         if (!(descr & TREE_DESCR_32_1)) {
-            descr = pulldown_to_32(tree, 4, descr);
-         }
-         tl_assert(descr & TREE_DESCR_32_1);
-         tree[6] = tree[4];
-         descr &= ~TREE_DESCR_32_1;
-         descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
-         break;
-      default:
-         tl_assert(0);
-   }
-   return descr;
-}
-
-static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
-   stats__cline_16to8pulldown++;
-   switch (toff) {
-      case 0: case 1:
-         if (!(descr & TREE_DESCR_16_0)) {
-            descr = pulldown_to_16(tree, 0, descr);
-         }
-         tl_assert(descr & TREE_DESCR_16_0);
-         tree[1] = tree[0];
-         descr &= ~TREE_DESCR_16_0;
-         descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
-         break;
-      case 2: case 3:
-         if (!(descr & TREE_DESCR_16_1)) {
-            descr = pulldown_to_16(tree, 2, descr);
-         }
-         tl_assert(descr & TREE_DESCR_16_1);
-         tree[3] = tree[2];
-         descr &= ~TREE_DESCR_16_1;
-         descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
-         break;
-      case 4: case 5:
-         if (!(descr & TREE_DESCR_16_2)) {
-            descr = pulldown_to_16(tree, 4, descr);
-         }
-         tl_assert(descr & TREE_DESCR_16_2);
-         tree[5] = tree[4];
-         descr &= ~TREE_DESCR_16_2;
-         descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
-         break;
-      case 6: case 7:
-         if (!(descr & TREE_DESCR_16_3)) {
-            descr = pulldown_to_16(tree, 6, descr);
-         }
-         tl_assert(descr & TREE_DESCR_16_3);
-         tree[7] = tree[6];
-         descr &= ~TREE_DESCR_16_3;
-         descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
-         break;
-      default:
-         tl_assert(0);
-   }
-   return descr;
-}
-
-
-static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
-   UShort mask;
-   switch (toff) {
-      case 0:
-         mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_16_0;
-         break;
-      case 2:
-         mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_16_1;
-         break;
-      case 4:
-         mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_16_2;
-         break;
-      case 6:
-         mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_16_3;
-         break;
-      default:
-         tl_assert(0);
-   }
-   return descr;
-}
-
-static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
-   UShort mask;
-   switch (toff) {
-      case 0:
-         if (!(descr & TREE_DESCR_16_0))
-            descr = pullup_descr_to_16(descr, 0);
-         if (!(descr & TREE_DESCR_16_1))
-            descr = pullup_descr_to_16(descr, 2);
-         mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_32_0;
-         break;
-      case 4:
-         if (!(descr & TREE_DESCR_16_2))
-            descr = pullup_descr_to_16(descr, 4);
-         if (!(descr & TREE_DESCR_16_3))
-            descr = pullup_descr_to_16(descr, 6);
-         mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
-         tl_assert( (descr & mask) == mask );
-         descr &= ~mask;
-         descr |= TREE_DESCR_32_1;
-         break;
-      default:
-         tl_assert(0);
-   }
-   return descr;
-}
-
-static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
-   switch (toff) {
-      case 0: case 4:
-         return 0 != (descr & TREE_DESCR_64);
-      default:
-         tl_assert(0);
-   }
-}
-
-static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
-   switch (toff) {
-      case 0:
-         return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
-      case 2:
-         return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
-      case 4:
-         return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
-      case 6:
-         return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
-      default:
-         tl_assert(0);
-   }
-}
-
-static void shadow_mem_read8 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_read8s++;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 .. 7 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
-      SVal* tree = &cl->svals[tno << 3];
-      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_read( thr_acc, a, svOld, 1 );
-   cl->svals[cloff] = svNew;
-}
-static void shadow_mem_read16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_read16s++;
-   if (UNLIKELY(!aligned16(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
-      if (valid_value_is_below_me_16(descr, toff)) {
-         goto slowcase;
-      } else {
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
-      }
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_read( thr_acc, a, svOld, 2 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_16to8splits++;
-   shadow_mem_read8( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_read8( thr_acc, a + 1, 0/*unused*/ );
-}
-
-__attribute__((noinline))
-static void shadow_mem_read32_SLOW ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   if (UNLIKELY(!aligned32(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 or 4 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
-      if (valid_value_is_above_me_32(descr, toff)) {
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
-      } else {
-         goto slowcase;
-      }
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_read( thr_acc, a, svOld, 4 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_32to16splits++;
-   shadow_mem_read16( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_read16( thr_acc, a + 2, 0/*unused*/ );
-}
-inline
-static void shadow_mem_read32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_read32s++;
-   if (UNLIKELY(!aligned32(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 or 4 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
-   { SVal* p = &cl->svals[cloff];
-     *p = msm__handle_read( thr_acc, a, *p, 4 );
-   }
-   return;
-  slowcase: /* misaligned, or not at this level in the tree */
-   shadow_mem_read32_SLOW( thr_acc, a, uuOpaque );
-}
-
-inline
-static void shadow_mem_read64 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_read64s++;
-   if (UNLIKELY(!aligned64(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0, unused */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
-      goto slowcase;
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_read( thr_acc, a, svOld, 8 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_64to32splits++;
-   shadow_mem_read32( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_read32( thr_acc, a + 4, 0/*unused*/ );
-}
-
-static void shadow_mem_write8 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_write8s++;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 .. 7 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
-      SVal* tree = &cl->svals[tno << 3];
-      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_write( thr_acc, a, svOld, 1 );
-   cl->svals[cloff] = svNew;
-}
-static void shadow_mem_write16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_write16s++;
-   if (UNLIKELY(!aligned16(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
-      if (valid_value_is_below_me_16(descr, toff)) {
-         goto slowcase;
-      } else {
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
-      }
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_write( thr_acc, a, svOld, 2 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_16to8splits++;
-   shadow_mem_write8( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_write8( thr_acc, a + 1, 0/*unused*/ );
-}
-
-__attribute__((noinline))
-static void shadow_mem_write32_SLOW ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   if (UNLIKELY(!aligned32(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 or 4 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
-      if (valid_value_is_above_me_32(descr, toff)) {
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
-      } else {
-         goto slowcase;
-      }
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_write( thr_acc, a, svOld, 4 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_32to16splits++;
-   shadow_mem_write16( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_write16( thr_acc, a + 2, 0/*unused*/ );
-}
-inline
-static void shadow_mem_write32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_write32s++;
-   if (UNLIKELY(!aligned32(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 or 4 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
-   { SVal* p = &cl->svals[cloff];
-     *p = msm__handle_write( thr_acc, a, *p, 4 );
-   }
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   shadow_mem_write32_SLOW( thr_acc, a, uuOpaque );
-}
-
-inline
-static void shadow_mem_write64 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   SVal       svOld, svNew;
-   UShort     descr;
-   stats__cline_write64s++;
-   if (UNLIKELY(!aligned64(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0, unused */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
-      goto slowcase;
-   }
-   svOld = cl->svals[cloff];
-   svNew = msm__handle_write( thr_acc, a, svOld, 8 );
-   cl->svals[cloff] = svNew;
-   return;
-  slowcase: /* misaligned, or must go further down the tree */
-   stats__cline_64to32splits++;
-   shadow_mem_write32( thr_acc, a + 0, 0/*unused*/ );
-   shadow_mem_write32( thr_acc, a + 4, 0/*unused*/ );
-}
-
-static void shadow_mem_set8 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_set8s++;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 .. 7 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
-      SVal* tree = &cl->svals[tno << 3];
-      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-   }
-   cl->svals[cloff] = svNew;
-}
-static void shadow_mem_set16 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_set16s++;
-   if (UNLIKELY(!aligned16(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
-      if (valid_value_is_below_me_16(descr, toff)) {
-         /* Writing at this level.  Need to fix up 'descr'. */
-         cl->descrs[tno] = pullup_descr_to_16(descr, toff);
-         /* At this point, the tree does not match cl->descr[tno] any
-            more.  The assignments below will fix it up. */
-      } else {
-         /* We can't indiscriminately write on the w16 node as in the
-            w64 case, as that might make the node inconsistent with
-            its parent.  So first, pull down to this level. */
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
-      if (SCE_CACHELINE)
-         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-      }
-   }
-   cl->svals[cloff + 0] = svNew;
-   cl->svals[cloff + 1] = 0;
-   return;
-  slowcase: /* misaligned */
-   stats__cline_16to8splits++;
-   shadow_mem_set8( uu_thr_acc, a + 0, svNew );
-   shadow_mem_set8( uu_thr_acc, a + 1, svNew );
-}
-static void shadow_mem_set32 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_set32s++;
-   if (UNLIKELY(!aligned32(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 or 4 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
-      if (valid_value_is_above_me_32(descr, toff)) {
-         /* We can't indiscriminately write on the w32 node as in the
-            w64 case, as that might make the node inconsistent with
-            its parent.  So first, pull down to this level. */
-         SVal* tree = &cl->svals[tno << 3];
-         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
-         if (SCE_CACHELINE)
-            tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
-      } else {
-         /* Writing at this level.  Need to fix up 'descr'. */
-         cl->descrs[tno] = pullup_descr_to_32(descr, toff);
-         /* At this point, the tree does not match cl->descr[tno] any
-            more.  The assignments below will fix it up. */
-      }
-   }
-   cl->svals[cloff + 0] = svNew;
-   cl->svals[cloff + 1] = 0;
-   cl->svals[cloff + 2] = 0;
-   cl->svals[cloff + 3] = 0;
-   return;
-  slowcase: /* misaligned */
-   stats__cline_32to16splits++;
-   shadow_mem_set16( uu_thr_acc, a + 0, svNew );
-   shadow_mem_set16( uu_thr_acc, a + 2, svNew );
-}
-inline
-static void shadow_mem_set64 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   stats__cline_set64s++;
-   if (UNLIKELY(!aligned64(a))) goto slowcase;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 */
-   cl->descrs[tno] = TREE_DESCR_64;
-   cl->svals[cloff + 0] = svNew;
-   cl->svals[cloff + 1] = 0;
-   cl->svals[cloff + 2] = 0;
-   cl->svals[cloff + 3] = 0;
-   cl->svals[cloff + 4] = 0;
-   cl->svals[cloff + 5] = 0;
-   cl->svals[cloff + 6] = 0;
-   cl->svals[cloff + 7] = 0;
-   return;
-  slowcase: /* misaligned */
-   stats__cline_64to32splits++;
-   shadow_mem_set32( uu_thr_acc, a + 0, svNew );
-   shadow_mem_set32( uu_thr_acc, a + 4, svNew );
-}
-
-static SVal shadow_mem_get8 ( Addr a ) {
-   CacheLine* cl; 
-   UWord      cloff, tno, toff;
-   UShort     descr;
-   stats__cline_get8s++;
-   cl    = get_cacheline(a);
-   cloff = get_cacheline_offset(a);
-   tno   = get_treeno(a);
-   toff  = get_tree_offset(a); /* == 0 .. 7 */
-   descr = cl->descrs[tno];
-   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
-      SVal* tree = &cl->svals[tno << 3];
-      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
-   }
-   return cl->svals[cloff];
-}
-
-static void shadow_mem_copy8 ( Addr src, Addr dst, Bool normalise ) {
-   SVal       sv;
-   stats__cline_copy8s++;
-   sv = shadow_mem_get8( src );
-
-   if (UNLIKELY(clo_trace_level > 0)) {
-      if (dst == clo_trace_addr) {
-         Thread* thr    = get_current_Thread();
-         SVal    sv_old = shadow_mem_get8( dst );
-         msm__show_state_change( thr, dst, 1, 'w', sv_old, sv );
-      }
-   }
-
-   shadow_mem_set8( NULL/*unused*/, dst, sv );
-}
-
-
-/* ------------ Shadow memory range setting ops ------------ */
-
-static void shadow_mem_modify_range(
-               Thread* thr, 
-               Addr    a, 
-               SizeT   len,
-               void    (*fn8) (Thread*,Addr,SVal),
-               void    (*fn16)(Thread*,Addr,SVal),
-               void    (*fn32)(Thread*,Addr,SVal),
-               void    (*fn64)(Thread*,Addr,SVal),
-               SVal    opaque
-            )
-{
-   /* fast track a couple of common cases */
-   if (len == 4 && aligned32(a)) {
-      fn32( thr, a, opaque );
-      return;
-   }
-   if (len == 8 && aligned64(a)) {
-      fn64( thr, a, opaque );
-      return;
-   }
-
-   /* be completely general (but as efficient as possible) */
-   if (len == 0) return;
-
-   if (!aligned16(a) && len >= 1) {
-      fn8( thr, a, opaque );
-      a += 1;
-      len -= 1;
-      tl_assert(aligned16(a));
-   }
-   if (len == 0) return;
-
-   if (!aligned32(a) && len >= 2) {
-      fn16( thr, a, opaque );
-      a += 2;
-      len -= 2;
-      tl_assert(aligned32(a));
-   }
-   if (len == 0) return;
-
-   if (!aligned64(a) && len >= 4) {
-      fn32( thr, a, opaque );
-      a += 4;
-      len -= 4;
-      tl_assert(aligned64(a));
-   }
-   if (len == 0) return;
-
-   if (len >= 8) {
-      tl_assert(aligned64(a));
-      while (len >= 8) {
-         fn64( thr, a, opaque );
-         a += 8;
-         len -= 8;
-      }
-      tl_assert(aligned64(a));
-   }
-   if (len == 0) return;
-
-   if (len >= 4)
-      tl_assert(aligned32(a));
-   if (len >= 4) {
-      fn32( thr, a, opaque );
-      a += 4;
-      len -= 4;
-   }
-   if (len == 0) return;
-
-   if (len >= 2)
-      tl_assert(aligned16(a));
-   if (len >= 2) {
-      fn16( thr, a, opaque );
-      a += 2;
-      len -= 2;
-   }
-   if (len == 0) return;
-
-   if (len >= 1) {
-      fn8( thr, a, opaque );
-      a += 1;
-      len -= 1;
-   }
-   tl_assert(len == 0);
-}
-
-/* Block-copy states (needed for implementing realloc()). */
-static void shadow_mem_copy_range ( Addr src, Addr dst, SizeT len )
-{
-   SizeT i;
-   if (len == 0)
-      return;
-   /* To be simple, just copy byte by byte.  But so as not to wreck
-      performance for later accesses to dst[0 .. len-1], normalise
-      destination lines as we finish with them, and also normalise the
-      line containing the first and last address. */
-   for (i = 0; i < len; i++) {
-      Bool normalise
-         = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
-           || i == 0       /* first in range */
-           || i == len-1;  /* last in range */
-      shadow_mem_copy8( src+i, dst+i, normalise );
-   }
-}
-
-static void shadow_mem_read_range ( Thread* thr, Addr a, SizeT len ) {
-   shadow_mem_modify_range( thr, a, len, 
-                            shadow_mem_read8,
-                            shadow_mem_read16,
-                            shadow_mem_read32,
-                            shadow_mem_read64,
-                            0/*opaque,ignored*/ );
-}
-
-static void shadow_mem_write_range ( Thread* thr, Addr a, SizeT len ) {
-   shadow_mem_modify_range( thr, a, len, 
-                            shadow_mem_write8,
-                            shadow_mem_write16,
-                            shadow_mem_write32,
-                            shadow_mem_write64,
-                            0/*opaque,ignored*/ );
-}
-
-static void shadow_mem_make_New ( Thread* thr, Addr a, SizeT len )
-{
-   if (UNLIKELY(clo_trace_level > 0)) {
-      if (len > 0 && a <= clo_trace_addr && clo_trace_addr < a+len) {
-         SVal sv_old = shadow_mem_get8( clo_trace_addr );
-         msm__show_state_change( thr, a, (Int)len, 'p', sv_old, SHVAL_New );
-      }
-   }
-   shadow_mem_modify_range( thr, a, len, 
-                            shadow_mem_set8,
-                            shadow_mem_set16,
-                            shadow_mem_set32,
-                            shadow_mem_set64,
-                            SHVAL_New/*opaque*/ );
-}
-
-
-/* Putting memory into the NoAccess state.  This is hugely complicated
-   by the problem of memory that contains locks.
-
-   1. Examine the .mbHasLocks fields in all SecMaps in the range to be
-      deleted.  This quickly indicates if there are or might be any
-      locks in the range to be deleted.  Note that .mbHasLocks fields on
-      SecMaps are not subject to scaching, so it safe to look at them
-      without flushing the scache.
-
-   2. Set the range to NoAccess.  Clear the .mbHasShared and
-      .mbHasLocks hint bits for any completely vacated SecMaps.
-      Clearing the hint bits isn't necessary for correctness, but it
-      is important to avoid ending up with hint bits being permanently
-      set, which would render them pointless.
-
-   3. If (1) indicated "definitely no locks", we're done.  This is
-      the fast and hopefully common case.
-
-   Otherwise, the range contains some locks (or may do), so we have to
-   go to considerable effort to tidy up.
-
-   4. Make up a set containing the locks which are deleted:
-
-      ToDelete = NULL
-
-      for each lk in map_locks {
-         if lk's guest addr falls in the range to memory be deleted
-            add lk to ToDelete
-
-         if lk is held, issue an error message - freeing memory
-            containing a held lock
-      }
-
-   5. If ToDelete is empty, there were in fact no locks in the range,
-      despite what the .mbHasLocks hint bits indicated.  We're done.
-
-   6. Flush the scache.  This is necessary both to bring the SecMap
-      .mbHasShared fields up to date, and to bring the actual shadow
-      values up to date.  We will need to examine both of these.
-
-      Invalidate the scache.  This is necessary because we will be
-      modifying values in the backing store (SecMaps) and need
-      subsequent shmem accesses to get the new values.
-
-   7. Modify all shadow words, by removing ToDelete from the lockset
-      of all ShM and ShR states.  Note this involves a complete scan
-      over map_shmem, which is very expensive according to OProfile.
-      Hence it depends critically on the size of each entry in
-      map_shmem.  See comments on definition of N_SECMAP_BITS above.
-
-      Why is it safe to do (7) after (2) ?  Because we're not
-      interested in messing with ShR/M states which are going to be
-      set to NoAccess anyway.
-
-      Optimisation 1 (implemented): skip this step for SecMaps which
-      do not have .mbHasShared set
-
-      Optimisation 2 (not implemented): for each SecMap, have a
-      summary lock set which is the union of all locks mentioned in
-      locksets on this page (or any superset of it).  Then skip step
-      (2) if the summary lockset does not intersect with ToDelete.
-
-      That's potentially cheap, since the usual lockset refinement
-      only shrinks locksets; hence there is no point in updating the
-      summary lockset for ShM/R -> ShM/R transitions.  Therefore only
-      need to do this for Excl->ShM/R transitions.
-
-   8. Tell laog that these locks have disappeared.
-*/
-static void shadow_mem_make_NoAccess ( Thread* thr, Addr aIN, SizeT len )
-{
-   Lock*     lk;
-   Addr      gla, sma, firstSM, lastSM, firstA, lastA;
-   WordSetID locksToDelete;
-   Bool      mbHasLocks;
-
-   if (0 && len > 500)
-      VG_(printf)("make NoAccess ( %#lx, %ld )\n", aIN, len );
-
-   if (len == 0) 
-      return;
-
-   /* --- Step 1 --- */
-
-   firstA  = aIN;
-   lastA   = aIN + len - 1;
-
-   firstSM = shmem__round_to_SecMap_base( firstA );
-   lastSM  = shmem__round_to_SecMap_base( lastA );
-   tl_assert(firstSM <= lastSM);
-
-   mbHasLocks = False;
-   for (sma = firstSM; sma <= lastSM; sma += N_SECMAP_ARANGE) {
-      if (shmem__get_mbHasLocks(sma)) {
-         mbHasLocks = True;
-         break;
-      }
-   }
-
-   /* --- Step 2 --- */
-
-   if (UNLIKELY(clo_trace_level > 0)) {
-      if (len > 0 && firstA <= clo_trace_addr && clo_trace_addr <= lastA) {
-         SVal sv_old = shadow_mem_get8( clo_trace_addr );
-         msm__show_state_change( thr, firstA, (Int)len, 'p',
-                                      sv_old, SHVAL_NoAccess );
-      }
-   }
-   shadow_mem_modify_range( thr, firstA, len, 
-                            shadow_mem_set8,
-                            shadow_mem_set16,
-                            shadow_mem_set32,
-                            shadow_mem_set64,
-                            SHVAL_NoAccess/*opaque*/ );
-
-   for (sma = firstSM; sma <= lastSM; sma += N_SECMAP_ARANGE) {
-      /* Is this sm entirely within the deleted range? */
-      if (firstA <= sma && sma + N_SECMAP_ARANGE - 1 <= lastA) {
-         /* Yes.  Clear the hint bits. */
-         shmem__set_mbHasLocks( sma, False );
-         shmem__set_mbHasShared( sma, False );
-      }
-   }
-
-   /* --- Step 3 --- */
-
-   if (!mbHasLocks)
-      return;
-
-   /* --- Step 4 --- */
-
-   if (0) 
-   VG_(printf)("shadow_mem_make_NoAccess(%p, %lu, %p): maybe slow case\n",
-               (void*)firstA, (UWord)len, (void*)lastA);
-   locksToDelete = HG_(emptyWS)( univ_lsets );
-   
-   /* Iterate over all locks in the range firstA .. lastA inclusive. */
-   VG_(initIterAtFM)( map_locks, firstA );
-   while (VG_(nextIterFM)( map_locks, (Word*)&gla, (Word*)&lk )
-          && gla <= lastA) {
-      tl_assert(is_sane_LockN(lk));
-      tl_assert(gla >= firstA);
-      tl_assert(gla <= lastA);
-
-      locksToDelete = HG_(addToWS)( univ_lsets, locksToDelete, (Word)lk );
-      /* If the lock is held, we must remove it from the currlock sets
-         of all threads that hold it.  Also take the opportunity to
-         report an error.  To report an error we need to know at least
-         one of the threads that holds it; really we should mention
-         them all, but that's too much hassle.  So choose one
-         arbitrarily. */
-      if (lk->heldBy) {
-         tl_assert(!VG_(isEmptyBag)(lk->heldBy));
-         record_error_FreeMemLock( (Thread*)VG_(anyElementOfBag)(lk->heldBy),
-                                   lk );
-         /* remove lock from locksets of all owning threads */
-         remove_Lock_from_locksets_of_all_owning_Threads( lk );
-         /* Leave lk->heldBy in place; del_Lock below will free it up. */
-      }
-   }
-   VG_(doneIterFM)( map_locks );
-
-   /* --- Step 5 --- */
-
-   if (HG_(isEmptyWS)( univ_lsets, locksToDelete ))
-      return;
-
-   /* --- Step 6 --- */
-
-   shmem__flush_and_invalidate_scache();
-
-   /* --- Step 7 --- */
-
-   if (0) 
-   VG_(printf)("shadow_mem_make_NoAccess(%p, %lu, %p): definitely slow case\n",
-               (void*)firstA, (UWord)len, (void*)lastA);
-
-   /* Modify all shadow words, by removing locksToDelete from the lockset
-      of all ShM and ShR states.
-      Optimisation 1: skip SecMaps which do not have .mbHasShared set
-   */
-   { Int        stats_SMs = 0, stats_SMs_scanned = 0;
-     Addr       ga;
-     SecMap*    sm;
-     SecMapIter itr;
-     SVal*      w32p = NULL;
-
-     VG_(initIterFM)( map_shmem );
-     while (VG_(nextIterFM)( map_shmem,
-                             (Word*)&ga, (Word*)&sm )) {
-        tl_assert(sm);
-        stats_SMs++;
-        /* Skip this SecMap if the summary bit indicates it is safe to
-           do so. */
-        if (!sm->mbHasShared)
-           continue;
-        stats_SMs_scanned++;
-        initSecMapIter( &itr );
-        while (stepSecMapIter( &w32p, &itr, sm )) {
-           Bool isM;
-           SVal wold, wnew; 
-           UInt lset_old, tset_old, lset_new;
-           wold = *w32p;
-           if (LIKELY( !is_SHVAL_Sh(wold) ))
-              continue;
-           isM      = is_SHVAL_ShM(wold);
-           lset_old = un_SHVAL_Sh_lset(wold);
-           tset_old = un_SHVAL_Sh_tset(wold);
-           lset_new = HG_(minusWS)( univ_lsets, lset_old, locksToDelete );
-           wnew     = isM ? mk_SHVAL_ShM(tset_old, lset_new)
-                          : mk_SHVAL_ShR(tset_old, lset_new);
-           if (wnew != wold)
-              *w32p = wnew;
-        }
-     }
-     VG_(doneIterFM)( map_shmem );
-     if (SHOW_EXPENSIVE_STUFF)
-        VG_(printf)("shadow_mem_make_NoAccess: %d SMs, %d scanned\n", 
-                    stats_SMs, stats_SMs_scanned);
-   }
-
-   /* Now we have to free up the Locks in locksToDelete and remove
-      any mention of them from admin_locks and map_locks.  This is
-      inefficient. */
-   { Lock* lkprev = NULL;
-     lk = admin_locks;
-     while (True) {
-        if (lk == NULL) break;
-        if (lkprev) tl_assert(lkprev->admin == lk);
-
-        if (!HG_(elemWS)(univ_lsets, locksToDelete, (Word)lk)) {
-           lkprev = lk;
-           lk = lk->admin;
-           continue;
-        }
-        /* Need to delete 'lk' */
-        if (lkprev == NULL) {
-           admin_locks = lk->admin;
-        } else {
-           lkprev->admin = lk->admin;
-        }
-        /* and get it out of map_locks */
-        map_locks_delete(lk->guestaddr);
-        /* release storage (incl. associated .heldBy Bag) */
-        { Lock* tmp = lk->admin;
-          del_LockN(lk);
-          lk = tmp;
-        }
-     }
-   }
+   Thr*     hbthr = thr->hbthr;
+   tl_assert(hbthr);
+   LIBHB_READ_N(hbthr, a, len);
+}
  
-   /* --- Step 8 --- */
+static void shadow_mem_write_range ( Thread* thr, Addr a, SizeT len ) {
+   Thr*     hbthr = thr->hbthr;
+   tl_assert(hbthr);
+   LIBHB_WRITE_N(hbthr, a, len);
+}
  
-   /* update lock order acquisition graph */
-   laog__handle_lock_deletions( locksToDelete );
+static void shadow_mem_make_New ( Thread* thr, Addr a, SizeT len )
+{
+   libhb_range_new( thr->hbthr, a, len );
+}
  
-   if (0) all__sanity_check("Make NoAccess");
+static void shadow_mem_make_NoAccess ( Thread* thr, Addr aIN, SizeT len )
+{
+   if (0 && len > 500)
+      VG_(printf)("make NoAccess ( %#lx, %ld )\n", aIN, len );
+   libhb_range_noaccess( thr->hbthr, aIN, len );
  }
  
  
@@ -5075,24 +1087,24 @@ static void shadow_mem_make_NoAccess ( Thread* thr, Addr aIN, SizeT len )
     existing segment, bind together the SegmentID and Segment, and
     return both of them.  Also update 'thr' so it references the new
     Segment. */
-static 
-void evhH__start_new_segment_for_thread ( /*OUT*/SegmentID* new_segidP,
-                                          /*OUT*/Segment** new_segP,
-                                          Thread* thr )
-{
-   Segment* cur_seg;
-   tl_assert(new_segP);
-   tl_assert(new_segidP);
-   tl_assert(is_sane_Thread(thr));
-   cur_seg = map_segments_lookup( thr->csegid );
-   tl_assert(cur_seg);
-   tl_assert(cur_seg->thr == thr); /* all sane segs should point back
-                                      at their owner thread. */
-   *new_segP = mk_Segment( thr, cur_seg, NULL/*other*/ );
-   *new_segidP = alloc_SegmentID();
-   map_segments_add( *new_segidP, *new_segP );
-   thr->csegid = *new_segidP;
-}
+//zz static 
+//zz void evhH__start_new_segment_for_thread ( /*OUT*/SegmentID* new_segidP,
+//zz                                           /*OUT*/Segment** new_segP,
+//zz                                           Thread* thr )
+//zz {
+//zz    Segment* cur_seg;
+//zz    tl_assert(new_segP);
+//zz    tl_assert(new_segidP);
+//zz    tl_assert(HG_(is_sane_Thread)(thr));
+//zz    cur_seg = map_segments_lookup( thr->csegid );
+//zz    tl_assert(cur_seg);
+//zz    tl_assert(cur_seg->thr == thr); /* all sane segs should point back
+//zz                                       at their owner thread. */
+//zz    *new_segP = mk_Segment( thr, cur_seg, NULL/*other*/ );
+//zz    *new_segidP = alloc_SegmentID();
+//zz    map_segments_add( *new_segidP, *new_segP );
+//zz    thr->csegid = *new_segidP;
+//zz }
  
  
  /* The lock at 'lock_ga' has acquired a writer.  Make all necessary
@@ -5106,7 +1118,7 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
     /* Basically what we need to do is call lockN_acquire_writer.
        However, that will barf if any 'invalid' lock states would
        result.  Therefore check before calling.  Side effect is that
-      'is_sane_LockN(lk)' is both a pre- and post-condition of this
+      'HG_(is_sane_LockN)(lk)' is both a pre- and post-condition of this
        routine. 
  
        Because this routine is only called after successful lock
@@ -5114,22 +1126,23 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
        invalid states.  Requests to do so are bugs in libpthread, since
        that should have rejected any such requests. */
  
-   /* be paranoid w.r.t hint bits, even if lock_ga is complete
-      nonsense */
-   shmem__set_mbHasLocks( lock_ga, True );
-
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_Thread)(thr));
     /* Try to find the lock.  If we can't, then create a new one with
        kind 'lkk'. */
     lk = map_locks_lookup_or_create( 
             lkk, lock_ga, map_threads_reverse_lookup_SLOW(thr) );
-   tl_assert( is_sane_LockN(lk) );
-   shmem__set_mbHasLocks( lock_ga, True );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+
+   /* check libhb level entities exist */
+   tl_assert(thr->hbthr);
+   tl_assert(lk->hbso);
  
     if (lk->heldBy == NULL) {
        /* the lock isn't held.  Simple. */
        tl_assert(!lk->heldW);
        lockN_acquire_writer( lk, thr );
+      /* acquire a dependency from the lock's VCs */
+      libhb_so_recv( thr->hbthr, lk->hbso, True/*strong_recv*/ );
        goto noerror;
     }
  
@@ -5137,8 +1150,9 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
        libpthread must be buggy. */
     tl_assert(lk->heldBy);
     if (!lk->heldW) {
-      record_error_Misc( thr, "Bug in libpthread: write lock "
-                              "granted on rwlock which is currently rd-held");
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: write lock "
+              "granted on rwlock which is currently rd-held");
        goto error;
     }
  
@@ -5147,9 +1161,10 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
     tl_assert(VG_(sizeUniqueBag)(lk->heldBy) == 1); /* from precondition */
  
     if (thr != (Thread*)VG_(anyElementOfBag)(lk->heldBy)) {
-      record_error_Misc( thr, "Bug in libpthread: write lock "
-                              "granted on mutex/rwlock which is currently "
-                              "wr-held by a different thread");
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: write lock "
+              "granted on mutex/rwlock which is currently "
+              "wr-held by a different thread");
        goto error;
     }
  
@@ -5159,14 +1174,18 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
        once the lock has been acquired, this must also be a libpthread
        bug. */
     if (lk->kind != LK_mbRec) {
-      record_error_Misc( thr, "Bug in libpthread: recursive write lock "
-                              "granted on mutex/wrlock which does not "
-                              "support recursion");
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: recursive write lock "
+              "granted on mutex/wrlock which does not "
+              "support recursion");
        goto error;
     }
  
     /* So we are recursively re-locking a lock we already w-hold. */
     lockN_acquire_writer( lk, thr );
+   /* acquire a dependency from the lock's VC.  Probably pointless,
+      but also harmless. */
+   libhb_so_recv( thr->hbthr, lk->hbso, True/*strong_recv*/ );
     goto noerror;
  
    noerror:
@@ -5179,7 +1198,7 @@ void evhH__post_thread_w_acquires_lock ( Thread* thr,
     /* fall through */
  
    error:
-   tl_assert(is_sane_LockN(lk));
+   tl_assert(HG_(is_sane_LockN)(lk));
  }
  
  
@@ -5194,7 +1213,7 @@ void evhH__post_thread_r_acquires_lock ( Thread* thr,
     /* Basically what we need to do is call lockN_acquire_reader.
        However, that will barf if any 'invalid' lock states would
        result.  Therefore check before calling.  Side effect is that
-      'is_sane_LockN(lk)' is both a pre- and post-condition of this
+      'HG_(is_sane_LockN)(lk)' is both a pre- and post-condition of this
        routine. 
  
        Because this routine is only called after successful lock
@@ -5202,24 +1221,25 @@ void evhH__post_thread_r_acquires_lock ( Thread* thr,
        invalid states.  Requests to do so are bugs in libpthread, since
        that should have rejected any such requests. */
  
-   /* be paranoid w.r.t hint bits, even if lock_ga is complete
-      nonsense */
-   shmem__set_mbHasLocks( lock_ga, True );
-
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_Thread)(thr));
     /* Try to find the lock.  If we can't, then create a new one with
        kind 'lkk'.  Only a reader-writer lock can be read-locked,
        hence the first assertion. */
     tl_assert(lkk == LK_rdwr);
     lk = map_locks_lookup_or_create( 
             lkk, lock_ga, map_threads_reverse_lookup_SLOW(thr) );
-   tl_assert( is_sane_LockN(lk) );
-   shmem__set_mbHasLocks( lock_ga, True );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+
+   /* check libhb level entities exist */
+   tl_assert(thr->hbthr);
+   tl_assert(lk->hbso);
  
     if (lk->heldBy == NULL) {
        /* the lock isn't held.  Simple. */
        tl_assert(!lk->heldW);
        lockN_acquire_reader( lk, thr );
+      /* acquire a dependency from the lock's VC */
+      libhb_so_recv( thr->hbthr, lk->hbso, False/*!strong_recv*/ );
        goto noerror;
     }
  
@@ -5227,15 +1247,18 @@ void evhH__post_thread_r_acquires_lock ( Thread* thr,
        libpthread must be buggy. */
     tl_assert(lk->heldBy);
     if (lk->heldW) {
-      record_error_Misc( thr, "Bug in libpthread: read lock "
-                              "granted on rwlock which is "
-                              "currently wr-held");
+      HG_(record_error_Misc)( thr, "Bug in libpthread: read lock "
+                                   "granted on rwlock which is "
+                                   "currently wr-held");
        goto error;
     }
  
     /* Easy enough.  In short anybody can get a read-lock on a rwlock
        provided it is either unlocked or already in rd-held. */
     lockN_acquire_reader( lk, thr );
+   /* acquire a dependency from the lock's VC.  Probably pointless,
+      but also harmless. */
+   libhb_so_recv( thr->hbthr, lk->hbso, False/*!strong_recv*/ );
     goto noerror;
  
    noerror:
@@ -5248,7 +1271,7 @@ void evhH__post_thread_r_acquires_lock ( Thread* thr,
     /* fall through */
  
    error:
-   tl_assert(is_sane_LockN(lk));
+   tl_assert(HG_(is_sane_LockN)(lk));
  }
  
  
@@ -5260,6 +1283,7 @@ void evhH__pre_thread_releases_lock ( Thread* thr,
  {
     Lock* lock;
     Word  n;
+   Bool  was_heldW;
  
     /* This routine is called prior to a lock release, before
        libpthread has had a chance to validate the call.  Hence we need
@@ -5270,43 +1294,43 @@ void evhH__pre_thread_releases_lock ( Thread* thr,
        should refer to a reader-writer lock, and is False if [ditto]
        lock_ga should refer to a standard mutex. */
  
-   /* be paranoid w.r.t hint bits, even if lock_ga is complete
-      nonsense */
-   shmem__set_mbHasLocks( lock_ga, True );
-
-   tl_assert(is_sane_Thread(thr));
+   tl_assert(HG_(is_sane_Thread)(thr));
     lock = map_locks_maybe_lookup( lock_ga );
  
     if (!lock) {
        /* We know nothing about a lock at 'lock_ga'.  Nevertheless
           the client is trying to unlock it.  So complain, then ignore
           the attempt. */
-      record_error_UnlockBogus( thr, lock_ga );
+      HG_(record_error_UnlockBogus)( thr, lock_ga );
        return;
     }
  
     tl_assert(lock->guestaddr == lock_ga);
-   tl_assert(is_sane_LockN(lock));
+   tl_assert(HG_(is_sane_LockN)(lock));
  
     if (isRDWR && lock->kind != LK_rdwr) {
-      record_error_Misc( thr, "pthread_rwlock_unlock with a "
-                              "pthread_mutex_t* argument " );
+      HG_(record_error_Misc)( thr, "pthread_rwlock_unlock with a "
+                                   "pthread_mutex_t* argument " );
     }
     if ((!isRDWR) && lock->kind == LK_rdwr) {
-      record_error_Misc( thr, "pthread_mutex_unlock with a "
-                              "pthread_rwlock_t* argument " );
+      HG_(record_error_Misc)( thr, "pthread_mutex_unlock with a "
+                                   "pthread_rwlock_t* argument " );
     }
  
     if (!lock->heldBy) {
        /* The lock is not held.  This indicates a serious bug in the
           client. */
        tl_assert(!lock->heldW);
-      record_error_UnlockUnlocked( thr, lock );
+      HG_(record_error_UnlockUnlocked)( thr, lock );
        tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lock ));
        tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
        goto error;
     }
  
+   /* test just above dominates */
+   tl_assert(lock->heldBy);
+   was_heldW = lock->heldW;
+
     /* The lock is held.  Is this thread one of the holders?  If not,
        report a bug in the client. */
     n = VG_(elemBag)( lock->heldBy, (Word)thr );
@@ -5317,11 +1341,11 @@ void evhH__pre_thread_releases_lock ( Thread* thr,
           attempt will fail.  So just complain and do nothing
           else. */
        Thread* realOwner = (Thread*)VG_(anyElementOfBag)( lock->heldBy );
-      tl_assert(is_sane_Thread(realOwner));
+      tl_assert(HG_(is_sane_Thread)(realOwner));
        tl_assert(realOwner != thr);
        tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lock ));
        tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
-      record_error_UnlockForeign( thr, realOwner, lock );
+      HG_(record_error_UnlockForeign)( thr, realOwner, lock );
        goto error;
     }
  
@@ -5347,19 +1371,27 @@ void evhH__pre_thread_releases_lock ( Thread* thr,
           tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
     } else {
        /* We no longer hold the lock. */
-      if (lock->heldBy) {
-         tl_assert(0 == VG_(elemBag)( lock->heldBy, (Word)thr ));
-      }
+      tl_assert(!lock->heldBy);
+      tl_assert(lock->heldW == False);
+      //if (lock->heldBy) {
+      //   tl_assert(0 == VG_(elemBag)( lock->heldBy, (Word)thr ));
+      //}
        /* update this thread's lockset accordingly. */
        thr->locksetA
           = HG_(delFromWS)( univ_lsets, thr->locksetA, (Word)lock );
        thr->locksetW
           = HG_(delFromWS)( univ_lsets, thr->locksetW, (Word)lock );
+      /* push our VC into the lock */
+      tl_assert(thr->hbthr);
+      tl_assert(lock->hbso);
+      /* If the lock was previously W-held, then we want to do a
+         strong send, and if previously R-held, then a weak send. */
+      libhb_so_send( thr->hbthr, lock->hbso, was_heldW );
     }
     /* fall through */
  
    error:
-   tl_assert(is_sane_LockN(lock));
+   tl_assert(HG_(is_sane_LockN)(lock));
  }
  
  
@@ -5393,6 +1425,7 @@ static void evh__stop_client_code ( ThreadId tid, ULong nDisp ) {
     if (0) VG_(printf)(" stop %d %llu\n", (Int)tid, nDisp);
     tl_assert(current_Thread != NULL);
     current_Thread = NULL;
+   libhb_maybe_GC();
  }
  static inline Thread* get_current_Thread_in_C_C ( void ) {
     return current_Thread;
@@ -5406,7 +1439,7 @@ static inline Thread* get_current_Thread ( void ) {
     /* evidently not in client code.  Do it the slow way. */
     coretid = VG_(get_running_tid)();
     /* FIXME: get rid of the following kludge.  It exists because
-      evim__new_mem is called during initialisation (as notification
+      evh__new_mem is called during initialisation (as notification
        of initial memory layout) and VG_(get_running_tid)() returns
        VG_INVALID_THREADID at that point. */
     if (coretid == VG_INVALID_THREADID)
@@ -5420,7 +1453,7 @@ void evh__new_mem ( Addr a, SizeT len ) {
     if (SHOW_EVENTS >= 2)
        VG_(printf)("evh__new_mem(%p, %lu)\n", (void*)a, len );
     shadow_mem_make_New( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__new_mem-post");
  }
  
@@ -5429,7 +1462,7 @@ void evh__new_mem_w_tid ( Addr a, SizeT len, ThreadId tid ) {
     if (SHOW_EVENTS >= 2)
        VG_(printf)("evh__new_mem_w_tid(%p, %lu)\n", (void*)a, len );
     shadow_mem_make_New( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__new_mem_w_tid-post");
  }
  
@@ -5441,7 +1474,7 @@ void evh__new_mem_w_perms ( Addr a, SizeT len,
                    (void*)a, len, (Int)rr, (Int)ww, (Int)xx );
     if (rr || ww || xx)
        shadow_mem_make_New( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__new_mem_w_perms-post");
  }
  
@@ -5456,7 +1489,7 @@ void evh__set_perms ( Addr a, SizeT len,
        NoAccess, else leave it alone. */
     if (!(rr || ww))
        shadow_mem_make_NoAccess( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__set_perms-post");
  }
  
@@ -5465,7 +1498,7 @@ void evh__die_mem ( Addr a, SizeT len ) {
     if (SHOW_EVENTS >= 2)
        VG_(printf)("evh__die_mem(%p, %lu)\n", (void*)a, len );
     shadow_mem_make_NoAccess( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__die_mem-post");
  }
  
@@ -5477,13 +1510,13 @@ void evh__pre_thread_ll_create ( ThreadId parent, ThreadId child )
                    (Int)parent, (Int)child );
  
     if (parent != VG_INVALID_THREADID) {
-      Thread*   thr_p;
-      Thread*   thr_c;
-      SegmentID segid_c;
-      Segment*  seg_c;
+      Thread* thr_p;
+      Thread* thr_c;
+      Thr*    hbthr_p;
+      Thr*    hbthr_c;
  
-      tl_assert(is_sane_ThreadId(parent));
-      tl_assert(is_sane_ThreadId(child));
+      tl_assert(HG_(is_sane_ThreadId)(parent));
+      tl_assert(HG_(is_sane_ThreadId)(child));
        tl_assert(parent != child);
  
        thr_p = map_threads_maybe_lookup( parent );
@@ -5492,18 +1525,22 @@ void evh__pre_thread_ll_create ( ThreadId parent, ThreadId child )
        tl_assert(thr_p != NULL);
        tl_assert(thr_c == NULL);
  
-      /* Create a new thread record for the child. */
-      // FIXME: code duplication from init_data_structures
-      segid_c = alloc_SegmentID();
-      seg_c   = mk_Segment( NULL/*thr*/, NULL/*prev*/, NULL/*other*/ );
-      map_segments_add( segid_c, seg_c );
+      hbthr_p = thr_p->hbthr;
+      tl_assert(hbthr_p != NULL);
+      tl_assert( libhb_get_Thr_opaque(hbthr_p) == thr_p );
+
+      hbthr_c = libhb_create ( hbthr_p );
  
+      /* Create a new thread record for the child. */
        /* a Thread for the new thread ... */
-      thr_c = mk_Thread( segid_c );
-      seg_c->thr = thr_c;
+      thr_c = mk_Thread( hbthr_c );
+      tl_assert( libhb_get_Thr_opaque(hbthr_c) == NULL );
+      libhb_set_Thr_opaque(hbthr_c, thr_c);
  
        /* and bind it in the thread-map table */
        map_threads[child] = thr_c;
+      tl_assert(thr_c->coretid == VG_INVALID_THREADID);
+      thr_c->coretid = child;
  
        /* Record where the parent is so we can later refer to this in
           error messages.
@@ -5522,28 +1559,9 @@ void evh__pre_thread_ll_create ( ThreadId parent, ThreadId child )
  #       endif
          thr_c->created_at = VG_(record_ExeContext)(parent, first_ip_delta);
        }
-
-      /* Now, mess with segments. */ 
-      if (clo_happens_before >= 1) {
-         /* Make the child's new segment depend on the parent */
-         seg_c->other = map_segments_lookup( thr_p->csegid );
-         seg_c->other_hint = 'c';
-         seg_c->vts = tick_VTS( thr_c, seg_c->other->vts );
-         tl_assert(seg_c->prev == NULL);
-         /* and start a new segment for the parent. */
-         { SegmentID new_segid = 0; /* bogus */
-           Segment*  new_seg   = NULL;
-           evhH__start_new_segment_for_thread( &new_segid, &new_seg, 
-                                               thr_p );
-           tl_assert(is_sane_SegmentID(new_segid));
-           tl_assert(is_sane_Segment(new_seg));
-           new_seg->vts = tick_VTS( thr_p, new_seg->prev->vts );
-           tl_assert(new_seg->other == NULL);
-         }
-      }
     }
  
-   if (clo_sanity_flags & SCE_THREADS)
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
        all__sanity_check("evh__pre_thread_create-post");
  }
  
@@ -5570,7 +1588,7 @@ void evh__pre_thread_ll_exit ( ThreadId quit_tid )
        how NPTL works).  In which case there has already been a prior
        sync event.  So in any case, just let the thread exit.  On NPTL,
        all thread exits go through here. */
-   tl_assert(is_sane_ThreadId(quit_tid));
+   tl_assert(HG_(is_sane_ThreadId)(quit_tid));
     thr_q = map_threads_maybe_lookup( quit_tid );
     tl_assert(thr_q != NULL);
  
@@ -5581,31 +1599,34 @@ void evh__pre_thread_ll_exit ( ThreadId quit_tid )
        HChar buf[80];
        VG_(sprintf)(buf, "Exiting thread still holds %d lock%s",
                          nHeld, nHeld > 1 ? "s" : "");
-      record_error_Misc( thr_q, buf );
+      HG_(record_error_Misc)( thr_q, buf );
     }
  
     /* About the only thing we do need to do is clear the map_threads
        entry, in order that the Valgrind core can re-use it. */
+   tl_assert(thr_q->coretid == quit_tid);
+   thr_q->coretid = VG_INVALID_THREADID;
     map_threads_delete( quit_tid );
  
-   if (clo_sanity_flags & SCE_THREADS)
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
        all__sanity_check("evh__pre_thread_ll_exit-post");
  }
  
+
  static
  void evh__HG_PTHREAD_JOIN_POST ( ThreadId stay_tid, Thread* quit_thr )
  {
-   Int      stats_SMs, stats_SMs_scanned, stats_reExcls;
-   Addr     ga;
-   SecMap*  sm;
     Thread*  thr_s;
     Thread*  thr_q;
+   Thr*     hbthr_s;
+   Thr*     hbthr_q;
+   SO*      so;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__post_thread_join(stayer=%d, quitter=%p)\n",
                    (Int)stay_tid, quit_thr );
  
-   tl_assert(is_sane_ThreadId(stay_tid));
+   tl_assert(HG_(is_sane_ThreadId)(stay_tid));
  
     thr_s = map_threads_maybe_lookup( stay_tid );
     thr_q = quit_thr;
@@ -5613,122 +1634,24 @@ void evh__HG_PTHREAD_JOIN_POST ( ThreadId stay_tid, Thread* quit_thr )
     tl_assert(thr_q != NULL);
     tl_assert(thr_s != thr_q);
  
-   if (clo_happens_before >= 1) {
-      /* Start a new segment for the stayer */
-      SegmentID new_segid = 0; /* bogus */
-      Segment*  new_seg   = NULL;
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr_s );
-      tl_assert(is_sane_SegmentID(new_segid));
-      tl_assert(is_sane_Segment(new_seg));
-      /* and make it depend on the quitter's last segment */
-      tl_assert(new_seg->other == NULL);
-      new_seg->other = map_segments_lookup( thr_q->csegid );
-      new_seg->other_hint = 'j';
-      tl_assert(new_seg->thr == thr_s);
-      new_seg->vts = tickL_and_joinR_VTS( thr_s, new_seg->prev->vts,
-                                                 new_seg->other->vts );
-   }
-
-   // FIXME: error-if: exiting thread holds any locks
-   //        or should evh__pre_thread_ll_exit do that?
-
-   /* Delete thread from ShM/ShR thread sets and restore Excl states
-      where appropriate */
-
-   /* When Thread(t) joins to Thread(u):
-
-      scan all shadow memory.  For each ShM/ShR thread set, replace
-      't' in each set with 'u'.  If this results in a singleton 'u',
-      change the state to Excl(u->csegid).
-
-      Optimisation: tag each SecMap with a superset of the union of
-      the thread sets in the SecMap.  Then if the tag set does not
-      include 't' then the SecMap can be skipped, because there is no
-      't' to change to anything else.
-
-      Problem is that the tag set needs to be updated often, after
-      every ShR/ShM store.  (that increases the thread set of the
-      shadow value.)
-
-      --> Compromise.  Tag each SecMap with a .mbHasShared bit which
-          must be set true if any ShR/ShM on the page.  Set this for
-          any transitions into ShR/ShM on the page.  Then skip page if
-          not set.
-
-      .mbHasShared bits are (effectively) cached in cache_shmem.
-      Hence that must be flushed before we can safely consult them.
-
-      Since we're modifying the backing store, we also need to
-      invalidate cache_shmem, so that subsequent memory references get
-      up to date shadow values.
-   */
-   shmem__flush_and_invalidate_scache();
-
-   stats_SMs = stats_SMs_scanned = stats_reExcls = 0;
-   VG_(initIterFM)( map_shmem );
-   while (VG_(nextIterFM)( map_shmem,
-                           (Word*)&ga, (Word*)&sm )) {
-      SecMapIter itr;
-      SVal*      w32p = NULL;
-      tl_assert(sm);
-      stats_SMs++;
-      /* Skip this SecMap if the summary bit indicates it is safe to
-         do so. */
-      if (!sm->mbHasShared)
-         continue;
-      stats_SMs_scanned++;
-      initSecMapIter( &itr );
-      while (stepSecMapIter( &w32p, &itr, sm )) {
-         Bool isM;
-         SVal wnew, wold;
-         UInt lset_old, tset_old, tset_new;
-         wold = *w32p;
-         if (!is_SHVAL_Sh(wold))
-            continue;
-         isM = is_SHVAL_ShM(wold);
-         lset_old = un_SHVAL_Sh_lset(wold);
-         tset_old = un_SHVAL_Sh_tset(wold);
-         /* Subst thr_q -> thr_s in the thread set.  Longwindedly, if
-            thr_q is in the set, delete it and add thr_s; else leave
-            it alone.  FIXME: is inefficient - make a special
-            substInWS method for this. */
-         tset_new 
-            = HG_(elemWS)( univ_tsets, tset_old, (Word)thr_q )
-              ? HG_(addToWS)(
-                   univ_tsets, 
-                   HG_(delFromWS)( univ_tsets, tset_old, (Word)thr_q ),
-                   (Word)thr_s 
-                )
-              : tset_old;
-
-         tl_assert(HG_(cardinalityWS)(univ_tsets, tset_new) 
-                   <= HG_(cardinalityWS)(univ_tsets, tset_old));
-
-         if (0) {
-            VG_(printf)("smga %#lx: old 0x%x new 0x%x   ",
-                        ga, tset_old, tset_new);
-            HG_(ppWS)( univ_tsets, tset_old );
-            VG_(printf)("  -->  ");
-            HG_(ppWS)( univ_tsets, tset_new );
-            VG_(printf)("\n");
-         }
-         if (HG_(isSingletonWS)( univ_tsets, tset_new, (Word)thr_s )) {
-            /* This word returns to Excl state */
-            wnew = mk_SHVAL_Excl(thr_s->csegid);
-            stats_reExcls++;
-         } else {
-            wnew = isM ? mk_SHVAL_ShM(tset_new, lset_old)
-                       : mk_SHVAL_ShR(tset_new, lset_old);
-         }
-         *w32p = wnew;
-      }
-   }
-   VG_(doneIterFM)( map_shmem );
-
-   if (SHOW_EXPENSIVE_STUFF)
-      VG_(printf)("evh__post_thread_join: %d SMs, "
-                  "%d scanned, %d re-Excls\n", 
-                  stats_SMs, stats_SMs_scanned, stats_reExcls);
+   hbthr_s = thr_s->hbthr;
+   hbthr_q = thr_q->hbthr;
+   tl_assert(hbthr_s != hbthr_q);
+   tl_assert( libhb_get_Thr_opaque(hbthr_s) == thr_s );
+   tl_assert( libhb_get_Thr_opaque(hbthr_q) == thr_q );
+
+   /* Allocate a temporary synchronisation object and use it to send
+      an imaginary message from the quitter to the stayer, the purpose
+      being to generate a dependence from the quitter to the
+      stayer. */
+   so = libhb_so_alloc();
+   tl_assert(so);
+   libhb_so_send(hbthr_q, so, True/*strong_send*/);
+   libhb_so_recv(hbthr_s, so, True/*strong_recv*/);
+   libhb_so_dealloc(so);
+
+   /* evh__pre_thread_ll_exit issues an error message if the exiting
+      thread holds any locks.  No need to check here. */
  
     /* This holds because, at least when using NPTL as the thread
        library, we should be notified the low level thread exit before
@@ -5739,7 +1662,7 @@ void evh__HG_PTHREAD_JOIN_POST ( ThreadId stay_tid, Thread* quit_thr )
     tl_assert( map_threads_maybe_reverse_lookup_SLOW(thr_q)
                == VG_INVALID_THREADID);
  
-   if (clo_sanity_flags & SCE_THREADS)
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
        all__sanity_check("evh__post_thread_join-post");
  }
  
@@ -5751,7 +1674,7 @@ void evh__pre_mem_read ( CorePart part, ThreadId tid, Char* s,
        VG_(printf)("evh__pre_mem_read(ctid=%d, \"%s\", %p, %lu)\n", 
                    (Int)tid, s, (void*)a, size );
     shadow_mem_read_range( map_threads_lookup(tid), a, size);
-   if (size >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (size >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__pre_mem_read-post");
  }
  
@@ -5765,7 +1688,7 @@ void evh__pre_mem_read_asciiz ( CorePart part, ThreadId tid,
     // FIXME: think of a less ugly hack
     len = VG_(strlen)( (Char*) a );
     shadow_mem_read_range( map_threads_lookup(tid), a, len+1 );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__pre_mem_read_asciiz-post");
  }
  
@@ -5776,7 +1699,7 @@ void evh__pre_mem_write ( CorePart part, ThreadId tid, Char* s,
        VG_(printf)("evh__pre_mem_write(ctid=%d, \"%s\", %p, %lu)\n", 
                    (Int)tid, s, (void*)a, size );
     shadow_mem_write_range( map_threads_lookup(tid), a, size);
-   if (size >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (size >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__pre_mem_write-post");
  }
  
@@ -5791,7 +1714,7 @@ void evh__new_mem_heap ( Addr a, SizeT len, Bool is_inited ) {
     } else {
        shadow_mem_make_New(get_current_Thread(), a, len);
     }
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__pre_mem_read-post");
  }
  
@@ -5800,52 +1723,78 @@ void evh__die_mem_heap ( Addr a, SizeT len ) {
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__die_mem_heap(%p, %lu)\n", (void*)a, len );
     shadow_mem_make_NoAccess( get_current_Thread(), a, len );
-   if (len >= SCE_BIGRANGE_T && (clo_sanity_flags & SCE_BIGRANGE))
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
        all__sanity_check("evh__pre_mem_read-post");
  }
  
-// thread async exit?
-
  static VG_REGPARM(1)
  void evh__mem_help_read_1(Addr a) {
-   shadow_mem_read8( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_1(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_read_2(Addr a) {
-   shadow_mem_read16( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_2(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_read_4(Addr a) {
-   shadow_mem_read32( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_4(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_read_8(Addr a) {
-   shadow_mem_read64( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_8(hbthr, a);
  }
+
  static VG_REGPARM(2)
  void evh__mem_help_read_N(Addr a, SizeT size) {
-   shadow_mem_read_range( get_current_Thread_in_C_C(), a, size );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_N(hbthr, a, size);
  }
  
  static VG_REGPARM(1)
  void evh__mem_help_write_1(Addr a) {
-   shadow_mem_write8( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_1(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_write_2(Addr a) {
-   shadow_mem_write16( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_2(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_write_4(Addr a) {
-   shadow_mem_write32( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_4(hbthr, a);
  }
+
  static VG_REGPARM(1)
  void evh__mem_help_write_8(Addr a) {
-   shadow_mem_write64( get_current_Thread_in_C_C(), a, 0/*unused*/ );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_8(hbthr, a);
  }
+
  static VG_REGPARM(2)
  void evh__mem_help_write_N(Addr a, SizeT size) {
-   shadow_mem_write_range( get_current_Thread_in_C_C(), a, size );
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_N(hbthr, a, size);
  }
  
  static void evh__bus_lock(void) {
@@ -5880,7 +1829,7 @@ void evh__HG_PTHREAD_MUTEX_INIT_POST( ThreadId tid,
     tl_assert(mbRec == 0 || mbRec == 1);
     map_locks_lookup_or_create( mbRec ? LK_mbRec : LK_nonRec,
                                 (Addr)mutex, tid );
-   if (clo_sanity_flags & SCE_LOCKS)
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
        all__sanity_check("evh__hg_PTHREAD_MUTEX_INIT_POST");
  }
  
@@ -5895,21 +1844,22 @@ void evh__HG_PTHREAD_MUTEX_DESTROY_PRE( ThreadId tid, void* mutex )
  
     thr = map_threads_maybe_lookup( tid );
     /* cannot fail - Thread* must already exist */
-   tl_assert( is_sane_Thread(thr) );
+   tl_assert( HG_(is_sane_Thread)(thr) );
  
     lk = map_locks_maybe_lookup( (Addr)mutex );
  
     if (lk == NULL || (lk->kind != LK_nonRec && lk->kind != LK_mbRec)) {
-      record_error_Misc( thr,
-                         "pthread_mutex_destroy with invalid argument" );
+      HG_(record_error_Misc)(
+         thr, "pthread_mutex_destroy with invalid argument" );
     }
  
     if (lk) {
-      tl_assert( is_sane_LockN(lk) );
+      tl_assert( HG_(is_sane_LockN)(lk) );
        tl_assert( lk->guestaddr == (Addr)mutex );
        if (lk->heldBy) {
           /* Basically act like we unlocked the lock */
-         record_error_Misc( thr, "pthread_mutex_destroy of a locked mutex" );
+         HG_(record_error_Misc)(
+            thr, "pthread_mutex_destroy of a locked mutex" );
           /* remove lock from locksets of all owning threads */
           remove_Lock_from_locksets_of_all_owning_Threads( lk );
           VG_(deleteBag)( lk->heldBy );
@@ -5918,10 +1868,13 @@ void evh__HG_PTHREAD_MUTEX_DESTROY_PRE( ThreadId tid, void* mutex )
           lk->acquired_at = NULL;
        }
        tl_assert( !lk->heldBy );
-      tl_assert( is_sane_LockN(lk) );
+      tl_assert( HG_(is_sane_LockN)(lk) );
+
+      map_locks_delete( lk->guestaddr );
+      del_LockN( lk );
     }
  
-   if (clo_sanity_flags & SCE_LOCKS)
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
        all__sanity_check("evh__hg_PTHREAD_MUTEX_DESTROY_PRE");
  }
  
@@ -5943,8 +1896,8 @@ static void evh__HG_PTHREAD_MUTEX_LOCK_PRE ( ThreadId tid,
     lk = map_locks_maybe_lookup( (Addr)mutex );
  
     if (lk && (lk->kind == LK_rdwr)) {
-      record_error_Misc( thr, "pthread_mutex_lock with a "
-                              "pthread_rwlock_t* argument " );
+      HG_(record_error_Misc)( thr, "pthread_mutex_lock with a "
+                                   "pthread_rwlock_t* argument " );
     }
  
     if ( lk 
@@ -5957,8 +1910,8 @@ static void evh__HG_PTHREAD_MUTEX_LOCK_PRE ( ThreadId tid,
           this is a real lock operation (not a speculative "tryLock"
           kind of thing).  Duh.  Deadlock coming up; but at least
           produce an error message. */
-      record_error_Misc( thr, "Attempt to re-lock a "
-                              "non-recursive lock I already hold" );
+      HG_(record_error_Misc)( thr, "Attempt to re-lock a "
+                                   "non-recursive lock I already hold" );
     }
  }
  
@@ -6010,62 +1963,71 @@ static void evh__HG_PTHREAD_MUTEX_UNLOCK_POST ( ThreadId tid, void* mutex )
  
  /* --------------- events to do with CVs --------------- */
  
-/* A mapping from CV to the thread segment which has most recently
-   signalled/broadcasted on it.  This makes it possible to create
-   thread segments to model happens-before events arising from CV
+/* A mapping from CV to the SO associated with it.  When the CV is
+   signalled/broadcasted upon, we do a 'send' into the SO, and when a
+   wait on it completes, we do a 'recv' from the SO.  This is believed
+   to give the correct happens-before events arising from CV
     signallings/broadcasts.
  */
  
-/* pthread_mutex_cond* -> Segment* */
-static WordFM* map_cond_to_Segment = NULL;
+/* pthread_mutex_cond* -> SO* */
+static WordFM* map_cond_to_SO = NULL;
+
+static void map_cond_to_SO_INIT ( void ) {
+   if (UNLIKELY(map_cond_to_SO == NULL)) {
+      map_cond_to_SO = VG_(newFM)( HG_(zalloc), "hg.mctSI.1", HG_(free), NULL );
+      tl_assert(map_cond_to_SO != NULL);
+   }
+}
+
+static SO* map_cond_to_SO_lookup_or_alloc ( void* cond ) {
+   UWord key, val;
+   map_cond_to_SO_INIT();
+   if (VG_(lookupFM)( map_cond_to_SO, &key, &val, (UWord)cond )) {
+      tl_assert(key == (UWord)cond);
+      return (SO*)val;
+   } else {
+      SO* so = libhb_so_alloc();
+      VG_(addToFM)( map_cond_to_SO, (UWord)cond, (UWord)so );
+      return so;
+   }
+}
  
-static void map_cond_to_Segment_INIT ( void ) {
-   if (UNLIKELY(map_cond_to_Segment == NULL)) {
-      map_cond_to_Segment = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
-      tl_assert(map_cond_to_Segment != NULL);
+static void map_cond_to_SO_delete ( void* cond ) {
+   UWord keyW, valW;
+   map_cond_to_SO_INIT();
+   if (VG_(delFromFM)( map_cond_to_SO, &keyW, &valW, (UWord)cond )) {
+      SO* so = (SO*)valW;
+      tl_assert(keyW == (UWord)cond);
+      libhb_so_dealloc(so);
     }
  }
  
  static void evh__HG_PTHREAD_COND_SIGNAL_PRE ( ThreadId tid, void* cond )
  {
-   /* 'tid' has signalled on 'cond'.  Start a new segment for this
-      thread, and make a binding from 'cond' to our old segment in the
-      mapping.  This is later used by other thread(s) which
-      successfully exit from a pthread_cond_wait on the same cv; then
-      they know what the signalling segment was, so a dependency edge
-      back to it can be constructed. */
-
+   /* 'tid' has signalled on 'cond'.  As per the comment above, bind
+      cond to a SO if it is not already so bound, and 'send' on the
+      SO.  This is later used by other thread(s) which successfully
+      exit from a pthread_cond_wait on the same cv; then they 'recv'
+      from the SO, thereby acquiring a dependency on this signalling
+      event. */
     Thread*   thr;
-   SegmentID new_segid;
-   Segment*  new_seg;
+   SO*       so;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_PTHREAD_COND_SIGNAL_PRE(ctid=%d, cond=%p)\n", 
                    (Int)tid, (void*)cond );
  
-   map_cond_to_Segment_INIT();
     thr = map_threads_maybe_lookup( tid );
     tl_assert(thr); /* cannot fail - Thread* must already exist */
  
     // error-if: mutex is bogus
     // error-if: mutex is not locked
  
-   if (clo_happens_before >= 2) {
-      /* create a new segment ... */
-      new_segid = 0; /* bogus */
-      new_seg   = NULL;
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr );
-      tl_assert( is_sane_SegmentID(new_segid) );
-      tl_assert( is_sane_Segment(new_seg) );
-      tl_assert( new_seg->thr == thr );
-      tl_assert( is_sane_Segment(new_seg->prev) );
-      tl_assert( new_seg->prev->vts );
-      new_seg->vts = tick_VTS( new_seg->thr, new_seg->prev->vts );
-
-      /* ... and add the binding. */
-      VG_(addToFM)( map_cond_to_Segment, (Word)cond,
-                                         (Word)(new_seg->prev) );
-   }
+   so = map_cond_to_SO_lookup_or_alloc( cond );
+   tl_assert(so);
+
+   libhb_so_send( thr->hbthr, so, True/*strong_send*/ );
  }
  
  /* returns True if it reckons 'mutex' is valid and held by this
@@ -6082,7 +2044,6 @@ static Bool evh__HG_PTHREAD_COND_WAIT_PRE ( ThreadId tid,
                    "(ctid=%d, cond=%p, mutex=%p)\n", 
                    (Int)tid, (void*)cond, (void*)mutex );
  
-   map_cond_to_Segment_INIT();
     thr = map_threads_maybe_lookup( tid );
     tl_assert(thr); /* cannot fail - Thread* must already exist */
  
@@ -6093,26 +2054,26 @@ static Bool evh__HG_PTHREAD_COND_WAIT_PRE ( ThreadId tid,
        is wrong. */
     if (lk == NULL) {
        lk_valid = False;
-      record_error_Misc( 
+      HG_(record_error_Misc)( 
           thr, 
           "pthread_cond_{timed}wait called with invalid mutex" );
     } else {
-      tl_assert( is_sane_LockN(lk) );
+      tl_assert( HG_(is_sane_LockN)(lk) );
        if (lk->kind == LK_rdwr) {
           lk_valid = False;
-         record_error_Misc( 
+         HG_(record_error_Misc)(
              thr, "pthread_cond_{timed}wait called with mutex "
                   "of type pthread_rwlock_t*" );
        } else
           if (lk->heldBy == NULL) {
           lk_valid = False;
-         record_error_Misc( 
+         HG_(record_error_Misc)( 
              thr, "pthread_cond_{timed}wait called with un-held mutex");
        } else
        if (lk->heldBy != NULL
            && VG_(elemBag)( lk->heldBy, (Word)thr ) == 0) {
           lk_valid = False;
-         record_error_Misc( 
+         HG_(record_error_Misc)(
              thr, "pthread_cond_{timed}wait called with mutex "
                   "held by a different thread" );
        }
@@ -6126,68 +2087,50 @@ static Bool evh__HG_PTHREAD_COND_WAIT_PRE ( ThreadId tid,
  static void evh__HG_PTHREAD_COND_WAIT_POST ( ThreadId tid,
                                               void* cond, void* mutex )
  {
-   /* A pthread_cond_wait(cond, mutex) completed successfully.  Start
-      a new segment for this thread.  Look up the signalling-segment
-      for the 'cond' in the mapping, and add a dependency edge from
-      the new segment back to it. */
-
-   Thread*   thr;
-   SegmentID new_segid;
-   Segment*  new_seg;
-   Segment*  signalling_seg;
-   Bool      found;
+   /* A pthread_cond_wait(cond, mutex) completed successfully.  Find
+      the SO for this cond, and 'recv' from it so as to acquire a
+      dependency edge back to the signaller/broadcaster. */
+   Thread* thr;
+   SO*     so;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_PTHREAD_COND_WAIT_POST"
                    "(ctid=%d, cond=%p, mutex=%p)\n", 
                    (Int)tid, (void*)cond, (void*)mutex );
  
-   map_cond_to_Segment_INIT();
     thr = map_threads_maybe_lookup( tid );
     tl_assert(thr); /* cannot fail - Thread* must already exist */
  
     // error-if: cond is also associated with a different mutex
  
-   if (clo_happens_before >= 2) {
-      /* create a new segment ... */
-      new_segid = 0; /* bogus */
-      new_seg   = NULL;
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr );
-      tl_assert( is_sane_SegmentID(new_segid) );
-      tl_assert( is_sane_Segment(new_seg) );
-      tl_assert( new_seg->thr == thr );
-      tl_assert( is_sane_Segment(new_seg->prev) );
-      tl_assert( new_seg->other == NULL);
-
-      /* and find out which thread signalled us; then add a dependency
-         edge back to it. */
-      signalling_seg = NULL;
-      found = VG_(lookupFM)( map_cond_to_Segment, 
-                             NULL, (Word*)&signalling_seg,
-                                   (Word)cond );
-      if (found) {
-         tl_assert(is_sane_Segment(signalling_seg));
-         tl_assert(new_seg->prev);
-         tl_assert(new_seg->prev->vts);
-         new_seg->other      = signalling_seg;
-         new_seg->other_hint = 's';
-         tl_assert(new_seg->other->vts);
-         new_seg->vts = tickL_and_joinR_VTS( 
-                           new_seg->thr, 
-                           new_seg->prev->vts,
-                           new_seg->other->vts );
-      } else {
-         /* Hmm.  How can a wait on 'cond' succeed if nobody signalled
-            it?  If this happened it would surely be a bug in the
-            threads library.  Or one of those fabled "spurious
-            wakeups". */
-         record_error_Misc( thr, "Bug in libpthread: pthread_cond_wait "
-                                 "succeeded on"
-                                 " without prior pthread_cond_post");
-         tl_assert(new_seg->prev->vts);
-         new_seg->vts = tick_VTS( new_seg->thr, new_seg->prev->vts );
-      }
+   so = map_cond_to_SO_lookup_or_alloc( cond );
+   tl_assert(so);
+
+   if (!libhb_so_everSent(so)) {
+      /* Hmm.  How can a wait on 'cond' succeed if nobody signalled
+         it?  If this happened it would surely be a bug in the threads
+         library.  Or one of those fabled "spurious wakeups". */
+      HG_(record_error_Misc)( thr, "Bug in libpthread: pthread_cond_wait "
+                                   "succeeded on"
+                                   " without prior pthread_cond_post");
     }
+
+   /* anyway, acquire a dependency on it. */
+   libhb_so_recv( thr->hbthr, so, True/*strong_recv*/ );
+}
+
+static void evh__HG_PTHREAD_COND_DESTROY_PRE ( ThreadId tid,
+                                               void* cond )
+{
+   /* Deal with destroy events.  The only purpose is to free storage
+      associated with the CV, so as to avoid any possible resource
+      leaks. */
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_COND_DESTROY_PRE"
+                  "(ctid=%d, cond=%p)\n", 
+                  (Int)tid, (void*)cond );
+
+   map_cond_to_SO_delete( cond );
  }
  
  
@@ -6201,7 +2144,7 @@ void evh__HG_PTHREAD_RWLOCK_INIT_POST( ThreadId tid, void* rwl )
        VG_(printf)("evh__hg_PTHREAD_RWLOCK_INIT_POST(ctid=%d, %p)\n", 
                    (Int)tid, (void*)rwl );
     map_locks_lookup_or_create( LK_rdwr, (Addr)rwl, tid );
-   if (clo_sanity_flags & SCE_LOCKS)
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
        all__sanity_check("evh__hg_PTHREAD_RWLOCK_INIT_POST");
  }
  
@@ -6216,21 +2159,22 @@ void evh__HG_PTHREAD_RWLOCK_DESTROY_PRE( ThreadId tid, void* rwl )
  
     thr = map_threads_maybe_lookup( tid );
     /* cannot fail - Thread* must already exist */
-   tl_assert( is_sane_Thread(thr) );
+   tl_assert( HG_(is_sane_Thread)(thr) );
  
     lk = map_locks_maybe_lookup( (Addr)rwl );
  
     if (lk == NULL || lk->kind != LK_rdwr) {
-      record_error_Misc( thr,
-                         "pthread_rwlock_destroy with invalid argument" );
+      HG_(record_error_Misc)(
+         thr, "pthread_rwlock_destroy with invalid argument" );
     }
  
     if (lk) {
-      tl_assert( is_sane_LockN(lk) );
+      tl_assert( HG_(is_sane_LockN)(lk) );
        tl_assert( lk->guestaddr == (Addr)rwl );
        if (lk->heldBy) {
           /* Basically act like we unlocked the lock */
-         record_error_Misc( thr, "pthread_rwlock_destroy of a locked mutex" );
+         HG_(record_error_Misc)(
+            thr, "pthread_rwlock_destroy of a locked mutex" );
           /* remove lock from locksets of all owning threads */
           remove_Lock_from_locksets_of_all_owning_Threads( lk );
           VG_(deleteBag)( lk->heldBy );
@@ -6239,10 +2183,13 @@ void evh__HG_PTHREAD_RWLOCK_DESTROY_PRE( ThreadId tid, void* rwl )
           lk->acquired_at = NULL;
        }
        tl_assert( !lk->heldBy );
-      tl_assert( is_sane_LockN(lk) );
+      tl_assert( HG_(is_sane_LockN)(lk) );
+
+      map_locks_delete( lk->guestaddr );
+      del_LockN( lk );
     }
  
-   if (clo_sanity_flags & SCE_LOCKS)
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
        all__sanity_check("evh__hg_PTHREAD_RWLOCK_DESTROY_PRE");
  }
  
@@ -6268,8 +2215,9 @@ void evh__HG_PTHREAD_RWLOCK_LOCK_PRE ( ThreadId tid,
     if ( lk 
          && (lk->kind == LK_nonRec || lk->kind == LK_mbRec) ) {
        /* Wrong kind of lock.  Duh.  */
-      record_error_Misc( thr, "pthread_rwlock_{rd,rw}lock with a "
-                              "pthread_mutex_t* argument " );
+      HG_(record_error_Misc)( 
+         thr, "pthread_rwlock_{rd,rw}lock with a "
+              "pthread_mutex_t* argument " );
     }
  }
  
@@ -6327,76 +2275,82 @@ static void evh__HG_PTHREAD_RWLOCK_UNLOCK_POST ( ThreadId tid, void* rwl )
  /* This is similar to but not identical to the handling for condition
     variables. */
  
-/* For each semaphore, we maintain a stack of Segments.  When a 'post'
-   operation is done on a semaphore (unlocking, essentially), a new
-   segment is created for the posting thread, and the old segment is
-   pushed on the semaphore's stack.
+/* For each semaphore, we maintain a stack of SOs.  When a 'post'
+   operation is done on a semaphore (unlocking, essentially), a new SO
+   is created for the posting thread, the posting thread does a strong
+   send to it (which merely installs the posting thread's VC in the
+   SO), and the SO is pushed on the semaphore's stack.
  
     Later, when a (probably different) thread completes 'wait' on the
-   semaphore, we pop a Segment off the semaphore's stack (which should
-   be nonempty).  We start a new segment for the thread and make it
-   also depend on the just-popped segment.  This mechanism creates
+   semaphore, we pop a SO off the semaphore's stack (which should be
+   nonempty), and do a strong recv from it.  This mechanism creates
     dependencies between posters and waiters of the semaphore.
  
-   It may not be necessary to use a stack - perhaps a bag of Segments
-   would do.  But we do need to keep track of how many unused-up posts
-   have happened for the semaphore.
+   It may not be necessary to use a stack - perhaps a bag of SOs would
+   do.  But we do need to keep track of how many unused-up posts have
+   happened for the semaphore.
  
-   Imagine T1 and T2 both post once on a semphore S, and T3 waits
+   Imagine T1 and T2 both post once on a semaphore S, and T3 waits
     twice on S.  T3 cannot complete its waits without both T1 and T2
     posting.  The above mechanism will ensure that T3 acquires
     dependencies on both T1 and T2.
  
-   When a semaphore is initialised with value N, the initialising
-   thread starts a new segment, the semaphore's stack is emptied out,
-   and the old segment is pushed on the stack N times.  This allows up
-   to N waits on the semaphore to acquire a dependency on the
-   initialisation point, which AFAICS is the correct behaviour.
+   When a semaphore is initialised with value N, we do as if we'd
+   posted N times on the semaphore: basically create N SOs and do a
+   strong send to all of then.  This allows up to N waits on the
+   semaphore to acquire a dependency on the initialisation point,
+   which AFAICS is the correct behaviour.
  
     We don't emit an error for DESTROY_PRE on a semaphore we don't know
     about.  We should.
  */
  
-/* sem_t* -> XArray* Segment* */
-static WordFM* map_sem_to_Segment_stack = NULL;
+/* sem_t* -> XArray* SO* */
+static WordFM* map_sem_to_SO_stack = NULL;
  
-static void map_sem_to_Segment_stack_INIT ( void ) {
-   if (map_sem_to_Segment_stack == NULL) {
-      map_sem_to_Segment_stack = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
-      tl_assert(map_sem_to_Segment_stack != NULL);
+static void map_sem_to_SO_stack_INIT ( void ) {
+   if (map_sem_to_SO_stack == NULL) {
+      map_sem_to_SO_stack = VG_(newFM)( HG_(zalloc), "hg.mstSs.1",
+                                        HG_(free), NULL );
+      tl_assert(map_sem_to_SO_stack != NULL);
     }
  }
  
-static void push_Segment_for_sem ( void* sem, Segment* seg ) {
+static void push_SO_for_sem ( void* sem, SO* so ) {
+   UWord   keyW;
     XArray* xa;
-   tl_assert(seg);
-   map_sem_to_Segment_stack_INIT();
-   if (VG_(lookupFM)( map_sem_to_Segment_stack, 
-                      NULL, (Word*)&xa, (Word)sem )) {
+   tl_assert(so);
+   map_sem_to_SO_stack_INIT();
+   if (VG_(lookupFM)( map_sem_to_SO_stack, 
+                      &keyW, (UWord*)&xa, (UWord)sem )) {
+      tl_assert(keyW == (UWord)sem);
        tl_assert(xa);
-      VG_(addToXA)( xa, &seg );
+      VG_(addToXA)( xa, &so );
     } else {
-      xa = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Segment*) );
-      VG_(addToXA)( xa, &seg );
-      VG_(addToFM)( map_sem_to_Segment_stack, (Word)sem, (Word)xa );
+     xa = VG_(newXA)( HG_(zalloc), "hg.pSfs.1", HG_(free), sizeof(SO*) );
+      VG_(addToXA)( xa, &so );
+      VG_(addToFM)( map_sem_to_SO_stack, (Word)sem, (Word)xa );
     }
  }
  
-static Segment* mb_pop_Segment_for_sem ( void* sem ) {
+static SO* mb_pop_SO_for_sem ( void* sem ) {
+   UWord    keyW;
     XArray*  xa;
-   Segment* seg;
-   map_sem_to_Segment_stack_INIT();
-   if (VG_(lookupFM)( map_sem_to_Segment_stack, 
-                      NULL, (Word*)&xa, (Word)sem )) {
+   SO* so;
+   map_sem_to_SO_stack_INIT();
+   if (VG_(lookupFM)( map_sem_to_SO_stack, 
+                      &keyW, (UWord*)&xa, (UWord)sem )) {
        /* xa is the stack for this semaphore. */
-      Word sz = VG_(sizeXA)( xa );
+      Word sz; 
+      tl_assert(keyW == (UWord)sem);
+      sz = VG_(sizeXA)( xa );
        tl_assert(sz >= 0);
        if (sz == 0)
           return NULL; /* odd, the stack is empty */
-      seg = *(Segment**)VG_(indexXA)( xa, sz-1 );
-      tl_assert(seg);
+      so = *(SO**)VG_(indexXA)( xa, sz-1 );
+      tl_assert(so);
        VG_(dropTailXA)( xa, 1 );
-      return seg;
+      return so;
     } else {
        /* hmm, that's odd.  No stack for this semaphore. */
        return NULL;
@@ -6405,82 +2359,86 @@ static Segment* mb_pop_Segment_for_sem ( void* sem ) {
  
  static void evh__HG_POSIX_SEM_DESTROY_PRE ( ThreadId tid, void* sem )
  {
-   Segment* seg;
+   UWord keyW, valW;
+   SO*   so;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_POSIX_SEM_DESTROY_PRE(ctid=%d, sem=%p)\n", 
                    (Int)tid, (void*)sem );
  
-   /* Empty out the semaphore's segment stack.  This way of doing it
-      is stupid, but at least it's easy. */
-   do {
-     seg = mb_pop_Segment_for_sem( sem );
-   } while (seg);
+   map_sem_to_SO_stack_INIT();
+
+   /* Empty out the semaphore's SO stack.  This way of doing it is
+      stupid, but at least it's easy. */
+   while (1) {
+      so = mb_pop_SO_for_sem( sem );
+      if (!so) break;
+      libhb_so_dealloc(so);
+   }
  
-   tl_assert(!seg);
+   if (VG_(delFromFM)( map_sem_to_SO_stack, &keyW, &valW, (UWord)sem )) {
+      XArray* xa = (XArray*)valW;
+      tl_assert(keyW == (UWord)sem);
+      tl_assert(xa);
+      tl_assert(VG_(sizeXA)(xa) == 0); /* preceding loop just emptied it */
+      VG_(deleteXA)(xa);
+   }
  }
  
  static 
  void evh__HG_POSIX_SEM_INIT_POST ( ThreadId tid, void* sem, UWord value )
  {
-   Segment* seg;
+   SO*     so;
+   Thread* thr;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_POSIX_SEM_INIT_POST(ctid=%d, sem=%p, value=%lu)\n", 
                    (Int)tid, (void*)sem, value );
  
-   /* Empty out the semaphore's segment stack.  This way of doing it
-      is stupid, but at least it's easy. */
-   do {
-     seg = mb_pop_Segment_for_sem( sem );
-   } while (seg);
-   tl_assert(!seg);
-
-   /* Now create a new segment for the thread, and push the old
-      segment on the stack 'value' times.  Skip this if the initial
-      value is zero -- no point in creating unnecessary segments. */
-   if (value > 0) {
-      /* create a new segment ... */
-      SegmentID new_segid = 0; /* bogus */
-      Segment*  new_seg   = NULL;
-      Thread*   thr       = map_threads_maybe_lookup( tid );
-      tl_assert(thr); /* cannot fail - Thread* must already exist */
-
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr );
-      tl_assert( is_sane_SegmentID(new_segid) );
-      tl_assert( is_sane_Segment(new_seg) );
-      tl_assert( new_seg->thr == thr );
-      tl_assert( is_sane_Segment(new_seg->prev) );
-      tl_assert( new_seg->prev->vts );
-      new_seg->vts = tick_VTS( new_seg->thr, new_seg->prev->vts );
-
-      if (value > 10000) {
-         /* If we don't do this, the following while loop runs us out
-            of memory for stupid initial values of 'sem'. */
-         record_error_Misc(
-            thr, "sem_init: initial value exceeds 10000; using 10000" );
-         value = 10000;
-      }
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
  
-      while (value > 0) {
-         push_Segment_for_sem( sem, new_seg->prev );
-         value--;
-      }
+   /* Empty out the semaphore's SO stack.  This way of doing it is
+      stupid, but at least it's easy. */
+   while (1) {
+      so = mb_pop_SO_for_sem( sem );
+      if (!so) break;
+      libhb_so_dealloc(so);
+   }
+
+   /* If we don't do this check, the following while loop runs us out
+      of memory for stupid initial values of 'value'. */
+   if (value > 10000) {
+      HG_(record_error_Misc)(
+         thr, "sem_init: initial value exceeds 10000; using 10000" );
+      value = 10000;
+   }
+
+   /* Now create 'valid' new SOs for the thread, do a strong send to
+      each of them, and push them all on the stack. */
+   for (; value > 0; value--) {
+      Thr* hbthr = thr->hbthr;
+      tl_assert(hbthr);
+
+      so = libhb_so_alloc();
+      libhb_so_send( hbthr, so, True/*strong send*/ );
+      push_SO_for_sem( sem, so );
     }
  }
  
  static void evh__HG_POSIX_SEM_POST_PRE ( ThreadId tid, void* sem )
  {
-   /* 'tid' has posted on 'sem'.  Start a new segment for this thread,
-      and push the old segment on a stack of segments associated with
-      'sem'.  This is later used by other thread(s) which successfully
-      exit from a sem_wait on the same sem; then they know what the
-      posting segment was, so a dependency edge back to it can be
-      constructed. */
+   /* 'tid' has posted on 'sem'.  Create a new SO, do a strong send to
+      it (iow, write our VC into it, then tick ours), and push the SO
+      on on a stack of SOs associated with 'sem'.  This is later used
+      by other thread(s) which successfully exit from a sem_wait on
+      the same sem; by doing a strong recv from SOs popped of the
+      stack, they acquire dependencies on the posting thread
+      segment(s). */
  
-   Thread*   thr;
-   SegmentID new_segid;
-   Segment*  new_seg;
+   Thread* thr;
+   SO*     so;
+   Thr*    hbthr;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_POSIX_SEM_POST_PRE(ctid=%d, sem=%p)\n", 
@@ -6491,34 +2449,24 @@ static void evh__HG_POSIX_SEM_POST_PRE ( ThreadId tid, void* sem )
  
     // error-if: sem is bogus
  
-   if (clo_happens_before >= 2) {
-      /* create a new segment ... */
-      new_segid = 0; /* bogus */
-      new_seg   = NULL;
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr );
-      tl_assert( is_sane_SegmentID(new_segid) );
-      tl_assert( is_sane_Segment(new_seg) );
-      tl_assert( new_seg->thr == thr );
-      tl_assert( is_sane_Segment(new_seg->prev) );
-      tl_assert( new_seg->prev->vts );
-      new_seg->vts = tick_VTS( new_seg->thr, new_seg->prev->vts );
-
-      /* ... and add the binding. */
-      push_Segment_for_sem( sem, new_seg->prev );
-   }
+   hbthr = thr->hbthr;
+   tl_assert(hbthr);
+
+   so = libhb_so_alloc();
+   libhb_so_send( hbthr, so, True/*strong send*/ );
+   push_SO_for_sem( sem, so );
  }
  
  static void evh__HG_POSIX_SEM_WAIT_POST ( ThreadId tid, void* sem )
  {
-   /* A sem_wait(sem) completed successfully.  Start a new segment for
-      this thread.  Pop the posting-segment for the 'sem' in the
-      mapping, and add a dependency edge from the new segment back to
-      it. */
+   /* A sem_wait(sem) completed successfully.  Pop the posting-SO for
+      the 'sem' from this semaphore's SO-stack, and do a strong recv
+      from it.  This creates a dependency back to one of the post-ers
+      for the semaphore. */
  
-   Thread*   thr;
-   SegmentID new_segid;
-   Segment*  new_seg;
-   Segment*  posting_seg;
+   Thread* thr;
+   SO*     so;
+   Thr*    hbthr;
  
     if (SHOW_EVENTS >= 1)
        VG_(printf)("evh__HG_POSIX_SEM_WAIT_POST(ctid=%d, sem=%p)\n", 
@@ -6529,40 +2477,21 @@ static void evh__HG_POSIX_SEM_WAIT_POST ( ThreadId tid, void* sem )
  
     // error-if: sem is bogus
  
-   if (clo_happens_before >= 2) {
-      /* create a new segment ... */
-      new_segid = 0; /* bogus */
-      new_seg   = NULL;
-      evhH__start_new_segment_for_thread( &new_segid, &new_seg, thr );
-      tl_assert( is_sane_SegmentID(new_segid) );
-      tl_assert( is_sane_Segment(new_seg) );
-      tl_assert( new_seg->thr == thr );
-      tl_assert( is_sane_Segment(new_seg->prev) );
-      tl_assert( new_seg->other == NULL);
-
-      /* and find out which thread posted last on sem; then add a
-         dependency edge back to it. */
-      posting_seg = mb_pop_Segment_for_sem( sem );
-      if (posting_seg) {
-         tl_assert(is_sane_Segment(posting_seg));
-         tl_assert(new_seg->prev);
-         tl_assert(new_seg->prev->vts);
-         new_seg->other      = posting_seg;
-         new_seg->other_hint = 'S';
-         tl_assert(new_seg->other->vts);
-         new_seg->vts = tickL_and_joinR_VTS( 
-                           new_seg->thr, 
-                           new_seg->prev->vts,
-                           new_seg->other->vts );
-      } else {
-         /* Hmm.  How can a wait on 'sem' succeed if nobody posted to
-            it?  If this happened it would surely be a bug in the
-            threads library. */
-         record_error_Misc( thr, "Bug in libpthread: sem_wait succeeded on"
-                                 " semaphore without prior sem_post");
-         tl_assert(new_seg->prev->vts);
-         new_seg->vts = tick_VTS( new_seg->thr, new_seg->prev->vts );
-      }
+   so = mb_pop_SO_for_sem( sem );
+
+   if (so) {
+      hbthr = thr->hbthr;
+      tl_assert(hbthr);
+
+      libhb_so_recv( hbthr, so, True/*strong recv*/ );
+      libhb_so_dealloc(so);
+   } else {
+      /* Hmm.  How can a wait on 'sem' succeed if nobody posted to it?
+         If this happened it would surely be a bug in the threads
+         library. */
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: sem_wait succeeded on"
+              " semaphore without prior sem_post");
     }
  }
  
@@ -6689,7 +2618,7 @@ static void laog__add_edge ( Lock* src, Lock* dst ) {
        presentF = outs_new == links->outs;
        links->outs = outs_new;
     } else {
-      links = hg_zalloc("hg", sizeof(LAOGLinks));
+      links = HG_(zalloc)("hg.lae.1", sizeof(LAOGLinks));
        links->inns = HG_(emptyWS)( univ_laog );
        links->outs = HG_(singletonWS)( univ_laog, (Word)dst );
        VG_(addToFM)( laog, (Word)src, (Word)links );
@@ -6705,7 +2634,7 @@ static void laog__add_edge ( Lock* src, Lock* dst ) {
        presentR = inns_new == links->inns;
        links->inns = inns_new;
     } else {
-      links = hg_zalloc("hg", sizeof(LAOGLinks));
+      links = HG_(zalloc)("hg.lae.2", sizeof(LAOGLinks));
        links->inns = HG_(singletonWS)( univ_laog, (Word)src );
        links->outs = HG_(emptyWS)( univ_laog );
        VG_(addToFM)( laog, (Word)dst, (Word)links );
@@ -6730,7 +2659,8 @@ static void laog__add_edge ( Lock* src, Lock* dst ) {
        if (VG_(lookupFM)( laog_exposition, NULL, NULL, (Word)&expo )) {
           /* we already have it; do nothing */
        } else {
-         LAOGLinkExposition* expo2 = hg_zalloc("hg", sizeof(LAOGLinkExposition));
+         LAOGLinkExposition* expo2 = HG_(zalloc)("hg.lae.3", 
+                                               sizeof(LAOGLinkExposition));
           expo2->src_ga = src->guestaddr;
           expo2->dst_ga = dst->guestaddr;
           expo2->src_ec = src->acquired_at;
@@ -6859,8 +2789,8 @@ Lock* laog__do_dfs_from_to ( Lock* src, WordSetID dsts /* univ_lsets */ )
        return NULL;
  
     ret     = NULL;
-   stack   = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Lock*) );
-   visited = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
+   stack   = VG_(newXA)( HG_(zalloc), "hg.lddft.1", HG_(free), sizeof(Lock*) );
+   visited = VG_(newFM)( HG_(zalloc), "hg.lddft.2", HG_(free), NULL/*unboxedcmp*/ );
  
     (void) VG_(addToXA)( stack, &src );
  
@@ -6913,9 +2843,10 @@ static void laog__pre_thread_acquires_lock (
        return;
  
     if (!laog)
-      laog = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
+      laog = VG_(newFM)( HG_(zalloc), "hg.lptal.1", 
+                         HG_(free), NULL/*unboxedcmp*/ );
     if (!laog_exposition)
-      laog_exposition = VG_(newFM)( hg_zalloc, "hg", hg_free, 
+      laog_exposition = VG_(newFM)( HG_(zalloc), "hg.lptal.2", HG_(free), 
                                      cmp_LAOGLinkExposition );
  
     /* First, the check.  Complain if there is any path in laog from lk
@@ -6946,14 +2877,14 @@ static void laog__pre_thread_acquires_lock (
           tl_assert(found->dst_ga == key.dst_ga);
           tl_assert(found->src_ec);
           tl_assert(found->dst_ec);
-         record_error_LockOrder( thr, 
-                                 lk->guestaddr, other->guestaddr,
-                                 found->src_ec, found->dst_ec );
+         HG_(record_error_LockOrder)( 
+            thr, lk->guestaddr, other->guestaddr,
+                 found->src_ec, found->dst_ec );
        } else {
           /* Hmm.  This can't happen (can it?) */
-         record_error_LockOrder( thr, 
-                                 lk->guestaddr,        other->guestaddr,
-                                 NULL, NULL );
+         HG_(record_error_LockOrder)(
+            thr, lk->guestaddr, other->guestaddr,
+                 NULL, NULL );
        }
     }
  
@@ -6975,7 +2906,7 @@ static void laog__pre_thread_acquires_lock (
        See the call points in evhH__post_thread_{r,w}_acquires_lock.
        When called in this inconsistent state, locks__sanity_check duly
        barfs. */
-   if (clo_sanity_flags & SCE_LAOG)
+   if (HG_(clo_sanity_flags) & SCE_LAOG)
        all_except_Locks__sanity_check("laog__pre_thread_acquires_lock-post");
  }
  
@@ -7021,16 +2952,16 @@ static void laog__handle_lock_deletions (
     UWord* ws_words;
  
     if (!laog)
-      laog = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
+      laog = VG_(newFM)( HG_(zalloc), "hg.lhld.1", HG_(free), NULL/*unboxedcmp*/ );
     if (!laog_exposition)
-      laog_exposition = VG_(newFM)( hg_zalloc, "hg", hg_free, 
+      laog_exposition = VG_(newFM)( HG_(zalloc), "hg.lhld.2", HG_(free), 
                                      cmp_LAOGLinkExposition );
  
     HG_(getPayloadWS)( &ws_words, &ws_size, univ_lsets, locksToDelete );
     for (i = 0; i < ws_size; i++)
        laog__handle_one_lock_deletion( (Lock*)ws_words[i] );
  
-   if (clo_sanity_flags & SCE_LAOG)
+   if (HG_(clo_sanity_flags) & SCE_LAOG)
        all__sanity_check("laog__handle_lock_deletions-post");
  }
  
@@ -7055,12 +2986,12 @@ static VgHashTable hg_mallocmeta_table = NULL;
  
  
  static MallocMeta* new_MallocMeta ( void ) {
-   MallocMeta* md = hg_zalloc( "hg", sizeof(MallocMeta) );
+   MallocMeta* md = HG_(zalloc)( "hg.new_MallocMeta.1", sizeof(MallocMeta) );
     tl_assert(md);
     return md;
  }
  static void delete_MallocMeta ( MallocMeta* md ) {
-   hg_free(md);
+   HG_(free)(md);
  }
  
  
@@ -7209,7 +3140,7 @@ static void* hg_cli__realloc ( ThreadId tid, void* payloadV, SizeT new_size )
        // memory state machine?
        shadow_mem_copy_range( payload, p_new, md->szB );
        evh__new_mem_heap ( p_new + md->szB, new_size - md->szB,
-                           /*inited*/False );
+                          /*inited*/False );
        /* FIXME: can anything funny happen here?  specifically, if the
           old range contained a lock, then die_mem_heap will complain.
           Is that the correct behaviour?  Not sure. */
@@ -7389,6 +3320,7 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
  {
     Int   i;
     IRSB* bbOut;
+   Bool x86busLocked = False;
  
     if (gWordTy != hWordTy) {
        /* We don't currently support this case. */
@@ -7423,18 +3355,33 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
              break;
  
           case Ist_MBE:
-            instrument_memory_bus_event( bbOut, st->Ist.MBE.event );
+            //instrument_memory_bus_event( bbOut, st->Ist.MBE.event );
+            switch (st->Ist.MBE.event) {
+               case Imbe_Fence:
+                  break; /* not interesting */
+               case Imbe_BusLock:
+                  tl_assert(x86busLocked == False);
+                  x86busLocked = True;
+                  break;
+               case Imbe_BusUnlock:
+                  tl_assert(x86busLocked == True);
+                  x86busLocked = False;
+                  break;
+               default:
+                  goto unhandled;
+            }
              break;
  
           case Ist_Store:
-            instrument_mem_access( 
-               bbOut, 
-               st->Ist.Store.addr, 
-               sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
-               True/*isStore*/,
-               sizeofIRType(hWordTy)
-            );
-            break;
+            if (!x86busLocked)
+               instrument_mem_access( 
+                  bbOut, 
+                  st->Ist.Store.addr, 
+                  sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
+                  True/*isStore*/,
+                  sizeofIRType(hWordTy)
+               );
+               break;
  
           case Ist_WrTmp: {
              IRExpr* data = st->Ist.WrTmp.data;
@@ -7479,6 +3426,8 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
           }
  
           default:
+         unhandled:
+            ppIRStmt(st);
              tl_assert(0);
  
        } /* switch (st->tag) */
@@ -7499,7 +3448,8 @@ static WordFM* map_pthread_t_to_Thread = NULL; /* pthread_t -> Thread* */
  
  static void map_pthread_t_to_Thread_INIT ( void ) {
     if (UNLIKELY(map_pthread_t_to_Thread == NULL)) {
-      map_pthread_t_to_Thread = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
+      map_pthread_t_to_Thread = VG_(newFM)( HG_(zalloc), "hg.mpttT.1", 
+                                            HG_(free), NULL );
        tl_assert(map_pthread_t_to_Thread != NULL);
     }
  }
@@ -7525,7 +3475,9 @@ Bool hg_handle_client_request ( ThreadId tid, UWord* args, UWord* ret)
           if (0) VG_(printf)("VG_USERREQ__HG_CLEAN_MEMORY(%#lx,%ld)\n",
                              args[1], args[2]);
           /* Call die_mem to (expensively) tidy up properly, if there
-            are any held locks etc in the area */
+            are any held locks etc in the area.  Calling evh__die_mem
+            and then evh__new_mem is a bit inefficient; probably just
+            the latter would do. */
           if (args[2] > 0) { /* length */
              evh__die_mem(args[1], args[2]);
              /* and then set it to New */
@@ -7568,8 +3520,8 @@ Bool hg_handle_client_request ( ThreadId tid, UWord* args, UWord* ret)
           map_pthread_t_to_Thread_INIT();
           my_thr = map_threads_maybe_lookup( tid );
           tl_assert(my_thr); /* See justification above in SET_MY_PTHREAD_T */
-         record_error_PthAPIerror( my_thr, (HChar*)args[1], 
-                                           (Word)args[2], (HChar*)args[3] );
+         HG_(record_error_PthAPIerror)(
+            my_thr, (HChar*)args[1], (Word)args[2], (HChar*)args[3] );
           break;
        }
  
@@ -7647,6 +3599,11 @@ Bool hg_handle_client_request ( ThreadId tid, UWord* args, UWord* ret)
           break;
        }
  
+      /* cond=arg[1] */
+      case _VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE:
+         evh__HG_PTHREAD_COND_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
        /* Thread successfully completed pthread_cond_wait, cond=arg[1],
           mutex=arg[2] */
        case _VG_USERREQ__HG_PTHREAD_COND_WAIT_POST:
@@ -7697,861 +3654,29 @@ Bool hg_handle_client_request ( ThreadId tid, UWord* args, UWord* ret)
           evh__HG_POSIX_SEM_WAIT_POST( tid, (void*)args[1] );
           break;
  
-      case _VG_USERREQ__HG_GET_MY_SEGMENT: { // -> Segment*
-         Thread*   thr;
-         SegmentID segid;
-         Segment*  seg;
-         thr = map_threads_maybe_lookup( tid );
-         tl_assert(thr); /* cannot fail */
-         segid = thr->csegid;
-         tl_assert(is_sane_SegmentID(segid));
-         seg = map_segments_lookup( segid );
-         tl_assert(seg);
-         *ret = (UWord)seg;
-         break;
-      }
+//zz       case _VG_USERREQ__HG_GET_MY_SEGMENT: { // -> Segment*
+//zz          Thread*   thr;
+//zz          SegmentID segid;
+//zz          Segment*  seg;
+//zz          thr = map_threads_maybe_lookup( tid );
+//zz          tl_assert(thr); /* cannot fail */
+//zz          segid = thr->csegid;
+//zz          tl_assert(is_sane_SegmentID(segid));
+//zz          seg = map_segments_lookup( segid );
+//zz          tl_assert(seg);
+//zz          *ret = (UWord)seg;
+//zz          break;
+//zz       }
  
        default:
           /* Unhandled Helgrind client request! */
-        tl_assert2(0, "unhandled Helgrind client request!");
-   }
-
-   return True;
-}
-
-
-/*----------------------------------------------------------------*/
-/*--- Error management                                         ---*/
-/*----------------------------------------------------------------*/
-
-/* maps (by value) strings to a copy of them in ARENA_TOOL */
-static UWord stats__string_table_queries = 0;
-static WordFM* string_table = NULL;
-static Word string_table_cmp ( UWord s1, UWord s2 ) {
-   return (Word)VG_(strcmp)( (HChar*)s1, (HChar*)s2 );
-}
-static HChar* string_table_strdup ( HChar* str ) {
-   HChar* copy = NULL;
-   stats__string_table_queries++;
-   if (!str)
-      str = "(null)";
-   if (!string_table) {
-      string_table = VG_(newFM)( hg_zalloc, "hg", hg_free, string_table_cmp );
-      tl_assert(string_table);
-   }
-   if (VG_(lookupFM)( string_table,
-                      NULL, (Word*)&copy, (Word)str )) {
-      tl_assert(copy);
-      if (0) VG_(printf)("string_table_strdup: %p -> %p\n", str, copy );
-      return copy;
-   } else {
-      copy = VG_(strdup)("hg", str);
-      tl_assert(copy);
-      VG_(addToFM)( string_table, (Word)copy, (Word)copy );
-      return copy;
-   }
-}
-
-/* maps from Lock .unique fields to LockP*s */
-static UWord stats__ga_LockN_to_P_queries = 0;
-static WordFM* yaWFM = NULL;
-static Word lock_unique_cmp ( UWord lk1W, UWord lk2W )
-{
-   Lock* lk1 = (Lock*)lk1W;
-   Lock* lk2 = (Lock*)lk2W;
-   tl_assert( is_sane_LockNorP(lk1) );
-   tl_assert( is_sane_LockNorP(lk2) );
-   if (lk1->unique < lk2->unique) return -1;
-   if (lk1->unique > lk2->unique) return 1;
-   return 0;
-}
-static Lock* mk_LockP_from_LockN ( Lock* lkn )
-{
-   Lock* lkp = NULL;
-   stats__ga_LockN_to_P_queries++;
-   tl_assert( is_sane_LockN(lkn) );
-   if (!yaWFM) {
-      yaWFM = VG_(newFM)( hg_zalloc, "hg", hg_free, lock_unique_cmp );
-      tl_assert(yaWFM);
-   }
-   if (!VG_(lookupFM)( yaWFM, NULL, (Word*)&lkp, (Word)lkn)) {
-      lkp = hg_zalloc( "hg", sizeof(Lock) );
-      *lkp = *lkn;
-      lkp->admin = NULL;
-      lkp->magic = LockP_MAGIC;
-      /* Forget about the bag of lock holders - don't copy that.
-         Also, acquired_at should be NULL whenever heldBy is, and vice
-         versa. */
-      lkp->heldW  = False;
-      lkp->heldBy = NULL;
-      lkp->acquired_at = NULL;
-      VG_(addToFM)( yaWFM, (Word)lkp, (Word)lkp );
-   }
-   tl_assert( is_sane_LockP(lkp) );
-   return lkp;
-}
-
-/* Errors:
-
-      race: program counter
-            read or write
-            data size
-            previous state
-            current state
-
-      FIXME: how does state printing interact with lockset gc?
-      Are the locksets in prev/curr state always valid?
-      Ditto question for the threadsets
-          ThreadSets - probably are always valid if Threads
-          are never thrown away.
-          LockSets - could at least print the lockset elements that
-          correspond to actual locks at the time of printing.  Hmm.
-*/
-
-/* Error kinds */
-typedef
-   enum {
-      XE_Race=1101,      // race
-      XE_FreeMemLock,    // freeing memory containing a locked lock
-      XE_UnlockUnlocked, // unlocking a not-locked lock
-      XE_UnlockForeign,  // unlocking a lock held by some other thread
-      XE_UnlockBogus,    // unlocking an address not known to be a lock
-      XE_PthAPIerror,    // error from the POSIX pthreads API
-      XE_LockOrder,      // lock order error
-      XE_Misc            // misc other error (w/ string to describe it)
-   }
-   XErrorTag;
-
-/* Extra contexts for kinds */
-typedef
-   struct  {
-      XErrorTag tag;
-      union {
-         struct {
-            Addr  data_addr;
-            Int   szB;
-            Bool  isWrite;
-            SVal  new_state;
-            SVal  old_state;
-            ExeContext* mb_lastlock;
-            Thread* thr;
-            Char  descr1[96];
-            Char  descr2[96];
-         } Race;
-         struct {
-            Thread* thr;  /* doing the freeing */
-            Lock*   lock; /* lock which is locked */
-         } FreeMemLock;
-         struct {
-            Thread* thr;  /* doing the unlocking */
-            Lock*   lock; /* lock (that is already unlocked) */
-         } UnlockUnlocked;
-         struct {
-            Thread* thr;    /* doing the unlocking */
-            Thread* owner;  /* thread that actually holds the lock */
-            Lock*   lock;   /* lock (that is held by 'owner') */
-         } UnlockForeign;
-         struct {
-            Thread* thr;     /* doing the unlocking */
-            Addr    lock_ga; /* purported address of the lock */
-         } UnlockBogus;
-         struct {
-            Thread* thr; 
-            HChar*  fnname; /* persistent, in tool-arena */
-            Word    err;    /* pth error code */
-            HChar*  errstr; /* persistent, in tool-arena */
-         } PthAPIerror;
-         struct {
-            Thread*     thr;
-            Addr        before_ga; /* always locked first in prog. history */
-            Addr        after_ga;
-            ExeContext* before_ec;
-            ExeContext* after_ec;
-         } LockOrder;
-         struct {
-            Thread* thr;
-            HChar*  errstr; /* persistent, in tool-arena */
-         } Misc;
-      } XE;
-   }
-   XError;
-
-static void init_XError ( XError* xe ) {
-   VG_(memset)(xe, 0, sizeof(*xe) );
-   xe->tag = XE_Race-1; /* bogus */
-}
-
-
-/* Extensions of suppressions */
-typedef
-   enum {
-      XS_Race=1201, /* race */
-      XS_FreeMemLock,
-      XS_UnlockUnlocked,
-      XS_UnlockForeign,
-      XS_UnlockBogus,
-      XS_PthAPIerror,
-      XS_LockOrder,
-      XS_Misc
-   }
-   XSuppTag;
-
-
-/* Updates the copy with address info if necessary. */
-static UInt hg_update_extra ( Error* err )
-{
-   XError* extra = (XError*)VG_(get_error_extra)(err);
-   tl_assert(extra);
-   //if (extra != NULL && Undescribed == extra->addrinfo.akind) {
-   //   describe_addr ( VG_(get_error_address)(err), &(extra->addrinfo) );
-   //}
-   return sizeof(XError);
-}
-
-static void record_error_Race ( Thread* thr, 
-                                Addr data_addr, Bool isWrite, Int szB,
-                                SVal old_sv, SVal new_sv,
-                                ExeContext* mb_lastlock ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   init_XError(&xe);
-   xe.tag = XE_Race;
-   xe.XE.Race.data_addr   = data_addr;
-   xe.XE.Race.szB         = szB;
-   xe.XE.Race.isWrite     = isWrite;
-   xe.XE.Race.new_state   = new_sv;
-   xe.XE.Race.old_state   = old_sv;
-   xe.XE.Race.mb_lastlock = mb_lastlock;
-   xe.XE.Race.thr         = thr;
-   // FIXME: tid vs thr
-   tl_assert(isWrite == False || isWrite == True);
-   tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
-
-   tl_assert(sizeof(xe.XE.Race.descr1) == sizeof(xe.XE.Race.descr2));
-   xe.XE.Race.descr1[0] = xe.XE.Race.descr2[0] = 0;
-   if (VG_(get_data_description)(
-             &xe.XE.Race.descr1[0],
-             &xe.XE.Race.descr2[0],
-             sizeof(xe.XE.Race.descr1)-1,
-             data_addr )) {
-      tl_assert( xe.XE.Race.descr1
-                    [ sizeof(xe.XE.Race.descr1)-1 ] == 0);
-      tl_assert( xe.XE.Race.descr2
-                    [ sizeof(xe.XE.Race.descr2)-1 ] == 0);
-   }
-
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_Race, data_addr, NULL, &xe );
-}
-
-static void record_error_FreeMemLock ( Thread* thr, Lock* lk ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   tl_assert( is_sane_LockN(lk) );
-   init_XError(&xe);
-   xe.tag = XE_FreeMemLock;
-   xe.XE.FreeMemLock.thr  = thr;
-   xe.XE.FreeMemLock.lock = mk_LockP_from_LockN(lk);
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_FreeMemLock, 0, NULL, &xe );
-}
-
-static void record_error_UnlockUnlocked ( Thread* thr, Lock* lk ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   tl_assert( is_sane_LockN(lk) );
-   init_XError(&xe);
-   xe.tag = XE_UnlockUnlocked;
-   xe.XE.UnlockUnlocked.thr  = thr;
-   xe.XE.UnlockUnlocked.lock = mk_LockP_from_LockN(lk);
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_UnlockUnlocked, 0, NULL, &xe );
-}
-
-static void record_error_UnlockForeign ( Thread* thr,
-                                         Thread* owner, Lock* lk ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   tl_assert( is_sane_Thread(owner) );
-   tl_assert( is_sane_LockN(lk) );
-   init_XError(&xe);
-   xe.tag = XE_UnlockForeign;
-   xe.XE.UnlockForeign.thr   = thr;
-   xe.XE.UnlockForeign.owner = owner;
-   xe.XE.UnlockForeign.lock  = mk_LockP_from_LockN(lk);
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_UnlockForeign, 0, NULL, &xe );
-}
-
-static void record_error_UnlockBogus ( Thread* thr, Addr lock_ga ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   init_XError(&xe);
-   xe.tag = XE_UnlockBogus;
-   xe.XE.UnlockBogus.thr     = thr;
-   xe.XE.UnlockBogus.lock_ga = lock_ga;
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_UnlockBogus, 0, NULL, &xe );
-}
-
-static 
-void record_error_LockOrder ( Thread* thr, Addr before_ga, Addr after_ga,
-                              ExeContext* before_ec, ExeContext* after_ec ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   init_XError(&xe);
-   xe.tag = XE_LockOrder;
-   xe.XE.LockOrder.thr       = thr;
-   xe.XE.LockOrder.before_ga = before_ga;
-   xe.XE.LockOrder.before_ec = before_ec;
-   xe.XE.LockOrder.after_ga  = after_ga;
-   xe.XE.LockOrder.after_ec  = after_ec;
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_LockOrder, 0, NULL, &xe );
-}
-
-static 
-void record_error_PthAPIerror ( Thread* thr, HChar* fnname, 
-                                Word err, HChar* errstr ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   tl_assert(fnname);
-   tl_assert(errstr);
-   init_XError(&xe);
-   xe.tag = XE_PthAPIerror;
-   xe.XE.PthAPIerror.thr    = thr;
-   xe.XE.PthAPIerror.fnname = string_table_strdup(fnname);
-   xe.XE.PthAPIerror.err    = err;
-   xe.XE.PthAPIerror.errstr = string_table_strdup(errstr);
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_PthAPIerror, 0, NULL, &xe );
-}
-
-static void record_error_Misc ( Thread* thr, HChar* errstr ) {
-   XError xe;
-   tl_assert( is_sane_Thread(thr) );
-   tl_assert(errstr);
-   init_XError(&xe);
-   xe.tag = XE_Misc;
-   xe.XE.Misc.thr    = thr;
-   xe.XE.Misc.errstr = string_table_strdup(errstr);
-   // FIXME: tid vs thr
-   VG_(maybe_record_error)( map_threads_reverse_lookup_SLOW(thr),
-                            XE_Misc, 0, NULL, &xe );
-}
-
-static Bool hg_eq_Error ( VgRes not_used, Error* e1, Error* e2 )
-{
-   XError *xe1, *xe2;
-
-   tl_assert(VG_(get_error_kind)(e1) == VG_(get_error_kind)(e2));
-
-   xe1 = (XError*)VG_(get_error_extra)(e1);
-   xe2 = (XError*)VG_(get_error_extra)(e2);
-   tl_assert(xe1);
-   tl_assert(xe2);
-
-   switch (VG_(get_error_kind)(e1)) {
-      case XE_Race:
-         return xe1->XE.Race.szB == xe2->XE.Race.szB
-                && xe1->XE.Race.isWrite == xe2->XE.Race.isWrite
-                && (clo_cmp_race_err_addrs 
-                       ? xe1->XE.Race.data_addr == xe2->XE.Race.data_addr
-                       : True);
-      case XE_FreeMemLock:
-         return xe1->XE.FreeMemLock.thr == xe2->XE.FreeMemLock.thr
-                && xe1->XE.FreeMemLock.lock == xe2->XE.FreeMemLock.lock;
-      case XE_UnlockUnlocked:
-         return xe1->XE.UnlockUnlocked.thr == xe2->XE.UnlockUnlocked.thr
-                && xe1->XE.UnlockUnlocked.lock == xe2->XE.UnlockUnlocked.lock;
-      case XE_UnlockForeign:
-         return xe1->XE.UnlockForeign.thr == xe2->XE.UnlockForeign.thr
-                && xe1->XE.UnlockForeign.owner == xe2->XE.UnlockForeign.owner
-                && xe1->XE.UnlockForeign.lock == xe2->XE.UnlockForeign.lock;
-      case XE_UnlockBogus:
-         return xe1->XE.UnlockBogus.thr == xe2->XE.UnlockBogus.thr
-                && xe1->XE.UnlockBogus.lock_ga == xe2->XE.UnlockBogus.lock_ga;
-      case XE_PthAPIerror:
-         return xe1->XE.PthAPIerror.thr == xe2->XE.PthAPIerror.thr
-                && 0==VG_(strcmp)(xe1->XE.PthAPIerror.fnname,
-                                  xe2->XE.PthAPIerror.fnname)
-                && xe1->XE.PthAPIerror.err == xe2->XE.PthAPIerror.err;
-      case XE_LockOrder:
-         return xe1->XE.LockOrder.thr == xe2->XE.LockOrder.thr;
-      case XE_Misc:
-         return xe1->XE.Misc.thr == xe2->XE.Misc.thr
-                && 0==VG_(strcmp)(xe1->XE.Misc.errstr, xe2->XE.Misc.errstr);
-      default:
-         tl_assert(0);
-   }
-
-   /*NOTREACHED*/
-   tl_assert(0);
-}
-
-/* Given a WordSetID in univ_tsets (that is, a Thread set ID), produce
-   an XArray* with the corresponding Thread*'s sorted by their
-   errmsg_index fields.  This is for printing out thread sets in
-   repeatable orders, which is important for for repeatable regression
-   testing.  The returned XArray* is dynamically allocated (of course)
-   and so must be hg_freed by the caller. */
-static Int cmp_Thread_by_errmsg_index ( void* thr1V, void* thr2V ) {
-   Thread* thr1 = *(Thread**)thr1V;
-   Thread* thr2 = *(Thread**)thr2V;
-   if (thr1->errmsg_index < thr2->errmsg_index) return -1;
-   if (thr1->errmsg_index > thr2->errmsg_index) return  1;
-   return 0;
-}
-static XArray* /* of Thread* */ get_sorted_thread_set ( WordSetID tset )
-{
-   XArray* xa;
-   UWord*  ts_words;
-   UWord   ts_size, i;
-   xa = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Thread*) );
-   tl_assert(xa);
-   HG_(getPayloadWS)( &ts_words, &ts_size, univ_tsets, tset );
-   tl_assert(ts_words);
-   tl_assert(ts_size >= 0);
-   /* This isn't a very clever scheme, but we don't expect this to be
-      called very often. */
-   for (i = 0; i < ts_size; i++) {
-      Thread* thr = (Thread*)ts_words[i];
-      tl_assert(is_sane_Thread(thr));
-      VG_(addToXA)( xa, (void*)&thr );
-   }
-   tl_assert(ts_size == VG_(sizeXA)( xa ));
-   VG_(setCmpFnXA)( xa, cmp_Thread_by_errmsg_index );
-   VG_(sortXA)( xa );
-   return xa;
-}
-
-
-/* Announce (that is, print the point-of-creation) of the threads in
-   'tset'.  Only do this once, as we only want to see these
-   announcements once each.  Also, first sort the threads by their
-   errmsg_index fields, and show only the first N_THREADS_TO_ANNOUNCE.
-   That's because we only want to bother to announce threads
-   enumerated by summarise_threadset() below, and that in turn does
-   the same: it sorts them and then only shows the first
-   N_THREADS_TO_ANNOUNCE. */
-
-static void announce_threadset ( WordSetID tset )
-{
-   const Word limit = N_THREADS_TO_ANNOUNCE;
-   Thread* thr;
-   XArray* sorted;
-   Word    ts_size, i, loopmax;
-   sorted = get_sorted_thread_set( tset );
-   ts_size = VG_(sizeXA)( sorted );
-   tl_assert(ts_size >= 0);
-   loopmax = limit < ts_size  ? limit  : ts_size; /* min(limit, ts_size) */
-   tl_assert(loopmax >= 0 && loopmax <= limit);
-   for (i = 0; i < loopmax; i++) {
-      thr = *(Thread**)VG_(indexXA)( sorted, i );
-      tl_assert(is_sane_Thread(thr));
-      tl_assert(thr->errmsg_index >= 1);
-      if (thr->announced)
-         continue;
-      if (thr->errmsg_index == 1/*FIXME: this hardwires an assumption
-                                  about the identity of the root
-                                  thread*/) {
-         tl_assert(thr->created_at == NULL);
-         VG_(message)(Vg_UserMsg, "Thread #%d is the program's root thread",
-                                  thr->errmsg_index);
-      } else {
-         tl_assert(thr->created_at != NULL);
-         VG_(message)(Vg_UserMsg, "Thread #%d was created",
-                                  thr->errmsg_index);
-         VG_(pp_ExeContext)( thr->created_at );
-      }
-      VG_(message)(Vg_UserMsg, "");
-      thr->announced = True;
-   }
-   VG_(deleteXA)( sorted );
-}
-static void announce_one_thread ( Thread* thr ) {
-   announce_threadset( HG_(singletonWS)(univ_tsets, (Word)thr ));
-}
-
-/* Generate into buf[0 .. nBuf-1] a 1-line summary of a thread set, of
-   the form "#1, #3, #77, #78, #79 and 42 others".  The first
-   N_THREADS_TO_ANNOUNCE are listed explicitly (as '#n') and the
-   leftovers lumped into the 'and n others' bit. */
-
-static void summarise_threadset ( WordSetID tset, Char* buf, UInt nBuf )
-{
-   const Word limit = N_THREADS_TO_ANNOUNCE;
-   Thread* thr;
-   XArray* sorted;
-   Word    ts_size, i, loopmax;
-   UInt    off = 0;
-   tl_assert(nBuf > 0);
-   tl_assert(nBuf >= 40 + 20*limit);
-   tl_assert(buf);
-   sorted = get_sorted_thread_set( tset );
-   ts_size = VG_(sizeXA)( sorted );
-   tl_assert(ts_size >= 0);
-   loopmax = limit < ts_size  ? limit  : ts_size; /* min(limit, ts_size) */
-   tl_assert(loopmax >= 0 && loopmax <= limit);
-   VG_(memset)(buf, 0, nBuf);
-   for (i = 0; i < loopmax; i++) {
-      thr = *(Thread**)VG_(indexXA)( sorted, i );
-      tl_assert(is_sane_Thread(thr));
-      tl_assert(thr->errmsg_index >= 1);
-      off += VG_(sprintf)(&buf[off], "#%d", (Int)thr->errmsg_index);
-      if (i < loopmax-1)
-         off += VG_(sprintf)(&buf[off], ", ");
-   }
-   if (limit < ts_size) {
-      Word others = ts_size - limit;
-      off += VG_(sprintf)(&buf[off], " and %d other%s", 
-                                     (Int)others, others > 1 ? "s" : "");
-   }
-   tl_assert(off < nBuf);
-   tl_assert(buf[nBuf-1] == 0);
-   VG_(deleteXA)( sorted );
-}
-
-static void hg_pp_Error ( Error* err )
-{
-   const Bool show_raw_states = False;
-   XError *xe = (XError*)VG_(get_error_extra)(err);
-
-   switch (VG_(get_error_kind)(err)) {
-
-   case XE_Misc: {
-      tl_assert(xe);
-      tl_assert( is_sane_Thread( xe->XE.Misc.thr ) );
-      announce_one_thread( xe->XE.Misc.thr );
-      VG_(message)(Vg_UserMsg,
-                  "Thread #%d: %s",
-                  (Int)xe->XE.Misc.thr->errmsg_index,
-                  xe->XE.Misc.errstr);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      break;
-   }
-
-   case XE_LockOrder: {
-      tl_assert(xe);
-      tl_assert( is_sane_Thread( xe->XE.LockOrder.thr ) );
-      announce_one_thread( xe->XE.LockOrder.thr );
-      VG_(message)(Vg_UserMsg,
-                  "Thread #%d: lock order \"%p before %p\" violated",
-                  (Int)xe->XE.LockOrder.thr->errmsg_index,
-                  (void*)xe->XE.LockOrder.before_ga,
-                  (void*)xe->XE.LockOrder.after_ga);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      if (xe->XE.LockOrder.before_ec && xe->XE.LockOrder.after_ec) {
-         VG_(message)(Vg_UserMsg,
-            "  Required order was established by acquisition of lock at %p",
-            (void*)xe->XE.LockOrder.before_ga);
-         VG_(pp_ExeContext)( xe->XE.LockOrder.before_ec );
-         VG_(message)(Vg_UserMsg,
-            "  followed by a later acquisition of lock at %p", 
-            (void*)xe->XE.LockOrder.after_ga);
-         VG_(pp_ExeContext)( xe->XE.LockOrder.after_ec );
-      }
-      break;
-   }
-
-   case XE_PthAPIerror: {
-      tl_assert(xe);
-      tl_assert( is_sane_Thread( xe->XE.PthAPIerror.thr ) );
-      announce_one_thread( xe->XE.PthAPIerror.thr );
-      VG_(message)(Vg_UserMsg,
-                  "Thread #%d's call to %s failed",
-                  (Int)xe->XE.PthAPIerror.thr->errmsg_index,
-                  xe->XE.PthAPIerror.fnname);
-      VG_(message)(Vg_UserMsg,
-                  "   with error code %ld (%s)",
-                  xe->XE.PthAPIerror.err,
-                  xe->XE.PthAPIerror.errstr);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      break;
-   }
-
-   case XE_UnlockBogus: {
-      tl_assert(xe);
-      tl_assert( is_sane_Thread( xe->XE.UnlockBogus.thr ) );
-      announce_one_thread( xe->XE.UnlockBogus.thr );
-      VG_(message)(Vg_UserMsg,
-                   "Thread #%d unlocked an invalid lock at %p ",
-                   (Int)xe->XE.UnlockBogus.thr->errmsg_index,
-                   (void*)xe->XE.UnlockBogus.lock_ga);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      break;
-   }
-
-   case XE_UnlockForeign: {
-      tl_assert(xe);
-      tl_assert( is_sane_LockP( xe->XE.UnlockForeign.lock ) );
-      tl_assert( is_sane_Thread( xe->XE.UnlockForeign.owner ) );
-      tl_assert( is_sane_Thread( xe->XE.UnlockForeign.thr ) );
-      announce_one_thread( xe->XE.UnlockForeign.thr );
-      announce_one_thread( xe->XE.UnlockForeign.owner );
-      VG_(message)(Vg_UserMsg,
-                   "Thread #%d unlocked lock at %p "
-                   "currently held by thread #%d",
-                   (Int)xe->XE.UnlockForeign.thr->errmsg_index,
-                   (void*)xe->XE.UnlockForeign.lock->guestaddr,
-                   (Int)xe->XE.UnlockForeign.owner->errmsg_index );
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      if (xe->XE.UnlockForeign.lock->appeared_at) {
-         VG_(message)(Vg_UserMsg,
-                      "  Lock at %p was first observed",
-                      (void*)xe->XE.UnlockForeign.lock->guestaddr);
-         VG_(pp_ExeContext)( xe->XE.UnlockForeign.lock->appeared_at );
-      }
-      break;
-   }
-
-   case XE_UnlockUnlocked: {
-      tl_assert(xe);
-      tl_assert( is_sane_LockP( xe->XE.UnlockUnlocked.lock ) );
-      tl_assert( is_sane_Thread( xe->XE.UnlockUnlocked.thr ) );
-      announce_one_thread( xe->XE.UnlockUnlocked.thr );
-      VG_(message)(Vg_UserMsg,
-                   "Thread #%d unlocked a not-locked lock at %p ",
-                   (Int)xe->XE.UnlockUnlocked.thr->errmsg_index,
-                   (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      if (xe->XE.UnlockUnlocked.lock->appeared_at) {
-         VG_(message)(Vg_UserMsg,
-                      "  Lock at %p was first observed",
-                      (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
-         VG_(pp_ExeContext)( xe->XE.UnlockUnlocked.lock->appeared_at );
-      }
-      break;
+         tl_assert2(0, "unhandled Helgrind client request 0x%lx",
+                       args[0]);
     }
  
-   case XE_FreeMemLock: {
-      tl_assert(xe);
-      tl_assert( is_sane_LockP( xe->XE.FreeMemLock.lock ) );
-      tl_assert( is_sane_Thread( xe->XE.FreeMemLock.thr ) );
-      announce_one_thread( xe->XE.FreeMemLock.thr );
-      VG_(message)(Vg_UserMsg,
-                   "Thread #%d deallocated location %p "
-                   "containing a locked lock",
-                   (Int)xe->XE.FreeMemLock.thr->errmsg_index,
-                   (void*)xe->XE.FreeMemLock.lock->guestaddr);
-      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-      if (xe->XE.FreeMemLock.lock->appeared_at) {
-         VG_(message)(Vg_UserMsg,
-                      "  Lock at %p was first observed",
-                      (void*)xe->XE.FreeMemLock.lock->guestaddr);
-         VG_(pp_ExeContext)( xe->XE.FreeMemLock.lock->appeared_at );
-      }
-      break;
-   }
-
-   case XE_Race: {
-      Addr      err_ga;
-      Char      old_buf[100], new_buf[100];
-      Char      old_tset_buf[140], new_tset_buf[140];
-      SVal      old_state, new_state;
-      Thread*   thr_acc;
-      HChar*    what;
-      Int       szB;
-      WordSetID tset_to_announce = HG_(emptyWS)( univ_tsets );
-
-      /* First extract some essential info */
-      tl_assert(xe);
-      old_state = xe->XE.Race.old_state;
-      new_state = xe->XE.Race.new_state;
-      thr_acc   = xe->XE.Race.thr;
-      what      = xe->XE.Race.isWrite ? "write" : "read";
-      szB       = xe->XE.Race.szB;
-      tl_assert(is_sane_Thread(thr_acc));
-      err_ga = VG_(get_error_address)(err);
-
-      /* Format the low level state print descriptions */
-      show_shadow_w32(old_buf, sizeof(old_buf), old_state);
-      show_shadow_w32(new_buf, sizeof(new_buf), new_state);
-
-      /* Now we have to 'announce' the threadset mentioned in the
-         error message, if it hasn't already been announced.
-         Unfortunately the precise threadset and error message text
-         depends on the nature of the transition involved.  So now
-         fall into a case analysis of the error state transitions. */
-
-      /* CASE of Excl -> ShM */
-      if (is_SHVAL_Excl(old_state) && is_SHVAL_ShM(new_state)) {
-         SegmentID old_segid;
-         Segment*  old_seg;
-         Thread*   old_thr; 
-         WordSetID new_tset;
-         old_segid = un_SHVAL_Excl( old_state );
-         tl_assert(is_sane_SegmentID(old_segid));
-         old_seg = map_segments_lookup( old_segid );
-         tl_assert(is_sane_Segment(old_seg));
-         tl_assert(old_seg->thr);
-         old_thr = old_seg->thr;
-         tl_assert(is_sane_Thread(old_thr));
-
-         new_tset = un_SHVAL_ShM_tset(new_state);
-         tset_to_announce = HG_(addToWS)( univ_tsets,
-                                          new_tset, (Word)old_thr );
-         announce_threadset( tset_to_announce );
-
-         VG_(message)(Vg_UserMsg,
-                      "Possible data race during %s of size %d at %#lx",
-                      what, szB, err_ga);
-         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-         /* pp_AddrInfo(err_addr, &extra->addrinfo); */
-         if (show_raw_states)
-         VG_(message)(Vg_UserMsg,
-                      "  Old state 0x%08x=%s, new state 0x%08x=%s",
-                      old_state, old_buf, new_state, new_buf);
-         VG_(message)(Vg_UserMsg,
-                      "  Old state: owned exclusively by thread #%d",
-                      old_thr->errmsg_index);
-         // This should always show exactly 2 threads
-         summarise_threadset( new_tset, new_tset_buf, sizeof(new_tset_buf) );
-         VG_(message)(Vg_UserMsg,
-                      "  New state: shared-modified by threads %s",
-                      new_tset_buf );
-         VG_(message)(Vg_UserMsg,
-                      "  Reason:    this thread, #%d, holds no locks at all",
-                      thr_acc->errmsg_index);
-      }
-      else 
-      /* Case of ShR/M -> ShM */
-      if (is_SHVAL_Sh(old_state) && is_SHVAL_ShM(new_state)) {
-         WordSetID old_tset = un_SHVAL_Sh_tset(old_state);
-         WordSetID new_tset = un_SHVAL_Sh_tset(new_state);
-
-         tset_to_announce = HG_(unionWS)( univ_tsets, old_tset, new_tset );
-         announce_threadset( tset_to_announce );
-
-         VG_(message)(Vg_UserMsg,
-                      "Possible data race during %s of size %d at %#lx",
-                      what, szB, err_ga);
-         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-         /* pp_AddrInfo(err_addr, &extra->addrinfo); */
-         if (show_raw_states)
-         VG_(message)(Vg_UserMsg,
-                      "  Old state 0x%08x=%s, new state 0x%08x=%s",
-                      old_state, old_buf, new_state, new_buf);
-
-         summarise_threadset( old_tset, old_tset_buf, sizeof(old_tset_buf) );
-         summarise_threadset( new_tset, new_tset_buf, sizeof(new_tset_buf) );
-
-         VG_(message)(Vg_UserMsg,
-                      "  Old state: shared-%s by threads %s", 
-                      is_SHVAL_ShM(old_state) ? "modified" : "readonly", 
-                      old_tset_buf);
-         VG_(message)(Vg_UserMsg,
-                      "  New state: shared-modified by threads %s", 
-                      new_tset_buf);
-         VG_(message)(Vg_UserMsg,
-                      "  Reason:    this thread, #%d, holds no "
-                      "consistent locks",
-                      thr_acc->errmsg_index);
-         if (xe->XE.Race.mb_lastlock) {
-            VG_(message)(Vg_UserMsg, "  Last consistently used lock for %#lx was "
-                                     "first observed", err_ga);
-            VG_(pp_ExeContext)(xe->XE.Race.mb_lastlock);
-         } else {
-            VG_(message)(Vg_UserMsg, "  Location %#lx has never been protected "
-                                     "by any lock", err_ga);
-         }
-      }
-      /* Hmm, unknown transition.  Just print what we do know. */
-      else {
-         VG_(message)(Vg_UserMsg,
-                      "Possible data race during %s of size %d at %#lx",
-                      what, szB, err_ga);
-         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
-
-         //pp_AddrInfo(err_addr, &extra->addrinfo);
-         VG_(message)(Vg_UserMsg,
-                      "  Old state 0x%08x=%s, new state 0x%08x=%s",
-                      old_state, old_buf, new_state, new_buf);
-      }
-
-      /* If we have a better description of the address, show it. */
-      if (xe->XE.Race.descr1[0] != 0)
-         VG_(message)(Vg_UserMsg, "  %s", &xe->XE.Race.descr1[0]);
-      if (xe->XE.Race.descr2[0] != 0)
-         VG_(message)(Vg_UserMsg, "  %s", &xe->XE.Race.descr2[0]);
-
-      break; /* case XE_Race */
-   } /* case XE_Race */
-
-   default:
-      tl_assert(0);
-   } /* switch (VG_(get_error_kind)(err)) */
-}
-
-static Char* hg_get_error_name ( Error* err )
-{
-   switch (VG_(get_error_kind)(err)) {
-      case XE_Race:           return "Race";
-      case XE_FreeMemLock:    return "FreeMemLock";
-      case XE_UnlockUnlocked: return "UnlockUnlocked";
-      case XE_UnlockForeign:  return "UnlockForeign";
-      case XE_UnlockBogus:    return "UnlockBogus";
-      case XE_PthAPIerror:    return "PthAPIerror";
-      case XE_LockOrder:      return "LockOrder";
-      case XE_Misc:           return "Misc";
-      default: tl_assert(0); /* fill in missing case */
-   }
-}
-
-static Bool hg_recognised_suppression ( Char* name, Supp *su )
-{
-#  define TRY(_name,_xskind)                   \
-      if (0 == VG_(strcmp)(name, (_name))) {   \
-         VG_(set_supp_kind)(su, (_xskind));    \
-         return True;                          \
-      }
-   TRY("Race",           XS_Race);
-   TRY("FreeMemLock",    XS_FreeMemLock);
-   TRY("UnlockUnlocked", XS_UnlockUnlocked);
-   TRY("UnlockForeign",  XS_UnlockForeign);
-   TRY("UnlockBogus",    XS_UnlockBogus);
-   TRY("PthAPIerror",    XS_PthAPIerror);
-   TRY("LockOrder",      XS_LockOrder);
-   TRY("Misc",           XS_Misc);
-   return False;
-#  undef TRY
-}
-
-static Bool hg_read_extra_suppression_info ( Int fd, Char* buf, Int nBuf,
-                                             Supp* su )
-{
-   /* do nothing -- no extra suppression info present.  Return True to
-      indicate nothing bad happened. */
     return True;
  }
  
-static Bool hg_error_matches_suppression ( Error* err, Supp* su )
-{
-   switch (VG_(get_supp_kind)(su)) {
-   case XS_Race:           return VG_(get_error_kind)(err) == XE_Race;
-   case XS_FreeMemLock:    return VG_(get_error_kind)(err) == XE_FreeMemLock;
-   case XS_UnlockUnlocked: return VG_(get_error_kind)(err) == XE_UnlockUnlocked;
-   case XS_UnlockForeign:  return VG_(get_error_kind)(err) == XE_UnlockForeign;
-   case XS_UnlockBogus:    return VG_(get_error_kind)(err) == XE_UnlockBogus;
-   case XS_PthAPIerror:    return VG_(get_error_kind)(err) == XE_PthAPIerror;
-   case XS_LockOrder:      return VG_(get_error_kind)(err) == XE_LockOrder;
-   case XS_Misc:           return VG_(get_error_kind)(err) == XE_Misc;
-   //case XS_: return VG_(get_error_kind)(err) == XE_;
-   default: tl_assert(0); /* fill in missing cases */
-   }
-}
-
-static void hg_print_extra_suppression_info ( Error* err )
-{
-   /* Do nothing */
-}
-
  
  /*----------------------------------------------------------------*/
  /*--- Setup                                                    ---*/
@@ -8559,31 +3684,22 @@ static void hg_print_extra_suppression_info ( Error* err )
  
  static Bool hg_process_cmd_line_option ( Char* arg )
  {
-   if      (VG_CLO_STREQ(arg, "--happens-before=none"))
-      clo_happens_before = 0;
-   else if (VG_CLO_STREQ(arg, "--happens-before=threads"))
-      clo_happens_before = 1;
-   else if (VG_CLO_STREQ(arg, "--happens-before=all"))
-      clo_happens_before = 2;
-
-   else if (VG_CLO_STREQ(arg, "--gen-vcg=no"))
-      clo_gen_vcg = 0;
-   else if (VG_CLO_STREQ(arg, "--gen-vcg=yes"))
-      clo_gen_vcg = 1;
-   else if (VG_CLO_STREQ(arg, "--gen-vcg=yes-w-vts"))
-      clo_gen_vcg = 2;
+   if      (VG_CLO_STREQ(arg, "--track-lockorders=no"))
+      HG_(clo_track_lockorders) = False;
+   else if (VG_CLO_STREQ(arg, "--track-lockorders=yes"))
+      HG_(clo_track_lockorders) = True;
  
     else if (VG_CLO_STREQ(arg, "--cmp-race-err-addrs=no"))
-      clo_cmp_race_err_addrs = False;
+      HG_(clo_cmp_race_err_addrs) = False;
     else if (VG_CLO_STREQ(arg, "--cmp-race-err-addrs=yes"))
-      clo_cmp_race_err_addrs = True;
+      HG_(clo_cmp_race_err_addrs) = True;
  
     else if (VG_CLO_STREQN(13, arg, "--trace-addr=")) {
-      clo_trace_addr = VG_(atoll16)(&arg[13]);
-      if (clo_trace_level == 0)
-         clo_trace_level = 1;
+      HG_(clo_trace_addr) = VG_(atoll16)(&arg[13]);
+      if (HG_(clo_trace_level) == 0)
+         HG_(clo_trace_level) = 1;
     }
-   else VG_BNUM_CLO(arg, "--trace-level", clo_trace_level, 0, 2)
+   else VG_BNUM_CLO(arg, "--trace-level", HG_(clo_trace_level), 0, 2)
  
     /* "stuvwx" --> stuvwx (binary) */
     else if (VG_CLO_STREQN(18, arg, "--hg-sanity-flags=")) {
@@ -8597,14 +3713,14 @@ static Bool hg_process_cmd_line_option ( Char* arg )
        }
        for (j = 0; j < 6; j++) {
           if      ('0' == opt[j]) { /* do nothing */ }
-         else if ('1' == opt[j]) clo_sanity_flags |= (1 << (6-1-j));
+         else if ('1' == opt[j]) HG_(clo_sanity_flags) |= (1 << (6-1-j));
           else {
              VG_(message)(Vg_UserMsg, "--hg-sanity-flags argument can "
                                       "only contain 0s and 1s");
              return False;
           }
        }
-      if (0) VG_(printf)("XXX sanity flags: 0x%x\n", clo_sanity_flags);
+      if (0) VG_(printf)("XXX sanity flags: 0x%lx\n", HG_(clo_sanity_flags));
     }
  
     else 
@@ -8616,8 +3732,7 @@ static Bool hg_process_cmd_line_option ( Char* arg )
  static void hg_print_usage ( void )
  {
     VG_(printf)(
-"    --happens-before=none|threads|all   [all] consider no events, thread\n"
-"      create/join, create/join/cvsignal/cvwait/semwait/post as sync points\n"
+"    --track-lockorders=no|yes  show lock ordering errors? [yes]\n"
  "    --trace-addr=0xXXYYZZ     show all state changes for address 0xXXYYZZ\n"
  "    --trace-level=0|1|2       verbosity level of --trace-addr [1]\n"
     );
@@ -8627,8 +3742,6 @@ static void hg_print_usage ( void )
  static void hg_print_debug_usage ( void )
  {
     VG_(replacement_malloc_print_debug_usage)();
-   VG_(printf)("    --gen-vcg=no|yes|yes-w-vts   show happens-before graph "
-               "in .vcg format [no]\n");
     VG_(printf)("    --cmp-race-err-addrs=no|yes  are data addresses in "
                 "race errors significant? [no]\n");
     VG_(printf)("    --hg-sanity-flags=<XXXXXX> sanity check "
@@ -8652,12 +3765,9 @@ static void hg_fini ( Int exitcode )
  {
     if (SHOW_DATA_STRUCTURES)
        pp_everything( PP_ALL, "SK_(fini)" );
-   if (clo_sanity_flags)
+   if (HG_(clo_sanity_flags))
        all__sanity_check("SK_(fini)");
  
-   if (clo_gen_vcg > 0)
-      segments__generate_vcg();
-
     if (VG_(clo_verbosity) >= 2) {
  
        if (1) {
@@ -8669,21 +3779,19 @@ static void hg_fini ( Int exitcode )
           HG_(ppWSUstats)( univ_laog,  "univ_laog" );
        }
  
-      VG_(printf)("\n");
-      VG_(printf)(" hbefore: %'10lu queries\n",        stats__hbefore_queries);
-      VG_(printf)(" hbefore: %'10lu cache 0 hits\n",   stats__hbefore_cache0s);
-      VG_(printf)(" hbefore: %'10lu cache > 0 hits\n", stats__hbefore_cacheNs);
-      VG_(printf)(" hbefore: %'10lu graph searches\n", stats__hbefore_gsearches);
-      VG_(printf)(" hbefore: %'10lu   of which slow\n",
-                  stats__hbefore_gsearches - stats__hbefore_gsearchFs);
-      VG_(printf)(" hbefore: %'10lu stack high water mark\n",
-                  stats__hbefore_stk_hwm);
-      VG_(printf)(" hbefore: %'10lu cache invals\n",   stats__hbefore_invals);
-      VG_(printf)(" hbefore: %'10lu probes\n",         stats__hbefore_probes);
+      //zz       VG_(printf)("\n");
+      //zz       VG_(printf)(" hbefore: %'10lu queries\n",        stats__hbefore_queries);
+      //zz       VG_(printf)(" hbefore: %'10lu cache 0 hits\n",   stats__hbefore_cache0s);
+      //zz       VG_(printf)(" hbefore: %'10lu cache > 0 hits\n", stats__hbefore_cacheNs);
+      //zz       VG_(printf)(" hbefore: %'10lu graph searches\n", stats__hbefore_gsearches);
+      //zz       VG_(printf)(" hbefore: %'10lu   of which slow\n",
+      //zz                   stats__hbefore_gsearches - stats__hbefore_gsearchFs);
+      //zz       VG_(printf)(" hbefore: %'10lu stack high water mark\n",
+      //zz                   stats__hbefore_stk_hwm);
+      //zz       VG_(printf)(" hbefore: %'10lu cache invals\n",   stats__hbefore_invals);
+      //zz       VG_(printf)(" hbefore: %'10lu probes\n",         stats__hbefore_probes);
  
        VG_(printf)("\n");
-      VG_(printf)("        segments: %'8lu Segment objects allocated\n",
-                  stats__mk_Segment);
        VG_(printf)("        locksets: %'8d unique lock sets\n",
                    (Int)HG_(cardinalityWSU)( univ_lsets ));
        VG_(printf)("      threadsets: %'8d unique thread sets\n",
@@ -8695,13 +3803,13 @@ static void hg_fini ( Int exitcode )
                    stats__ga_LL_adds,
                    (Int)(ga_to_lastlock ? VG_(sizeFM)( ga_to_lastlock ) : 0) );
  
-      VG_(printf)("  LockN-to-P map: %'8lu queries (%d map size)\n",
-                  stats__ga_LockN_to_P_queries,
-                  (Int)(yaWFM ? VG_(sizeFM)( yaWFM ) : 0) );
+      VG_(printf)("  LockN-to-P map: %'8llu queries (%llu map size)\n",
+                  HG_(stats__LockN_to_P_queries),
+                  HG_(stats__LockN_to_P_get_map_size)() );
  
-      VG_(printf)("string table map: %'8lu queries (%d map size)\n",
-                  stats__string_table_queries,
-                  (Int)(string_table ? VG_(sizeFM)( string_table ) : 0) );
+      VG_(printf)("string table map: %'8llu queries (%llu map size)\n",
+                  HG_(stats__string_table_queries),
+                  HG_(stats__string_table_get_map_size)() );
        VG_(printf)("            LAOG: %'8d map size\n",
                    (Int)(laog ? VG_(sizeFM)( laog ) : 0));
        VG_(printf)(" LAOG exposition: %'8d map size\n",
@@ -8714,79 +3822,53 @@ static void hg_fini ( Int exitcode )
        VG_(printf)("   sanity checks: %'8lu\n", stats__sanity_checks);
  
        VG_(printf)("\n");
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_Excl_nochange\n",
-                  stats__msm_read_Excl_nochange, stats__msm_write_Excl_nochange);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_Excl_transfer\n",
-                  stats__msm_read_Excl_transfer, stats__msm_write_Excl_transfer);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_Excl_to_ShR/ShM\n",
-                  stats__msm_read_Excl_to_ShR,   stats__msm_write_Excl_to_ShM);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_ShR_to_ShR/ShM\n",
-                  stats__msm_read_ShR_to_ShR,    stats__msm_write_ShR_to_ShM);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_ShM_to_ShM\n",
-                  stats__msm_read_ShM_to_ShM,    stats__msm_write_ShM_to_ShM);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_New_to_Excl\n",
-                  stats__msm_read_New_to_Excl,   stats__msm_write_New_to_Excl);
-      VG_(printf)("     msm: %'12lu %'12lu rd/wr_NoAccess\n",
-                  stats__msm_read_NoAccess,      stats__msm_write_NoAccess);
+      libhb_shutdown(True);
+   }
+}
  
-      VG_(printf)("\n");
-      VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
-                  stats__secmaps_allocd,
-                  stats__secmap_ga_space_covered);
-      VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
-                  stats__secmap_linesZ_allocd,
-                  stats__secmap_linesZ_bytes);
-      VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)\n",
-                  stats__secmap_linesF_allocd,
-                  stats__secmap_linesF_bytes);
-      VG_(printf)(" secmaps: %'10lu iterator steppings\n",
-                  stats__secmap_iterator_steppings);
+/* FIXME: move these somewhere sane */
  
-      VG_(printf)("\n");
-      VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
-                  stats__cache_totrefs, stats__cache_totmisses );
-      VG_(printf)("   cache: %'12lu Z-fetch, %'12lu F-fetch\n",
-                  stats__cache_Z_fetches, stats__cache_F_fetches );
-      VG_(printf)("   cache: %'12lu Z-wback, %'12lu F-wback\n",
-                  stats__cache_Z_wbacks, stats__cache_F_wbacks );
-      VG_(printf)("   cache: %'12lu invals,  %'12lu flushes\n",
-                  stats__cache_invals, stats__cache_flushes );
+static
+void for_libhb__get_stacktrace ( Thr* hbt, Addr* frames, UWord nRequest )
+{
+   Thread*     thr;
+   ThreadId    tid;
+   UWord       nActual;
+   tl_assert(hbt);
+   thr = libhb_get_Thr_opaque( hbt );
+   tl_assert(thr);
+   tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+   nActual = (UWord)VG_(get_StackTrace)( tid, frames, (UInt)nRequest,
+                                         NULL, NULL, 0 );
+   tl_assert(nActual <= nRequest);
+   for (; nActual < nRequest; nActual++)
+      frames[nActual] = 0;
+}
  
-      VG_(printf)("\n");
-      VG_(printf)("   cline: %'10lu normalises\n",
-                  stats__cline_normalises );
-      VG_(printf)("   cline:  reads 8/4/2/1: %'12lu %'12lu %'12lu %'12lu\n",
-                  stats__cline_read64s,
-                  stats__cline_read32s,
-                  stats__cline_read16s,
-                  stats__cline_read8s );
-      VG_(printf)("   cline: writes 8/4/2/1: %'12lu %'12lu %'12lu %'12lu\n",
-                  stats__cline_write64s,
-                  stats__cline_write32s,
-                  stats__cline_write16s,
-                  stats__cline_write8s );
-      VG_(printf)("   cline:   sets 8/4/2/1: %'12lu %'12lu %'12lu %'12lu\n",
-                  stats__cline_set64s,
-                  stats__cline_set32s,
-                  stats__cline_set16s,
-                  stats__cline_set8s );
-      VG_(printf)("   cline: get1s %'lu, copy1s %'lu\n",
-                  stats__cline_get8s, stats__cline_copy8s );
-      VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
-                 stats__cline_64to32splits,
-                 stats__cline_32to16splits,
-                 stats__cline_16to8splits );
-      VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
-                 stats__cline_64to32pulldown,
-                 stats__cline_32to16pulldown,
-                 stats__cline_16to8pulldown );
+static
+struct EC_*  for_libhb__stacktrace_to_EC ( Addr* frames, UWord nFrames )
+{
+   return VG_(make_ExeContext_from_StackTrace)( frames, (UInt)nFrames );
+}
  
-      VG_(printf)("\n");
-   }
+static
+struct EC_*  for_libhb__get_EC ( Thr* hbt )
+{
+   Thread*     thr;
+   ThreadId    tid;
+   ExeContext* ec;
+   tl_assert(hbt);
+   thr = libhb_get_Thr_opaque( hbt );
+   tl_assert(thr);
+   tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+   ec = VG_(record_ExeContext)( tid, 0 );
+   return (struct EC_*) ec;
  }
  
+
  static void hg_pre_clo_init ( void )
  {
+   Thr* hbthr_root;
     VG_(details_name)            ("Helgrind");
     VG_(details_version)         (NULL);
     VG_(details_description)     ("a thread error detector");
@@ -8800,15 +3882,15 @@ static void hg_pre_clo_init ( void )
                                     hg_fini);
  
     VG_(needs_core_errors)         ();
-   VG_(needs_tool_errors)         (hg_eq_Error,
-                                   hg_pp_Error,
+   VG_(needs_tool_errors)         (HG_(eq_Error),
+                                   HG_(pp_Error),
                                     False,/*show TIDs for errors*/
-                                   hg_update_extra,
-                                   hg_recognised_suppression,
-                                   hg_read_extra_suppression_info,
-                                   hg_error_matches_suppression,
-                                   hg_get_error_name,
-                                   hg_print_extra_suppression_info);
+                                   HG_(update_extra),
+                                   HG_(recognised_suppression),
+                                   HG_(read_extra_suppression_info),
+                                   HG_(error_matches_suppression),
+                                   HG_(get_error_name),
+                                   HG_(print_extra_suppression_info));
  
     VG_(needs_command_line_options)(hg_process_cmd_line_option,
                                     hg_print_usage,
@@ -8830,9 +3912,7 @@ static void hg_pre_clo_init ( void )
                                     hg_cli__realloc,
                                     HG_CLI__MALLOC_REDZONE_SZB );
  
-   VG_(needs_var_info)();
-
-   //VG_(needs_xml_output)          ();
+   VG_(needs_var_info)(); /* optional */
  
     VG_(track_new_mem_startup)     ( evh__new_mem_w_perms );
     VG_(track_new_mem_stack_signal)( evh__new_mem_w_tid );
@@ -8866,7 +3946,13 @@ static void hg_pre_clo_init ( void )
     VG_(track_start_client_code)( evh__start_client_code );
     VG_(track_stop_client_code)( evh__stop_client_code );
  
-   initialise_data_structures();
+   /////////////////////////////////////////////
+   hbthr_root = libhb_init( for_libhb__get_stacktrace, 
+                            for_libhb__stacktrace_to_EC,
+                            for_libhb__get_EC );
+   /////////////////////////////////////////////
+
+   initialise_data_structures(hbthr_root);
  
     /* Ensure that requirements for "dodgy C-as-C++ style inheritance"
        as described in comments at the top of pub_tool_hashtable.h, are
@@ -8876,10 +3962,6 @@ static void hg_pre_clo_init ( void )
     hg_mallocmeta_table
        = VG_(HT_construct)( "hg_malloc_metadata_table" );
  
-   /* a SecMap must contain an integral number of CacheLines */
-   tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
-   /* also ... a CacheLine holds an integral number of trees */
-   tl_assert(0 == (N_LINE_ARANGE % 8));
  }
  
  VG_DETERMINE_INTERFACE_VERSION(hg_pre_clo_init)
diff --git a/helgrind/hg_wordset.c b/helgrind/hg_wordset.c

index 041af2e8f8be373d76b592ab6956cf4a134156bc..d5c7fbacc13fa9b742f27e4b03d49e4c5112d934 100644 (file)
--- a/helgrind/hg_wordset.c
+++ b/helgrind/hg_wordset.c
@@ -38,10 +38,11 @@
  #include "pub_tool_libcassert.h"
  #include "pub_tool_libcbase.h"
  #include "pub_tool_libcprint.h"
+#include "pub_tool_threadstate.h"
  #include "pub_tool_wordfm.h"
  
-#define HG_(str) VGAPPEND(vgHelgrind_,str)
-#include "hg_wordset.h"
+#include "hg_basics.h"
+#include "hg_wordset.h"     /* self */
  
  //------------------------------------------------------------------//
  //--- Word Cache                                                 ---//
@@ -140,7 +141,8 @@ typedef
     corresponding ix2vec entry number.  The two mappings are mutually
     redundant. */
  struct _WordSetU {
-      void*     (*alloc)(HChar*, SizeT);
+      void*     (*alloc)(HChar*,SizeT);
+      HChar*    cc;
        void      (*dealloc)(void*);
        WordFM*   vec2ix; /* WordVec-to-WordSet mapping tree */
        WordVec** ix2vec; /* WordSet-to-WordVec mapping array */
@@ -176,12 +178,12 @@ static WordVec* new_WV_of_size ( WordSetU* wsu, UWord sz )
  {
     WordVec* wv;
     tl_assert(sz >= 0);
-   wv = wsu->alloc( "hg", sizeof(WordVec) );
+   wv = wsu->alloc( wsu->cc, sizeof(WordVec) );
     wv->owner = wsu;
     wv->words = NULL;
     wv->size = sz;
     if (sz > 0) {
-     wv->words = wsu->alloc( "hg", (SizeT)sz * sizeof(UWord) );
+     wv->words = wsu->alloc( wsu->cc, (SizeT)sz * sizeof(UWord) );
     }
     return wv;
  }
@@ -238,7 +240,7 @@ static void ensure_ix2vec_space ( WordSetU* wsu )
        return;
     new_sz = 2 * wsu->ix2vec_size;
     if (new_sz == 0) new_sz = 2;
-   new_vec = wsu->alloc( "hg", new_sz * sizeof(WordVec*) );
+   new_vec = wsu->alloc( wsu->cc, new_sz * sizeof(WordVec*) );
     tl_assert(new_vec);
     for (i = 0; i < wsu->ix2vec_size; i++)
        new_vec[i] = wsu->ix2vec[i];
@@ -306,17 +308,19 @@ static WordSet add_or_dealloc_WordVec( WordSetU* wsu, WordVec* wv_new )
  
  
  WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                             HChar* cc,
                               void  (*dealloc)(void*),
                               Word  cacheSize )
  {
     WordSetU* wsu;
     WordVec*  empty;
  
-   wsu          = alloc_nofail( "hg", sizeof(WordSetU) );
+   wsu          = alloc_nofail( cc, sizeof(WordSetU) );
     VG_(memset)( wsu, 0, sizeof(WordSetU) );
     wsu->alloc   = alloc_nofail;
+   wsu->cc      = cc;
     wsu->dealloc = dealloc;
-   wsu->vec2ix  = VG_(newFM)( alloc_nofail, "hg", 
+   wsu->vec2ix  = VG_(newFM)( alloc_nofail, cc,
                                dealloc, cmp_WordVecs_for_FM );
     wsu->ix2vec_used = 0;
     wsu->ix2vec_size = 0;
diff --git a/helgrind/hg_wordset.h b/helgrind/hg_wordset.h

index 871ab7828522ee8c423849bc9e151eb8f4d149f4..3a8511fb4d26860d8f915e31550f01fc5841af8d 100644 (file)
--- a/helgrind/hg_wordset.h
+++ b/helgrind/hg_wordset.h
@@ -48,6 +48,7 @@ typedef  UInt              WordSet;   /* opaque, small int index */
  
  /* Allocate and initialise a WordSetU */
  WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                             HChar* cc,
                               void  (*dealloc)(void*),
                               Word  cacheSize );
  
diff --git a/helgrind/libhb.h b/helgrind/libhb.h

new file mode 100644 (file)

index 0000000..6031332
--- /dev/null
+++ b/helgrind/libhb.h
@@ -0,0 +1,154 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking               ---*/
+/*--- the happens-before relationship in concurrent programs.      ---*/
+/*---                                                 libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of LibHB, a library for implementing and checking
+   the happens-before relationship in concurrent programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __LIBHB_H
+#define __LIBHB_H
+
+/* Abstract to user: thread identifiers */
+/* typedef  struct _Thr  Thr; */ /* now in hg_lock_n_thread.h */
+
+/* Abstract to user: synchronisation objects */
+/* typedef  struct _SO  SO; */ /* now in hg_lock_n_thread.h */
+
+/* Abstract to the lib: execution contexts */
+/* struct _EC will be defined by user at some point. */
+typedef  struct _EC  EC;
+
+/* Initialise library; returns Thr* for root thread.  'shadow_alloc'
+   should never return NULL, instead it should simply not return if
+   they encounter an out-of-memory condition. */
+Thr* libhb_init (
+        void        (*get_stacktrace)( Thr*, Addr*, UWord ),
+        struct _EC* (*stacktrace_to_EC)( Addr*, UWord ),
+        struct _EC* (*get_EC)( Thr* )
+     );
+
+/* Shut down the library, and print stats (in fact that's _all_
+   this is for.) */
+void libhb_shutdown ( Bool show_stats );
+
+/* Thread creation: returns Thr* for new thread */
+Thr* libhb_create ( Thr* parent );
+
+/* Thread async exit */
+void libhb_async_exit ( Thr* exitter );
+
+/* Synchronisation objects (abstract to caller) */
+
+/* Allocate a new one (alloc'd by library) */
+SO* libhb_so_alloc ( void );
+
+/* Dealloc one */
+void libhb_so_dealloc ( SO* so );
+
+/* Send a message via a sync object.  If strong_send is true, the
+   resulting inter-thread dependency seen by a future receiver of this
+   message will be a dependency on this thread only.  That is, in a
+   strong send, the VC inside the SO is replaced by the clock of the
+   sending thread.  For a weak send, the sender's VC is joined into
+   that already in the SO, if any.  This subtlety is needed to model
+   rwlocks: a strong send corresponds to releasing a rwlock that had
+   been w-held (or releasing a standard mutex).  A weak send
+   corresponds to releasing a rwlock that has been r-held.
+
+   (rationale): Since in general many threads may hold a rwlock in
+   r-mode, a weak send facility is necessary in order that the final
+   SO reflects the join of the VCs of all the threads releasing the
+   rwlock, rather than merely holding the VC of the most recent thread
+   to release it. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send );
+
+/* Recv a message from a sync object.  If strong_recv is True, the
+   resulting inter-thread dependency is considered adequate to induce
+   a h-b ordering on both reads and writes.  If it is False, the
+   implied h-b ordering exists only for reads, not writes.  This is
+   subtlety is required in order to support reader-writer locks: a
+   thread doing a write-acquire of a rwlock (or acquiring a normal
+   mutex) models this by doing a strong receive.  A thread doing a
+   read-acquire of a rwlock models this by doing a !strong_recv. */
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv );
+
+/* Has this SO ever been sent on? */
+Bool libhb_so_everSent ( SO* so );
+
+/* Memory accesses (1/2/4/8 byte size).  They report a race if one is
+   found. */
+#define LIBHB_WRITE_1(_thr,_a)    zsm_apply8___msm_write((_thr),(_a))
+#define LIBHB_WRITE_2(_thr,_a)    zsm_apply16___msm_write((_thr),(_a))
+#define LIBHB_WRITE_4(_thr,_a)    zsm_apply32___msm_write((_thr),(_a))
+#define LIBHB_WRITE_8(_thr,_a)    zsm_apply64___msm_write((_thr),(_a))
+#define LIBHB_WRITE_N(_thr,_a,_n) zsm_apply_range___msm_read((_thr),(_a),(_n))
+
+#define LIBHB_READ_1(_thr,_a)    zsm_apply8___msm_read((_thr),(_a))
+#define LIBHB_READ_2(_thr,_a)    zsm_apply16___msm_read((_thr),(_a))
+#define LIBHB_READ_4(_thr,_a)    zsm_apply32___msm_read((_thr),(_a))
+#define LIBHB_READ_8(_thr,_a)    zsm_apply64___msm_read((_thr),(_a))
+#define LIBHB_READ_N(_thr,_a,_n) zsm_apply_range___msm_read((_thr),(_a),(_n))
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a );
+void zsm_apply16___msm_write ( Thr* thr, Addr a );
+void zsm_apply32___msm_write ( Thr* thr, Addr a );
+void zsm_apply64___msm_write ( Thr* thr, Addr a );
+void zsm_apply_range___msm_write ( Thr* thr,
+                                   Addr a, SizeT len );
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a );
+void zsm_apply16___msm_read ( Thr* thr, Addr a );
+void zsm_apply32___msm_read ( Thr* thr, Addr a );
+void zsm_apply64___msm_read ( Thr* thr, Addr a );
+void zsm_apply_range___msm_read ( Thr* thr,
+                                  Addr a, SizeT len );
+
+
+/* Set memory address ranges to new (freshly allocated), or noaccess
+   (no longer accessible). */
+void libhb_range_new      ( Thr*, Addr, SizeT );
+void libhb_range_noaccess ( Thr*, Addr, SizeT );
+
+/* For the convenience of callers, we offer to store one void* item in
+   a Thr, which we ignore, but the caller can get or set any time. */
+void* libhb_get_Thr_opaque ( Thr* );
+void  libhb_set_Thr_opaque ( Thr*, void* );
+
+/* Low level copy of shadow state from [src,src+len) to [dst,dst+len).
+   Overlapping moves are checked for and asserted against. */
+void libhb_copy_shadow_state ( Addr src, Addr dst, SizeT len );
+
+/* Call this periodically to give libhb the opportunity to
+   garbage-collect its internal data structures. */
+void libhb_maybe_GC ( void );
+
+#endif /* __LIBHB_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                  libhb.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c

new file mode 100644 (file)

index 0000000..fc232f7
--- /dev/null
+++ b/helgrind/libhb_core.c
@@ -0,0 +1,4562 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking               ---*/
+/*--- the happens-before relationship in concurrent programs.      ---*/
+/*---                                                 libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of LibHB, a library for implementing and checking
+   the happens-before relationship in concurrent programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_aspacemgr.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
+
+/* fwds for
+   Globals needed by other parts of the library.  These are set
+   once at startup and then never changed. */
+static void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
+static struct _EC* (*main_stacktrace_to_EC)( Addr*, UWord ) = NULL;
+static struct _EC* (*main_get_EC)( Thr* ) = NULL;
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+//                                                             //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN compressed shadow memory                      //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_ZSM_H
+#define __HB_ZSM_H
+
+typedef  ULong  SVal;
+
+/* This value has special significance to the implementation, and callers
+   may not store it in the shadow memory. */
+#define SVal_INVALID (3ULL << 62)
+
+/* This is the default value for shadow memory.  Initially the shadow
+   memory contains no accessible areas and so all reads produce this
+   value.  TODO: make this caller-defineable. */
+#define SVal_NOACCESS (2ULL << 62)
+
+/* Initialise the library.  Once initialised, it will (or may) call
+   rcinc and rcdec in response to all the calls below, in order to
+   allow the user to do reference counting on the SVals stored herein.
+   It is important to understand, however, that due to internal
+   caching, the reference counts are in general inaccurate, and can be
+   both above or below the true reference count for an item.  In
+   particular, the library may indicate that the reference count for
+   an item is zero, when in fact it is not.
+
+   To make the reference counting exact and therefore non-pointless,
+   call zsm_flush_cache.  Immediately after it returns, the reference
+   counts for all items, as deduced by the caller by observing calls
+   to rcinc and rcdec, will be correct, and so any items with a zero
+   reference count may be freed (or at least considered to be
+   unreferenced by this library).
+*/
+static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
+
+static void zsm_set_range   ( Addr, SizeT, SVal );
+static SVal zsm_read8       ( Addr );
+static void zsm_copy_range  ( Addr, Addr, SizeT );
+static void zsm_flush_cache ( void );
+
+#endif /* ! __HB_ZSM_H */
+
+
+/* For the shadow mem cache stuff we may want more intrusive
+   checks.  Unfortunately there's no almost-zero-cost way to make them
+   selectable at run time.  Hence set the #if 0 to #if 1 and
+   rebuild if you want them. */
+#if 0
+#  define SCE_CACHELINE 1  /* do sanity-check CacheLine stuff */
+#  define inline __attribute__((noinline))
+   /* probably want to ditch -fomit-frame-pointer too */
+#else
+#  define SCE_CACHELINE 0   /* don't sanity-check CacheLine stuff */
+#endif
+
+/* For the SegmentID, SegmentSet and SVal stuff we may want more
+   intrusive checks.  Again there's no zero cost way to do this.  Set
+   the #if 0 to #if 1 and rebuild if you want them. */
+#if 0
+#  define SCE_SVALS 1 /* sanity-check shadow value stuff */
+#else
+#  define SCE_SVALS 0
+#endif
+
+
+/* Round a up to the next multiple of N.  N must be a power of 2 */
+#define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
+/* Round a down to the next multiple of N.  N must be a power of 2 */
+#define ROUNDDN(a, N)   ((a) & ~(N-1))
+
+
+
+/* ------ User-supplied RC functions ------ */
+static void(*rcinc)(SVal) = NULL;
+static void(*rcdec)(SVal) = NULL;
+
+
+/* ------ CacheLine ------ */
+
+#define N_LINE_BITS      6 /* must be >= 3 */
+#define N_LINE_ARANGE    (1 << N_LINE_BITS)
+#define N_LINE_TREES     (N_LINE_ARANGE >> 3)
+
+typedef
+   struct {
+      UShort descrs[N_LINE_TREES];
+      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+   }
+   CacheLine;
+
+#define TREE_DESCR_16_0 (1<<0)
+#define TREE_DESCR_32_0 (1<<1)
+#define TREE_DESCR_16_1 (1<<2)
+#define TREE_DESCR_64   (1<<3)
+#define TREE_DESCR_16_2 (1<<4)
+#define TREE_DESCR_32_1 (1<<5)
+#define TREE_DESCR_16_3 (1<<6)
+#define TREE_DESCR_8_0  (1<<7)
+#define TREE_DESCR_8_1  (1<<8)
+#define TREE_DESCR_8_2  (1<<9)
+#define TREE_DESCR_8_3  (1<<10)
+#define TREE_DESCR_8_4  (1<<11)
+#define TREE_DESCR_8_5  (1<<12)
+#define TREE_DESCR_8_6  (1<<13)
+#define TREE_DESCR_8_7  (1<<14)
+#define TREE_DESCR_DTY  (1<<15)
+
+typedef
+   struct {
+      SVal  dict[4]; /* can represent up to 4 diff values in the line */
+      UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
+                                      dict indexes */
+      /* if dict[0] == SVal_INVALID then dict[1] is the index of the
+         LineF to use, and dict[2..] are also SVal_INVALID. */
+   }
+   LineZ; /* compressed rep for a cache line */
+
+typedef
+   struct {
+      Bool inUse;
+      SVal w64s[N_LINE_ARANGE];
+   }
+   LineF; /* full rep for a cache line */
+
+/* Shadow memory.
+   Primary map is a WordFM Addr SecMap*.  
+   SecMaps cover some page-size-ish section of address space and hold
+     a compressed representation.
+   CacheLine-sized chunks of SecMaps are copied into a Cache, being
+   decompressed when moved into the cache and recompressed on the
+   way out.  Because of this, the cache must operate as a writeback
+   cache, not a writethrough one.
+
+   Each SecMap must hold a power-of-2 number of CacheLines.  Hence
+   N_SECMAP_BITS must >= N_LINE_BITS.
+*/
+#define N_SECMAP_BITS   13
+#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
+
+// # CacheLines held by a SecMap
+#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
+
+/* The data in the SecMap is held in the array of LineZs.  Each LineZ
+   either carries the required data directly, in a compressed
+   representation, or it holds (in .dict[0]) an index to the LineF in
+   .linesF that holds the full representation.
+
+   Currently-unused LineF's have their .inUse bit set to zero.
+   Since each in-use LineF is referred to be exactly one LineZ,
+   the number of .linesZ[] that refer to .linesF should equal
+   the number of .linesF[] that have .inUse == True.
+
+   RC obligations: the RCs presented to the user include exactly
+   the values in:
+   * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
+   * F reps that are in use (.inUse == True)
+
+   Hence the following actions at the following transitions are required:
+
+   F rep: .inUse==True  -> .inUse==False        -- rcdec_LineF
+   F rep: .inUse==False -> .inUse==True         -- rcinc_LineF
+   Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
+   Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
+*/
+typedef
+   struct {
+      UInt   magic;
+      LineZ  linesZ[N_SECMAP_ZLINES];
+      LineF* linesF;
+      UInt   linesF_size;
+   }
+   SecMap;
+
+#define SecMap_MAGIC   0x571e58cbU
+
+static inline Bool is_sane_SecMap ( SecMap* sm ) {
+   return sm != NULL && sm->magic == SecMap_MAGIC;
+}
+
+/* ------ Cache ------ */
+
+#define N_WAY_BITS 16
+#define N_WAY_NENT (1 << N_WAY_BITS)
+
+/* Each tag is the address of the associated CacheLine, rounded down
+   to a CacheLine address boundary.  A CacheLine size must be a power
+   of 2 and must be 8 or more.  Hence an easy way to initialise the
+   cache so it is empty is to set all the tag values to any value % 8
+   != 0, eg 1.  This means all queries in the cache initially miss.
+   It does however require us to detect and not writeback, any line
+   with a bogus tag. */
+typedef
+   struct {
+      CacheLine lyns0[N_WAY_NENT];
+      Addr      tags0[N_WAY_NENT];
+   }
+   Cache;
+
+static inline Bool is_valid_scache_tag ( Addr tag ) {
+   /* a valid tag should be naturally aligned to the start of
+      a CacheLine. */
+   return 0 == (tag & (N_LINE_ARANGE - 1));
+}
+
+
+/* --------- Primary data structures --------- */
+
+/* Shadow memory primary map */
+static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
+static Cache   cache_shmem;
+
+
+static UWord stats__secmaps_search       = 0; // # SM finds
+static UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
+static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
+static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
+static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
+static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
+static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
+static UWord stats__secmap_linesF_bytes  = 0; //  .. using this much storage
+static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
+static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
+static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
+static UWord stats__cache_F_fetches      = 0; // # F lines fetched
+static UWord stats__cache_F_wbacks       = 0; // # F lines written back
+static UWord stats__cache_invals         = 0; // # cache invals
+static UWord stats__cache_flushes        = 0; // # cache flushes
+static UWord stats__cache_totrefs        = 0; // # total accesses
+static UWord stats__cache_totmisses      = 0; // # misses
+static ULong stats__cache_make_New_arange = 0; // total arange made New
+static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
+static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
+static UWord stats__cline_read64s        = 0; // # calls to s_m_read64
+static UWord stats__cline_read32s        = 0; // # calls to s_m_read32
+static UWord stats__cline_read16s        = 0; // # calls to s_m_read16
+static UWord stats__cline_read8s         = 0; // # calls to s_m_read8
+static UWord stats__cline_write64s       = 0; // # calls to s_m_write64
+static UWord stats__cline_write32s       = 0; // # calls to s_m_write32
+static UWord stats__cline_write16s       = 0; // # calls to s_m_write16
+static UWord stats__cline_write8s        = 0; // # calls to s_m_write8
+static UWord stats__cline_set64s         = 0; // # calls to s_m_set64
+static UWord stats__cline_set32s         = 0; // # calls to s_m_set32
+static UWord stats__cline_set16s         = 0; // # calls to s_m_set16
+static UWord stats__cline_set8s          = 0; // # calls to s_m_set8
+static UWord stats__cline_get8s          = 0; // # calls to s_m_get8
+static UWord stats__cline_copy8s         = 0; // # calls to s_m_copy8
+static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
+static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
+static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
+static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
+static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
+static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
+
+static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
+   return a & ~(N_SECMAP_ARANGE - 1);
+}
+static inline UWord shmem__get_SecMap_offset ( Addr a ) {
+   return a & (N_SECMAP_ARANGE - 1);
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_shmem :: WordFM Addr SecMap                          ---*/
+/*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
+/*----------------------------------------------------------------*/
+
+/*--------------- SecMap allocation --------------- */
+
+static HChar* shmem__bigchunk_next = NULL;
+static HChar* shmem__bigchunk_end1 = NULL;
+
+static void* shmem__bigchunk_alloc ( SizeT n )
+{
+   const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
+   tl_assert(n > 0);
+   n = VG_ROUNDUP(n, 16);
+   tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
+   tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
+             <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
+   if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
+      if (0)
+      VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
+                  (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
+      shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
+      if (shmem__bigchunk_next == NULL)
+         VG_(out_of_memory_NORETURN)(
+            "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
+      shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
+   }
+   tl_assert(shmem__bigchunk_next);
+   tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
+   tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
+   shmem__bigchunk_next += n;
+   return shmem__bigchunk_next - n;
+}
+
+static SecMap* shmem__alloc_SecMap ( void )
+{
+   Word    i, j;
+   SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
+   if (0) VG_(printf)("alloc_SecMap %p\n",sm);
+   tl_assert(sm);
+   sm->magic = SecMap_MAGIC;
+   for (i = 0; i < N_SECMAP_ZLINES; i++) {
+      sm->linesZ[i].dict[0] = SVal_NOACCESS;
+      sm->linesZ[i].dict[1] = SVal_INVALID;
+      sm->linesZ[i].dict[2] = SVal_INVALID;
+      sm->linesZ[i].dict[3] = SVal_INVALID;
+      for (j = 0; j < N_LINE_ARANGE/4; j++)
+         sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
+   }
+   sm->linesF      = NULL;
+   sm->linesF_size = 0;
+   stats__secmaps_allocd++;
+   stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
+   stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
+   stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
+   return sm;
+}
+
+typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
+static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
+
+static SecMap* shmem__find_SecMap ( Addr ga ) 
+{
+   SecMap* sm    = NULL;
+   Addr    gaKey = shmem__round_to_SecMap_base(ga);
+   // Cache
+   stats__secmaps_search++;
+   if (LIKELY(gaKey == smCache[0].gaKey))
+      return smCache[0].sm;
+   if (LIKELY(gaKey == smCache[1].gaKey)) {
+      SMCacheEnt tmp = smCache[0];
+      smCache[0] = smCache[1];
+      smCache[1] = tmp;
+      return smCache[0].sm;
+   }
+   if (gaKey == smCache[2].gaKey) {
+      SMCacheEnt tmp = smCache[1];
+      smCache[1] = smCache[2];
+      smCache[2] = tmp;
+      return smCache[1].sm;
+   }
+   // end Cache
+   stats__secmaps_search_slow++;
+   if (VG_(lookupFM)( map_shmem,
+                      NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
+      tl_assert(sm != NULL);
+      smCache[2] = smCache[1];
+      smCache[1] = smCache[0];
+      smCache[0].gaKey = gaKey;
+      smCache[0].sm    = sm;
+   } else {
+      tl_assert(sm == NULL);
+   }
+   return sm;
+}
+
+static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
+{
+   SecMap* sm = shmem__find_SecMap ( ga );
+   if (LIKELY(sm)) {
+      return sm;
+   } else {
+      /* create a new one */
+      Addr gaKey = shmem__round_to_SecMap_base(ga);
+      sm = shmem__alloc_SecMap();
+      tl_assert(sm);
+      VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
+      return sm;
+   }
+}
+
+
+/* ------------ LineF and LineZ related ------------ */
+
+static void rcinc_LineF ( LineF* lineF ) {
+   UWord i;
+   tl_assert(lineF->inUse);
+   for (i = 0; i < N_LINE_ARANGE; i++)
+      rcinc(lineF->w64s[i]);
+}
+
+static void rcdec_LineF ( LineF* lineF ) {
+   UWord i;
+   tl_assert(lineF->inUse);
+   for (i = 0; i < N_LINE_ARANGE; i++)
+      rcdec(lineF->w64s[i]);
+}
+
+static void rcinc_LineZ ( LineZ* lineZ ) {
+   tl_assert(lineZ->dict[0] != SVal_INVALID);
+   rcinc(lineZ->dict[0]);
+   if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
+   if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
+   if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
+}
+
+static void rcdec_LineZ ( LineZ* lineZ ) {
+   tl_assert(lineZ->dict[0] != SVal_INVALID);
+   rcdec(lineZ->dict[0]);
+   if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
+   if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
+   if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
+}
+
+inline
+static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
+   Word bix, shft, mask, prep;
+   tl_assert(ix >= 0);
+   bix  = ix >> 2;
+   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+   mask = 3 << shft;
+   prep = b2 << shft;
+   arr[bix] = (arr[bix] & ~mask) | prep;
+}
+
+inline
+static UWord read_twobit_array ( UChar* arr, UWord ix ) {
+   Word bix, shft;
+   tl_assert(ix >= 0);
+   bix  = ix >> 2;
+   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+   return (arr[bix] >> shft) & 3;
+}
+
+/* Given address 'tag', find either the Z or F line containing relevant
+   data, so it can be read into the cache.
+*/
+static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
+                                  /*OUT*/LineF** fp, Addr tag ) {
+   LineZ* lineZ;
+   LineF* lineF;
+   UWord   zix;
+   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
+   UWord   smoff = shmem__get_SecMap_offset(tag);
+   /* since smoff is derived from a valid tag, it should be
+      cacheline-aligned. */
+   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+   zix = smoff >> N_LINE_BITS;
+   tl_assert(zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+   lineF = NULL;
+   if (lineZ->dict[0] == SVal_INVALID) {
+      UInt fix = (UInt)lineZ->dict[1];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(lineF->inUse);
+      lineZ = NULL;
+   }
+   *zp = lineZ;
+   *fp = lineF;
+}
+
+/* Given address 'tag', return the relevant SecMap and the index of
+   the LineZ within it, in the expectation that the line is to be
+   overwritten.  Regardless of whether 'tag' is currently associated
+   with a Z or F representation, to rcdec on the current
+   representation, in recognition of the fact that the contents are
+   just about to be overwritten. */
+static __attribute__((noinline))
+void find_Z_for_writing ( /*OUT*/SecMap** smp,
+                          /*OUT*/Word* zixp,
+                          Addr tag ) {
+   LineZ* lineZ;
+   LineF* lineF;
+   UWord   zix;
+   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
+   UWord   smoff = shmem__get_SecMap_offset(tag);
+   /* since smoff is derived from a valid tag, it should be
+      cacheline-aligned. */
+   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+   zix = smoff >> N_LINE_BITS;
+   tl_assert(zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+   lineF = NULL;
+   /* re RCs, we are freeing up this LineZ/LineF so that new data can
+      be parked in it.  Hence have to rcdec it accordingly. */
+   /* If lineZ has an associated lineF, free it up. */
+   if (lineZ->dict[0] == SVal_INVALID) {
+      UInt fix = (UInt)lineZ->dict[1];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(lineF->inUse);
+      rcdec_LineF(lineF);
+      lineF->inUse = False;
+   } else {
+      rcdec_LineZ(lineZ);
+   }
+   *smp  = sm;
+   *zixp = zix;
+}
+
+static __attribute__((noinline))
+void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
+   UInt        i, new_size;
+   LineF* nyu;
+
+   if (sm->linesF) {
+      tl_assert(sm->linesF_size > 0);
+   } else {
+      tl_assert(sm->linesF_size == 0);
+   }
+
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         if (!sm->linesF[i].inUse) {
+            *fixp = (Word)i;
+            return;
+         }
+      }
+   }
+
+   /* No free F line found.  Expand existing array and try again. */
+   new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
+   nyu      = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
+                           new_size * sizeof(LineF) );
+   tl_assert(nyu);
+
+   stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+   stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
+                                  * sizeof(LineF);
+
+   if (0)
+   VG_(printf)("SM %p: expand F array from %d to %d\n", 
+               sm, (Int)sm->linesF_size, new_size);
+
+   for (i = 0; i < new_size; i++)
+      nyu[i].inUse = False;
+
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         tl_assert(sm->linesF[i].inUse);
+         nyu[i] = sm->linesF[i];
+      }
+      VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
+      HG_(free)(sm->linesF);
+   }
+
+   sm->linesF      = nyu;
+   sm->linesF_size = new_size;
+
+   for (i = 0; i < sm->linesF_size; i++) {
+      if (!sm->linesF[i].inUse) {
+         *fixp = (Word)i;
+         return;
+      }
+    }
+
+    /*NOTREACHED*/
+    tl_assert(0);
+}
+
+
+/* ------------ CacheLine and implicit-tree related ------------ */
+
+__attribute__((unused))
+static void pp_CacheLine ( CacheLine* cl ) {
+   Word i;
+   if (!cl) {
+      VG_(printf)("%s","pp_CacheLine(NULL)\n");
+      return;
+   }
+   for (i = 0; i < N_LINE_TREES; i++) 
+      VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
+   for (i = 0; i < N_LINE_ARANGE; i++) 
+      VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
+}
+
+static UChar descr_to_validbits ( UShort descr )
+{
+   /* a.k.a Party Time for gcc's constant folder */
+#  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
+                b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
+             ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
+                          ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
+                          ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
+                          ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
+                          ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
+                          ( (b16_2) << 4)  | ( (b64)   << 3)  | \
+                          ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
+                          ( (b16_0) << 0) ) )
+
+#  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
+             ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
+                         ( (bit5) << 5) | ( (bit4) << 4) | \
+                         ( (bit3) << 3) | ( (bit2) << 2) | \
+                         ( (bit1) << 1) | ( (bit0) << 0) ) )
+
+   /* these should all get folded out at compile time */
+   tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
+   tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
+
+   switch (descr) {
+   /*
+              +--------------------------------- TREE_DESCR_8_7
+              |             +------------------- TREE_DESCR_8_0
+              |             |  +---------------- TREE_DESCR_16_3
+              |             |  | +-------------- TREE_DESCR_32_1
+              |             |  | | +------------ TREE_DESCR_16_2
+              |             |  | | |  +--------- TREE_DESCR_64
+              |             |  | | |  |  +------ TREE_DESCR_16_1
+              |             |  | | |  |  | +---- TREE_DESCR_32_0
+              |             |  | | |  |  | | +-- TREE_DESCR_16_0
+              |             |  | | |  |  | | |
+              |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
+   case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
+                                                 return BYTE(1,1,1,1,1,1,1,1);
+   case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
+                                                 return BYTE(1,1,0,1,1,1,1,1);
+   case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */ 
+                                                 return BYTE(0,1,1,1,1,1,1,1);
+   case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
+                                                 return BYTE(0,1,0,1,1,1,1,1);
+
+   case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */ 
+                                                 return BYTE(1,1,1,1,1,1,0,1);
+   case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
+                                                 return BYTE(1,1,0,1,1,1,0,1);
+   case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
+                                                 return BYTE(0,1,1,1,1,1,0,1);
+   case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
+                                                 return BYTE(0,1,0,1,1,1,0,1);
+
+   case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
+                                                 return BYTE(1,1,1,1,0,1,1,1);
+   case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
+                                                 return BYTE(1,1,0,1,0,1,1,1);
+   case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
+                                                 return BYTE(0,1,1,1,0,1,1,1);
+   case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
+                                                 return BYTE(0,1,0,1,0,1,1,1);
+
+   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
+                                                 return BYTE(1,1,1,1,0,1,0,1);
+   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
+                                                 return BYTE(1,1,0,1,0,1,0,1);
+   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
+                                                 return BYTE(0,1,1,1,0,1,0,1);
+   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
+                                                 return BYTE(0,1,0,1,0,1,0,1);
+
+   case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
+                                                 return BYTE(0,0,0,1,1,1,1,1);
+   case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
+                                                 return BYTE(0,0,0,1,1,1,0,1);
+   case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
+                                                 return BYTE(0,0,0,1,0,1,1,1);
+   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
+                                                 return BYTE(0,0,0,1,0,1,0,1);
+
+   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
+                                                 return BYTE(1,1,1,1,0,0,0,1);
+   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
+                                                 return BYTE(1,1,0,1,0,0,0,1);
+   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
+                                                 return BYTE(0,1,1,1,0,0,0,1);
+   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
+                                                 return BYTE(0,1,0,1,0,0,0,1);
+
+   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
+                                                 return BYTE(0,0,0,1,0,0,0,1);
+
+   case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
+                                                 return BYTE(0,0,0,0,0,0,0,1);
+
+   default: return BYTE(0,0,0,0,0,0,0,0); 
+                   /* INVALID - any valid descr produces at least one
+                      valid bit in tree[0..7]*/
+   }
+   /* NOTREACHED*/
+   tl_assert(0);
+
+#  undef DESCR
+#  undef BYTE
+}
+
+__attribute__((unused))
+static Bool is_sane_Descr ( UShort descr ) {
+   return descr_to_validbits(descr) != 0;
+}
+
+static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
+   VG_(sprintf)(dst, 
+                "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
+                (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
+   );
+}
+static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
+   VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
+                     (Int)((byte & 128) ? 1 : 0),
+                     (Int)((byte &  64) ? 1 : 0),
+                     (Int)((byte &  32) ? 1 : 0),
+                     (Int)((byte &  16) ? 1 : 0),
+                     (Int)((byte &   8) ? 1 : 0),
+                     (Int)((byte &   4) ? 1 : 0),
+                     (Int)((byte &   2) ? 1 : 0),
+                     (Int)((byte &   1) ? 1 : 0)
+   );
+}
+
+static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
+   Word  i;
+   UChar validbits = descr_to_validbits(descr);
+   HChar buf[128], buf2[128];
+   if (validbits == 0)
+      goto bad;
+   for (i = 0; i < 8; i++) {
+      if (validbits & (1<<i)) {
+         if (tree[i] == SVal_INVALID)
+            goto bad;
+      } else {
+         if (tree[i] != SVal_INVALID)
+            goto bad;
+      }
+   }
+   return True;
+  bad:
+   sprintf_Descr( buf, descr );
+   sprintf_Byte( buf2, validbits );
+   VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
+   VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
+   VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
+   for (i = 0; i < 8; i++)
+      VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
+   VG_(printf)("%s","}\n");
+   return 0;
+}
+
+static Bool is_sane_CacheLine ( CacheLine* cl )
+{
+   Word tno, cloff;
+
+   if (!cl) goto bad;
+
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      UShort descr = cl->descrs[tno];
+      SVal*  tree  = &cl->svals[cloff];
+      if (!is_sane_Descr_and_Tree(descr, tree))
+         goto bad;
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   return True;
+  bad:
+   pp_CacheLine(cl);
+   return False;
+}
+
+static UShort normalise_tree ( /*MOD*/SVal* tree )
+{
+   UShort descr;
+   /* pre: incoming tree[0..7] does not have any invalid shvals, in
+      particular no zeroes. */
+   if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
+                || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
+                || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
+                || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
+      tl_assert(0);
+   
+   descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
+           | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
+           | TREE_DESCR_8_1 | TREE_DESCR_8_0;
+   /* build 16-bit layer */
+   if (tree[1] == tree[0]) {
+      tree[1] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
+      descr |= TREE_DESCR_16_0;
+   }
+   if (tree[3] == tree[2]) {
+      tree[3] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
+      descr |= TREE_DESCR_16_1;
+   }
+   if (tree[5] == tree[4]) {
+      tree[5] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
+      descr |= TREE_DESCR_16_2;
+   }
+   if (tree[7] == tree[6]) {
+      tree[7] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
+      descr |= TREE_DESCR_16_3;
+   }
+   /* build 32-bit layer */
+   if (tree[2] == tree[0]
+       && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
+      tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
+      descr |= TREE_DESCR_32_0;
+   }
+   if (tree[6] == tree[4]
+       && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
+      tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
+      descr |= TREE_DESCR_32_1;
+   }
+   /* build 64-bit layer */
+   if (tree[4] == tree[0]
+       && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
+      tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
+      descr |= TREE_DESCR_64;
+   }
+   return descr;
+}
+
+/* This takes a cacheline where all the data is at the leaves
+   (w8[..]) and builds a correctly normalised tree. */
+static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
+{
+   Word tno, cloff;
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      SVal* tree = &cl->svals[cloff];
+      cl->descrs[tno] = normalise_tree( tree );
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   if (SCE_CACHELINE)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   stats__cline_normalises++;
+}
+
+
+typedef struct { UChar count; SVal sval; } CountedSVal;
+
+static
+void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
+                               /*OUT*/Word* dstUsedP,
+                               Word nDst, CacheLine* src )
+{
+   Word  tno, cloff, dstUsed;
+
+   tl_assert(nDst == N_LINE_ARANGE);
+   dstUsed = 0;
+
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      UShort descr = src->descrs[tno];
+      SVal*  tree  = &src->svals[cloff];
+
+      /* sequentialise the tree described by (descr,tree). */
+#     define PUT(_n,_v)                                \
+         do { dst[dstUsed  ].count = (_n);             \
+              dst[dstUsed++].sval  = (_v);             \
+         } while (0)
+
+      /* byte 0 */
+      if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
+      if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
+      if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
+      if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
+      /* byte 1 */
+      if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
+      /* byte 2 */
+      if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
+      if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
+      /* byte 3 */
+      if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
+      /* byte 4 */
+      if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
+      if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
+      if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
+      /* byte 5 */
+      if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
+      /* byte 6 */
+      if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
+      if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
+      /* byte 7 */
+      if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
+
+#     undef PUT
+      /* END sequentialise the tree described by (descr,tree). */
+
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   tl_assert(dstUsed <= nDst);
+
+   *dstUsedP = dstUsed;
+}
+
+/* Write the cacheline 'wix' to backing store.  Where it ends up
+   is determined by its tag field. */
+static __attribute__((noinline)) void cacheline_wback ( UWord wix )
+{
+   Word        i, j, k, m;
+   Addr        tag;
+   SecMap*     sm;
+   CacheLine*  cl;
+   LineZ* lineZ;
+   LineF* lineF;
+   Word        zix, fix, csvalsUsed;
+   CountedSVal csvals[N_LINE_ARANGE];
+   SVal        sv;
+
+   if (0)
+   VG_(printf)("scache wback line %d\n", (Int)wix);
+
+   tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+   tag =  cache_shmem.tags0[wix];
+   cl  = &cache_shmem.lyns0[wix];
+
+   /* The cache line may have been invalidated; if so, ignore it. */
+   if (!is_valid_scache_tag(tag))
+      return;
+
+   /* Where are we going to put it? */
+   sm         = NULL;
+   lineZ      = NULL;
+   lineF      = NULL;
+   zix = fix = -1;
+
+   /* find the Z line to write in and rcdec it or the associated F
+      line. */
+   find_Z_for_writing( &sm, &zix, tag );
+
+   tl_assert(sm);
+   tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+
+   /* Generate the data to be stored */
+   if (SCE_CACHELINE)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+
+   csvalsUsed = -1;
+   sequentialise_CacheLine( csvals, &csvalsUsed, 
+                            N_LINE_ARANGE, cl );
+   tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
+   if (0) VG_(printf)("%lu ", csvalsUsed);
+
+   lineZ->dict[0] = lineZ->dict[1] 
+                  = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+
+   /* i indexes actual shadow values, k is cursor in csvals */
+   i = 0;
+   for (k = 0; k < csvalsUsed; k++) {
+
+      sv = csvals[k].sval;
+      if (SCE_SVALS)
+         tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+      /* do we already have it? */
+      if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
+      if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
+      if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
+      if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
+      /* no.  look for a free slot. */
+      if (SCE_SVALS)
+         tl_assert(sv != SVal_INVALID);
+      if (lineZ->dict[0] 
+          == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
+      if (lineZ->dict[1]
+          == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
+      if (lineZ->dict[2]
+          == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
+      if (lineZ->dict[3]
+          == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
+      break; /* we'll have to use the f rep */
+     dict_ok:
+      m = csvals[k].count;
+      if (m == 8) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         write_twobit_array( lineZ->ix2s, i+2, j );
+         write_twobit_array( lineZ->ix2s, i+3, j );
+         write_twobit_array( lineZ->ix2s, i+4, j );
+         write_twobit_array( lineZ->ix2s, i+5, j );
+         write_twobit_array( lineZ->ix2s, i+6, j );
+         write_twobit_array( lineZ->ix2s, i+7, j );
+         i += 8;
+      }
+      else if (m == 4) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         write_twobit_array( lineZ->ix2s, i+2, j );
+         write_twobit_array( lineZ->ix2s, i+3, j );
+         i += 4;
+      }
+      else if (m == 1) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         i += 1;
+      }
+      else if (m == 2) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         i += 2;
+      }
+      else {
+         tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
+      }
+
+   }
+
+   if (LIKELY(i == N_LINE_ARANGE)) {
+      /* Construction of the compressed representation was
+         successful. */
+      rcinc_LineZ(lineZ);
+      stats__cache_Z_wbacks++;
+   } else {
+      /* Cannot use the compressed(z) representation.  Use the full(f)
+         rep instead. */
+      tl_assert(i >= 0 && i < N_LINE_ARANGE);
+      alloc_F_for_writing( sm, &fix );
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(!lineF->inUse);
+      lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+      lineZ->dict[1] = (SVal)fix;
+      lineF->inUse = True;
+      i = 0;
+      for (k = 0; k < csvalsUsed; k++) {
+         if (SCE_SVALS)
+            tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+         sv = csvals[k].sval;
+         if (SCE_SVALS)
+            tl_assert(sv != SVal_INVALID);
+         for (m = csvals[k].count; m > 0; m--) {
+            lineF->w64s[i] = sv;
+            i++;
+         }
+      }
+      tl_assert(i == N_LINE_ARANGE);
+      rcinc_LineF(lineF);
+      stats__cache_F_wbacks++;
+   }
+
+   //if (anyShared)
+   //   sm->mbHasShared = True;
+
+   /* mb_tidy_one_cacheline(); */
+}
+
+/* Fetch the cacheline 'wix' from the backing store.  The tag
+   associated with 'wix' is assumed to have already been filled in;
+   hence that is used to determine where in the backing store to read
+   from. */
+static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
+{
+   Word       i;
+   Addr       tag;
+   CacheLine* cl;
+   LineZ*     lineZ;
+   LineF*     lineF;
+
+   if (0)
+   VG_(printf)("scache fetch line %d\n", (Int)wix);
+
+   tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+   tag =  cache_shmem.tags0[wix];
+   cl  = &cache_shmem.lyns0[wix];
+
+   /* reject nonsense requests */
+   tl_assert(is_valid_scache_tag(tag));
+
+   lineZ = NULL;
+   lineF = NULL;
+   find_ZF_for_reading( &lineZ, &lineF, tag );
+   tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
+
+   /* expand the data into the bottom layer of the tree, then get
+      cacheline_normalise to build the descriptor array. */
+   if (lineF) {
+      tl_assert(lineF->inUse);
+      for (i = 0; i < N_LINE_ARANGE; i++) {
+         cl->svals[i] = lineF->w64s[i];
+      }
+      stats__cache_F_fetches++;
+   } else {
+      for (i = 0; i < N_LINE_ARANGE; i++) {
+         SVal sv;
+         UWord ix = read_twobit_array( lineZ->ix2s, i );
+         /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
+         sv = lineZ->dict[ix];
+         tl_assert(sv != SVal_INVALID);
+         cl->svals[i] = sv;
+      }
+      stats__cache_Z_fetches++;
+   }
+   normalise_CacheLine( cl );
+}
+
+static void shmem__invalidate_scache ( void ) {
+   Word wix;
+   if (0) VG_(printf)("%s","scache inval\n");
+   tl_assert(!is_valid_scache_tag(1));
+   for (wix = 0; wix < N_WAY_NENT; wix++) {
+      cache_shmem.tags0[wix] = 1/*INVALID*/;
+   }
+   stats__cache_invals++;
+}
+
+static void shmem__flush_and_invalidate_scache ( void ) {
+   Word wix;
+   Addr tag;
+   if (0) VG_(printf)("%s","scache flush and invalidate\n");
+   tl_assert(!is_valid_scache_tag(1));
+   for (wix = 0; wix < N_WAY_NENT; wix++) {
+      tag = cache_shmem.tags0[wix];
+      if (tag == 1/*INVALID*/) {
+         /* already invalid; nothing to do */
+      } else {
+         tl_assert(is_valid_scache_tag(tag));
+         cacheline_wback( wix );
+      }
+      cache_shmem.tags0[wix] = 1/*INVALID*/;
+   }
+   stats__cache_flushes++;
+   stats__cache_invals++;
+}
+
+
+static inline Bool aligned16 ( Addr a ) {
+   return 0 == (a & 1);
+}
+static inline Bool aligned32 ( Addr a ) {
+   return 0 == (a & 3);
+}
+static inline Bool aligned64 ( Addr a ) {
+   return 0 == (a & 7);
+}
+static inline UWord get_cacheline_offset ( Addr a ) {
+   return (UWord)(a & (N_LINE_ARANGE - 1));
+}
+static inline Addr cacheline_ROUNDUP ( Addr a ) {
+   return ROUNDUP(a, N_LINE_ARANGE);
+}
+static inline Addr cacheline_ROUNDDN ( Addr a ) {
+   return ROUNDDN(a, N_LINE_ARANGE);
+}
+static inline UWord get_treeno ( Addr a ) {
+   return get_cacheline_offset(a) >> 3;
+}
+static inline UWord get_tree_offset ( Addr a ) {
+   return a & 7;
+}
+
+static __attribute__((noinline))
+       CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
+{
+   /* tag is 'a' with the in-line offset masked out, 
+      eg a[31]..a[4] 0000 */
+   Addr       tag = a & ~(N_LINE_ARANGE - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+   stats__cache_totrefs++;
+   if (LIKELY(tag == cache_shmem.tags0[wix])) {
+      return &cache_shmem.lyns0[wix];
+   } else {
+      return get_cacheline_MISS( a );
+   }
+}
+
+static __attribute__((noinline))
+       CacheLine* get_cacheline_MISS ( Addr a )
+{
+   /* tag is 'a' with the in-line offset masked out, 
+      eg a[31]..a[4] 0000 */
+
+   CacheLine* cl;
+   Addr*      tag_old_p;
+   Addr       tag = a & ~(N_LINE_ARANGE - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+   tl_assert(tag != cache_shmem.tags0[wix]);
+
+   /* Dump the old line into the backing store. */
+   stats__cache_totmisses++;
+
+   cl        = &cache_shmem.lyns0[wix];
+   tag_old_p = &cache_shmem.tags0[wix];
+
+   if (is_valid_scache_tag( *tag_old_p )) {
+      /* EXPENSIVE and REDUNDANT: callee does it */
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      cacheline_wback( wix );
+   }
+   /* and reload the new one */
+   *tag_old_p = tag;
+   cacheline_fetch( wix );
+   if (SCE_CACHELINE)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   return cl;
+}
+
+static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_64to32pulldown++;
+   switch (toff) {
+      case 0: case 4:
+         tl_assert(descr & TREE_DESCR_64);
+         tree[4] = tree[0];
+         descr &= ~TREE_DESCR_64;
+         descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_32to16pulldown++;
+   switch (toff) {
+      case 0: case 2:
+         if (!(descr & TREE_DESCR_32_0)) {
+            descr = pulldown_to_32(tree, 0, descr);
+         }
+         tl_assert(descr & TREE_DESCR_32_0);
+         tree[2] = tree[0];
+         descr &= ~TREE_DESCR_32_0;
+         descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
+         break;
+      case 4: case 6:
+         if (!(descr & TREE_DESCR_32_1)) {
+            descr = pulldown_to_32(tree, 4, descr);
+         }
+         tl_assert(descr & TREE_DESCR_32_1);
+         tree[6] = tree[4];
+         descr &= ~TREE_DESCR_32_1;
+         descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_16to8pulldown++;
+   switch (toff) {
+      case 0: case 1:
+         if (!(descr & TREE_DESCR_16_0)) {
+            descr = pulldown_to_16(tree, 0, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_0);
+         tree[1] = tree[0];
+         descr &= ~TREE_DESCR_16_0;
+         descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
+         break;
+      case 2: case 3:
+         if (!(descr & TREE_DESCR_16_1)) {
+            descr = pulldown_to_16(tree, 2, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_1);
+         tree[3] = tree[2];
+         descr &= ~TREE_DESCR_16_1;
+         descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
+         break;
+      case 4: case 5:
+         if (!(descr & TREE_DESCR_16_2)) {
+            descr = pulldown_to_16(tree, 4, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_2);
+         tree[5] = tree[4];
+         descr &= ~TREE_DESCR_16_2;
+         descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
+         break;
+      case 6: case 7:
+         if (!(descr & TREE_DESCR_16_3)) {
+            descr = pulldown_to_16(tree, 6, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_3);
+         tree[7] = tree[6];
+         descr &= ~TREE_DESCR_16_3;
+         descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+
+static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
+   UShort mask;
+   switch (toff) {
+      case 0:
+         mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_0;
+         break;
+      case 2:
+         mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_1;
+         break;
+      case 4:
+         mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_2;
+         break;
+      case 6:
+         mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_3;
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
+   UShort mask;
+   switch (toff) {
+      case 0:
+         if (!(descr & TREE_DESCR_16_0))
+            descr = pullup_descr_to_16(descr, 0);
+         if (!(descr & TREE_DESCR_16_1))
+            descr = pullup_descr_to_16(descr, 2);
+         mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_32_0;
+         break;
+      case 4:
+         if (!(descr & TREE_DESCR_16_2))
+            descr = pullup_descr_to_16(descr, 4);
+         if (!(descr & TREE_DESCR_16_3))
+            descr = pullup_descr_to_16(descr, 6);
+         mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_32_1;
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
+   switch (toff) {
+      case 0: case 4:
+         return 0 != (descr & TREE_DESCR_64);
+      default:
+         tl_assert(0);
+   }
+}
+
+static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
+   switch (toff) {
+      case 0:
+         return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
+      case 2:
+         return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
+      case 4:
+         return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
+      case 6:
+         return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
+      default:
+         tl_assert(0);
+   }
+}
+
+/* ------------ Cache management ------------ */
+
+static void zsm_flush_cache ( void )
+{
+   shmem__flush_and_invalidate_scache();
+}
+
+
+static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
+{
+   tl_assert( sizeof(UWord) == sizeof(Addr) );
+
+   rcinc = p_rcinc;
+   rcdec = p_rcdec;
+
+   tl_assert(map_shmem == NULL);
+   map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
+                           HG_(free), 
+                           NULL/*unboxed UWord cmp*/);
+   tl_assert(map_shmem != NULL);
+   shmem__invalidate_scache();
+
+   /* a SecMap must contain an integral number of CacheLines */
+   tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
+   /* also ... a CacheLine holds an integral number of trees */
+   tl_assert(0 == (N_LINE_ARANGE % 8));
+}
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END compressed shadow memory                        //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN vts primitives                                //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_VTS_H
+#define __HB_VTS_H
+
+/* VtsIDs can't exceed 30 bits, since they have to be packed into the
+   lowest 30 bits of an SVal. */
+typedef  UInt  VtsID;
+#define VtsID_INVALID 0xFFFFFFFF
+
+/* A VTS contains .ts, its vector clock, and also .id, a field to hold
+   a backlink for the caller's convenience.  Since we have no idea
+   what to set that to in the library, it always gets set to
+   VtsID_INVALID. */
+typedef
+   struct {
+      VtsID   id;
+      XArray* ts; /* XArray* ScalarTS(abstract) */
+   }
+   VTS;
+
+
+/* Create a new, empty VTS. */
+VTS* VTS__new ( void );
+
+/* Delete this VTS in its entirety. */
+void VTS__delete ( VTS* vts );
+
+/* Create a new singleton VTS. */
+VTS* VTS__singleton ( Thr* thr, ULong tym );
+
+/* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
+   not modified. */
+VTS* VTS__tick ( Thr* me, VTS* vts );
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+   Neither arg is modified. */
+VTS* VTS__join ( VTS* a, VTS* b );
+
+/* Compute the partial ordering relation of the two args. */
+typedef
+   enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
+   POrd;
+
+POrd VTS__cmp ( VTS* a, VTS* b );
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+   based on their VCs, so they can be looked up in a table, tree, etc.
+   Returns -1, 0 or 1. */
+Word VTS__cmp_structural ( VTS* a, VTS* b );
+
+/* Debugging only.  Display the given VTS in the buffer. */
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
+
+/* Debugging only.  Return vts[index], so to speak. */
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* index );
+
+#endif /* ! __HB_VTS_H */
+
+
+/*--------------- to do with Vector Timestamps ---------------*/
+
+/* Scalar Timestamp */
+typedef
+   struct {
+      Thr*    thr;
+      ULong   tym;
+   }
+   ScalarTS;
+
+
+static Bool is_sane_VTS ( VTS* vts )
+{
+   UWord     i, n;
+   ScalarTS  *st1, *st2;
+   if (!vts) return False;
+   if (!vts->ts) return False;
+   n = VG_(sizeXA)( vts->ts );
+   if (n >= 2) {
+      for (i = 0; i < n-1; i++) {
+         st1 = VG_(indexXA)( vts->ts, i );
+         st2 = VG_(indexXA)( vts->ts, i+1 );
+         if (st1->thr >= st2->thr)
+            return False;
+         if (st1->tym == 0 || st2->tym == 0)
+            return False;
+      }
+   }
+   return True;
+}
+
+
+/* Create a new, empty VTS.
+*/
+VTS* VTS__new ( void )
+{
+   VTS* vts;
+   vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
+   tl_assert(vts);
+   vts->id = VtsID_INVALID;
+   vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
+                         HG_(free), sizeof(ScalarTS) );
+   tl_assert(vts->ts);
+   return vts;
+}
+
+
+/* Delete this VTS in its entirety.
+*/
+void VTS__delete ( VTS* vts )
+{
+   tl_assert(vts);
+   tl_assert(vts->ts);
+   VG_(deleteXA)( vts->ts );
+   HG_(free)(vts);
+}
+
+
+/* Create a new singleton VTS. 
+*/
+VTS* VTS__singleton ( Thr* thr, ULong tym ) {
+   ScalarTS st;
+   VTS*     vts;
+   tl_assert(thr);
+   tl_assert(tym >= 1);
+   vts = VTS__new();
+   st.thr = thr;
+   st.tym = tym;
+   VG_(addToXA)( vts->ts, &st );
+   return vts;
+}
+
+
+/* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
+   not modified.
+*/
+VTS* VTS__tick ( Thr* me, VTS* vts )
+{
+   ScalarTS* here = NULL;
+   ScalarTS  tmp;
+   VTS*      res;
+   Word      i, n; 
+   tl_assert(me);
+   tl_assert(is_sane_VTS(vts));
+   //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
+   //                   (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
+   res = VTS__new();
+   n = VG_(sizeXA)( vts->ts );
+
+   /* main loop doesn't handle zero-entry case correctly, so
+      special-case it. */
+   if (n == 0) {
+      tmp.thr = me;
+      tmp.tym = 1;
+      VG_(addToXA)( res->ts, &tmp );
+      tl_assert(is_sane_VTS(res));
+      return res;
+   }
+
+   for (i = 0; i < n; i++) {
+      here = VG_(indexXA)( vts->ts, i );
+      if (me < here->thr) {
+         /* We just went past 'me', without seeing it. */
+         tmp.thr = me;
+         tmp.tym = 1;
+         VG_(addToXA)( res->ts, &tmp );
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+         i++;
+         break;
+      } 
+      else if (me == here->thr) {
+         tmp = *here;
+         tmp.tym++;
+         VG_(addToXA)( res->ts, &tmp );
+         i++;
+         break;
+      }
+      else /* me > here->thr */ {
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+      }
+   }
+   tl_assert(i >= 0 && i <= n);
+   if (i == n && here && here->thr < me) {
+      tmp.thr = me;
+      tmp.tym = 1;
+      VG_(addToXA)( res->ts, &tmp );
+   } else {
+      for (/*keepgoing*/; i < n; i++) {
+         here = VG_(indexXA)( vts->ts, i );
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+      }
+   }
+   tl_assert(is_sane_VTS(res));
+   //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
+   //                   (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
+   return res;
+}
+
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+   Neither arg is modified.
+*/
+VTS* VTS__join ( VTS* a, VTS* b )
+{
+   Word     ia, ib, useda, usedb;
+   ULong    tyma, tymb, tymMax;
+   Thr*     thr;
+   VTS*     res;
+   ScalarTS *tmpa, *tmpb;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   res = VTS__new();
+   ia = ib = 0;
+
+   while (1) {
+
+      /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+         from a and b in order, where thr is the next Thr*
+         occurring in either a or b, and tyma/b are the relevant
+         scalar timestamps, taking into account implicit zeroes. */
+      tl_assert(ia >= 0 && ia <= useda);
+      tl_assert(ib >= 0 && ib <= usedb);
+      tmpa = tmpb = NULL;
+
+      if (ia == useda && ib == usedb) {
+         /* both empty - done */
+         break;
+      }
+      else
+      if (ia == useda && ib != usedb) {
+         /* a empty, use up b */
+         tmpb = VG_(indexXA)( b->ts, ib );
+         thr  = tmpb->thr;
+         tyma = 0;
+         tymb = tmpb->tym;
+         ib++;
+      }
+      else
+      if (ia != useda && ib == usedb) {
+         /* b empty, use up a */
+         tmpa = VG_(indexXA)( a->ts, ia );
+         thr  = tmpa->thr;
+         tyma = tmpa->tym;
+         tymb = 0;
+         ia++;
+      }
+      else {
+         /* both not empty; extract lowest-Thr*'d triple */
+         tmpa = VG_(indexXA)( a->ts, ia );
+         tmpb = VG_(indexXA)( b->ts, ib );
+         if (tmpa->thr < tmpb->thr) {
+            /* a has the lowest unconsidered Thr* */
+            thr  = tmpa->thr;
+            tyma = tmpa->tym;
+            tymb = 0;
+            ia++;
+         }
+         else
+         if (tmpa->thr > tmpb->thr) {
+            /* b has the lowest unconsidered Thr* */
+            thr  = tmpb->thr;
+            tyma = 0;
+            tymb = tmpb->tym;
+            ib++;
+         } else {
+            /* they both next mention the same Thr* */
+            tl_assert(tmpa->thr == tmpb->thr);
+            thr  = tmpa->thr; /* == tmpb->thr */
+            tyma = tmpa->tym;
+            tymb = tmpb->tym;
+            ia++;
+            ib++;
+         }
+      }
+
+      /* having laboriously determined (thr, tyma, tymb), do something
+         useful with it. */
+      tymMax = tyma > tymb ? tyma : tymb;
+      if (tymMax > 0) {
+         ScalarTS st;
+         st.thr = thr;
+         st.tym = tymMax;
+         VG_(addToXA)( res->ts, &st );
+      }
+
+   }
+
+   tl_assert(is_sane_VTS( res ));
+
+   return res;
+}
+
+
+/* Compute the partial ordering relation of the two args.
+*/
+POrd VTS__cmp ( VTS* a, VTS* b )
+{
+   Word     ia, ib, useda, usedb;
+   ULong    tyma, tymb;
+   Thr*     thr;
+   ScalarTS *tmpa, *tmpb;
+
+   Bool all_leq = True;
+   Bool all_geq = True;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   ia = ib = 0;
+
+   while (1) {
+
+      /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+         from a and b in order, where thr is the next Thr*
+         occurring in either a or b, and tyma/b are the relevant
+         scalar timestamps, taking into account implicit zeroes. */
+      tl_assert(ia >= 0 && ia <= useda);
+      tl_assert(ib >= 0 && ib <= usedb);
+      tmpa = tmpb = NULL;
+
+      if (ia == useda && ib == usedb) {
+         /* both empty - done */
+         break;
+      }
+      else
+      if (ia == useda && ib != usedb) {
+         /* a empty, use up b */
+         tmpb = VG_(indexXA)( b->ts, ib );
+         thr  = tmpb->thr;
+         tyma = 0;
+         tymb = tmpb->tym;
+         ib++;
+      }
+      else
+      if (ia != useda && ib == usedb) {
+         /* b empty, use up a */
+         tmpa = VG_(indexXA)( a->ts, ia );
+         thr  = tmpa->thr;
+         tyma = tmpa->tym;
+         tymb = 0;
+         ia++;
+      }
+      else {
+         /* both not empty; extract lowest-Thr*'d triple */
+         tmpa = VG_(indexXA)( a->ts, ia );
+         tmpb = VG_(indexXA)( b->ts, ib );
+         if (tmpa->thr < tmpb->thr) {
+            /* a has the lowest unconsidered Thr* */
+            thr  = tmpa->thr;
+            tyma = tmpa->tym;
+            tymb = 0;
+            ia++;
+         }
+         else
+         if (tmpa->thr > tmpb->thr) {
+            /* b has the lowest unconsidered Thr* */
+            thr  = tmpb->thr;
+            tyma = 0;
+            tymb = tmpb->tym;
+            ib++;
+         } else {
+            /* they both next mention the same Thr* */
+            tl_assert(tmpa->thr == tmpb->thr);
+            thr  = tmpa->thr; /* == tmpb->thr */
+            tyma = tmpa->tym;
+            tymb = tmpb->tym;
+            ia++;
+            ib++;
+         }
+      }
+
+      /* having laboriously determined (thr, tyma, tymb), do something
+         useful with it. */
+      if (tyma < tymb)
+         all_geq = False;
+      if (tyma > tymb)
+         all_leq = False;
+   }
+
+   if (all_leq && all_geq)
+      return POrd_EQ;
+   /* now we know they aren't equal, so either all_leq or all_geq or
+      both are false. */
+   if (all_leq)
+      return POrd_LT;
+   if (all_geq)
+      return POrd_GT;
+   /* hmm, neither all_geq or all_leq.  This means unordered. */
+   return POrd_UN;
+}
+
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+   based on their VCs, so they can be looked up in a table, tree, etc.
+   Returns -1, 0 or 1.  (really just 'deriving Ord' :-)
+*/
+Word VTS__cmp_structural ( VTS* a, VTS* b )
+{
+   /* We just need to generate an arbitrary total ordering based on
+      a->ts and b->ts.  Preferably do it in a way which comes across likely
+      differences relatively quickly. */
+   Word     i, useda, usedb;
+   ScalarTS *tmpa, *tmpb;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   if (useda < usedb) return -1;
+   if (useda > usedb) return 1;
+
+   /* Same length vectors, so let's step through them together. */
+   tl_assert(useda == usedb);
+   for (i = 0; i < useda; i++) {
+      tmpa = VG_(indexXA)( a->ts, i );
+      tmpb = VG_(indexXA)( b->ts, i );
+      if (tmpa->tym < tmpb->tym) return -1;
+      if (tmpa->tym > tmpb->tym) return 1;
+      if (tmpa->thr < tmpb->thr) return -1;
+      if (tmpa->thr > tmpb->thr) return 1;
+   }
+
+   /* They're identical. */
+   return 0;
+}
+
+
+/* Debugging only.  Display the given VTS in the buffer.
+*/
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
+   ScalarTS* st;
+   HChar     unit[64];
+   Word      i, n;
+   Int       avail = nBuf;
+   tl_assert(vts && vts->ts);
+   tl_assert(nBuf > 16);
+   buf[0] = '[';
+   buf[1] = 0;
+   n = VG_(sizeXA)( vts->ts );
+   for (i = 0; i < n; i++) {
+      tl_assert(avail >= 40);
+      st = VG_(indexXA)( vts->ts, i );
+      VG_(memset)(unit, 0, sizeof(unit));
+      VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
+                         st->thr, st->tym);
+      if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
+         VG_(strcat)(buf, " ...]");
+         buf[nBuf-1] = 0;
+         return;
+      }
+      VG_(strcat)(buf, unit);
+      avail -= VG_(strlen)(unit);
+   }
+   VG_(strcat)(buf, "]");
+   buf[nBuf-1] = 0;
+}
+
+
+/* Debugging only.  Return vts[index], so to speak.
+*/
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
+   UWord i, n;
+   tl_assert(vts && vts->ts);
+   n = VG_(sizeXA)( vts->ts );
+   for (i = 0; i < n; i++) {
+      ScalarTS* st = VG_(indexXA)( vts->ts, i );
+      if (st->thr == idx)
+         return st->tym;
+   }
+   return 0;
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END vts primitives                                  //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN main library                                  //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// VTS set                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static WordFM* /* VTS* void void */ vts_set = NULL;
+
+static void vts_set_init ( void )
+{
+   tl_assert(!vts_set);
+   vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
+                         HG_(free),
+                         (Word(*)(UWord,UWord))VTS__cmp_structural );
+   tl_assert(vts_set);
+}
+
+/* Given a newly made VTS, look in vts_set to see if we already have
+   an identical one.  If yes, free up this one and return instead a
+   pointer to the existing one.  If no, add this one to the set and
+   return the same pointer.  Caller differentiates the two cases by
+   comparing returned pointer with the supplied one (although that
+   does require that the supplied VTS is not already in the set).
+*/
+static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
+{
+   UWord keyW, valW;
+   /* lookup cand (by value) */
+   if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
+      /* found it */
+      tl_assert(valW == 0);
+      /* if this fails, cand (by ref) was already present (!) */
+      tl_assert(keyW != (UWord)cand);
+      VTS__delete(cand);
+      return (VTS*)keyW;
+   } else {
+      /* not present.  Add and return pointer to same. */
+      VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
+      return cand;
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// VTS table                                           //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static void VtsID__invalidate_caches ( void ); /* fwds */
+
+/* A type to hold VTS table entries.  Invariants:
+   If .vts == NULL, then this entry is not in use, so:
+   - .rc == 0
+   - this entry is on the freelist (unfortunately, does not imply
+     any constraints on value for .nextfree)
+   If .vts != NULL, then this entry is in use:
+   - .vts is findable in vts_set
+   - .vts->id == this entry number
+   - no specific value for .rc (even 0 is OK)
+   - this entry is not on freelist, so .nextfree == VtsID_INVALID
+*/
+typedef
+   struct {
+      VTS*  vts;      /* vts, in vts_set */
+      UWord rc;       /* reference count - enough for entire aspace */
+      VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
+   }
+   VtsTE;
+
+/* The VTS table. */
+static XArray* /* of VtsTE */ vts_tab = NULL;
+
+/* An index into the VTS table, indicating the start of the list of
+   free (available for use) entries.  If the list is empty, this is
+   VtsID_INVALID. */
+static VtsID vts_tab_freelist = VtsID_INVALID;
+
+/* Do a GC of vts_tab when the freelist becomes empty AND the size of
+   vts_tab equals or exceeds this size.  After GC, the value here is
+   set appropriately so as to check for the next GC point. */
+static Word vts_next_GC_at = 1000;
+
+static void vts_tab_init ( void )
+{
+   vts_tab
+      = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
+                    HG_(free), sizeof(VtsTE) );
+   vts_tab_freelist
+      = VtsID_INVALID;
+   tl_assert(vts_tab);
+}
+
+/* Add ii to the free list, checking that it looks out-of-use. */
+static void add_to_free_list ( VtsID ii )
+{
+   VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts == NULL);
+   tl_assert(ie->rc == 0);
+   tl_assert(ie->freelink == VtsID_INVALID);
+   ie->freelink = vts_tab_freelist;
+   vts_tab_freelist = ii;
+}
+
+/* Get an entry from the free list.  This will return VtsID_INVALID if
+   the free list is empty. */
+static VtsID get_from_free_list ( void )
+{
+   VtsID  ii;
+   VtsTE* ie;
+   if (vts_tab_freelist == VtsID_INVALID)
+      return VtsID_INVALID;
+   ii = vts_tab_freelist;
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts == NULL);
+   tl_assert(ie->rc == 0);
+   vts_tab_freelist = ie->freelink;
+   return ii;
+}
+
+/* Produce a new VtsID that can be used, either by getting it from
+   the freelist, or, if that is empty, by expanding vts_tab. */
+static VtsID get_new_VtsID ( void )
+{
+   VtsID ii;
+   VtsTE te;
+   ii = get_from_free_list();
+   if (ii != VtsID_INVALID)
+      return ii;
+   te.vts = NULL;
+   te.rc = 0;
+   te.freelink = VtsID_INVALID;
+   ii = (VtsID)VG_(addToXA)( vts_tab, &te );
+   return ii;
+}
+
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcinc ( VtsID ii )
+{
+   VtsTE* ie;
+   /* VG_(indexXA) does a range check for us */
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts); /* else it's not in use */
+   tl_assert(ie->rc < ~0UL); /* else we can't continue */
+   tl_assert(ie->vts->id == ii);
+   ie->rc++;
+}
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcdec ( VtsID ii )
+{
+   VtsTE* ie;
+   /* VG_(indexXA) does a range check for us */
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts); /* else it's not in use */
+   tl_assert(ie->rc > 0); /* else RC snafu */
+   tl_assert(ie->vts->id == ii);
+   ie->rc--;
+}
+
+
+/* Look up 'cand' in our collection of VTSs.  If present, deallocate
+   it and return the VtsID for the pre-existing version.  If not
+   present, add it to both vts_tab and vts_set, allocate a fresh VtsID
+   for it, and return that. */
+static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
+{
+   VTS* auld;
+   tl_assert(cand->id == VtsID_INVALID);
+   auld = vts_set__find_and_dealloc__or_add(cand);
+   if (auld != cand) {
+      /* We already have an Aulde one.  Use that. */
+      VtsTE* ie;
+      tl_assert(auld->id != VtsID_INVALID);
+      ie = VG_(indexXA)( vts_tab, auld->id );
+      tl_assert(ie->vts == auld);
+      return auld->id;
+   } else {
+      VtsID  ii = get_new_VtsID();
+      VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+      ie->vts = cand;
+      ie->rc = 0;
+      ie->freelink = VtsID_INVALID;
+      cand->id = ii;
+      return ii;
+   }
+}
+
+
+static void show_vts_stats ( HChar* caller )
+{
+   UWord nSet, nTab, nLive;
+   ULong totrc;
+   UWord n, i;
+   nSet = VG_(sizeFM)( vts_set );
+   nTab = VG_(sizeXA)( vts_tab );
+   totrc = 0;
+   nLive = 0;
+   n = VG_(sizeXA)( vts_tab );
+   for (i = 0; i < n; i++) {
+      VtsTE* ie = VG_(indexXA)( vts_tab, i );
+      if (ie->vts) {
+         nLive++;
+         totrc += (ULong)ie->rc;
+      } else {
+         tl_assert(ie->rc == 0);
+      }
+   }
+   VG_(printf)("  show_vts_stats %s\n", caller);
+   VG_(printf)("    vts_tab size %4lu\n", nTab);
+   VG_(printf)("    vts_tab live %4lu\n", nLive);
+   VG_(printf)("    vts_set size %4lu\n", nSet);
+   VG_(printf)("        total rc %4llu\n", totrc);
+}
+
+/* NOT TO BE CALLED FROM WITHIN libzsm. */
+static void vts_tab__do_GC ( Bool show_stats )
+{
+   UWord i, nTab, nLive, nFreed;
+
+   /* check this is actually necessary. */
+   tl_assert(vts_tab_freelist == VtsID_INVALID);
+
+   /* empty the caches for partial order checks and binary joins.  We
+      could do better and prune out the entries to be deleted, but it
+      ain't worth the hassle. */
+   VtsID__invalidate_caches();
+
+   /* First, make the reference counts up to date. */
+   zsm_flush_cache();
+
+   nTab = VG_(sizeXA)( vts_tab );
+
+   if (show_stats) {
+      VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
+      show_vts_stats("before GC");
+   }
+
+   /* Now we can inspect the entire vts_tab.  Any entries
+      with zero .rc fields are now no longer in use and can be
+      free list, removed from vts_set, and deleted. */
+   nFreed = 0;
+   for (i = 0; i < nTab; i++) {
+      Bool present;
+      UWord oldK = 0, oldV = 0;
+      VtsTE* te = VG_(indexXA)( vts_tab, i );
+      if (te->vts == NULL) {
+         tl_assert(te->rc == 0);
+         continue; /* already on the free list (presumably) */
+      }
+      if (te->rc > 0)
+         continue; /* in use */
+      /* Ok, we got one we can free. */
+      tl_assert(te->vts->id == i);
+      /* first, remove it from vts_set. */
+      present = VG_(delFromFM)( vts_set,
+                                &oldK, &oldV, (UWord)te->vts );
+      tl_assert(present); /* else it isn't in vts_set ?! */
+      tl_assert(oldV == 0); /* no info stored in vts_set val fields */
+      tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
+      /* now free the VTS itself */
+      VTS__delete(te->vts);
+      te->vts = NULL;
+      /* and finally put this entry on the free list */
+      tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
+      add_to_free_list( i );
+      nFreed++;
+   }
+
+   /* Now figure out when the next GC should be.  We'll allow the
+      number of VTSs to double before GCing again.  Except of course
+      that since we can't (or, at least, don't) shrink vts_tab, we
+      can't set the threshhold value smaller than it. */
+   tl_assert(nFreed <= nTab);
+   nLive = nTab - nFreed;
+   tl_assert(nLive >= 0 && nLive <= nTab);
+   vts_next_GC_at = 2 * nLive;
+   if (vts_next_GC_at < nTab)
+      vts_next_GC_at = nTab;
+
+   if (show_stats) {
+      show_vts_stats("after GC");
+      VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
+   }
+
+   if (1) {
+      static UInt ctr = 0;
+      tl_assert(nTab > 0);
+      VG_(printf)("libhb: VTS GC: #%u  old size %lu  live %lu  (%2llu%%)\n",
+                  ctr++, nTab, nLive, (100ULL * nLive) / nTab);
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Vts IDs                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+//////////////////////////
+static ULong stats__getOrdering_queries = 0;
+static ULong stats__getOrdering_misses  = 0;
+static ULong stats__join2_queries       = 0;
+static ULong stats__join2_misses        = 0;
+
+static inline UInt ROL32 ( UInt w, Int n ) {
+   w = (w << n) | (w >> (32-n));
+   return w;
+}
+static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
+   UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
+   return hash % nTab;
+}
+
+#define N_GETORDERING_CACHE 1023
+static
+   struct { VtsID vi1; VtsID vi2; POrd ord; }
+   getOrdering_cache[N_GETORDERING_CACHE];
+
+#define N_JOIN2_CACHE 1023
+static
+   struct { VtsID vi1; VtsID vi2; VtsID res; }
+   join2_cache[N_JOIN2_CACHE];
+
+static void VtsID__invalidate_caches ( void ) {
+   Int i;
+   for (i = 0; i < N_GETORDERING_CACHE; i++) {
+      getOrdering_cache[i].vi1 = VtsID_INVALID;
+      getOrdering_cache[i].vi2 = VtsID_INVALID;
+      getOrdering_cache[i].ord = 0; /* an invalid POrd value */
+   }
+   for (i = 0; i < N_JOIN2_CACHE; i++) {
+     join2_cache[i].vi1 = VtsID_INVALID;
+     join2_cache[i].vi2 = VtsID_INVALID;
+     join2_cache[i].res = VtsID_INVALID;
+   }
+}
+//////////////////////////
+
+static Bool VtsID__is_valid ( VtsID vi ) {
+   VtsTE* ve;
+   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
+      return False;
+   ve = VG_(indexXA)( vts_tab, vi );
+   if (!ve->vts)
+      return False;
+   tl_assert(ve->vts->id == vi);
+   return True;
+}
+
+static VTS* VtsID__to_VTS ( VtsID vi ) {
+   VtsTE* te = VG_(indexXA)( vts_tab, vi );
+   tl_assert(te->vts);
+   return te->vts;
+}
+
+static void VtsID__pp ( VtsID vi ) {
+   HChar buf[100];
+   VTS* vts = VtsID__to_VTS(vi);
+   VTS__show( buf, sizeof(buf)-1, vts );
+   buf[sizeof(buf)-1] = 0;
+   VG_(printf)("%s", buf);
+}
+
+/* compute partial ordering relation of vi1 and vi2. */
+__attribute__((noinline))
+static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
+   UInt hash;
+   POrd ord;
+   VTS  *v1, *v2;
+   //if (vi1 == vi2) return POrd_EQ;
+   tl_assert(vi1 != vi2);
+   ////++
+   stats__getOrdering_queries++;
+   hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
+   if (getOrdering_cache[hash].vi1 == vi1
+       && getOrdering_cache[hash].vi2 == vi2)
+      return getOrdering_cache[hash].ord;
+   stats__getOrdering_misses++;
+   ////--
+   v1  = VtsID__to_VTS(vi1);
+   v2  = VtsID__to_VTS(vi2);
+   ord = VTS__cmp( v1, v2 );
+   ////++
+   getOrdering_cache[hash].vi1 = vi1;
+   getOrdering_cache[hash].vi2 = vi2;
+   getOrdering_cache[hash].ord = ord;
+   ////--
+   return ord;
+}
+static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
+   return vi1 == vi2  ? POrd_EQ  : VtsID__getOrdering_WRK(vi1, vi2);
+}
+
+/* compute binary join */
+__attribute__((noinline))
+static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
+   UInt  hash;
+   VtsID res;
+   VTS   *vts1, *vts2, *nyu;
+   //if (vi1 == vi2) return vi1;
+   tl_assert(vi1 != vi2);
+   ////++
+   stats__join2_queries++;
+   hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
+   if (join2_cache[hash].vi1 == vi1
+       && join2_cache[hash].vi2 == vi2)
+      return join2_cache[hash].res;
+   stats__join2_misses++;
+   ////--
+   vts1 = VtsID__to_VTS(vi1);
+   vts2 = VtsID__to_VTS(vi2);
+   nyu  = VTS__join(vts1,vts2);
+   res  = vts_tab__find_and_dealloc__or_add(nyu);
+   ////++
+   join2_cache[hash].vi1 = vi1;
+   join2_cache[hash].vi2 = vi2;
+   join2_cache[hash].res = res;
+   ////--
+   return res;
+}
+static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
+   return vi1 == vi2  ? vi1  : VtsID__join2_WRK(vi1, vi2);
+}
+
+/* create a singleton VTS, namely [thr:1] */
+static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
+   VTS* nyu = VTS__singleton(thr,tym);
+   return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* tick operation, creates value 1 if specified index is absent */
+static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
+   VTS* vts = VtsID__to_VTS(vi);
+   VTS* nyu = VTS__tick(idx,vts);
+   return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* index into a VTS (only for assertions) */
+static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
+   VTS* vts = VtsID__to_VTS(vi);
+   return VTS__indexAt_SLOW( vts, idx );
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Threads                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+struct _Thr {
+   /* Current VTSs for this thread.  They change as we go along.  viR
+      is the VTS to be used for reads, viW for writes.  Usually they
+      are the same, but can differ when we deal with reader-writer
+      locks.  It is always the case that VtsID__getOrdering(viW,viR)
+      == POrd_LT or POrdEQ -- that is, viW must be the same, or
+      lagging behind, viR. */
+   VtsID viR;
+   VtsID viW;
+   /* opaque (to us) data we hold on behalf of the library's user. */
+   void* opaque;
+};
+
+static Thr* Thr__new ( void ) {
+   Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
+   thr->viR = VtsID_INVALID;
+   thr->viW = VtsID_INVALID;
+   return thr;
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Shadow Values                                       //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
+// hb_zsm.h.  We have to do everything else here.
+
+/* SVal is 64 bit unsigned int.
+
+      <---------30--------->    <---------30--------->
+   00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
+   01 X--------------------X XX X--------------------X   E(rror)
+   10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
+   11 X--------------------X XX X--------------------X   I: SVal_INVALID
+*/
+#define SVAL_TAGMASK (3ULL << 62)
+
+static inline Bool SVal__isC ( SVal s ) {
+   return (0ULL << 62) == (s & SVAL_TAGMASK);
+}
+static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
+   //tl_assert(VtsID__is_valid(rmini));
+   //tl_assert(VtsID__is_valid(wmini));
+   return (((ULong)rmini) << 32) | ((ULong)wmini);
+}
+static inline VtsID SVal__unC_Rmin ( SVal s ) {
+   tl_assert(SVal__isC(s));
+   return (VtsID)(s >> 32);
+}
+static inline VtsID SVal__unC_Wmin ( SVal s ) {
+   tl_assert(SVal__isC(s));
+   return (VtsID)(s & 0xFFFFFFFFULL);
+}
+
+static Bool SVal__isE ( SVal s ) {
+   return (1ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkE ( void ) {
+   return 1ULL << 62;
+}
+
+static Bool SVal__isA ( SVal s ) {
+   return (2ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkA ( void ) {
+   return 2ULL << 62;
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcinc ( SVal s ) {
+   if (SVal__isC(s)) {
+      VtsID__rcinc( SVal__unC_Rmin(s) );
+      VtsID__rcinc( SVal__unC_Wmin(s) );
+   }
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcdec ( SVal s ) {
+   if (SVal__isC(s)) {
+      VtsID__rcdec( SVal__unC_Rmin(s) );
+      VtsID__rcdec( SVal__unC_Wmin(s) );
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Change-event map2                                   //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+#define EVENT_MAP_GC_AT                (1 * 1000 * 1000)
+#define EVENT_MAP_GC_DISCARD_FRACTION  0.5
+
+/* This is in two parts:
+
+   1. An OSet of RCECs.  This is a set of reference-counted stack
+      traces.  When the reference count of a stack trace becomes zero,
+      it is removed from the set and freed up.  The intent is to have
+      a set of stack traces which can be referred to from (2), but to
+      only represent each one once.  The set is indexed/searched by
+      ordering on the stack trace vectors.
+
+   2. An OSet of OldRefs.  These store information about each old ref
+      that we need to record.  It is indexed by address of the
+      location for which the information is recorded.  For LRU
+      purposes, each OldRef also contains a generation number,
+      indicating when it was most recently accessed.
+
+      The important part of an OldRef is, however, its accs[] array.
+      This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC.  This
+      allows us to collect the last access-traceback by up to
+      N_OLDREF_ACCS different threads for this location.  The accs[]
+      array is a MTF-array.  If a pair falls off the end, that's too
+      bad -- we will lose info about that thread's access to this
+      location.
+
+      When this OSet becomes too big, we can throw away the entries
+      whose generation numbers are below some threshold; hence doing
+      approximate LRU discarding.  For each discarded OldRef we must
+      of course decrement the reference count on the all RCECs it
+      refers to, in order that entries from (1) eventually get
+      discarded too.
+*/
+
+
+static UWord stats__ctxt_rcdec1 = 0;
+static UWord stats__ctxt_rcdec2 = 0;
+static UWord stats__ctxt_rcdec3 = 0;
+static UWord stats__ctxt_rcdec_calls = 0;
+static UWord stats__ctxt_rcdec_discards = 0;
+static UWord stats__ctxt_rcdec1_eq = 0;
+
+static UWord stats__ctxt_tab_curr = 0;
+static UWord stats__ctxt_tab_max  = 0;
+
+static UWord stats__ctxt_tab_qs   = 0;
+static UWord stats__ctxt_tab_cmps = 0;
+
+
+///////////////////////////////////////////////////////
+//// Part (1): An OSet of RCECs
+///
+
+#define N_FRAMES 8
+
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+//#define N_RCEC_TAB 98317 /* prime */
+#define N_RCEC_TAB 196613 /* prime */
+
+typedef
+   struct _RCEC {
+      struct _RCEC* next;
+      UWord magic;
+      UWord rc;
+      UWord rcX; /* used for crosschecking */
+      UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
+   }
+   RCEC;
+
+static RCEC** contextTab = NULL; /* hash table of RCEC*s */
+
+
+/* Gives an arbitrary total order on RCEC .frames fields */
+static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+   Word i;
+   tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
+   tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
+   if (ec1->frames[0] < ec2->frames[0]) return -1;
+   if (ec1->frames[0] > ec2->frames[0]) return 1;
+   for (i = 1; i < 1 + N_FRAMES; i++) {
+      if (ec1->frames[i] < ec2->frames[i]) return -1;
+      if (ec1->frames[i] > ec2->frames[i]) return 1;
+   }
+   return 0;
+}
+
+
+/* Dec the ref of this RCEC. */
+static void ctxt__rcdec ( RCEC* ec )
+{
+   stats__ctxt_rcdec_calls++;
+   tl_assert(ec && ec->magic == RCEC_MAGIC);
+   tl_assert(ec->rc > 0);
+   ec->rc--;
+}
+
+static void ctxt__rcinc ( RCEC* ec )
+{
+   tl_assert(ec && ec->magic == RCEC_MAGIC);
+   ec->rc++;
+}
+
+
+/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
+   move it one step closer the the front of the list, so as to make
+   subsequent searches for it cheaper. */
+static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
+{
+   RCEC *ec0, *ec1, *ec2;
+   if (ec == *headp)
+      tl_assert(0); /* already at head of list */
+   tl_assert(ec != NULL);
+   ec0 = *headp;
+   ec1 = NULL;
+   ec2 = NULL;
+   while (True) {
+      if (ec0 == NULL || ec0 == ec) break;
+      ec2 = ec1;
+      ec1 = ec0;
+      ec0 = ec0->next;
+   }
+   tl_assert(ec0 == ec);
+   if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
+      RCEC* tmp;
+      /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
+         predecessor.  Swap ec0 and ec1, that is, move ec0 one step
+         closer to the start of the list. */
+      tl_assert(ec2->next == ec1);
+      tl_assert(ec1->next == ec0);
+      tmp = ec0->next;
+      ec2->next = ec0;
+      ec0->next = ec1;
+      ec1->next = tmp;
+   }
+   else
+   if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
+      /* it's second in the list. */
+      tl_assert(*headp == ec1);
+      tl_assert(ec1->next == ec0);
+      ec1->next = ec0->next;
+      ec0->next = ec1;
+      *headp = ec0;
+   }
+}
+
+
+/* Find the given RCEC in the tree, and return a pointer to it.  Or,
+   if not present, add the given one to the tree (by making a copy of
+   it, so the caller can immediately deallocate the original) and
+   return a pointer to the copy.  The caller can safely have 'example'
+   on its stack, since we will always return a pointer to a copy of
+   it, not to the original.  Note that the inserted node will have .rc
+   of zero and so the caller must immediatly increment it. */
+__attribute__((noinline))
+static RCEC* ctxt__find_or_add ( RCEC* example )
+{
+   UWord hent;
+   RCEC* copy;
+   tl_assert(example && example->magic == RCEC_MAGIC);
+   tl_assert(example->rc == 0);
+
+   /* Search the hash table to see if we already have it. */
+   stats__ctxt_tab_qs++;
+   hent = example->frames[0] % N_RCEC_TAB;
+   copy = contextTab[hent];
+   while (1) {
+      if (!copy) break;
+      tl_assert(copy->magic == RCEC_MAGIC);
+      stats__ctxt_tab_cmps++;
+      if (0 == RCEC__cmp_by_frames(copy, example)) break;
+      copy = copy->next;
+   }
+
+   if (copy) {
+      tl_assert(copy != example);
+      /* optimisation: if it's not at the head of its list, move 1
+         step fwds, to make future searches cheaper */
+      if (copy != contextTab[hent]) {
+         move_RCEC_one_step_forward( &contextTab[hent], copy );
+      }
+   } else {
+      copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
+      tl_assert(copy != example);
+      *copy = *example;
+      copy->next = contextTab[hent];
+      contextTab[hent] = copy;
+      stats__ctxt_tab_curr++;
+      if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
+         stats__ctxt_tab_max = stats__ctxt_tab_curr;
+   }
+   return copy;
+}
+
+static inline UWord ROLW ( UWord w, Int n )
+{
+   Int bpw = 8 * sizeof(UWord);
+   w = (w << n) | (w >> (bpw-n));
+   return w;
+}
+
+__attribute__((noinline))
+static RCEC* get_RCEC ( Thr* thr )
+{
+   UWord hash, i;
+   RCEC  example;
+   example.magic = RCEC_MAGIC;
+   example.rc = 0;
+   example.rcX = 0;
+   main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
+   hash = 0;
+   for (i = 1; i < 1 + N_FRAMES; i++) {
+      hash ^= example.frames[i];
+      hash = ROLW(hash, 19);
+   }
+   example.frames[0] = hash;
+   return ctxt__find_or_add( &example );
+}
+
+///////////////////////////////////////////////////////
+//// Part (2): An OSet of OldRefs, that refer to (1)
+///
+
+// (UInt) `echo "Old Reference Information" | md5sum`
+#define OldRef_MAGIC 0x30b1f075UL
+
+typedef  struct { Thr* thr; RCEC* rcec; }  Thr_n_RCEC;
+
+#define N_OLDREF_ACCS 3
+
+typedef
+   struct {
+      Addr  ea;
+      UWord magic;
+      UWord gen;    /* when most recently accessed */
+      /* unused slots in this array have .thr == NULL */
+      Thr_n_RCEC accs[N_OLDREF_ACCS];
+   }
+   OldRef;
+
+static Word OldRef__cmp_by_EA ( OldRef* r1, OldRef* r2 ) {
+   tl_assert(r1 && r1->magic == OldRef_MAGIC);
+   tl_assert(r2 && r2->magic == OldRef_MAGIC);
+   if (r1->ea < r2->ea) return -1;
+   if (r1->ea > r2->ea) return 1;
+   return 0;
+}
+
+static OSet* oldrefTree     = NULL; /* OSet* of OldRef */
+static UWord oldrefGen      = 0;    /* current LRU generation # */
+static UWord oldrefTreeN    = 0;    /* # elems in oldrefTree */
+static UWord oldrefGenIncAt = 0;    /* inc gen # when size hits this */
+
+static void event_map_bind ( Addr a, Thr* thr )
+{
+   OldRef key, *ref;
+   RCEC*  here;
+   Word   i, j;
+
+   key.ea    = a;
+   key.magic = OldRef_MAGIC;
+
+   ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
+
+   if (ref) {
+
+      /* We already have a record for this address.  We now need to
+         see if we have a stack trace pertaining to this thread's
+         access. */
+      tl_assert(ref->magic == OldRef_MAGIC);
+
+      tl_assert(thr);
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         if (ref->accs[i].thr == thr)
+            break;
+      }
+
+      if (i < N_OLDREF_ACCS) {
+         /* thread 'thr' has an entry at index 'i'.  Update it. */
+         if (i > 0) {
+            Thr_n_RCEC tmp = ref->accs[i-1];
+            ref->accs[i-1] = ref->accs[i];
+            ref->accs[i] = tmp;
+            i--;
+         }
+         here = get_RCEC( thr );
+         if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
+         ctxt__rcinc( here );
+         stats__ctxt_rcdec1++;
+         ctxt__rcdec( ref->accs[i].rcec );
+         ref->accs[i].rcec = here;
+         tl_assert(ref->accs[i].thr == thr);
+      } else {
+         here = get_RCEC( thr );
+         ctxt__rcinc( here );
+         /* No entry for this thread.  Shuffle all of them down one
+            slot, and put the new entry at the start of the array. */
+         if (ref->accs[N_OLDREF_ACCS-1].thr) {
+            /* the last slot is in use.  We must dec the rc on the
+               associated rcec. */
+            tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
+            stats__ctxt_rcdec2++;
+            ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
+         } else {
+            tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
+         }
+         for (j = N_OLDREF_ACCS-1; j >= 1; j--)
+            ref->accs[j] = ref->accs[j-1];
+         ref->accs[0].thr = thr;
+         ref->accs[0].rcec = here;
+         tl_assert(thr); /* thr==NULL is used to signify an empty slot,
+                            so we can't add a NULL thr. */
+      }
+
+      ref->gen = oldrefGen;
+      tl_assert(ref->ea == a);
+
+   } else {
+
+      /* We don't have a record for this address.  Create a new one. */
+      if (oldrefTreeN >= oldrefGenIncAt) {
+         oldrefGen++;
+         oldrefGenIncAt = oldrefTreeN + 50000;
+         if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
+                            oldrefGen, oldrefTreeN );
+      }
+      here = get_RCEC( thr );
+      ctxt__rcinc(here);
+      ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
+      ref->magic = OldRef_MAGIC;
+      ref->gen = oldrefGen;
+      ref->ea = a;
+      ref->accs[0].rcec = here;
+      ref->accs[0].thr = thr;
+      tl_assert(thr); /* thr==NULL is used to signify an empty slot,
+                         so we can't add a NULL thr. */
+      for (j = 1; j < N_OLDREF_ACCS; j++) {
+         ref->accs[j].thr = NULL;
+         ref->accs[j].rcec = NULL;
+      }
+      VG_(OSetGen_Insert)( oldrefTree, ref );
+      oldrefTreeN++;
+
+   }
+}
+
+
+static
+Bool event_map_lookup ( /*OUT*/struct _EC** resEC,
+                        /*OUT*/Thr** resThr,
+                        Thr* thr_acc, Addr a )
+{
+  Word   i;
+  OldRef key, *ref;
+
+  tl_assert(thr_acc);
+
+  key.ea = a;
+  key.magic = OldRef_MAGIC;
+
+   ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
+   if (ref) {
+      tl_assert(ref->magic == OldRef_MAGIC);
+      tl_assert(ref->accs[0].thr); /* first slot must always be used */
+
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         if (ref->accs[i].thr != NULL
+             && ref->accs[i].thr != thr_acc)
+            break;
+      }
+      /* If we didn't find an entry for some thread other than
+         thr_acc, just return the entry for thread 0.  It'll look
+         pretty stupid to the user though. */
+      if (i == N_OLDREF_ACCS)
+         i = 0;
+
+      tl_assert(i >= 0 && i < N_OLDREF_ACCS);
+      tl_assert(ref->accs[i].thr);
+      tl_assert(ref->accs[i].rcec);
+      tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
+
+      *resEC  = main_stacktrace_to_EC(&ref->accs[i].rcec->frames[1], N_FRAMES);
+      *resThr = ref->accs[i].thr;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+static void event_map_init ( void )
+{
+   Word i;
+   tl_assert(!contextTab);
+   contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
+                             N_RCEC_TAB * sizeof(RCEC*) );
+   tl_assert(contextTab);
+   for (i = 0; i < N_RCEC_TAB; i++)
+      contextTab[i] = NULL;
+
+   tl_assert(!oldrefTree);
+   tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
+   oldrefTree = VG_(OSetGen_Create)(
+                   offsetof(OldRef,ea), /* == 0 */
+                   NULL, /* use unboxed cmp on OldRefs */
+                   HG_(zalloc), "libhb.event_map_init.2 (oldref tree)", 
+                   HG_(free)
+                );
+   tl_assert(oldrefTree);
+
+   oldrefGen = 0;
+   oldrefGenIncAt = 0;
+   oldrefTreeN = 0;
+}
+
+static void event_map__check_reference_counts ( Bool before )
+{
+   RCEC*   rcec;
+   OldRef* oldref;
+   Word    i;
+   UWord   nEnts = 0;
+
+   /* Set the 'check' reference counts to zero.  Also, optionally
+      check that the real reference counts are non-zero.  We allow
+      these to fall to zero before a GC, but the GC must get rid of
+      all those that are zero, hence none should be zero after a
+      GC. */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+         nEnts++;
+         tl_assert(rcec);
+         tl_assert(rcec->magic == RCEC_MAGIC);
+         if (!before)
+            tl_assert(rcec->rc > 0);
+         rcec->rcX = 0;
+      }
+   }
+
+   /* check that the stats are sane */
+   tl_assert(nEnts == stats__ctxt_tab_curr);
+   tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
+
+   /* visit all the referencing points, inc check ref counts */
+   VG_(OSetGen_ResetIter)( oldrefTree );
+   while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+      tl_assert(oldref->magic == OldRef_MAGIC);
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         if (oldref->accs[i].thr) {
+            tl_assert(oldref->accs[i].rcec);
+            tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
+            oldref->accs[i].rcec->rcX++;
+         } else {
+            tl_assert(!oldref->accs[i].rcec);
+         }
+      }
+   }
+
+   /* compare check ref counts with actual */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+         tl_assert(rcec->rc == rcec->rcX);
+      }
+   }
+}
+
+static void event_map_maybe_GC ( void )
+{
+   OldRef* oldref;
+   UWord   keyW, valW, retained, maxGen;
+   WordFM* genMap;
+   XArray* refs2del;
+   Word    i, j, n2del;
+
+   if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
+      return;
+
+   if (0)
+      VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
+
+   /* Check our counting is sane */
+   tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
+
+   /* Check the reference counts */
+   event_map__check_reference_counts( True/*before*/ );
+
+   /* Compute the distribution of generation values in the ref tree */
+   /* genMap :: generation-number -> count-of-nodes-with-that-number */
+   genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
+                                      HG_(free), NULL );
+
+   VG_(OSetGen_ResetIter)( oldrefTree );
+   while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+      UWord key = oldref->gen;
+      keyW = valW = 0;
+      if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
+         tl_assert(keyW == key);
+         tl_assert(valW > 0);
+      }
+      /* now valW is the old count for generation 'key' */
+      VG_(addToFM)(genMap, key, valW+1);
+   }
+
+   tl_assert(VG_(sizeFM)(genMap) > 0);
+
+   retained = oldrefTreeN;
+   maxGen = 0;
+   VG_(initIterFM)( genMap );
+   while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
+      tl_assert(keyW > 0); /* can't allow a generation # 0 */
+      if (0) VG_(printf)("  XXX: gen %lu has %lu\n", keyW, valW );
+      tl_assert(keyW >= maxGen);
+      tl_assert(retained >= valW);
+      if (retained - valW
+          > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
+         retained -= valW;
+         maxGen = keyW;
+      } else {
+         break;
+      }
+   }
+   VG_(doneIterFM)( genMap );
+
+   VG_(printf)(
+      "libhb: EvM GC: delete generations %lu and below, "
+      "retaining %lu entries\n",
+      maxGen, retained );
+
+   VG_(deleteFM)( genMap, NULL, NULL );
+
+   /* If this fails, it means there's only one generation in the
+      entire tree.  So we're kind of in a bad situation, and need to
+      do some stop-gap measure, such as randomly deleting half the
+      entries. */
+   tl_assert(retained < oldrefTreeN);
+
+   /* Now make up a big list of the oldrefTree entries we want to
+      delete.  We can't simultaneously traverse the tree and delete
+      stuff from it, so first we need to copy them off somewhere
+      else. (sigh) */
+   refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
+                          HG_(free), sizeof(OldRef*) );
+
+   VG_(OSetGen_ResetIter)( oldrefTree );
+   while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+      tl_assert(oldref->magic == OldRef_MAGIC);
+      if (oldref->gen <= maxGen) {
+         VG_(addToXA)( refs2del, &oldref );
+      }
+   }
+
+   n2del = VG_(sizeXA)( refs2del );
+   tl_assert(n2del == (Word)(oldrefTreeN - retained));
+
+   if (0) VG_(printf)("%s","deleting entries\n");
+   for (i = 0; i < n2del; i++) {
+      void* nd;
+      OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
+      tl_assert(ref);
+      tl_assert(ref->magic == OldRef_MAGIC);
+      for (j = 0; j < N_OLDREF_ACCS; j++) {
+         if (ref->accs[j].rcec) {
+            tl_assert(ref->accs[j].thr);
+            stats__ctxt_rcdec3++;
+            ctxt__rcdec( ref->accs[j].rcec );
+         } else {
+            tl_assert(!ref->accs[j].thr);
+         }
+      }
+      nd = VG_(OSetGen_Remove)( oldrefTree, ref );
+      VG_(OSetGen_FreeNode)( oldrefTree, nd );
+   }
+
+   VG_(deleteXA)( refs2del );
+
+   tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
+
+   oldrefTreeN = retained;
+   oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
+
+   /* Throw away all RCECs with zero reference counts */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      RCEC** pp = &contextTab[i];
+      RCEC*  p  = *pp;
+      while (p) {
+         if (p->rc == 0) {
+            *pp = p->next;
+            HG_(free)(p);
+            p = *pp;
+            tl_assert(stats__ctxt_tab_curr > 0);
+            stats__ctxt_tab_curr--;
+         } else {
+            pp = &p->next;
+            p = p->next;
+         }
+      }
+   }
+
+   /* Check the reference counts */
+   event_map__check_reference_counts( False/*after*/ );
+
+   //if (0)
+   //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
+   //            VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
+
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Core MSM                                            //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+#define MSM_CONFACC 1
+
+#define MSM_RACE2ERR 1
+
+#define MSM_CHECK 0
+
+static ULong stats__msm_read         = 0;
+static ULong stats__msm_read_change  = 0;
+static ULong stats__msm_write        = 0;
+static ULong stats__msm_write_change = 0;
+
+__attribute__((noinline))
+static void record_race_info ( Thr* acc_thr, 
+                               Addr acc_addr, SizeT szB, Bool isWrite,
+                               SVal svOld, SVal svNew )
+{
+   Bool found;
+   Thr* thrp = NULL;
+   struct _EC* where  = NULL;
+   struct _EC* wherep = NULL;
+   where = main_get_EC( acc_thr );
+   found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
+   if (found) {
+      tl_assert(wherep);
+      tl_assert(thrp);
+      tl_assert(thrp->opaque);
+      tl_assert(acc_thr->opaque);
+      HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+                              isWrite, szB, NULL/*mb_lastlock*/,
+                              wherep, thrp->opaque );
+   } else {
+      tl_assert(!wherep);
+      tl_assert(!thrp);
+      tl_assert(acc_thr->opaque);
+      HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+                              isWrite, szB, NULL/*mb_lastlock*/,
+                              NULL, NULL );
+   }
+}
+
+static Bool is_sane_SVal_C ( SVal sv ) {
+   POrd ord;
+   if (!SVal__isC(sv)) return True;
+   ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
+   if (ord == POrd_EQ || ord == POrd_LT) return True;
+   return False;
+}
+
+
+/* Compute new state following a read */
+static inline SVal msm_read ( SVal svOld,
+                              /* The following are only needed for 
+                                 creating error reports. */
+                              Thr* acc_thr,
+                              Addr acc_addr, SizeT szB )
+{
+   SVal svNew = SVal_INVALID;
+   stats__msm_read++;
+
+   /* Redundant sanity check on the constraints */
+   if (MSM_CHECK) {
+      tl_assert(is_sane_SVal_C(svOld));
+   }
+
+   if (SVal__isC(svOld)) {
+      POrd  ord;
+      VtsID tviR  = acc_thr->viR;
+      VtsID tviW  = acc_thr->viW;
+      VtsID rmini = SVal__unC_Rmin(svOld);
+      VtsID wmini = SVal__unC_Wmin(svOld);
+
+      ord = VtsID__getOrdering(rmini,tviR);
+      if (ord == POrd_EQ || ord == POrd_LT) {
+         /* no race */
+         /* Note: RWLOCK subtlety: use tviW, not tviR */
+         svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
+         goto out;
+      } else {
+         svNew = MSM_RACE2ERR
+                    ? SVal__mkE()
+                    : SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
+         record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
+                           svOld, svNew );
+         goto out;
+      }
+   }
+   if (SVal__isA(svOld)) {
+      /* reading no-access memory (sigh); leave unchanged */
+      /* check for no pollution */
+      tl_assert(svOld == SVal_NOACCESS);
+      svNew = SVal_NOACCESS;
+      goto out;
+   }
+   if (SVal__isE(svOld)) {
+      /* no race, location is already "in error" */
+      svNew = SVal__mkE();
+      goto out;
+   }
+   VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
+   tl_assert(0);
+
+  out:
+   if (MSM_CHECK) {
+      tl_assert(is_sane_SVal_C(svNew));
+   }
+   tl_assert(svNew != SVal_INVALID);
+   if (svNew != svOld) {
+      if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
+         event_map_bind( acc_addr, acc_thr );
+         stats__msm_read_change++;
+      }
+   }
+   return svNew;
+}
+
+
+/* Compute new state following a write */
+static inline SVal msm_write ( SVal svOld,
+                              /* The following are only needed for 
+                                 creating error reports. */
+                              Thr* acc_thr,
+                              Addr acc_addr, SizeT szB )
+{
+   SVal svNew = SVal_INVALID;
+   stats__msm_write++;
+
+   /* Redundant sanity check on the constraints */
+   if (MSM_CHECK) {
+      tl_assert(is_sane_SVal_C(svOld));
+   }
+
+   if (SVal__isC(svOld)) {
+      POrd  ord;
+      VtsID tviW  = acc_thr->viW;
+      VtsID wmini = SVal__unC_Wmin(svOld);
+
+      ord = VtsID__getOrdering(wmini,tviW);
+      if (ord == POrd_EQ || ord == POrd_LT) {
+         /* no race */
+         svNew = SVal__mkC( tviW, tviW );
+         goto out;
+      } else {
+         VtsID rmini = SVal__unC_Rmin(svOld);
+         svNew = MSM_RACE2ERR
+                    ? SVal__mkE()
+                    : SVal__mkC( VtsID__join2(rmini,tviW),
+                                 VtsID__join2(wmini,tviW) );
+         record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
+                           svOld, svNew );
+         goto out;
+      }
+   }
+   if (SVal__isA(svOld)) {
+      /* writing no-access memory (sigh); leave unchanged */
+      /* check for no pollution */
+      tl_assert(svOld == SVal_NOACCESS);
+      svNew = SVal_NOACCESS;
+      goto out;
+   }
+   if (SVal__isE(svOld)) {
+      /* no race, location is already "in error" */
+      svNew = SVal__mkE();
+      goto out;
+   }
+   VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
+   tl_assert(0);
+
+  out:
+   if (MSM_CHECK) {
+      tl_assert(is_sane_SVal_C(svNew));
+   }
+   tl_assert(svNew != SVal_INVALID);
+   if (svNew != svOld) {
+      if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
+         event_map_bind( acc_addr, acc_thr );
+         stats__msm_write_change++;
+      }
+   }
+   return svNew;
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Apply core MSM to specific memory locations         //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+/*------------- ZSM accesses: 8 bit apply ------------- */
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,1 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,1 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+/*------------- ZSM accesses: 16 bit apply ------------- */
+
+void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         goto slowcase;
+      } else {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      }
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,2 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_16to8splits++;
+   zsm_apply8___msm_read( thr, a + 0 );
+   zsm_apply8___msm_read( thr, a + 1 );
+}
+
+void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         goto slowcase;
+      } else {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      }
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,2 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_16to8splits++;
+   zsm_apply8___msm_write( thr, a + 0 );
+   zsm_apply8___msm_write( thr, a + 1 );
+}
+
+/*------------- ZSM accesses: 32 bit apply ------------- */
+
+void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+      } else {
+         goto slowcase;
+      }
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,4 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_32to16splits++;
+   zsm_apply16___msm_read( thr, a + 0 );
+   zsm_apply16___msm_read( thr, a + 2 );
+}
+
+void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+      } else {
+         goto slowcase;
+      }
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,4 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_32to16splits++;
+   zsm_apply16___msm_write( thr, a + 0 );
+   zsm_apply16___msm_write( thr, a + 2 );
+}
+
+/*------------- ZSM accesses: 64 bit apply ------------- */
+
+void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, unused */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+      goto slowcase;
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,8 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_64to32splits++;
+   zsm_apply32___msm_read( thr, a + 0 );
+   zsm_apply32___msm_read( thr, a + 4 );
+}
+
+void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, unused */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+      goto slowcase;
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,8 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_64to32splits++;
+   zsm_apply32___msm_write( thr, a + 0 );
+   zsm_apply32___msm_write( thr, a + 4 );
+}
+
+/*--------------- ZSM accesses: 8 bit write --------------- */
+
+static
+void zsm_write8 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+/*--------------- ZSM accesses: 16 bit write --------------- */
+
+static
+void zsm_write16 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         /* Writing at this level.  Need to fix up 'descr'. */
+         cl->descrs[tno] = pullup_descr_to_16(descr, toff);
+         /* At this point, the tree does not match cl->descr[tno] any
+            more.  The assignments below will fix it up. */
+      } else {
+         /* We can't indiscriminately write on the w16 node as in the
+            w64 case, as that might make the node inconsistent with
+            its parent.  So first, pull down to this level. */
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      if (SCE_CACHELINE)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      }
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_16to8splits++;
+   zsm_write8( a + 0, svNew );
+   zsm_write8( a + 1, svNew );
+}
+
+/*--------------- ZSM accesses: 32 bit write --------------- */
+
+static
+void zsm_write32 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set32s++;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         /* We can't indiscriminately write on the w32 node as in the
+            w64 case, as that might make the node inconsistent with
+            its parent.  So first, pull down to this level. */
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+         if (SCE_CACHELINE)
+            tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      } else {
+         /* Writing at this level.  Need to fix up 'descr'. */
+         cl->descrs[tno] = pullup_descr_to_32(descr, toff);
+         /* At this point, the tree does not match cl->descr[tno] any
+            more.  The assignments below will fix it up. */
+      }
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   cl->svals[cloff + 2] = SVal_INVALID;
+   cl->svals[cloff + 3] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_32to16splits++;
+   zsm_write16( a + 0, svNew );
+   zsm_write16( a + 2, svNew );
+}
+
+/*--------------- ZSM accesses: 64 bit write --------------- */
+
+static
+void zsm_write64 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   stats__cline_set64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 */
+   cl->descrs[tno] = TREE_DESCR_64;
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   cl->svals[cloff + 2] = SVal_INVALID;
+   cl->svals[cloff + 3] = SVal_INVALID;
+   cl->svals[cloff + 4] = SVal_INVALID;
+   cl->svals[cloff + 5] = SVal_INVALID;
+   cl->svals[cloff + 6] = SVal_INVALID;
+   cl->svals[cloff + 7] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_64to32splits++;
+   zsm_write32( a + 0, svNew );
+   zsm_write32( a + 4, svNew );
+}
+
+/*------------- ZSM accesses: 8 bit read/copy ------------- */
+
+static
+SVal zsm_read8 ( Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_get8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+   }
+   return cl->svals[cloff];
+}
+
+static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
+   SVal       sv;
+   stats__cline_copy8s++;
+   sv = zsm_read8( src );
+   zsm_write8( dst, sv );
+}
+
+/* ------------ Shadow memory range setting ops ------------ */
+
+void zsm_apply_range___msm_read ( Thr* thr, 
+                                  Addr a, SizeT len )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_apply32___msm_read( thr, a );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_apply64___msm_read( thr, a );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_apply8___msm_read( thr, a );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_apply16___msm_read( thr, a );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_apply32___msm_read( thr, a );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_apply64___msm_read( thr, a );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_apply32___msm_read( thr, a );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_apply16___msm_read( thr, a );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_apply8___msm_read( thr, a );
+      a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+
+void zsm_apply_range___msm_write ( Thr* thr,
+                                   Addr a, SizeT len )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_apply32___msm_write( thr, a );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_apply64___msm_write( thr, a );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_apply8___msm_write( thr, a );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_apply16___msm_write( thr, a );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_apply32___msm_write( thr, a );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_apply64___msm_write( thr, a );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_apply32___msm_write( thr, a );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_apply16___msm_write( thr, a );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_apply8___msm_write( thr, a );
+      a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+
+
+/* Block-copy states (needed for implementing realloc()). */
+
+static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
+{
+   SizeT i;
+   if (len == 0)
+      return;
+
+   /* assert for non-overlappingness */
+   tl_assert(src+len <= dst || dst+len <= src);
+
+   /* To be simple, just copy byte by byte.  But so as not to wreck
+      performance for later accesses to dst[0 .. len-1], normalise
+      destination lines as we finish with them, and also normalise the
+      line containing the first and last address. */
+   for (i = 0; i < len; i++) {
+      Bool normalise
+         = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
+           || i == 0       /* first in range */
+           || i == len-1;  /* last in range */
+      zsm_copy8( src+i, dst+i, normalise );
+   }
+}
+
+
+/* For setting address ranges to a given value.  Has considerable
+   sophistication so as to avoid generating large numbers of pointless
+   cache loads/writebacks for large ranges. */
+
+/* Do small ranges in-cache, in the obvious way. */
+static
+void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_write32( a, svNew );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_write64( a, svNew );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_write8( a, svNew );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_write16( a, svNew );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_write32( a, svNew );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_write64( a, svNew );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_write32( a, svNew );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_write16( a, svNew );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_write8( a, svNew );
+      a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+/* If we're doing a small range, hand off to zsm_set_range_SMALL.  But
+   for larger ranges, try to operate directly on the out-of-cache
+   representation, rather than dragging lines into the cache,
+   overwriting them, and forcing them out.  This turns out to be an
+   important performance optimisation. */
+
+static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
+{
+   tl_assert(svNew != SVal_INVALID);
+   stats__cache_make_New_arange += (ULong)len;
+
+   if (0 && len > 500)
+      VG_(printf)("make New      ( %#lx, %ld )\n", a, len );
+
+   if (0) {
+      static UWord n_New_in_cache = 0;
+      static UWord n_New_not_in_cache = 0;
+      /* tag is 'a' with the in-line offset masked out, 
+         eg a[31]..a[4] 0000 */
+      Addr       tag = a & ~(N_LINE_ARANGE - 1);
+      UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+      if (LIKELY(tag == cache_shmem.tags0[wix])) {
+         n_New_in_cache++;
+      } else {
+         n_New_not_in_cache++;
+      }
+      if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
+         VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
+                     n_New_in_cache, n_New_not_in_cache );
+   }
+
+   if (LIKELY(len < 2 * N_LINE_ARANGE)) {
+      zsm_set_range_SMALL( a, len, svNew );
+   } else {
+      Addr  before_start  = a;
+      Addr  aligned_start = cacheline_ROUNDUP(a);
+      Addr  after_start   = cacheline_ROUNDDN(a + len);
+      UWord before_len    = aligned_start - before_start;
+      UWord aligned_len   = after_start - aligned_start;
+      UWord after_len     = a + len - after_start;
+      tl_assert(before_start <= aligned_start);
+      tl_assert(aligned_start <= after_start);
+      tl_assert(before_len < N_LINE_ARANGE);
+      tl_assert(after_len < N_LINE_ARANGE);
+      tl_assert(get_cacheline_offset(aligned_start) == 0);
+      if (get_cacheline_offset(a) == 0) {
+         tl_assert(before_len == 0);
+         tl_assert(a == aligned_start);
+      }
+      if (get_cacheline_offset(a+len) == 0) {
+         tl_assert(after_len == 0);
+         tl_assert(after_start == a+len);
+      }
+      if (before_len > 0) {
+         zsm_set_range_SMALL( before_start, before_len, svNew );
+      }
+      if (after_len > 0) {
+         zsm_set_range_SMALL( after_start, after_len, svNew );
+      }
+      stats__cache_make_New_inZrep += (ULong)aligned_len;
+
+      while (1) {
+         Addr tag;
+         UWord wix;
+         if (aligned_start >= after_start)
+            break;
+         tl_assert(get_cacheline_offset(aligned_start) == 0);
+         tag = aligned_start & ~(N_LINE_ARANGE - 1);
+         wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
+         if (tag == cache_shmem.tags0[wix]) {
+            UWord i;
+            for (i = 0; i < N_LINE_ARANGE / 8; i++)
+               zsm_write64( aligned_start + i * 8, svNew );
+         } else {
+            UWord i;
+            Word zix;
+            SecMap* sm;
+            LineZ* lineZ;
+            /* This line is not in the cache.  Do not force it in; instead
+               modify it in-place. */
+            /* find the Z line to write in and rcdec it or the
+               associated F line. */
+            find_Z_for_writing( &sm, &zix, tag );
+            tl_assert(sm);
+            tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+            lineZ = &sm->linesZ[zix];
+            lineZ->dict[0] = svNew;
+            lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+            for (i = 0; i < N_LINE_ARANGE/4; i++)
+               lineZ->ix2s[i] = 0; /* all refer to dict[0] */
+            rcinc_LineZ(lineZ);
+         }
+         aligned_start += N_LINE_ARANGE;
+         aligned_len -= N_LINE_ARANGE;
+      }
+      tl_assert(aligned_start == after_start);
+      tl_assert(aligned_len == 0);
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Synchronisation objects                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+// (UInt) `echo "Synchronisation object" | md5sum`
+#define SO_MAGIC 0x56b3c5b0U
+
+struct _SO {
+   VtsID viR; /* r-clock of sender */
+   VtsID viW; /* w-clock of sender */
+   UInt  magic;
+};
+
+static SO* SO__Alloc ( void ) {
+   SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
+   so->viR   = VtsID_INVALID;
+   so->viW   = VtsID_INVALID;
+   so->magic = SO_MAGIC;
+   return so;
+}
+static void SO__Dealloc ( SO* so ) {
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+   } else {
+      tl_assert(so->viW != VtsID_INVALID);
+      VtsID__rcdec(so->viR);
+      VtsID__rcdec(so->viW);
+   }
+   so->magic = 0;
+   HG_(free)( so );
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Top Level API                                       //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static void show_thread_state ( HChar* str, Thr* t ) 
+{
+   if (1) return;
+   if (t->viR == t->viW) {
+      VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
+      VtsID__pp( t->viR );
+      VG_(printf)("%s","\n");
+   } else {
+      VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
+      VtsID__pp( t->viR );
+      VG_(printf)(" viW %u==", t->viW);
+      VtsID__pp( t->viW );
+      VG_(printf)("%s","\n");
+   }
+}
+
+
+Thr* libhb_init (
+        void        (*get_stacktrace)( Thr*, Addr*, UWord ),
+        struct _EC* (*stacktrace_to_EC)( Addr*, UWord ),
+        struct _EC* (*get_EC)( Thr* )
+     )
+{
+   Thr*  thr;
+   VtsID vi;
+   tl_assert(get_stacktrace);
+   tl_assert(stacktrace_to_EC);
+   tl_assert(get_EC);
+   main_get_stacktrace   = get_stacktrace;
+   main_stacktrace_to_EC = stacktrace_to_EC;
+   main_get_EC           = get_EC;
+
+   // No need to initialise hg_wordfm.
+   // No need to initialise hg_wordset.
+
+   vts_set_init();
+   vts_tab_init();
+   event_map_init();
+   VtsID__invalidate_caches();
+
+   // initialise shadow memory
+   zsm_init( SVal__rcinc, SVal__rcdec );
+
+   thr = Thr__new();
+   vi  = VtsID__mk_Singleton( thr, 1 );
+   thr->viR = vi;
+   thr->viW = vi;
+   VtsID__rcinc(thr->viR);
+   VtsID__rcinc(thr->viW);
+
+   show_thread_state("  root", thr);
+   return thr;
+}
+
+Thr* libhb_create ( Thr* parent )
+{
+   /* The child's VTSs are copies of the parent's VTSs, but ticked at
+      the child's index.  Since the child's index is guaranteed
+      unique, it has never been seen before, so the implicit value
+      before the tick is zero and after that is one. */
+   Thr* child = Thr__new();
+
+   child->viR = VtsID__tick( parent->viR, child );
+   child->viW = VtsID__tick( parent->viW, child );
+   VtsID__rcinc(child->viR);
+   VtsID__rcinc(child->viW);
+
+   tl_assert(VtsID__indexAt( child->viR, child ) == 1);
+   tl_assert(VtsID__indexAt( child->viW, child ) == 1);
+
+   /* and the parent has to move along too */
+   VtsID__rcdec(parent->viR);
+   VtsID__rcdec(parent->viW);
+   parent->viR = VtsID__tick( parent->viR, parent );
+   parent->viW = VtsID__tick( parent->viW, parent );
+   VtsID__rcinc(parent->viR);
+   VtsID__rcinc(parent->viW);
+
+   show_thread_state(" child", child);
+   show_thread_state("parent", parent);
+
+   return child;
+}
+
+/* Shut down the library, and print stats (in fact that's _all_
+   this is for. */
+void libhb_shutdown ( Bool show_stats )
+{
+   if (show_stats) {
+      VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
+      VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
+                  stats__secmaps_allocd,
+                  stats__secmap_ga_space_covered);
+      VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
+                  stats__secmap_linesZ_allocd,
+                  stats__secmap_linesZ_bytes);
+      VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)\n",
+                  stats__secmap_linesF_allocd,
+                  stats__secmap_linesF_bytes);
+      VG_(printf)(" secmaps: %'10lu iterator steppings\n",
+                  stats__secmap_iterator_steppings);
+      VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
+                  stats__secmaps_search, stats__secmaps_search_slow);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
+                  stats__cache_totrefs, stats__cache_totmisses );
+      VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
+                  stats__cache_Z_fetches, stats__cache_F_fetches );
+      VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
+                  stats__cache_Z_wbacks, stats__cache_F_wbacks );
+      VG_(printf)("   cache: %'14lu invals,     %'14lu flushes\n",
+                  stats__cache_invals, stats__cache_flushes );
+      VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
+                  stats__cache_make_New_arange,
+                  stats__cache_make_New_inZrep);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)("   cline: %'10lu normalises\n",
+                  stats__cline_normalises );
+      VG_(printf)("   cline:  rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_read64s,
+                  stats__cline_read32s,
+                  stats__cline_read16s,
+                  stats__cline_read8s );
+      VG_(printf)("   cline:  wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_write64s,
+                  stats__cline_write32s,
+                  stats__cline_write16s,
+                  stats__cline_write8s );
+      VG_(printf)("   cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_set64s,
+                  stats__cline_set32s,
+                  stats__cline_set16s,
+                  stats__cline_set8s );
+      VG_(printf)("   cline: get1s %'lu, copy1s %'lu\n",
+                  stats__cline_get8s, stats__cline_copy8s );
+      VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
+                 stats__cline_64to32splits,
+                 stats__cline_32to16splits,
+                 stats__cline_16to8splits );
+      VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
+                 stats__cline_64to32pulldown,
+                 stats__cline_32to16pulldown,
+                 stats__cline_16to8pulldown );
+      if (0)
+      VG_(printf)("   cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
+                  (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
+
+      VG_(printf)("%s","\n");
+
+      VG_(printf)("   libhb: %'13llu msm_read  (%'llu changed)\n",
+                  stats__msm_read, stats__msm_read_change);
+      VG_(printf)("   libhb: %'13llu msm_write (%'llu changed)\n",
+                  stats__msm_write, stats__msm_write_change);
+      VG_(printf)("   libhb: %'13llu getOrd queries (%'llu misses)\n",
+                  stats__getOrdering_queries, stats__getOrdering_misses);
+      VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
+                  stats__join2_queries, stats__join2_misses);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)(
+         "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
+         VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
+      );
+      VG_(printf)( "   libhb: %lu entries in vts_set\n",
+                   VG_(sizeFM)( vts_set ) );
+
+      VG_(printf)("%s","\n");
+      VG_(printf)( "   libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
+                   stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
+                   stats__ctxt_rcdec2,
+                   stats__ctxt_rcdec3 );
+      VG_(printf)( "   libhb: ctxt__rcdec: calls %lu, discards %lu\n",
+                   stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
+      VG_(printf)( "   libhb: contextTab: %lu slots, %lu max ents\n",
+                   (UWord)N_RCEC_TAB,
+                   stats__ctxt_tab_curr );
+      VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
+                   stats__ctxt_tab_qs,
+                   stats__ctxt_tab_cmps );
+#if 0
+      VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
+      VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
+      VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
+      VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
+      VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
+      VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
+      VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
+      VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
+      VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
+      VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
+      VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
+      VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
+      VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
+      VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
+
+      VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
+      VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
+      VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
+      VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
+#endif
+
+      VG_(printf)("%s","<<< END libhb stats >>>\n");
+      VG_(printf)("%s","\n");
+
+   }
+}
+
+void libhb_async_exit ( Thr* thr )
+{
+   /* is there anything we need to do? */
+}
+
+/* Both Segs and SOs point to VTSs.  However, there is no sharing, so
+   a Seg that points at a VTS is its one-and-only owner, and ditto for
+   a SO that points at a VTS. */
+
+SO* libhb_so_alloc ( void )
+{
+   return SO__Alloc();
+}
+
+void libhb_so_dealloc ( SO* so )
+{
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+   SO__Dealloc(so);
+}
+
+/* See comments in libhb.h for details on the meaning of 
+   strong vs weak sends and strong vs weak receives. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
+{
+   /* Copy the VTSs from 'thr' into the sync object, and then move
+      the thread along one step. */
+
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+
+   /* stay sane .. a thread's read-clock must always lead or be the
+      same as its write-clock */
+   { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
+     tl_assert(ord == POrd_EQ || ord == POrd_LT);
+   }
+
+   /* since we're overwriting the VtsIDs in the SO, we need to drop
+      any references made by the previous contents thereof */
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+      so->viR = thr->viR;
+      so->viW = thr->viW;
+      VtsID__rcinc(so->viR);
+      VtsID__rcinc(so->viW);
+   } else {
+      /* In a strong send, we dump any previous VC in the SO and
+         install the sending thread's VC instead.  For a weak send we
+         must join2 with what's already there. */
+      tl_assert(so->viW != VtsID_INVALID);
+      VtsID__rcdec(so->viR);
+      VtsID__rcdec(so->viW);
+      so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
+      so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
+      VtsID__rcinc(so->viR);
+      VtsID__rcinc(so->viW);
+   }
+
+   /* move both parent clocks along */
+   VtsID__rcdec(thr->viR);
+   VtsID__rcdec(thr->viW);
+   thr->viR = VtsID__tick( thr->viR, thr );
+   thr->viW = VtsID__tick( thr->viW, thr );
+   VtsID__rcinc(thr->viR);
+   VtsID__rcinc(thr->viW);
+   if (strong_send)
+      show_thread_state("s-send", thr);
+   else
+      show_thread_state("w-send", thr);
+}
+
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
+{
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+
+   if (so->viR != VtsID_INVALID) {
+      tl_assert(so->viW != VtsID_INVALID);
+
+      /* Weak receive (basically, an R-acquisition of a R-W lock).
+         This advances the read-clock of the receiver, but not the
+         write-clock. */
+      VtsID__rcdec(thr->viR);
+      thr->viR = VtsID__join2( thr->viR, so->viR );
+      VtsID__rcinc(thr->viR);
+
+      /* For a strong receive, we also advance the receiver's write
+         clock, which means the receive as a whole is essentially
+         equivalent to a W-acquisition of a R-W lock. */
+      if (strong_recv) {
+         VtsID__rcdec(thr->viW);
+         thr->viW = VtsID__join2( thr->viW, so->viW );
+         VtsID__rcinc(thr->viW);
+      }
+
+      if (strong_recv) 
+         show_thread_state("s-recv", thr);
+      else 
+         show_thread_state("w-recv", thr);
+
+   } else {
+      tl_assert(so->viW == VtsID_INVALID);
+      /* Deal with degenerate case: 'so' has no vts, so there has been
+         no message posted to it.  Just ignore this case. */
+      show_thread_state("d-recv", thr);
+   }
+}
+
+Bool libhb_so_everSent ( SO* so )
+{
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+      return False;
+   } else {
+      tl_assert(so->viW != VtsID_INVALID);
+      return True;
+   }
+}
+
+#define XXX1 0 // 0x67a106c
+#define XXX2 0
+
+static Bool TRACEME(Addr a, SizeT szB) {
+   if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
+   if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
+   return False;
+}
+static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
+  SVal sv = zsm_read8(a);
+  VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
+  show_thread_state("", thr);
+  VG_(printf)("%s","\n");
+}
+
+void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
+{
+   SVal sv = SVal__mkC(thr->viW, thr->viW);
+   tl_assert(is_sane_SVal_C(sv));
+   if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
+   zsm_set_range( a, szB, sv );
+   if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
+}
+
+void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
+{
+   if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
+   zsm_set_range( a, szB, SVal__mkA() );
+   if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
+}
+
+void* libhb_get_Thr_opaque ( Thr* thr ) {
+   tl_assert(thr);
+   return thr->opaque;
+}
+
+void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
+   tl_assert(thr);
+   thr->opaque = v;
+}
+
+void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
+{
+   zsm_copy_range(dst, src, len);
+}
+
+void libhb_maybe_GC ( void )
+{
+   event_map_maybe_GC();
+   /* If there are still freelist entries available, no need for a
+      GC. */
+   if (vts_tab_freelist != VtsID_INVALID)
+      return;
+   /* So all the table entries are full, and we're having to expand
+      the table.  But did we hit the threshhold point yet? */
+   if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
+      return;
+   vts_tab__do_GC( False/*don't show stats*/ );
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END main library                                    //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/pub_tool_execontext.h b/include/pub_tool_execontext.h

index 164615ce2b3b524747adc714a6046988deb60a1e..84dda74c04c8a5a03d5281280ccdfd7919ad2fa0 100644 (file)
--- a/include/pub_tool_execontext.h
+++ b/include/pub_tool_execontext.h
@@ -104,6 +104,8 @@ static inline Bool VG_(is_plausible_ECU)( UInt ecu ) {
     return (ecu > 0) && ((ecu & 3) == 0);
  }
  
+// Make an ExeContext containing exactly the specified stack frames.
+ExeContext* VG_(make_ExeContext_from_StackTrace)( Addr* ips, UInt n_ips );
  
  #endif   // __PUB_TOOL_EXECONTEXT_H
author	Julian Seward <jseward@acm.org>
	Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)
committer	Julian Seward <jseward@acm.org>
	Sat, 25 Oct 2008 16:22:41 +0000 (16:22 +0000)
coregrind/m_debuginfo/debuginfo.c		patch \| blob \| blame \| history
coregrind/m_debuginfo/priv_storage.h		patch \| blob \| blame \| history
coregrind/m_debuginfo/storage.c		patch \| blob \| blame \| history
coregrind/m_execontext.c		patch \| blob \| blame \| history
coregrind/m_main.c		patch \| blob \| blame \| history
coregrind/m_stacktrace.c		patch \| blob \| blame \| history
coregrind/m_xarray.c		patch \| blob \| blame \| history
coregrind/pub_core_debuginfo.h		patch \| blob \| blame \| history
glibc-2.34567-NPTL-helgrind.supp		patch \| blob \| blame \| history
helgrind/Makefile.am		patch \| blob \| blame \| history
helgrind/README_MSMProp2.txt	[new file with mode: 0644]	patch \| blob
helgrind/README_YARD.txt	[new file with mode: 0644]	patch \| blob
helgrind/helgrind.h		patch \| blob \| blame \| history
helgrind/hg_basics.c	[new file with mode: 0644]	patch \| blob
helgrind/hg_basics.h	[new file with mode: 0644]	patch \| blob
helgrind/hg_errors.c	[new file with mode: 0644]	patch \| blob
helgrind/hg_errors.h	[new file with mode: 0644]	patch \| blob
helgrind/hg_intercepts.c		patch \| blob \| blame \| history
helgrind/hg_lock_n_thread.c	[new file with mode: 0644]	patch \| blob
helgrind/hg_lock_n_thread.h	[new file with mode: 0644]	patch \| blob
helgrind/hg_main.c		patch \| blob \| blame \| history
helgrind/hg_wordset.c		patch \| blob \| blame \| history
helgrind/hg_wordset.h		patch \| blob \| blame \| history
helgrind/libhb.h	[new file with mode: 0644]	patch \| blob
helgrind/libhb_core.c	[new file with mode: 0644]	patch \| blob
include/pub_tool_execontext.h		patch \| blob \| blame \| history