From: Julian Seward <jseward@acm.org>
Date: Thu, 16 May 2002 11:06:21 +0000 (+0000)
Subject: Remove existing non-working support for self-modifying code, and instead
X-Git-Tag: svn/VALGRIND_1_0_3~190
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6610ca19b3820e27c72ab97154e2b3acc52aa6b4;p=thirdparty%2Fvalgrind.git

Remove existing non-working support for self-modifying code, and instead
add a simple compromise, in which the client can notify valgrind
that certain code address ranges are invalid and should be retranslated.
This is done using the VALGRIND_DISCARD_TRANSLATIONS macro in valgrind.h.

At the same time take the opportunity to close the potentially fatal
loophole that translations for executable segments were not being
discarded when those segments were munmapped.  They are now.

Documentation updated.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@274
---

diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 8081e0ae8f..b794e6125c 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1,3 +1,4 @@
+
 /*--------------------------------------------------------------------*/
 /*--- The cache simulation framework: instrumentation, recording   ---*/
 /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
 
    Copyright (C) 2000-2002 Julian Seward 
       jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
    The GNU General Public License is contained in the file LICENSE.
 */
 
-#include <string.h>
-
 #include "vg_include.h"
 
 #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
       curr_file_node = curr_file_node->next;
    }
    if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
    fnname_hash = hash(fn_name, N_FN_ENTRIES);
    curr_fn_node = curr_file_node->fns[fnname_hash];
    while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
       curr_fn_node = curr_fn_node->next;
    }
    if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
 
       /* Allow for filename switching in the middle of a BB;  if this happens,
        * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
          VG_(strcpy)(curr_file, fl_buf);
          VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
          VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@ void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
    VGP_POPCC;
 }
 
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html
index dc66721359..20fbb36b59 100644
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@ detect problems such as:
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
index dc66721359..20fbb36b59 100644
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@ detect problems such as:
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index 710b12cb90..252353c468 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -90,16 +90,6 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
 /* Assembly code stubs make these requests ... */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 214d2ca100..573ee93271 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1524,56 +1524,6 @@ static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
 }
 
 
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
 /* Synthesise a minimal test (and which discards result) of reg32
    against lit.  It's always safe do simply
       emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@ static void emitUInstr ( Int i, UInstr* u )
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+	 /* No longer possible, but retained for illustrative purposes.
          if (u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+	 */
          break;
       }
 
@@ -2598,8 +2550,10 @@ static void emitUInstr ( Int i, UInstr* u )
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
          if (u->opcode == FPU_W && u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+         */
          break;
 
       case FPU:
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 62db9ec1d7..29689225d3 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -146,51 +146,6 @@ VG_(helper_value_check4_fail):
 	ret
 
 
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-	# save the live regs
-	pushl	%eax
-	pushl	%ebx
-	pushl	%ecx
-	pushl	%edx
-	pushl	%esi
-	pushl	%edi
-	
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-
-	popl	%edi
-	popl	%esi
-	popl	%edx
-	popl	%ecx
-	popl	%ebx
-	popl	%eax
-	
-	ret
-#else	
-	incl	VG_(smc_total_check4s)
-	pushl	%ebp
-	shrl	$VG_SMC_CACHE_SHIFT, %ebp
-	andl	$VG_SMC_CACHE_MASK, %ebp
-	cmpb	$0, VG_(smc_cache)(%ebp)
-	jnz	vg_smc_cache_failure
-	addl	$4, %esp
-	ret
-      vg_smc_cache_failure:
-	popl	%ebp
-	pushal
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-	popal
-	ret
-#endif
-
-	
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 22e4f48830..7f44dde7d4 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1301,7 +1301,7 @@ extern Bool VG_(what_line_is_this) ( Addr a,
 extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                      Char* fn_name, Int n_fn_name);
 
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
 
 
 /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@ extern UInt VG_(translations_needing_spill);
 /* total of register ranks over all translations */
 extern UInt VG_(total_reg_rank);
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
 /* Counts pertaining to internal sanity checking. */
 extern UInt VG_(sanity_fast_count);
 extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@ extern void VG_(maybe_do_lru_pass) ( void );
 extern void VG_(flush_transtab) ( void );
 extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
 extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
 
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
 
 extern void VG_(sanity_check_tc_tt) ( void );
 extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@ extern UInt VG_(run_innerloop) ( void );
    Exports of vg_helpers.S
    ------------------------------------------------------------------ */
 
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
 /* Mul, div, etc, -- we don't codegen these directly. */
 extern void VG_(helper_idiv_64_32);
 extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@ extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
 extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
 extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
 
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index a7e41b2dda..94e175c70c 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -381,22 +381,6 @@ UInt VG_(translations_needing_spill) = 0;
 UInt VG_(total_reg_rank) = 0;
 
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
 /* Counts pertaining to internal sanity checking. */
 UInt VG_(sanity_fast_count) = 0;
 UInt VG_(sanity_slow_count) = 0;
@@ -954,13 +938,6 @@ static void vg_show_counts ( void )
                 VG_(uinstrs_prealloc),
                 VG_(uinstrs_spill),
                 VG_(total_reg_rank) );
-   VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-		VG_(smc_total_check4s),
-		VG_(smc_cache_passed),
-		VG_(smc_fancy_passed),
-		VG_(smc_discard_count) );
    VG_(message)(Vg_DebugMsg, 
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
@@ -1020,11 +997,12 @@ void VG_(main) ( void )
       VGP_PUSHCC(VgpInitAudit);
       VGM_(init_memory_audit)();
       VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
    }
 
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@ void VG_(main) ( void )
       carefully sets up the permissions maps to cover the anonymous
       mmaps for the translation table and translation cache, which
       wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
       VG_(message)(Vg_UserMsg, 
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index d1d792a000..57d687d5f8 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -330,8 +330,6 @@ void create_translation_for ( ThreadId tid, Addr orig_addr )
    VG_(overall_in_count) ++;
    VG_(overall_in_osize) += orig_size;
    VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
 }
 
 
@@ -2684,6 +2682,7 @@ void do_nontrivial_clientreq ( ThreadId tid )
       case VG_USERREQ__MAKE_NOACCESS_STACK:
       case VG_USERREQ__RUNNING_ON_VALGRIND:
       case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
          SET_EDX(
             tid, 
             VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index c781751985..eb3b39428d 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -36,13 +36,16 @@
 
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
 
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
 
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
 */
 
 /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@ void read_symtab_callback (
       = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
 }
 
 
@@ -1197,9 +1202,6 @@ void read_symtab_callback (
    which happen to correspond to the munmap()d area.  */
 void VG_(read_symbols) ( void )
 {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
    VG_(read_procselfmaps) ( read_symtab_callback );
 
    /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@ void VG_(read_symbols) ( void )
            /* the main assertion */
            overlap = (lo <= lo2 && lo2 <= hi)
                       || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
 	   if (overlap) {
               VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
               ppSegInfo ( si );
@@ -1240,15 +1241,16 @@ void VG_(read_symbols) ( void )
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
 */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
-   if (! VG_(clo_instrument)) 
-     return;
-
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1257,7 +1259,8 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
       prev = curr;
       curr = curr->next;
    }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
    }
 
    freeSegInfo(curr);
+   return True;
 }
 
 
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 1e4bff28d8..0a806944ec 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -297,7 +297,7 @@ Bool VG_(anyFlagUse) ( UInstr* u )
 
    Important!  If you change the set of allocatable registers from
    %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
 */
 __inline__ Int VG_(rankToRealRegNo) ( Int rank )
 {
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index d0f0eb1e2f..a364df0b86 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -32,6 +32,8 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 
+/* #define DEBUG_TRANSTAB */
+
 
 /*------------------------------------------------------------*/
 /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
    of code retranslation.  */
 
 /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
 
 /* Do a LRU pass when the translation cache becomes this full. */
 #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
 
 /* Number of entries in the translation table.  This must be a prime
    number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
 
 /* Do an LRU pass when the translation table becomes this full. */
 #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
    N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
 #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
 
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
 #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
 #define VG_TTE_DELETED ((Addr)3)
 
 /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
 */
 static UChar* vg_tc = NULL;
 
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
 static Int vg_tc_used = 0;
 
 /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@ static Int vg_tc_used = 0;
 */
 static TTEntry* vg_tt = NULL;
 
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
 static Int vg_tt_used = 0;
 
 /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@ void VG_(maybe_do_lru_pass) ( void )
    if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
       return;
 
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
    VGP_PUSHCC(VgpDoLRU);
    /*   
    VG_(printf)(
@@ -157,8 +167,9 @@ void VG_(maybe_do_lru_pass) ( void )
       vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
 
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
       j = vg_tt[i].mru_epoch;
       vg_assert(j <= VG_(current_epoch));
       j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@ void VG_(maybe_do_lru_pass) ( void )
       recently used at most thresh epochs ago.  Traverse the TT and
       mark such entries as deleted. */
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
       if (vg_tt[i].mru_epoch <= thresh) {
          vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
 	 VG_(this_epoch_out_count) ++;
 	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
 	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@ void VG_(maybe_do_lru_pass) ( void )
       }
    }
 
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
    /* Now compact the TC, sliding live entries downwards to fill spaces
       left by deleted entries.  In this loop, r is the offset in TC of
       the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@ void VG_(maybe_do_lru_pass) ( void )
             vg_tc[w+i] = vg_tc[r+i];
          tte->trans_addr = (Addr)&vg_tc[w+4];
          w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
       }
       r += 4+tte->trans_size;
    }
@@ -252,6 +263,9 @@ void VG_(maybe_do_lru_pass) ( void )
    vg_assert(w <= tc_target);
    vg_tc_used = w;
 
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
    /* Invalidate the fast cache, since it is now out of date.  It will get
       reconstructed incrementally when the client resumes. */
    VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@ void VG_(maybe_do_lru_pass) ( void )
       );
 
    /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
 
    VGP_POPCC;
 }
@@ -290,7 +309,6 @@ void VG_(sanity_check_tc_tt) ( void )
    for (i = 0; i < VG_TT_SIZE; i++) {
       tte = &vg_tt[i];
       if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
       vg_assert(tte->mru_epoch >= 0);
       vg_assert(tte->mru_epoch <= VG_(current_epoch));
       counted_entries++;
@@ -323,8 +341,7 @@ extern void VG_(add_to_trans_tab) ( TTEntry* tte )
    while (True) {
       if (vg_tt[i].orig_addr == tte->orig_addr)
          VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
          /* Put it here, and set the back pointer. */
          vg_tt[i] = *tte;
          VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@ void VG_(invalidate_tt_fast)( void )
 */
 static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
 {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
    Int i;
    /* Hash to get initial probe point. */
    //   if (queries == 10000) {
@@ -388,7 +405,7 @@ static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
    //queries++;
    i = ((UInt)orig_addr) % VG_TT_SIZE;
    while (True) {
-     //probes++;
+      //probes++;
       if (vg_tt[i].orig_addr == orig_addr)
          return &vg_tt[i];
       if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@ Addr VG_(search_transtab) ( Addr original_addr )
 }
 
 
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
 */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
 {
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
 
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
 #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
 
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
    }
-}
-
 
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-	 } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
       }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
    }
 
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
 }
 
 
@@ -655,7 +502,7 @@ void VG_(flush_transtab) ( void )
 /*--- Initialisation.                                      ---*/
 /*------------------------------------------------------------*/
 
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
 {
    Int i;
 
@@ -678,17 +525,6 @@ void VG_(init_transtab_and_SMC) ( void )
       at the first TT entry, which is, of course, empty. */
    for (i = 0; i < VG_TT_FAST_SIZE; i++)
       VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/docs/manual.html b/docs/manual.html
index dc66721359..20fbb36b59 100644
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@ detect problems such as:
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/include/valgrind.h b/include/valgrind.h
index 43efffb928..478426da34 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -64,11 +64,11 @@
         _zzq_arg4     /* request fourth param */ )                      \
                                                                         \
   { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
     asm volatile("movl %1, %%eax\n\t"                                   \
                  "movl %2, %%edx\n\t"                                   \
                  "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
 #define VG_USERREQ__CHECK_READABLE       0x1006
 #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
 #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
 
 
 /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                             0, 0, 0, 0);                           \
    }
 
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
 #endif
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html
index dc66721359..20fbb36b59 100644
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@ detect problems such as:
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/tests/discard.c b/tests/discard.c
new file mode 100644
index 0000000000..0c14e9f14c
--- /dev/null
+++ b/tests/discard.c
@@ -0,0 +1,27 @@
+
+#include <stdio.h>
+#include <valgrind.h>
+
+int fooble ( void )
+{
+  int x, y;
+  y = 0;
+  for (x = 0; x < 100; x++) {
+    if ((x % 3) == 0) y += x; else y++;
+  }
+  return y;
+}
+
+void someother ( void )
+{
+}
+
+int main ( void )
+{
+  printf("fooble-1() = %d\n", fooble() );
+  VALGRIND_DISCARD_TRANSLATIONS( (char*)(&fooble), 
+          ((char*)(&someother)) - ((char*)(&fooble)) );
+  printf("fooble-2() = %d\n", fooble() );
+  return 0;
+}
+
diff --git a/valgrind.h b/valgrind.h
index 43efffb928..478426da34 100644
--- a/valgrind.h
+++ b/valgrind.h
@@ -64,11 +64,11 @@
         _zzq_arg4     /* request fourth param */ )                      \
                                                                         \
   { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
     asm volatile("movl %1, %%eax\n\t"                                   \
                  "movl %2, %%edx\n\t"                                   \
                  "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
 #define VG_USERREQ__CHECK_READABLE       0x1006
 #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
 #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
 
 
 /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                             0, 0, 0, 0);                           \
    }
 
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
 #endif
diff --git a/vg_cachesim.c b/vg_cachesim.c
index 8081e0ae8f..b794e6125c 100644
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -1,3 +1,4 @@
+
 /*--------------------------------------------------------------------*/
 /*--- The cache simulation framework: instrumentation, recording   ---*/
 /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
 
    Copyright (C) 2000-2002 Julian Seward 
       jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
    The GNU General Public License is contained in the file LICENSE.
 */
 
-#include <string.h>
-
 #include "vg_include.h"
 
 #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
       curr_file_node = curr_file_node->next;
    }
    if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
    fnname_hash = hash(fn_name, N_FN_ENTRIES);
    curr_fn_node = curr_file_node->fns[fnname_hash];
    while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
       curr_fn_node = curr_fn_node->next;
    }
    if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
 
       /* Allow for filename switching in the middle of a BB;  if this happens,
        * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
          VG_(strcpy)(curr_file, fl_buf);
          VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
          VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@ void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
    VGP_POPCC;
 }
 
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_clientperms.c b/vg_clientperms.c
index 02d0b7bf89..e9ecbc420c 100644
--- a/vg_clientperms.c
+++ b/vg_clientperms.c
@@ -385,6 +385,10 @@ UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block )
          VG_(detect_memory_leaks)();
          return 0; /* return value is meaningless */
 
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         VG_(invalidate_translations)( arg[1], arg[2] );
+         return 0;  /* return value is meaningless */
+
       default:
          VG_(message)(Vg_UserMsg, 
                       "Warning: unknown client request code %d", arg[0]);
diff --git a/vg_constants.h b/vg_constants.h
index 710b12cb90..252353c468 100644
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -90,16 +90,6 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
 /* Assembly code stubs make these requests ... */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/vg_from_ucode.c b/vg_from_ucode.c
index 214d2ca100..573ee93271 100644
--- a/vg_from_ucode.c
+++ b/vg_from_ucode.c
@@ -1524,56 +1524,6 @@ static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
 }
 
 
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
 /* Synthesise a minimal test (and which discards result) of reg32
    against lit.  It's always safe do simply
       emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@ static void emitUInstr ( Int i, UInstr* u )
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+	 /* No longer possible, but retained for illustrative purposes.
          if (u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+	 */
          break;
       }
 
@@ -2598,8 +2550,10 @@ static void emitUInstr ( Int i, UInstr* u )
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
          if (u->opcode == FPU_W && u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+         */
          break;
 
       case FPU:
diff --git a/vg_helpers.S b/vg_helpers.S
index 62db9ec1d7..29689225d3 100644
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -146,51 +146,6 @@ VG_(helper_value_check4_fail):
 	ret
 
 
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-	# save the live regs
-	pushl	%eax
-	pushl	%ebx
-	pushl	%ecx
-	pushl	%edx
-	pushl	%esi
-	pushl	%edi
-	
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-
-	popl	%edi
-	popl	%esi
-	popl	%edx
-	popl	%ecx
-	popl	%ebx
-	popl	%eax
-	
-	ret
-#else	
-	incl	VG_(smc_total_check4s)
-	pushl	%ebp
-	shrl	$VG_SMC_CACHE_SHIFT, %ebp
-	andl	$VG_SMC_CACHE_MASK, %ebp
-	cmpb	$0, VG_(smc_cache)(%ebp)
-	jnz	vg_smc_cache_failure
-	addl	$4, %esp
-	ret
-      vg_smc_cache_failure:
-	popl	%ebp
-	pushal
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-	popal
-	ret
-#endif
-
-	
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/vg_include.h b/vg_include.h
index 22e4f48830..7f44dde7d4 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -1301,7 +1301,7 @@ extern Bool VG_(what_line_is_this) ( Addr a,
 extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                      Char* fn_name, Int n_fn_name);
 
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
 
 
 /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@ extern UInt VG_(translations_needing_spill);
 /* total of register ranks over all translations */
 extern UInt VG_(total_reg_rank);
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
 /* Counts pertaining to internal sanity checking. */
 extern UInt VG_(sanity_fast_count);
 extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@ extern void VG_(maybe_do_lru_pass) ( void );
 extern void VG_(flush_transtab) ( void );
 extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
 extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
 
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
 
 extern void VG_(sanity_check_tc_tt) ( void );
 extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@ extern UInt VG_(run_innerloop) ( void );
    Exports of vg_helpers.S
    ------------------------------------------------------------------ */
 
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
 /* Mul, div, etc, -- we don't codegen these directly. */
 extern void VG_(helper_idiv_64_32);
 extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@ extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
 extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
 extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
 
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
diff --git a/vg_main.c b/vg_main.c
index a7e41b2dda..94e175c70c 100644
--- a/vg_main.c
+++ b/vg_main.c
@@ -381,22 +381,6 @@ UInt VG_(translations_needing_spill) = 0;
 UInt VG_(total_reg_rank) = 0;
 
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
 /* Counts pertaining to internal sanity checking. */
 UInt VG_(sanity_fast_count) = 0;
 UInt VG_(sanity_slow_count) = 0;
@@ -954,13 +938,6 @@ static void vg_show_counts ( void )
                 VG_(uinstrs_prealloc),
                 VG_(uinstrs_spill),
                 VG_(total_reg_rank) );
-   VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-		VG_(smc_total_check4s),
-		VG_(smc_cache_passed),
-		VG_(smc_fancy_passed),
-		VG_(smc_discard_count) );
    VG_(message)(Vg_DebugMsg, 
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
@@ -1020,11 +997,12 @@ void VG_(main) ( void )
       VGP_PUSHCC(VgpInitAudit);
       VGM_(init_memory_audit)();
       VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
    }
 
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@ void VG_(main) ( void )
       carefully sets up the permissions maps to cover the anonymous
       mmaps for the translation table and translation cache, which
       wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
       VG_(message)(Vg_UserMsg, 
diff --git a/vg_scheduler.c b/vg_scheduler.c
index d1d792a000..57d687d5f8 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -330,8 +330,6 @@ void create_translation_for ( ThreadId tid, Addr orig_addr )
    VG_(overall_in_count) ++;
    VG_(overall_in_osize) += orig_size;
    VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
 }
 
 
@@ -2684,6 +2682,7 @@ void do_nontrivial_clientreq ( ThreadId tid )
       case VG_USERREQ__MAKE_NOACCESS_STACK:
       case VG_USERREQ__RUNNING_ON_VALGRIND:
       case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
          SET_EDX(
             tid, 
             VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/vg_symtab2.c b/vg_symtab2.c
index c781751985..eb3b39428d 100644
--- a/vg_symtab2.c
+++ b/vg_symtab2.c
@@ -36,13 +36,16 @@
 
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
 
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
 
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
 */
 
 /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@ void read_symtab_callback (
       = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
 }
 
 
@@ -1197,9 +1202,6 @@ void read_symtab_callback (
    which happen to correspond to the munmap()d area.  */
 void VG_(read_symbols) ( void )
 {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
    VG_(read_procselfmaps) ( read_symtab_callback );
 
    /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@ void VG_(read_symbols) ( void )
            /* the main assertion */
            overlap = (lo <= lo2 && lo2 <= hi)
                       || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
 	   if (overlap) {
               VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
               ppSegInfo ( si );
@@ -1240,15 +1241,16 @@ void VG_(read_symbols) ( void )
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
 */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
-   if (! VG_(clo_instrument)) 
-     return;
-
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1257,7 +1259,8 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
       prev = curr;
       curr = curr->next;
    }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
    }
 
    freeSegInfo(curr);
+   return True;
 }
 
 
diff --git a/vg_syscall_mem.c b/vg_syscall_mem.c
index ac63267ae1..6d4e4975a4 100644
--- a/vg_syscall_mem.c
+++ b/vg_syscall_mem.c
@@ -487,12 +487,15 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
          KERNEL_DO_SYSCALL(tid,res);
          if (!VG_(is_kerror)(res)) {
             /* Copied from munmap() wrapper. */
+            Bool munmap_exe;
             Addr start  = arg1;
             Addr length = arg2;
             while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
             while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
             make_noaccess( start, length );
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
             approximate_mmap_permissions( (Addr)res, arg3, arg4 );
          }
          break;         
@@ -2070,6 +2073,7 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
                pages.  If we don't do that, our idea of addressible
                memory diverges from that of the kernel's, which causes
                the leak detector to crash. */
+            Bool munmap_exe;
             Addr start = arg1;
             Addr length = arg2;
             while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
@@ -2083,7 +2087,9 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
             /* Tell our symbol table machinery about this, so that if
                this happens to be a .so being unloaded, the relevant
                symbols are removed too. */
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
          }
          break;
 
diff --git a/vg_translate.c b/vg_translate.c
index 1e4bff28d8..0a806944ec 100644
--- a/vg_translate.c
+++ b/vg_translate.c
@@ -297,7 +297,7 @@ Bool VG_(anyFlagUse) ( UInstr* u )
 
    Important!  If you change the set of allocatable registers from
    %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
 */
 __inline__ Int VG_(rankToRealRegNo) ( Int rank )
 {
diff --git a/vg_transtab.c b/vg_transtab.c
index d0f0eb1e2f..a364df0b86 100644
--- a/vg_transtab.c
+++ b/vg_transtab.c
@@ -32,6 +32,8 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 
+/* #define DEBUG_TRANSTAB */
+
 
 /*------------------------------------------------------------*/
 /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
    of code retranslation.  */
 
 /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
 
 /* Do a LRU pass when the translation cache becomes this full. */
 #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
 
 /* Number of entries in the translation table.  This must be a prime
    number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
 
 /* Do an LRU pass when the translation table becomes this full. */
 #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
    N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
 #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
 
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
 #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
 #define VG_TTE_DELETED ((Addr)3)
 
 /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
 */
 static UChar* vg_tc = NULL;
 
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
 static Int vg_tc_used = 0;
 
 /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@ static Int vg_tc_used = 0;
 */
 static TTEntry* vg_tt = NULL;
 
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
 static Int vg_tt_used = 0;
 
 /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@ void VG_(maybe_do_lru_pass) ( void )
    if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
       return;
 
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
    VGP_PUSHCC(VgpDoLRU);
    /*   
    VG_(printf)(
@@ -157,8 +167,9 @@ void VG_(maybe_do_lru_pass) ( void )
       vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
 
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
       j = vg_tt[i].mru_epoch;
       vg_assert(j <= VG_(current_epoch));
       j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@ void VG_(maybe_do_lru_pass) ( void )
       recently used at most thresh epochs ago.  Traverse the TT and
       mark such entries as deleted. */
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
       if (vg_tt[i].mru_epoch <= thresh) {
          vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
 	 VG_(this_epoch_out_count) ++;
 	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
 	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@ void VG_(maybe_do_lru_pass) ( void )
       }
    }
 
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
    /* Now compact the TC, sliding live entries downwards to fill spaces
       left by deleted entries.  In this loop, r is the offset in TC of
       the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@ void VG_(maybe_do_lru_pass) ( void )
             vg_tc[w+i] = vg_tc[r+i];
          tte->trans_addr = (Addr)&vg_tc[w+4];
          w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
       }
       r += 4+tte->trans_size;
    }
@@ -252,6 +263,9 @@ void VG_(maybe_do_lru_pass) ( void )
    vg_assert(w <= tc_target);
    vg_tc_used = w;
 
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
    /* Invalidate the fast cache, since it is now out of date.  It will get
       reconstructed incrementally when the client resumes. */
    VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@ void VG_(maybe_do_lru_pass) ( void )
       );
 
    /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
 
    VGP_POPCC;
 }
@@ -290,7 +309,6 @@ void VG_(sanity_check_tc_tt) ( void )
    for (i = 0; i < VG_TT_SIZE; i++) {
       tte = &vg_tt[i];
       if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
       vg_assert(tte->mru_epoch >= 0);
       vg_assert(tte->mru_epoch <= VG_(current_epoch));
       counted_entries++;
@@ -323,8 +341,7 @@ extern void VG_(add_to_trans_tab) ( TTEntry* tte )
    while (True) {
       if (vg_tt[i].orig_addr == tte->orig_addr)
          VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
          /* Put it here, and set the back pointer. */
          vg_tt[i] = *tte;
          VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@ void VG_(invalidate_tt_fast)( void )
 */
 static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
 {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
    Int i;
    /* Hash to get initial probe point. */
    //   if (queries == 10000) {
@@ -388,7 +405,7 @@ static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
    //queries++;
    i = ((UInt)orig_addr) % VG_TT_SIZE;
    while (True) {
-     //probes++;
+      //probes++;
       if (vg_tt[i].orig_addr == orig_addr)
          return &vg_tt[i];
       if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@ Addr VG_(search_transtab) ( Addr original_addr )
 }
 
 
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
 */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
 {
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
 
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
 #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
 
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
    }
-}
-
 
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-	 } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
       }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
    }
 
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
 }
 
 
@@ -655,7 +502,7 @@ void VG_(flush_transtab) ( void )
 /*--- Initialisation.                                      ---*/
 /*------------------------------------------------------------*/
 
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
 {
    Int i;
 
@@ -678,17 +525,6 @@ void VG_(init_transtab_and_SMC) ( void )
       at the first TT entry, which is, of course, empty. */
    for (i = 0; i < VG_TT_FAST_SIZE; i++)
       VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
 }
 
 /*--------------------------------------------------------------------*/