Remove existing non-working support for self-modifying code, and instead

author Julian Seward <jseward@acm.org>

Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)

committer Julian Seward <jseward@acm.org>

Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)
author Julian Seward <jseward@acm.org>
Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)
committer Julian Seward <jseward@acm.org>
Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c

index 8081e0ae8f63e6c252b4bbc8a8882277a8dcefd9..b794e6125cc484adb92dc3c7344a5f74f88004bb 100644 (file)
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1,3 +1,4 @@
+
  /*--------------------------------------------------------------------*/
  /*--- The cache simulation framework: instrumentation, recording   ---*/
  /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
  
     Copyright (C) 2000-2002 Julian Seward 
        jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
  
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
     The GNU General Public License is contained in the file LICENSE.
  */
  
-#include <string.h>
-
  #include "vg_include.h"
  
  #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
     filename_hash = hash(filename, N_FILE_ENTRIES);
     curr_file_node = BBCC_table[filename_hash];
     while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
        curr_file_node = curr_file_node->next;
     }
     if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
     fnname_hash = hash(fn_name, N_FN_ENTRIES);
     curr_fn_node = curr_file_node->fns[fnname_hash];
     while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
        curr_fn_node = curr_fn_node->next;
     }
     if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
  
        /* Allow for filename switching in the middle of a BB;  if this happens,
         * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
           VG_(strcpy)(curr_file, fl_buf);
           VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
           VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
  
        /* If the function name for this instruction doesn't match that of the
         * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
           VG_(printf)("Mismatched function names\n");
           VG_(printf)("  filenames: BB:%s, instr:%s;"
                       "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@ void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
     VGP_POPCC;
  }
  
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html

index dc66721359901a6b88ee4a338c07bee29d9c67e1..20fbb36b594bea57edc24257fd7c4cfc1b97c427 100644 (file)
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -24,8 +24,9 @@
  <body bgcolor="#ffffff">
  
  <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
  <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
  <p>
  
  <center>
@@ -102,7 +103,9 @@ detect problems such as:
    <li>Reading/writing memory after it has been free'd</li>
    <li>Reading/writing off the end of malloc'd blocks</li>
    <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
  </ul>
  
  Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
        all fairly dodgy and doesn't work at all if threads are
        involved.</li><br>
        <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
  </ul>
  
  
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
      right now.  Returns no value.  I guess this could be used to
      incrementally check for leaks between arbitrary places in the
      program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
  </ul>
  <p>
  
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
  <code>malloc</code> is 8-aligned.  Valgrind's allocator only
  guarantees 4-alignment, so without the patch Mozilla makes an illegal
  memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
  
  
  <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
        running under Valgrind.  This is due to the large amount of
        adminstrative information maintained behind the scenes.  Another
        cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
        translations when running (eg) a web browser.
        </li>
  </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
  translations.  Subsequent jumps to that address will use this
  translation.
  
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
  
  <p>The JITter translates basic blocks -- blocks of straight-line-code
  -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html

index dc66721359901a6b88ee4a338c07bee29d9c67e1..20fbb36b594bea57edc24257fd7c4cfc1b97c427 100644 (file)
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -24,8 +24,9 @@
  <body bgcolor="#ffffff">
  
  <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
  <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
  <p>
  
  <center>
@@ -102,7 +103,9 @@ detect problems such as:
    <li>Reading/writing memory after it has been free'd</li>
    <li>Reading/writing off the end of malloc'd blocks</li>
    <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
  </ul>
  
  Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
        all fairly dodgy and doesn't work at all if threads are
        involved.</li><br>
        <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
  </ul>
  
  
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
      right now.  Returns no value.  I guess this could be used to
      incrementally check for leaks between arbitrary places in the
      program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
  </ul>
  <p>
  
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
  <code>malloc</code> is 8-aligned.  Valgrind's allocator only
  guarantees 4-alignment, so without the patch Mozilla makes an illegal
  memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
  
  
  <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
        running under Valgrind.  This is due to the large amount of
        adminstrative information maintained behind the scenes.  Another
        cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
        translations when running (eg) a web browser.
        </li>
  </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
  translations.  Subsequent jumps to that address will use this
  translation.
  
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
  
  <p>The JITter translates basic blocks -- blocks of straight-line-code
  -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h

index 710b12cb90eab14e3cdb70da7573231d41125bdc..252353c468f046b480a53383b9f023c55f97b9ca 100644 (file)
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -90,16 +90,6 @@
  /* Constants for the fast original-code-write check cache. */
  
  
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
  /* Assembly code stubs make these requests ... */
  #define VG_USERREQ__SIGNAL_RETURNS          0x4001
  #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c

index 214d2ca1006c1bad097fbc623f4a6cdf873cfa08..573ee932716b68d77ad3dfcf9e6d15fdf9ae06d1 100644 (file)
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1524,56 +1524,6 @@ static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
  }
  
  
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
  /* Synthesise a minimal test (and which discards result) of reg32
     against lit.  It's always safe do simply
        emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@ static void emitUInstr ( Int i, UInstr* u )
           vg_assert(u->tag1 == RealReg);
           vg_assert(u->tag2 == RealReg);
           synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+        /* No longer possible, but retained for illustrative purposes.
           if (u->smc_check) 
              synth_orig_code_write_check ( u->size, u->val2 );
+        */
           break;
        }
  
@@ -2598,8 +2550,10 @@ static void emitUInstr ( Int i, UInstr* u )
           synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                              u->val1 & 0xFF,
                              u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
           if (u->opcode == FPU_W && u->smc_check) 
              synth_orig_code_write_check ( u->size, u->val2 );
+         */
           break;
  
        case FPU:
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S

index 62db9ec1d7d596fe05460b060f6fb2416a57bb43..29689225d332cdcd4db0e241e909e899f02ec2cf 100644 (file)
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -146,51 +146,6 @@ VG_(helper_value_check4_fail):
         ret
  
  
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-       # save the live regs
-       pushl   %eax
-       pushl   %ebx
-       pushl   %ecx
-       pushl   %edx
-       pushl   %esi
-       pushl   %edi
-       
-       pushl   %ebp
-       call    VG_(smc_check4)
-       addl    $4, %esp
-
-       popl    %edi
-       popl    %esi
-       popl    %edx
-       popl    %ecx
-       popl    %ebx
-       popl    %eax
-       
-       ret
-#else  
-       incl    VG_(smc_total_check4s)
-       pushl   %ebp
-       shrl    $VG_SMC_CACHE_SHIFT, %ebp
-       andl    $VG_SMC_CACHE_MASK, %ebp
-       cmpb    $0, VG_(smc_cache)(%ebp)
-       jnz     vg_smc_cache_failure
-       addl    $4, %esp
-       ret
-      vg_smc_cache_failure:
-       popl    %ebp
-       pushal
-       pushl   %ebp
-       call    VG_(smc_check4)
-       addl    $4, %esp
-       popal
-       ret
-#endif
-
-       
  /* Fetch the time-stamp-ctr reg.
     On entry:
         dummy, replaced by %EAX value
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h

index 22e4f48830820ee58948883d5382c707650c9633..7f44dde7d4692c2480479dfd02399fd5614e6542 100644 (file)
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1301,7 +1301,7 @@ extern Bool VG_(what_line_is_this) ( Addr a,
  extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                       Char* fn_name, Int n_fn_name);
  
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
  
  
  /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@ extern UInt VG_(translations_needing_spill);
  /* total of register ranks over all translations */
  extern UInt VG_(total_reg_rank);
  
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
  /* Counts pertaining to internal sanity checking. */
  extern UInt VG_(sanity_fast_count);
  extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@ extern void VG_(maybe_do_lru_pass) ( void );
  extern void VG_(flush_transtab) ( void );
  extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
  extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
  
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
  
  extern void VG_(sanity_check_tc_tt) ( void );
  extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@ extern UInt VG_(run_innerloop) ( void );
     Exports of vg_helpers.S
     ------------------------------------------------------------------ */
  
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
  /* Mul, div, etc, -- we don't codegen these directly. */
  extern void VG_(helper_idiv_64_32);
  extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@ extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
  extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
  extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
  
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
  /* ---------------------------------------------------------------------
     The state of the simulated CPU.
     ------------------------------------------------------------------ */
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c

index a7e41b2dda076a89c11ee6f99981aad410823cda..94e175c70c77d2c243e2d4f3d2af85547d9e3e2f 100644 (file)
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -381,22 +381,6 @@ UInt VG_(translations_needing_spill) = 0;
  UInt VG_(total_reg_rank) = 0;
  
  
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
  /* Counts pertaining to internal sanity checking. */
  UInt VG_(sanity_fast_count) = 0;
  UInt VG_(sanity_slow_count) = 0;
@@ -954,13 +938,6 @@ static void vg_show_counts ( void )
                  VG_(uinstrs_prealloc),
                  VG_(uinstrs_spill),
                  VG_(total_reg_rank) );
-   VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-               VG_(smc_total_check4s),
-               VG_(smc_cache_passed),
-               VG_(smc_fancy_passed),
-               VG_(smc_discard_count) );
     VG_(message)(Vg_DebugMsg, 
                  "   sanity: %d cheap, %d expensive checks.",
                  VG_(sanity_fast_count), 
@@ -1020,11 +997,12 @@ void VG_(main) ( void )
        VGP_PUSHCC(VgpInitAudit);
        VGM_(init_memory_audit)();
        VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
     }
  
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
     /* End calibration of our RDTSC-based clock, leaving it as long as
        we can. */
     VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@ void VG_(main) ( void )
        carefully sets up the permissions maps to cover the anonymous
        mmaps for the translation table and translation cache, which
        wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
  
     if (VG_(clo_verbosity) == 1) {
        VG_(message)(Vg_UserMsg, 
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c

index d1d792a000c784f045d47a3a511df1d81dd3f871..57d687d5f806e228d794ecb5443d8b62dfb054f2 100644 (file)
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -330,8 +330,6 @@ void create_translation_for ( ThreadId tid, Addr orig_addr )
     VG_(overall_in_count) ++;
     VG_(overall_in_osize) += orig_size;
     VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
  }
  
  
@@ -2684,6 +2682,7 @@ void do_nontrivial_clientreq ( ThreadId tid )
        case VG_USERREQ__MAKE_NOACCESS_STACK:
        case VG_USERREQ__RUNNING_ON_VALGRIND:
        case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
           SET_EDX(
              tid, 
              VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c

index c7817519850ab70e79d106eaa7fea009982d5281..eb3b39428de2ad6c398a1780e2662dcd48cb7e1f 100644 (file)
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -36,13 +36,16 @@
  
  /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
     dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
  
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
  
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
  */
  
  /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@ void read_symtab_callback (
        = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
  
     /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
  }
  
  
@@ -1197,9 +1202,6 @@ void read_symtab_callback (
     which happen to correspond to the munmap()d area.  */
  void VG_(read_symbols) ( void )
  {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
     VG_(read_procselfmaps) ( read_symtab_callback );
  
     /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@ void VG_(read_symbols) ( void )
             /* the main assertion */
             overlap = (lo <= lo2 && lo2 <= hi)
                        || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
            if (overlap) {
                VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
                ppSegInfo ( si );
@@ -1240,15 +1241,16 @@ void VG_(read_symbols) ( void )
     to a segment for a .so, and if so discard the relevant SegInfo.
     This might not be a very clever idea from the point of view of
     accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
  */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
  {
     SegInfo *prev, *curr;
  
-   if (! VG_(clo_instrument)) 
-     return;
-
     prev = NULL;
     curr = segInfo;
     while (True) {
@@ -1257,7 +1259,8 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
        prev = curr;
        curr = curr->next;
     }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
  
     VG_(message)(Vg_UserMsg, 
                  "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
     }
  
     freeSegInfo(curr);
+   return True;
  }
  
  
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c

index 1e4bff28d8447e0f920858f548bc9be902c345d6..0a806944ec3d1dcd8608eb451231d31f84e4dc8f 100644 (file)
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -297,7 +297,7 @@ Bool VG_(anyFlagUse) ( UInstr* u )
  
     Important!  If you change the set of allocatable registers from
     %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
  */
  __inline__ Int VG_(rankToRealRegNo) ( Int rank )
  {
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c

index d0f0eb1e2f06def4317de0db49d480e55774fa8e..a364df0b865fa4baa75fd6114a5c13fc53fb9851 100644 (file)
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -32,6 +32,8 @@
  #include "vg_include.h"
  #include "vg_constants.h"
  
+/* #define DEBUG_TRANSTAB */
+
  
  /*------------------------------------------------------------*/
  /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
     of code retranslation.  */
  
  /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
  
  /* Do a LRU pass when the translation cache becomes this full. */
  #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
  
  /* Number of entries in the translation table.  This must be a prime
     number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
  
  /* Do an LRU pass when the translation table becomes this full. */
  #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
     N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
  #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
  
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
  #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
  #define VG_TTE_DELETED ((Addr)3)
  
  /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
  */
  static UChar* vg_tc = NULL;
  
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
  static Int vg_tc_used = 0;
  
  /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@ static Int vg_tc_used = 0;
  */
  static TTEntry* vg_tt = NULL;
  
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
  static Int vg_tt_used = 0;
  
  /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@ void VG_(maybe_do_lru_pass) ( void )
     if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
        return;
  
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
     VGP_PUSHCC(VgpDoLRU);
     /*   
     VG_(printf)(
@@ -157,8 +167,9 @@ void VG_(maybe_do_lru_pass) ( void )
        vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
  
     for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
        j = vg_tt[i].mru_epoch;
        vg_assert(j <= VG_(current_epoch));
        j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@ void VG_(maybe_do_lru_pass) ( void )
        recently used at most thresh epochs ago.  Traverse the TT and
        mark such entries as deleted. */
     for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
        if (vg_tt[i].mru_epoch <= thresh) {
           vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
          VG_(this_epoch_out_count) ++;
          VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
          VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@ void VG_(maybe_do_lru_pass) ( void )
        }
     }
  
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
     /* Now compact the TC, sliding live entries downwards to fill spaces
        left by deleted entries.  In this loop, r is the offset in TC of
        the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@ void VG_(maybe_do_lru_pass) ( void )
              vg_tc[w+i] = vg_tc[r+i];
           tte->trans_addr = (Addr)&vg_tc[w+4];
           w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
        }
        r += 4+tte->trans_size;
     }
@@ -252,6 +263,9 @@ void VG_(maybe_do_lru_pass) ( void )
     vg_assert(w <= tc_target);
     vg_tc_used = w;
  
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
     /* Invalidate the fast cache, since it is now out of date.  It will get
        reconstructed incrementally when the client resumes. */
     VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@ void VG_(maybe_do_lru_pass) ( void )
        );
  
     /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
  
     VGP_POPCC;
  }
@@ -290,7 +309,6 @@ void VG_(sanity_check_tc_tt) ( void )
     for (i = 0; i < VG_TT_SIZE; i++) {
        tte = &vg_tt[i];
        if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
        vg_assert(tte->mru_epoch >= 0);
        vg_assert(tte->mru_epoch <= VG_(current_epoch));
        counted_entries++;
@@ -323,8 +341,7 @@ extern void VG_(add_to_trans_tab) ( TTEntry* tte )
     while (True) {
        if (vg_tt[i].orig_addr == tte->orig_addr)
           VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
           /* Put it here, and set the back pointer. */
           vg_tt[i] = *tte;
           VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@ void VG_(invalidate_tt_fast)( void )
  */
  static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
  {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
     Int i;
     /* Hash to get initial probe point. */
     //   if (queries == 10000) {
@@ -388,7 +405,7 @@ static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
     //queries++;
     i = ((UInt)orig_addr) % VG_TT_SIZE;
     while (True) {
-     //probes++;
+      //probes++;
        if (vg_tt[i].orig_addr == orig_addr)
           return &vg_tt[i];
        if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@ Addr VG_(search_transtab) ( Addr original_addr )
  }
  
  
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
  */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
  {
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
  
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
  #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
  
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
     }
-}
-
  
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-        } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
        }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
     }
  
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
  }
  
  
@@ -655,7 +502,7 @@ void VG_(flush_transtab) ( void )
  /*--- Initialisation.                                      ---*/
  /*------------------------------------------------------------*/
  
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
  {
     Int i;
  
@@ -678,17 +525,6 @@ void VG_(init_transtab_and_SMC) ( void )
        at the first TT entry, which is, of course, empty. */
     for (i = 0; i < VG_TT_FAST_SIZE; i++)
        VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
  }
  
  /*--------------------------------------------------------------------*/
diff --git a/docs/manual.html b/docs/manual.html

index dc66721359901a6b88ee4a338c07bee29d9c67e1..20fbb36b594bea57edc24257fd7c4cfc1b97c427 100644 (file)
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -24,8 +24,9 @@
  <body bgcolor="#ffffff">
  
  <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
  <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
  <p>
  
  <center>
@@ -102,7 +103,9 @@ detect problems such as:
    <li>Reading/writing memory after it has been free'd</li>
    <li>Reading/writing off the end of malloc'd blocks</li>
    <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
  </ul>
  
  Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
        all fairly dodgy and doesn't work at all if threads are
        involved.</li><br>
        <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
  </ul>
  
  
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
      right now.  Returns no value.  I guess this could be used to
      incrementally check for leaks between arbitrary places in the
      program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
  </ul>
  <p>
  
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
  <code>malloc</code> is 8-aligned.  Valgrind's allocator only
  guarantees 4-alignment, so without the patch Mozilla makes an illegal
  memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
  
  
  <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
        running under Valgrind.  This is due to the large amount of
        adminstrative information maintained behind the scenes.  Another
        cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
        translations when running (eg) a web browser.
        </li>
  </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
  translations.  Subsequent jumps to that address will use this
  translation.
  
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
  
  <p>The JITter translates basic blocks -- blocks of straight-line-code
  -- as single entities.  To minimise the considerable difficulties of
diff --git a/include/valgrind.h b/include/valgrind.h

index 43efffb92876945b4e5cadcaf7a83a7e0c1180c6..478426da343fd9c14b6e311cc566af723884afd5 100644 (file)
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -64,11 +64,11 @@
          _zzq_arg4     /* request fourth param */ )                      \
                                                                          \
    { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
      asm volatile("movl %1, %%eax\n\t"                                   \
                   "movl %2, %%edx\n\t"                                   \
                   "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
  #define VG_USERREQ__CHECK_READABLE       0x1006
  #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
  #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
  
  
  /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                              0, 0, 0, 0);                           \
     }
  
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
  #endif
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html

index dc66721359901a6b88ee4a338c07bee29d9c67e1..20fbb36b594bea57edc24257fd7c4cfc1b97c427 100644 (file)
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -24,8 +24,9 @@
  <body bgcolor="#ffffff">
  
  <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
  <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
  <p>
  
  <center>
@@ -102,7 +103,9 @@ detect problems such as:
    <li>Reading/writing memory after it has been free'd</li>
    <li>Reading/writing off the end of malloc'd blocks</li>
    <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
  </ul>
  
  Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@ shouldn't need to use them in the normal run of things.  Nevertheless:
        all fairly dodgy and doesn't work at all if threads are
        involved.</li><br>
        <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
  </ul>
  
  
@@ -1185,6 +1169,24 @@ A brief description of the available macros:
      right now.  Returns no value.  I guess this could be used to
      incrementally check for leaks between arbitrary places in the
      program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
  </ul>
  <p>
  
@@ -1255,7 +1257,7 @@ bug in Mozilla which assumes that memory returned from
  <code>malloc</code> is 8-aligned.  Valgrind's allocator only
  guarantees 4-alignment, so without the patch Mozilla makes an illegal
  memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
  
  
  <a name="install"></a>
@@ -1730,10 +1732,8 @@ a kernel 2.2.X or 2.4.X system, subject to the following constraints:
        running under Valgrind.  This is due to the large amount of
        adminstrative information maintained behind the scenes.  Another
        cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
        translations when running (eg) a web browser.
        </li>
  </ul>
@@ -1809,14 +1809,14 @@ instrumented translation, which is added to the collection of
  translations.  Subsequent jumps to that address will use this
  translation.
  
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
  
  <p>The JITter translates basic blocks -- blocks of straight-line-code
  -- as single entities.  To minimise the considerable difficulties of
diff --git a/tests/discard.c b/tests/discard.c

new file mode 100644 (file)

index 0000000..0c14e9f
--- /dev/null
+++ b/tests/discard.c
@@ -0,0 +1,27 @@
+
+#include <stdio.h>
+#include <valgrind.h>
+
+int fooble ( void )
+{
+  int x, y;
+  y = 0;
+  for (x = 0; x < 100; x++) {
+    if ((x % 3) == 0) y += x; else y++;
+  }
+  return y;
+}
+
+void someother ( void )
+{
+}
+
+int main ( void )
+{
+  printf("fooble-1() = %d\n", fooble() );
+  VALGRIND_DISCARD_TRANSLATIONS( (char*)(&fooble), 
+          ((char*)(&someother)) - ((char*)(&fooble)) );
+  printf("fooble-2() = %d\n", fooble() );
+  return 0;
+}
+
diff --git a/valgrind.h b/valgrind.h

index 43efffb92876945b4e5cadcaf7a83a7e0c1180c6..478426da343fd9c14b6e311cc566af723884afd5 100644 (file)
--- a/valgrind.h
+++ b/valgrind.h
@@ -64,11 +64,11 @@
          _zzq_arg4     /* request fourth param */ )                      \
                                                                          \
    { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
      asm volatile("movl %1, %%eax\n\t"                                   \
                   "movl %2, %%edx\n\t"                                   \
                   "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
  #define VG_USERREQ__CHECK_READABLE       0x1006
  #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
  #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
  
  
  /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                              0, 0, 0, 0);                           \
     }
  
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
  #endif
diff --git a/vg_cachesim.c b/vg_cachesim.c

index 8081e0ae8f63e6c252b4bbc8a8882277a8dcefd9..b794e6125cc484adb92dc3c7344a5f74f88004bb 100644 (file)
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -1,3 +1,4 @@
+
  /*--------------------------------------------------------------------*/
  /*--- The cache simulation framework: instrumentation, recording   ---*/
  /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
  
     Copyright (C) 2000-2002 Julian Seward 
        jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
  
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
     The GNU General Public License is contained in the file LICENSE.
  */
  
-#include <string.h>
-
  #include "vg_include.h"
  
  #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
     filename_hash = hash(filename, N_FILE_ENTRIES);
     curr_file_node = BBCC_table[filename_hash];
     while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
        curr_file_node = curr_file_node->next;
     }
     if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@ static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
     fnname_hash = hash(fn_name, N_FN_ENTRIES);
     curr_fn_node = curr_file_node->fns[fnname_hash];
     while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
        curr_fn_node = curr_fn_node->next;
     }
     if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
  
        /* Allow for filename switching in the middle of a BB;  if this happens,
         * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
           VG_(strcpy)(curr_file, fl_buf);
           VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
           VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@ static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
  
        /* If the function name for this instruction doesn't match that of the
         * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
           VG_(printf)("Mismatched function names\n");
           VG_(printf)("  filenames: BB:%s, instr:%s;"
                       "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@ void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
     VGP_POPCC;
  }
  
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_clientperms.c b/vg_clientperms.c

index 02d0b7bf89bda2ac63bb9e232e9dc6cc183d1d09..e9ecbc420c9282bc99c0afec4bdd426b11d6bc1e 100644 (file)
--- a/vg_clientperms.c
+++ b/vg_clientperms.c
@@ -385,6 +385,10 @@ UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block )
           VG_(detect_memory_leaks)();
           return 0; /* return value is meaningless */
  
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         VG_(invalidate_translations)( arg[1], arg[2] );
+         return 0;  /* return value is meaningless */
+
        default:
           VG_(message)(Vg_UserMsg, 
                        "Warning: unknown client request code %d", arg[0]);
diff --git a/vg_constants.h b/vg_constants.h

index 710b12cb90eab14e3cdb70da7573231d41125bdc..252353c468f046b480a53383b9f023c55f97b9ca 100644 (file)
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -90,16 +90,6 @@
  /* Constants for the fast original-code-write check cache. */
  
  
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
  /* Assembly code stubs make these requests ... */
  #define VG_USERREQ__SIGNAL_RETURNS          0x4001
  #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/vg_from_ucode.c b/vg_from_ucode.c

index 214d2ca1006c1bad097fbc623f4a6cdf873cfa08..573ee932716b68d77ad3dfcf9e6d15fdf9ae06d1 100644 (file)
--- a/vg_from_ucode.c
+++ b/vg_from_ucode.c
@@ -1524,56 +1524,6 @@ static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
  }
  
  
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
  /* Synthesise a minimal test (and which discards result) of reg32
     against lit.  It's always safe do simply
        emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@ static void emitUInstr ( Int i, UInstr* u )
           vg_assert(u->tag1 == RealReg);
           vg_assert(u->tag2 == RealReg);
           synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+        /* No longer possible, but retained for illustrative purposes.
           if (u->smc_check) 
              synth_orig_code_write_check ( u->size, u->val2 );
+        */
           break;
        }
  
@@ -2598,8 +2550,10 @@ static void emitUInstr ( Int i, UInstr* u )
           synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                              u->val1 & 0xFF,
                              u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
           if (u->opcode == FPU_W && u->smc_check) 
              synth_orig_code_write_check ( u->size, u->val2 );
+         */
           break;
  
        case FPU:
diff --git a/vg_helpers.S b/vg_helpers.S

index 62db9ec1d7d596fe05460b060f6fb2416a57bb43..29689225d332cdcd4db0e241e909e899f02ec2cf 100644 (file)
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -146,51 +146,6 @@ VG_(helper_value_check4_fail):
         ret
  
  
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-       # save the live regs
-       pushl   %eax
-       pushl   %ebx
-       pushl   %ecx
-       pushl   %edx
-       pushl   %esi
-       pushl   %edi
-       
-       pushl   %ebp
-       call    VG_(smc_check4)
-       addl    $4, %esp
-
-       popl    %edi
-       popl    %esi
-       popl    %edx
-       popl    %ecx
-       popl    %ebx
-       popl    %eax
-       
-       ret
-#else  
-       incl    VG_(smc_total_check4s)
-       pushl   %ebp
-       shrl    $VG_SMC_CACHE_SHIFT, %ebp
-       andl    $VG_SMC_CACHE_MASK, %ebp
-       cmpb    $0, VG_(smc_cache)(%ebp)
-       jnz     vg_smc_cache_failure
-       addl    $4, %esp
-       ret
-      vg_smc_cache_failure:
-       popl    %ebp
-       pushal
-       pushl   %ebp
-       call    VG_(smc_check4)
-       addl    $4, %esp
-       popal
-       ret
-#endif
-
-       
  /* Fetch the time-stamp-ctr reg.
     On entry:
         dummy, replaced by %EAX value
diff --git a/vg_include.h b/vg_include.h

index 22e4f48830820ee58948883d5382c707650c9633..7f44dde7d4692c2480479dfd02399fd5614e6542 100644 (file)
--- a/vg_include.h
+++ b/vg_include.h
@@ -1301,7 +1301,7 @@ extern Bool VG_(what_line_is_this) ( Addr a,
  extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                       Char* fn_name, Int n_fn_name);
  
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
  
  
  /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@ extern UInt VG_(translations_needing_spill);
  /* total of register ranks over all translations */
  extern UInt VG_(total_reg_rank);
  
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
  /* Counts pertaining to internal sanity checking. */
  extern UInt VG_(sanity_fast_count);
  extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@ extern void VG_(maybe_do_lru_pass) ( void );
  extern void VG_(flush_transtab) ( void );
  extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
  extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
  
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
  
  extern void VG_(sanity_check_tc_tt) ( void );
  extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@ extern UInt VG_(run_innerloop) ( void );
     Exports of vg_helpers.S
     ------------------------------------------------------------------ */
  
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
  /* Mul, div, etc, -- we don't codegen these directly. */
  extern void VG_(helper_idiv_64_32);
  extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@ extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
  extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
  extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
  
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
  /* ---------------------------------------------------------------------
     The state of the simulated CPU.
     ------------------------------------------------------------------ */
diff --git a/vg_main.c b/vg_main.c

index a7e41b2dda076a89c11ee6f99981aad410823cda..94e175c70c77d2c243e2d4f3d2af85547d9e3e2f 100644 (file)
--- a/vg_main.c
+++ b/vg_main.c
@@ -381,22 +381,6 @@ UInt VG_(translations_needing_spill) = 0;
  UInt VG_(total_reg_rank) = 0;
  
  
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
  /* Counts pertaining to internal sanity checking. */
  UInt VG_(sanity_fast_count) = 0;
  UInt VG_(sanity_slow_count) = 0;
@@ -954,13 +938,6 @@ static void vg_show_counts ( void )
                  VG_(uinstrs_prealloc),
                  VG_(uinstrs_spill),
                  VG_(total_reg_rank) );
-   VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-               VG_(smc_total_check4s),
-               VG_(smc_cache_passed),
-               VG_(smc_fancy_passed),
-               VG_(smc_discard_count) );
     VG_(message)(Vg_DebugMsg, 
                  "   sanity: %d cheap, %d expensive checks.",
                  VG_(sanity_fast_count), 
@@ -1020,11 +997,12 @@ void VG_(main) ( void )
        VGP_PUSHCC(VgpInitAudit);
        VGM_(init_memory_audit)();
        VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
     }
  
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
     /* End calibration of our RDTSC-based clock, leaving it as long as
        we can. */
     VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@ void VG_(main) ( void )
        carefully sets up the permissions maps to cover the anonymous
        mmaps for the translation table and translation cache, which
        wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
  
     if (VG_(clo_verbosity) == 1) {
        VG_(message)(Vg_UserMsg, 
diff --git a/vg_scheduler.c b/vg_scheduler.c

index d1d792a000c784f045d47a3a511df1d81dd3f871..57d687d5f806e228d794ecb5443d8b62dfb054f2 100644 (file)
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -330,8 +330,6 @@ void create_translation_for ( ThreadId tid, Addr orig_addr )
     VG_(overall_in_count) ++;
     VG_(overall_in_osize) += orig_size;
     VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
  }
  
  
@@ -2684,6 +2682,7 @@ void do_nontrivial_clientreq ( ThreadId tid )
        case VG_USERREQ__MAKE_NOACCESS_STACK:
        case VG_USERREQ__RUNNING_ON_VALGRIND:
        case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
           SET_EDX(
              tid, 
              VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/vg_symtab2.c b/vg_symtab2.c

index c7817519850ab70e79d106eaa7fea009982d5281..eb3b39428de2ad6c398a1780e2662dcd48cb7e1f 100644 (file)
--- a/vg_symtab2.c
+++ b/vg_symtab2.c
@@ -36,13 +36,16 @@
  
  /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
     dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
  
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
  
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
  */
  
  /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@ void read_symtab_callback (
        = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
  
     /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
  }
  
  
@@ -1197,9 +1202,6 @@ void read_symtab_callback (
     which happen to correspond to the munmap()d area.  */
  void VG_(read_symbols) ( void )
  {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
     VG_(read_procselfmaps) ( read_symtab_callback );
  
     /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@ void VG_(read_symbols) ( void )
             /* the main assertion */
             overlap = (lo <= lo2 && lo2 <= hi)
                        || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
            if (overlap) {
                VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
                ppSegInfo ( si );
@@ -1240,15 +1241,16 @@ void VG_(read_symbols) ( void )
     to a segment for a .so, and if so discard the relevant SegInfo.
     This might not be a very clever idea from the point of view of
     accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
  */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
  {
     SegInfo *prev, *curr;
  
-   if (! VG_(clo_instrument)) 
-     return;
-
     prev = NULL;
     curr = segInfo;
     while (True) {
@@ -1257,7 +1259,8 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
        prev = curr;
        curr = curr->next;
     }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
  
     VG_(message)(Vg_UserMsg, 
                  "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@ void VG_(symtab_notify_munmap) ( Addr start, UInt length )
     }
  
     freeSegInfo(curr);
+   return True;
  }
  
  
diff --git a/vg_syscall_mem.c b/vg_syscall_mem.c

index ac63267ae1a909c162052cb8911c1a6a672e0a2d..6d4e4975a4acdb1d6b9ca684adf8b6b4809a79ec 100644 (file)
--- a/vg_syscall_mem.c
+++ b/vg_syscall_mem.c
@@ -487,12 +487,15 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
           KERNEL_DO_SYSCALL(tid,res);
           if (!VG_(is_kerror)(res)) {
              /* Copied from munmap() wrapper. */
+            Bool munmap_exe;
              Addr start  = arg1;
              Addr length = arg2;
              while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
              while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
              make_noaccess( start, length );
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
              approximate_mmap_permissions( (Addr)res, arg3, arg4 );
           }
           break;         
@@ -2070,6 +2073,7 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
                 pages.  If we don't do that, our idea of addressible
                 memory diverges from that of the kernel's, which causes
                 the leak detector to crash. */
+            Bool munmap_exe;
              Addr start = arg1;
              Addr length = arg2;
              while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
@@ -2083,7 +2087,9 @@ void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
              /* Tell our symbol table machinery about this, so that if
                 this happens to be a .so being unloaded, the relevant
                 symbols are removed too. */
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
           }
           break;
  
diff --git a/vg_translate.c b/vg_translate.c

index 1e4bff28d8447e0f920858f548bc9be902c345d6..0a806944ec3d1dcd8608eb451231d31f84e4dc8f 100644 (file)
--- a/vg_translate.c
+++ b/vg_translate.c
@@ -297,7 +297,7 @@ Bool VG_(anyFlagUse) ( UInstr* u )
  
     Important!  If you change the set of allocatable registers from
     %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
  */
  __inline__ Int VG_(rankToRealRegNo) ( Int rank )
  {
diff --git a/vg_transtab.c b/vg_transtab.c

index d0f0eb1e2f06def4317de0db49d480e55774fa8e..a364df0b865fa4baa75fd6114a5c13fc53fb9851 100644 (file)
--- a/vg_transtab.c
+++ b/vg_transtab.c
@@ -32,6 +32,8 @@
  #include "vg_include.h"
  #include "vg_constants.h"
  
+/* #define DEBUG_TRANSTAB */
+
  
  /*------------------------------------------------------------*/
  /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
     of code retranslation.  */
  
  /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
  
  /* Do a LRU pass when the translation cache becomes this full. */
  #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
  
  /* Number of entries in the translation table.  This must be a prime
     number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
  
  /* Do an LRU pass when the translation table becomes this full. */
  #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
     N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
  #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
  
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
  #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
  #define VG_TTE_DELETED ((Addr)3)
  
  /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
  */
  static UChar* vg_tc = NULL;
  
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
  static Int vg_tc_used = 0;
  
  /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@ static Int vg_tc_used = 0;
  */
  static TTEntry* vg_tt = NULL;
  
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
  static Int vg_tt_used = 0;
  
  /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@ void VG_(maybe_do_lru_pass) ( void )
     if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
        return;
  
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
     VGP_PUSHCC(VgpDoLRU);
     /*   
     VG_(printf)(
@@ -157,8 +167,9 @@ void VG_(maybe_do_lru_pass) ( void )
        vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
  
     for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
        j = vg_tt[i].mru_epoch;
        vg_assert(j <= VG_(current_epoch));
        j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@ void VG_(maybe_do_lru_pass) ( void )
        recently used at most thresh epochs ago.  Traverse the TT and
        mark such entries as deleted. */
     for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
        if (vg_tt[i].mru_epoch <= thresh) {
           vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
          VG_(this_epoch_out_count) ++;
          VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
          VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@ void VG_(maybe_do_lru_pass) ( void )
        }
     }
  
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
     /* Now compact the TC, sliding live entries downwards to fill spaces
        left by deleted entries.  In this loop, r is the offset in TC of
        the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@ void VG_(maybe_do_lru_pass) ( void )
              vg_tc[w+i] = vg_tc[r+i];
           tte->trans_addr = (Addr)&vg_tc[w+4];
           w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
        }
        r += 4+tte->trans_size;
     }
@@ -252,6 +263,9 @@ void VG_(maybe_do_lru_pass) ( void )
     vg_assert(w <= tc_target);
     vg_tc_used = w;
  
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
     /* Invalidate the fast cache, since it is now out of date.  It will get
        reconstructed incrementally when the client resumes. */
     VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@ void VG_(maybe_do_lru_pass) ( void )
        );
  
     /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
  
     VGP_POPCC;
  }
@@ -290,7 +309,6 @@ void VG_(sanity_check_tc_tt) ( void )
     for (i = 0; i < VG_TT_SIZE; i++) {
        tte = &vg_tt[i];
        if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
        vg_assert(tte->mru_epoch >= 0);
        vg_assert(tte->mru_epoch <= VG_(current_epoch));
        counted_entries++;
@@ -323,8 +341,7 @@ extern void VG_(add_to_trans_tab) ( TTEntry* tte )
     while (True) {
        if (vg_tt[i].orig_addr == tte->orig_addr)
           VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
           /* Put it here, and set the back pointer. */
           vg_tt[i] = *tte;
           VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@ void VG_(invalidate_tt_fast)( void )
  */
  static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
  {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
     Int i;
     /* Hash to get initial probe point. */
     //   if (queries == 10000) {
@@ -388,7 +405,7 @@ static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
     //queries++;
     i = ((UInt)orig_addr) % VG_TT_SIZE;
     while (True) {
-     //probes++;
+      //probes++;
        if (vg_tt[i].orig_addr == orig_addr)
           return &vg_tt[i];
        if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@ Addr VG_(search_transtab) ( Addr original_addr )
  }
  
  
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
  */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
  {
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
  
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
  #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
  
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
     }
-}
-
  
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-        } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
        }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
     }
  
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
  }
  
  
@@ -655,7 +502,7 @@ void VG_(flush_transtab) ( void )
  /*--- Initialisation.                                      ---*/
  /*------------------------------------------------------------*/
  
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
  {
     Int i;
  
@@ -678,17 +525,6 @@ void VG_(init_transtab_and_SMC) ( void )
        at the first TT entry, which is, of course, empty. */
     for (i = 0; i < VG_TT_FAST_SIZE; i++)
        VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
  }
  
  /*--------------------------------------------------------------------*/
author	Julian Seward <jseward@acm.org>
	Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)
committer	Julian Seward <jseward@acm.org>
	Thu, 16 May 2002 11:06:21 +0000 (11:06 +0000)
cachegrind/cg_main.c		patch \| blob \| blame \| history
cachegrind/docs/manual.html		patch \| blob \| blame \| history
coregrind/docs/manual.html		patch \| blob \| blame \| history
coregrind/vg_constants.h		patch \| blob \| blame \| history
coregrind/vg_from_ucode.c		patch \| blob \| blame \| history
coregrind/vg_helpers.S		patch \| blob \| blame \| history
coregrind/vg_include.h		patch \| blob \| blame \| history
coregrind/vg_main.c		patch \| blob \| blame \| history
coregrind/vg_scheduler.c		patch \| blob \| blame \| history
coregrind/vg_symtab2.c		patch \| blob \| blame \| history
coregrind/vg_translate.c		patch \| blob \| blame \| history
coregrind/vg_transtab.c		patch \| blob \| blame \| history
docs/manual.html		patch \| blob \| blame \| history
include/valgrind.h		patch \| blob \| blame \| history
memcheck/docs/manual.html		patch \| blob \| blame \| history
tests/discard.c	[new file with mode: 0644]	patch \| blob
valgrind.h		patch \| blob \| blame \| history
vg_cachesim.c		patch \| blob \| blame \| history
vg_clientperms.c		patch \| blob \| blame \| history
vg_constants.h		patch \| blob \| blame \| history
vg_from_ucode.c		patch \| blob \| blame \| history
vg_helpers.S		patch \| blob \| blame \| history
vg_include.h		patch \| blob \| blame \| history
vg_main.c		patch \| blob \| blame \| history
vg_scheduler.c		patch \| blob \| blame \| history
vg_symtab2.c		patch \| blob \| blame \| history
vg_syscall_mem.c		patch \| blob \| blame \| history
vg_translate.c		patch \| blob \| blame \| history
vg_transtab.c		patch \| blob \| blame \| history