+
/*--------------------------------------------------------------------*/
/*--- The cache simulation framework: instrumentation, recording ---*/
/*--- and results printing. ---*/
Copyright (C) 2000-2002 Julian Seward
jseward@acm.org
- Julian_Seward@muraroa.demon.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
The GNU General Public License is contained in the file LICENSE.
*/
-#include <string.h>
-
#include "vg_include.h"
#include "vg_cachesim_L2.c"
filename_hash = hash(filename, N_FILE_ENTRIES);
curr_file_node = BBCC_table[filename_hash];
while (NULL != curr_file_node &&
- strcmp(filename, curr_file_node->filename) != 0) {
+ VG_(strcmp)(filename, curr_file_node->filename) != 0) {
curr_file_node = curr_file_node->next;
}
if (NULL == curr_file_node) {
fnname_hash = hash(fn_name, N_FN_ENTRIES);
curr_fn_node = curr_file_node->fns[fnname_hash];
while (NULL != curr_fn_node &&
- strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+ VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
curr_fn_node = curr_fn_node->next;
}
if (NULL == curr_fn_node) {
/* Allow for filename switching in the middle of a BB; if this happens,
* must print the new filename with the function name. */
- if (0 != strcmp(fl_buf, curr_file)) {
+ if (0 != VG_(strcmp)(fl_buf, curr_file)) {
VG_(strcpy)(curr_file, fl_buf);
VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
/* If the function name for this instruction doesn't match that of the
* first instruction in the BB, print warning. */
- if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+ if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
VG_(printf)("Mismatched function names\n");
VG_(printf)(" filenames: BB:%s, instr:%s;"
" fn_names: BB:%s, instr:%s;"
VGP_POPCC;
}
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+ VG_(printf)( "cachesim_notify_discard: %p for %d\n",
+ tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
<body bgcolor="#ffffff">
<a name="title"> </a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
<center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
<p>
<center>
<li>Reading/writing memory after it has been free'd</li>
<li>Reading/writing off the end of malloc'd blocks</li>
<li>Reading/writing inappropriate areas on the stack</li>
- <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+ <li>Memory leaks -- where pointers to malloc'd blocks are lost
+ forever</li>
+ <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
</ul>
Problems like these can be difficult to find by other means, often
all fairly dodgy and doesn't work at all if threads are
involved.</li><br>
<p>
-
- <li><code>--smc-check=none</code><br>
- <code>--smc-check=some</code> [default]<br>
- <code>--smc-check=all</code>
- <p>How carefully should Valgrind check for self-modifying code
- writes, so that translations can be discarded? When
- "none", no writes are checked. When "some", only writes
- resulting from moves from integer registers to memory are
- checked. When "all", all memory writes are checked, even those
- with which are no sane program would generate code -- for
- example, floating-point writes.
- <p>
- NOTE that this is all a bit bogus. This mechanism has never
- been enabled in any snapshot of Valgrind which was made
- available to the general public, because the extra checks reduce
- performance, increase complexity, and I have yet to come across
- any programs which actually use self-modifying code. I think
- the flag is ignored.
- </li>
</ul>
right now. Returns no value. I guess this could be used to
incrementally check for leaks between arbitrary places in the
program's execution. Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+ of code in the specified address range. Useful if you are
+ debugging a JITter or some other dynamic code generation system.
+ After this call, attempts to execute code in the invalidated
+ address range will cause valgrind to make new translations of that
+ code, which is probably the semantics you want. Note that this is
+ implemented naively, and involves checking all 200191 entries in
+ the translation table to see if any of them overlap the specified
+ address range. So try not to call it often, or performance will
+ nosedive. Note that you can be clever about this: you only need
+ to call it when an area which previously contained code is
+ overwritten with new code. You can choose to write code into
+ fresh memory, and just call this occasionally to discard large
+ chunks of old code all at once.
+ <p>
+ Warning: minimally tested. Also, doesn't interact well with the
+ cache simulator.
</ul>
<p>
<code>malloc</code> is 8-aligned. Valgrind's allocator only
guarantees 4-alignment, so without the patch Mozilla makes an illegal
memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
<a name="install"></a>
running under Valgrind. This is due to the large amount of
adminstrative information maintained behind the scenes. Another
cause is that Valgrind dynamically translates the original
- executable and never throws any translation away, except in
- those rare cases where self-modifying code is detected.
- Translated, instrumented code is 12-14 times larger than the
- original (!) so you can easily end up with 15+ MB of
+ executable. Translated, instrumented code is 14-16 times larger
+ than the original (!) so you can easily end up with 30+ MB of
translations when running (eg) a web browser.
</li>
</ul>
translations. Subsequent jumps to that address will use this
translation.
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated. Such a write invalidates translations of code
-bracketing the written address. Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there. In this way,
-self modifying code is supported. In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code. Such checking is expensive, and in practice (fortunately)
+almost no applications need it. However, to help people who are
+debugging dynamic code generation systems, there is a Client Request
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range. So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
<p>The JITter translates basic blocks -- blocks of straight-line-code
-- as single entities. To minimise the considerable difficulties of
<body bgcolor="#ffffff">
<a name="title"> </a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
<center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
<p>
<center>
<li>Reading/writing memory after it has been free'd</li>
<li>Reading/writing off the end of malloc'd blocks</li>
<li>Reading/writing inappropriate areas on the stack</li>
- <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+ <li>Memory leaks -- where pointers to malloc'd blocks are lost
+ forever</li>
+ <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
</ul>
Problems like these can be difficult to find by other means, often
all fairly dodgy and doesn't work at all if threads are
involved.</li><br>
<p>
-
- <li><code>--smc-check=none</code><br>
- <code>--smc-check=some</code> [default]<br>
- <code>--smc-check=all</code>
- <p>How carefully should Valgrind check for self-modifying code
- writes, so that translations can be discarded? When
- "none", no writes are checked. When "some", only writes
- resulting from moves from integer registers to memory are
- checked. When "all", all memory writes are checked, even those
- with which are no sane program would generate code -- for
- example, floating-point writes.
- <p>
- NOTE that this is all a bit bogus. This mechanism has never
- been enabled in any snapshot of Valgrind which was made
- available to the general public, because the extra checks reduce
- performance, increase complexity, and I have yet to come across
- any programs which actually use self-modifying code. I think
- the flag is ignored.
- </li>
</ul>
right now. Returns no value. I guess this could be used to
incrementally check for leaks between arbitrary places in the
program's execution. Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+ of code in the specified address range. Useful if you are
+ debugging a JITter or some other dynamic code generation system.
+ After this call, attempts to execute code in the invalidated
+ address range will cause valgrind to make new translations of that
+ code, which is probably the semantics you want. Note that this is
+ implemented naively, and involves checking all 200191 entries in
+ the translation table to see if any of them overlap the specified
+ address range. So try not to call it often, or performance will
+ nosedive. Note that you can be clever about this: you only need
+ to call it when an area which previously contained code is
+ overwritten with new code. You can choose to write code into
+ fresh memory, and just call this occasionally to discard large
+ chunks of old code all at once.
+ <p>
+ Warning: minimally tested. Also, doesn't interact well with the
+ cache simulator.
</ul>
<p>
<code>malloc</code> is 8-aligned. Valgrind's allocator only
guarantees 4-alignment, so without the patch Mozilla makes an illegal
memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
<a name="install"></a>
running under Valgrind. This is due to the large amount of
adminstrative information maintained behind the scenes. Another
cause is that Valgrind dynamically translates the original
- executable and never throws any translation away, except in
- those rare cases where self-modifying code is detected.
- Translated, instrumented code is 12-14 times larger than the
- original (!) so you can easily end up with 15+ MB of
+ executable. Translated, instrumented code is 14-16 times larger
+ than the original (!) so you can easily end up with 30+ MB of
translations when running (eg) a web browser.
</li>
</ul>
translations. Subsequent jumps to that address will use this
translation.
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated. Such a write invalidates translations of code
-bracketing the written address. Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there. In this way,
-self modifying code is supported. In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code. Such checking is expensive, and in practice (fortunately)
+almost no applications need it. However, to help people who are
+debugging dynamic code generation systems, there is a Client Request
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range. So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
<p>The JITter translates basic blocks -- blocks of straight-line-code
-- as single entities. To minimise the considerable difficulties of
/* Constants for the fast original-code-write check cache. */
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS 19
-#define VG_SMC_CACHE_SIZE (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
/* Assembly code stubs make these requests ... */
#define VG_USERREQ__SIGNAL_RETURNS 0x4001
#define VG_USERREQ__PTHREAD_RETURNS 0x4002
}
-/* A word in memory containing a pointer to vg_helper_smc_check4.
- Never changes.
-*/
-static const Addr vg_helper_smc_check4_ADDR
- = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
- UInt offset;
-
- /*
- In this example, reg is %eax and sz == 8:
-
- -- check the first four bytes
- 0087 89C5 movl %eax, %ebp
- 0089 FF1544332211 call * 0x11223344
-
- -- check the second four
- 008f 89C5 movl %eax, %ebp
- 0091 83C504 addl $4, %ebp
- 0094 FF1544332211 call * 0x11223344
-
- Because we can't call an absolute address (alas), the
- address called is stored in memory at 0x11223344 in this
- example, and it just contains the address of
- vg_helper_smc_check4 -- which is where we really want
- to get to.
- */
- vg_assert(0);
-
- if (sz < 4) sz = 4;
-
- for (offset = 0; offset < sz; offset += 4) {
-
- emit_movl_reg_reg ( reg, R_EBP );
-
- if (offset > 0) {
- newEmit();
- emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
- if (dis) VG_(printf)("\n");
- }
-
- newEmit();
- emitB ( 0xFF ); emitB ( 0x15 );
- emitL ( (Addr)&vg_helper_smc_check4_ADDR );
- if (dis) VG_(printf)("\n");
- }
-}
-
-
/* Synthesise a minimal test (and which discards result) of reg32
against lit. It's always safe do simply
emit_testv_lit_reg ( 4, lit, reg32 )
vg_assert(u->tag1 == RealReg);
vg_assert(u->tag2 == RealReg);
synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+ /* No longer possible, but retained for illustrative purposes.
if (u->smc_check)
synth_orig_code_write_check ( u->size, u->val2 );
+ */
break;
}
synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
u->val1 & 0xFF,
u->val2 );
+ /* No longer possible, but retained for illustrative purposes.
if (u->opcode == FPU_W && u->smc_check)
synth_orig_code_write_check ( u->size, u->val2 );
+ */
break;
case FPU:
ret
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
- # save the live regs
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushl %esi
- pushl %edi
-
- pushl %ebp
- call VG_(smc_check4)
- addl $4, %esp
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-
- ret
-#else
- incl VG_(smc_total_check4s)
- pushl %ebp
- shrl $VG_SMC_CACHE_SHIFT, %ebp
- andl $VG_SMC_CACHE_MASK, %ebp
- cmpb $0, VG_(smc_cache)(%ebp)
- jnz vg_smc_cache_failure
- addl $4, %esp
- ret
- vg_smc_cache_failure:
- popl %ebp
- pushal
- pushl %ebp
- call VG_(smc_check4)
- addl $4, %esp
- popal
- ret
-#endif
-
-
/* Fetch the time-stamp-ctr reg.
On entry:
dummy, replaced by %EAX value
extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
Char* fn_name, Int n_fn_name);
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
/* ---------------------------------------------------------------------
/* total of register ranks over all translations */
extern UInt VG_(total_reg_rank);
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
- harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
/* Counts pertaining to internal sanity checking. */
extern UInt VG_(sanity_fast_count);
extern UInt VG_(sanity_slow_count);
extern void VG_(flush_transtab) ( void );
extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
-extern void VG_(smc_mark_original) ( Addr original_addr,
- Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
extern void VG_(sanity_check_tc_tt) ( void );
extern Addr VG_(search_transtab) ( Addr original_addr );
Exports of vg_helpers.S
------------------------------------------------------------------ */
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
/* Mul, div, etc, -- we don't codegen these directly. */
extern void VG_(helper_idiv_64_32);
extern void VG_(helper_div_64_32);
extern void VG_(cachesim_log_non_mem_instr)( iCC* cc );
extern void VG_(cachesim_log_mem_instr) ( idCC* cc, Addr data_addr );
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
/* ---------------------------------------------------------------------
The state of the simulated CPU.
------------------------------------------------------------------ */
UInt VG_(total_reg_rank) = 0;
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
- harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
/* Counts pertaining to internal sanity checking. */
UInt VG_(sanity_fast_count) = 0;
UInt VG_(sanity_slow_count) = 0;
VG_(uinstrs_prealloc),
VG_(uinstrs_spill),
VG_(total_reg_rank) );
- VG_(message)(Vg_DebugMsg,
- "smc-check: %d checks, %d fast pass, "
- "%d slow pass, %d discards.",
- VG_(smc_total_check4s),
- VG_(smc_cache_passed),
- VG_(smc_fancy_passed),
- VG_(smc_discard_count) );
VG_(message)(Vg_DebugMsg,
" sanity: %d cheap, %d expensive checks.",
VG_(sanity_fast_count),
VGP_PUSHCC(VgpInitAudit);
VGM_(init_memory_audit)();
VGP_POPCC;
- VGP_PUSHCC(VgpReadSyms);
- VG_(read_symbols)();
- VGP_POPCC;
}
+ VGP_PUSHCC(VgpReadSyms);
+ VG_(read_symbols)();
+ VGP_POPCC;
+
/* End calibration of our RDTSC-based clock, leaving it as long as
we can. */
VG_(end_rdtsc_calibration)();
carefully sets up the permissions maps to cover the anonymous
mmaps for the translation table and translation cache, which
wastes > 20M of virtual address space. */
- VG_(init_transtab_and_SMC)();
+ VG_(init_tt_tc)();
if (VG_(clo_verbosity) == 1) {
VG_(message)(Vg_UserMsg,
VG_(overall_in_count) ++;
VG_(overall_in_osize) += orig_size;
VG_(overall_in_tsize) += trans_size;
- /* Record translated area for SMC detection. */
- VG_(smc_mark_original) ( orig_addr, orig_size );
}
case VG_USERREQ__MAKE_NOACCESS_STACK:
case VG_USERREQ__RUNNING_ON_VALGRIND:
case VG_USERREQ__DO_LEAK_CHECK:
+ case VG_USERREQ__DISCARD_TRANSLATIONS:
SET_EDX(
tid,
VG_(handle_client_request) ( &VG_(threads)[tid], arg )
/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
dlopen()ed libraries, which is something that KDE3 does a lot.
- Still kludgey, though less than before:
- * we don't check whether we should throw away some symbol tables
- when munmap() happens
+ Stabs reader greatly improved by Nick Nethercode, Apr 02.
- * symbol table reading code for ELF binaries is a shambles.
- Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+ 16 May 02: when notified about munmap, return a Bool indicating
+ whether or not the area being munmapped had executable permissions.
+ This is then used to determine whether or not
+ VG_(invalid_translations) should be called for that area. In order
+ that this work even if --instrument=no, in this case we still keep
+ track of the mapped executable segments, but do not load any debug
+ info or symbols.
*/
/*------------------------------------------------------------*/
= si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
/* And actually fill it up. */
- vg_read_lib_symbols ( si );
- canonicaliseSymtab ( si );
- canonicaliseLoctab ( si );
+ if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+ vg_read_lib_symbols ( si );
+ canonicaliseSymtab ( si );
+ canonicaliseLoctab ( si );
+ }
}
which happen to correspond to the munmap()d area. */
void VG_(read_symbols) ( void )
{
- if (! VG_(clo_instrument) && ! VG_(clo_cachesim))
- return;
-
VG_(read_procselfmaps) ( read_symtab_callback );
/* Do a sanity check on the symbol tables: ensure that the address
/* the main assertion */
overlap = (lo <= lo2 && lo2 <= hi)
|| (lo <= hi2 && hi2 <= hi);
- //vg_assert(!overlap);
if (overlap) {
VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
ppSegInfo ( si );
to a segment for a .so, and if so discard the relevant SegInfo.
This might not be a very clever idea from the point of view of
accuracy of error messages, but we need to do it in order to
- maintain the no-overlapping invariant.
+ maintain the no-overlapping invariant.
+
+ 16 May 02: Returns a Bool indicating whether or not the discarded
+ range falls inside a known executable segment. See comment at top
+ of file for why.
*/
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
{
SegInfo *prev, *curr;
- if (! VG_(clo_instrument))
- return;
-
prev = NULL;
curr = segInfo;
while (True) {
prev = curr;
curr = curr->next;
}
- if (curr == NULL) return;
+ if (curr == NULL)
+ return False;
VG_(message)(Vg_UserMsg,
"discard syms in %s due to munmap()",
}
freeSegInfo(curr);
+ return True;
}
Important! If you change the set of allocatable registers from
%eax, %ebx, %ecx, %edx, %esi you must change the
- save/restore sequences in vg_helper_smc_check4 to match!
+ save/restore sequences in various places to match!
*/
__inline__ Int VG_(rankToRealRegNo) ( Int rank )
{
#include "vg_include.h"
#include "vg_constants.h"
+/* #define DEBUG_TRANSTAB */
+
/*------------------------------------------------------------*/
/*--- Management of the LRU-based translation table+cache. ---*/
of code retranslation. */
/* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
/* Do a LRU pass when the translation cache becomes this full. */
#define VG_TC_LIMIT_PERCENT 98
/* Number of entries in the translation table. This must be a prime
number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
/* Do an LRU pass when the translation table becomes this full. */
#define VG_TT_LIMIT_PERCENT /*67*/ 80
N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago. */
#define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
-/* This TT entry is empty. */
+/* This TT entry is empty. There is no associated TC storage. */
#define VG_TTE_EMPTY ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+ contribute to the orig->trans mapping. However, the ex-translation
+ it points at still occupies space in TC. This slot cannot be
+ re-used without doing an LRU pass. */
#define VG_TTE_DELETED ((Addr)3)
/* The TC. This used to be statically allocated, but that forces many
*/
static UChar* vg_tc = NULL;
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC. This includes those pointed to from
+ VG_TTE_DELETED entries. */
static Int vg_tc_used = 0;
/* The TT. Like TC, for the same reason, is dynamically allocated at
*/
static TTEntry* vg_tt = NULL;
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries. This includes deleted ones. */
static Int vg_tt_used = 0;
/* Fast helper for the TT. A direct-mapped cache which holds a
if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
return;
+# ifdef DEBUG_TRANSTAB
+ VG_(sanity_check_tc_tt)();
+# endif
+
VGP_PUSHCC(VgpDoLRU);
/*
VG_(printf)(
vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
- if (vg_tt[i].orig_addr == VG_TTE_EMPTY ||
- vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED)
+ continue;
j = vg_tt[i].mru_epoch;
vg_assert(j <= VG_(current_epoch));
j = VG_(current_epoch) - j;
recently used at most thresh epochs ago. Traverse the TT and
mark such entries as deleted. */
for (i = 0; i < VG_TT_SIZE; i++) {
- if (vg_tt[i].orig_addr == VG_TTE_EMPTY ||
- vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED)
+ continue;
if (vg_tt[i].mru_epoch <= thresh) {
vg_tt[i].orig_addr = VG_TTE_DELETED;
- vg_tt_used--;
VG_(this_epoch_out_count) ++;
VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
}
}
- vg_assert(vg_tt_used >= 0);
- vg_assert(vg_tt_used <= tt_target);
-
/* Now compact the TC, sliding live entries downwards to fill spaces
left by deleted entries. In this loop, r is the offset in TC of
the current translation under consideration, and w is the next
vg_tc[w+i] = vg_tc[r+i];
tte->trans_addr = (Addr)&vg_tc[w+4];
w += 4+tte->trans_size;
+ } else {
+ tte->orig_addr = VG_TTE_EMPTY;
+ vg_tt_used--;
}
r += 4+tte->trans_size;
}
vg_assert(w <= tc_target);
vg_tc_used = w;
+ vg_assert(vg_tt_used >= 0);
+ vg_assert(vg_tt_used <= tt_target);
+
/* Invalidate the fast cache, since it is now out of date. It will get
reconstructed incrementally when the client resumes. */
VG_(invalidate_tt_fast)();
);
/* Reconstruct the SMC detection structures. */
+# ifdef DEBUG_TRANSTAB
+ for (i = 0; i < VG_TT_SIZE; i++)
+ vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+# endif
+ VG_(sanity_check_tc_tt)();
VGP_POPCC;
}
for (i = 0; i < VG_TT_SIZE; i++) {
tte = &vg_tt[i];
if (tte->orig_addr == VG_TTE_EMPTY) continue;
- if (tte->orig_addr == VG_TTE_DELETED) continue;
vg_assert(tte->mru_epoch >= 0);
vg_assert(tte->mru_epoch <= VG_(current_epoch));
counted_entries++;
while (True) {
if (vg_tt[i].orig_addr == tte->orig_addr)
VG_(panic)("add_to_trans_tab: duplicate");
- if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
- vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
/* Put it here, and set the back pointer. */
vg_tt[i] = *tte;
VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
*/
static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
{
- //static Int queries = 0;
- //static Int probes = 0;
+ //static Int queries = 0;
+ //static Int probes = 0;
Int i;
/* Hash to get initial probe point. */
// if (queries == 10000) {
//queries++;
i = ((UInt)orig_addr) % VG_TT_SIZE;
while (True) {
- //probes++;
+ //probes++;
if (vg_tt[i].orig_addr == orig_addr)
return &vg_tt[i];
if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
}
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code. ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
- vg_oldmap -- array[64k] of Bool, which approximately records
- parts of the address space corresponding to code for which
- a translation exists in the translation table. vg_oldmap is
- consulted at each write, to determine whether that write might
- be writing a code address; if so, the program is stopped at
- the next jump, and the corresponding translations are invalidated.
-
- Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
- addresses a containing a code byte which has been translated. So
- it acts kind-of like a direct-mapped cache with 64k entries.
-
- The second structure is vg_CAW, a small array of addresses at which
- vg_oldmap indicates a code write may have happened. This is
- (effectively) checked at each control transfer (jump), so that
- translations can be discarded before going on. An array is
- somewhat overkill, since it strikes me as very unlikely that a
- single basic block will do more than one code write. Nevertheless
- ...
-
- ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+ This is slow, so you *really* don't want to call it very often.
*/
-
-
-/* Definitions for the self-modifying-code detection cache, intended
- as a fast check which clears the vast majority of writes. */
-
-#define VG_SMC_CACHE_HASH(aaa) \
- ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
- provides a precise record of which words in the address space
- belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
- Int i;
- VgSmcSecondary* sec
- = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
- for (i = 0; i < 2048; i++)
- sec->chars[i] = 0;
- return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx) \
- (1 & ( ((UChar*)arr)[((UInt)indx) / 8] \
- >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx) \
- ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
- detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
{
- UInt bit_index;
- VgSmcSecondary* smc_secondary;
+ Addr i_start, i_end, o_start, o_end;
+ UInt out_count, out_osize, out_tsize;
+ Int i;
-# if VG_SMC_FASTCHECK_IN_C
- VG_(smc_total_check4s)++;
-
- /* Try the fast check first. */
- if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+# ifdef DEBUG_TRANSTAB
+ VG_(sanity_check_tc_tt)();
# endif
+ i_start = start;
+ i_end = start + range - 1;
+ out_count = out_osize = out_tsize = 0;
- VG_(smc_cache_passed)++;
-
- /* Need to do a slow check. */
- smc_secondary = vg_smc_primary[a >> 16];
- if (smc_secondary == NULL) return;
-
- bit_index = (a & 0xFFFF) >> 2;
- if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
- VG_(smc_fancy_passed)++;
-
- /* Detected a Real Live write to code which has been translated.
- Note it. */
- if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
- VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
- "increase and recompile.");
- vg_origwrites[vg_origwrites_used] = a;
- vg_origwrites_used++;
-
- VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
- /* Force an exit before the next basic block, so the translation
- cache can be flushed appropriately. */
- // VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
- //VG_(dispatch_ctr) = 1;
- //VG_(interrupt_reason) = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
- updating both the fast-check cache and the slow-but-correct data
- structure.
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
- Addr a;
- VgSmcSecondary* smc_secondary;
- UInt bit_index;
-
- for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
- VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
- smc_secondary = vg_smc_primary[a >> 16];
- if (smc_secondary == NULL)
- smc_secondary =
- vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
- bit_index = (a & 0xFFFF) >> 2;
- SET_BIT_ARRAY(smc_secondary->chars, bit_index);
+ for (i = 0; i < VG_TT_SIZE; i++) {
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ o_start = vg_tt[i].orig_addr;
+ o_end = o_start + vg_tt[i].orig_size - 1;
+ if (o_end < i_start || o_start > i_end)
+ continue;
+ if (VG_(clo_cachesim))
+ VG_(cachesim_notify_discard)( & vg_tt[i] );
+ vg_tt[i].orig_addr = VG_TTE_DELETED;
+ VG_(this_epoch_out_count) ++;
+ VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+ VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+ VG_(overall_out_count) ++;
+ VG_(overall_out_osize) += vg_tt[i].orig_size;
+ VG_(overall_out_tsize) += vg_tt[i].trans_size;
+ out_count ++;
+ out_osize += vg_tt[i].orig_size;
+ out_tsize += vg_tt[i].trans_size;
}
-}
-
-/* Discard any translations whose original code overlaps with the
- range w_addr .. w_addr+3 inclusive.
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-# if 0
- Int i, rd, wr;
- Addr o_start, o_end;
- TTEntry* tt;
-
- for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
- tt = vg_transtab[i];
- wr = 0;
- for (rd = 0; rd < vg_transtab_used[i]; rd++) {
- o_start = tt[rd].orig_addr;
- o_end = o_start + tt[rd].orig_size;
- if (w_addr > o_end || (w_addr+3) < o_start) {
- /* No collision possible; keep this translation */
- VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
- if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
- wr++;
- } else {
- /* Possible collision; discard. */
- vg_smc_discards++;
- VG_(message) (Vg_DebugMsg,
- "discarding translation of %p .. %p",
- tt[rd].orig_addr,
- tt[rd].orig_addr + tt[rd].orig_size - 1);
- VG_(free)((void*)tt[rd].trans_addr);
- }
+ if (out_count > 0) {
+ VG_(invalidate_tt_fast)();
+ VG_(sanity_check_tc_tt)();
+# ifdef DEBUG_TRANSTAB
+ { Addr aa;
+ for (aa = i_start; aa <= i_end; aa++)
+ vg_assert(search_trans_table ( aa ) == NULL);
}
- vg_transtab_used[i] = wr;
- }
-# endif
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
- following the discovery of a (potential) original-code-write.
-*/
-void VG_(flush_transtab) ( void )
-{
-# if 0
- Addr w_addr;
- Int i, j;
-
- /* We shouldn't be here unless a code write was detected. */
- vg_assert(vg_origwrites_used > 0);
-
- /* Instead of incrementally fixing up the translation table cache,
- just invalidate the whole darn thing. Pray this doesn't happen
- very often :) */
- for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
- VG_(transtab_cache_orig)[i] =
- VG_(transtab_cache_trans)[i] = (Addr)0;
-
- /* Clear out the fast cache; discard_translations_bracketing
- reconstructs it. */
- for (i = 0; i < VG_SMC_CACHE_SIZE; i++)
- VG_(smc_cache)[i] = False;
-
- /* And also clear the slow-but-correct table. */
- for (i = 0; i < 65536; i++) {
- VgSmcSecondary* sec = vg_smc_primary[i];
- if (sec)
- for (j = 0; j < 2048; j++)
- sec->chars[j] = 0;
+# endif
}
- /* This doesn't need to be particularly fast, since we (presumably)
- don't have to handle particularly frequent writes to code
- addresses. */
- while (vg_origwrites_used > 0) {
- vg_origwrites_used--;
- w_addr = vg_origwrites[vg_origwrites_used];
- discard_translations_bracketing ( w_addr );
- }
-
- vg_assert(vg_origwrites_used == 0);
-# endif
+ if (1|| VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_UserMsg,
+ "discard %d (%d -> %d) translations in range %p .. %p",
+ out_count, out_osize, out_tsize, i_start, i_end );
}
/*--- Initialisation. ---*/
/*------------------------------------------------------------*/
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
{
Int i;
at the first TT entry, which is, of course, empty. */
for (i = 0; i < VG_TT_FAST_SIZE; i++)
VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
- /* No part of the address space has any translations. */
- for (i = 0; i < 65536; i++)
- vg_smc_primary[i] = NULL;
-
- /* ... and the associated fast-check cache reflects this. */
- for (i = 0; i < VG_SMC_CACHE_SIZE; i++)
- VG_(smc_cache)[i] = False;
-
- /* Finally, no original-code-writes have been recorded. */
- vg_origwrites_used = 0;
}
/*--------------------------------------------------------------------*/
<body bgcolor="#ffffff">
<a name="title"> </a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
<center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
<p>
<center>
<li>Reading/writing memory after it has been free'd</li>
<li>Reading/writing off the end of malloc'd blocks</li>
<li>Reading/writing inappropriate areas on the stack</li>
- <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+ <li>Memory leaks -- where pointers to malloc'd blocks are lost
+ forever</li>
+ <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
</ul>
Problems like these can be difficult to find by other means, often
all fairly dodgy and doesn't work at all if threads are
involved.</li><br>
<p>
-
- <li><code>--smc-check=none</code><br>
- <code>--smc-check=some</code> [default]<br>
- <code>--smc-check=all</code>
- <p>How carefully should Valgrind check for self-modifying code
- writes, so that translations can be discarded? When
- "none", no writes are checked. When "some", only writes
- resulting from moves from integer registers to memory are
- checked. When "all", all memory writes are checked, even those
- with which are no sane program would generate code -- for
- example, floating-point writes.
- <p>
- NOTE that this is all a bit bogus. This mechanism has never
- been enabled in any snapshot of Valgrind which was made
- available to the general public, because the extra checks reduce
- performance, increase complexity, and I have yet to come across
- any programs which actually use self-modifying code. I think
- the flag is ignored.
- </li>
</ul>
right now. Returns no value. I guess this could be used to
incrementally check for leaks between arbitrary places in the
program's execution. Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+ of code in the specified address range. Useful if you are
+ debugging a JITter or some other dynamic code generation system.
+ After this call, attempts to execute code in the invalidated
+ address range will cause valgrind to make new translations of that
+ code, which is probably the semantics you want. Note that this is
+ implemented naively, and involves checking all 200191 entries in
+ the translation table to see if any of them overlap the specified
+ address range. So try not to call it often, or performance will
+ nosedive. Note that you can be clever about this: you only need
+ to call it when an area which previously contained code is
+ overwritten with new code. You can choose to write code into
+ fresh memory, and just call this occasionally to discard large
+ chunks of old code all at once.
+ <p>
+ Warning: minimally tested. Also, doesn't interact well with the
+ cache simulator.
</ul>
<p>
<code>malloc</code> is 8-aligned. Valgrind's allocator only
guarantees 4-alignment, so without the patch Mozilla makes an illegal
memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
<a name="install"></a>
running under Valgrind. This is due to the large amount of
adminstrative information maintained behind the scenes. Another
cause is that Valgrind dynamically translates the original
- executable and never throws any translation away, except in
- those rare cases where self-modifying code is detected.
- Translated, instrumented code is 12-14 times larger than the
- original (!) so you can easily end up with 15+ MB of
+ executable. Translated, instrumented code is 14-16 times larger
+ than the original (!) so you can easily end up with 30+ MB of
translations when running (eg) a web browser.
</li>
</ul>
translations. Subsequent jumps to that address will use this
translation.
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated. Such a write invalidates translations of code
-bracketing the written address. Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there. In this way,
-self modifying code is supported. In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code. Such checking is expensive, and in practice (fortunately)
+almost no applications need it. However, to help people who are
+debugging dynamic code generation systems, there is a Client Request
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range. So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
<p>The JITter translates basic blocks -- blocks of straight-line-code
-- as single entities. To minimise the considerable difficulties of
_zzq_arg4 /* request fourth param */ ) \
\
{ volatile unsigned int _zzq_args[5]; \
- _zzq_args[0] = (volatile unsigned int)_zzq_request; \
- _zzq_args[1] = (volatile unsigned int)_zzq_arg1; \
- _zzq_args[2] = (volatile unsigned int)_zzq_arg2; \
- _zzq_args[3] = (volatile unsigned int)_zzq_arg3; \
- _zzq_args[4] = (volatile unsigned int)_zzq_arg4; \
+ _zzq_args[0] = (volatile unsigned int)(_zzq_request); \
+ _zzq_args[1] = (volatile unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (volatile unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (volatile unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (volatile unsigned int)(_zzq_arg4); \
asm volatile("movl %1, %%eax\n\t" \
"movl %2, %%edx\n\t" \
"roll $29, %%eax ; roll $3, %%eax\n\t" \
#define VG_USERREQ__CHECK_READABLE 0x1006
#define VG_USERREQ__MAKE_NOACCESS_STACK 0x1007
#define VG_USERREQ__RUNNING_ON_VALGRIND 0x1008
-#define VG_USERREQ__DO_LEAK_CHECK 0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK 0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
/* Client-code macros to manipulate the state of memory. */
0, 0, 0, 0); \
}
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+ _qzz_len - 1]. Useful if you are debugging a JITter or some such,
+ since it provides a way to make sure valgrind will retranslate the
+ invalidated area. Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \
+ {unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \
+ VG_USERREQ__DISCARD_TRANSLATIONS, \
+ _qzz_addr, _qzz_len, 0, 0); \
+ }
+
+
#endif
<body bgcolor="#ffffff">
<a name="title"> </a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
<center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
<p>
<center>
<li>Reading/writing memory after it has been free'd</li>
<li>Reading/writing off the end of malloc'd blocks</li>
<li>Reading/writing inappropriate areas on the stack</li>
- <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+ <li>Memory leaks -- where pointers to malloc'd blocks are lost
+ forever</li>
+ <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
</ul>
Problems like these can be difficult to find by other means, often
all fairly dodgy and doesn't work at all if threads are
involved.</li><br>
<p>
-
- <li><code>--smc-check=none</code><br>
- <code>--smc-check=some</code> [default]<br>
- <code>--smc-check=all</code>
- <p>How carefully should Valgrind check for self-modifying code
- writes, so that translations can be discarded? When
- "none", no writes are checked. When "some", only writes
- resulting from moves from integer registers to memory are
- checked. When "all", all memory writes are checked, even those
- with which are no sane program would generate code -- for
- example, floating-point writes.
- <p>
- NOTE that this is all a bit bogus. This mechanism has never
- been enabled in any snapshot of Valgrind which was made
- available to the general public, because the extra checks reduce
- performance, increase complexity, and I have yet to come across
- any programs which actually use self-modifying code. I think
- the flag is ignored.
- </li>
</ul>
right now. Returns no value. I guess this could be used to
incrementally check for leaks between arbitrary places in the
program's execution. Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+ of code in the specified address range. Useful if you are
+ debugging a JITter or some other dynamic code generation system.
+ After this call, attempts to execute code in the invalidated
+ address range will cause valgrind to make new translations of that
+ code, which is probably the semantics you want. Note that this is
+ implemented naively, and involves checking all 200191 entries in
+ the translation table to see if any of them overlap the specified
+ address range. So try not to call it often, or performance will
+ nosedive. Note that you can be clever about this: you only need
+ to call it when an area which previously contained code is
+ overwritten with new code. You can choose to write code into
+ fresh memory, and just call this occasionally to discard large
+ chunks of old code all at once.
+ <p>
+ Warning: minimally tested. Also, doesn't interact well with the
+ cache simulator.
</ul>
<p>
<code>malloc</code> is 8-aligned. Valgrind's allocator only
guarantees 4-alignment, so without the patch Mozilla makes an illegal
memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
<a name="install"></a>
running under Valgrind. This is due to the large amount of
adminstrative information maintained behind the scenes. Another
cause is that Valgrind dynamically translates the original
- executable and never throws any translation away, except in
- those rare cases where self-modifying code is detected.
- Translated, instrumented code is 12-14 times larger than the
- original (!) so you can easily end up with 15+ MB of
+ executable. Translated, instrumented code is 14-16 times larger
+ than the original (!) so you can easily end up with 30+ MB of
translations when running (eg) a web browser.
</li>
</ul>
translations. Subsequent jumps to that address will use this
translation.
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated. Such a write invalidates translations of code
-bracketing the written address. Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there. In this way,
-self modifying code is supported. In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code. Such checking is expensive, and in practice (fortunately)
+almost no applications need it. However, to help people who are
+debugging dynamic code generation systems, there is a Client Request
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range. So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
<p>The JITter translates basic blocks -- blocks of straight-line-code
-- as single entities. To minimise the considerable difficulties of
--- /dev/null
+
+#include <stdio.h>
+#include <valgrind.h>
+
+int fooble ( void )
+{
+ int x, y;
+ y = 0;
+ for (x = 0; x < 100; x++) {
+ if ((x % 3) == 0) y += x; else y++;
+ }
+ return y;
+}
+
+void someother ( void )
+{
+}
+
+int main ( void )
+{
+ printf("fooble-1() = %d\n", fooble() );
+ VALGRIND_DISCARD_TRANSLATIONS( (char*)(&fooble),
+ ((char*)(&someother)) - ((char*)(&fooble)) );
+ printf("fooble-2() = %d\n", fooble() );
+ return 0;
+}
+
_zzq_arg4 /* request fourth param */ ) \
\
{ volatile unsigned int _zzq_args[5]; \
- _zzq_args[0] = (volatile unsigned int)_zzq_request; \
- _zzq_args[1] = (volatile unsigned int)_zzq_arg1; \
- _zzq_args[2] = (volatile unsigned int)_zzq_arg2; \
- _zzq_args[3] = (volatile unsigned int)_zzq_arg3; \
- _zzq_args[4] = (volatile unsigned int)_zzq_arg4; \
+ _zzq_args[0] = (volatile unsigned int)(_zzq_request); \
+ _zzq_args[1] = (volatile unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (volatile unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (volatile unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (volatile unsigned int)(_zzq_arg4); \
asm volatile("movl %1, %%eax\n\t" \
"movl %2, %%edx\n\t" \
"roll $29, %%eax ; roll $3, %%eax\n\t" \
#define VG_USERREQ__CHECK_READABLE 0x1006
#define VG_USERREQ__MAKE_NOACCESS_STACK 0x1007
#define VG_USERREQ__RUNNING_ON_VALGRIND 0x1008
-#define VG_USERREQ__DO_LEAK_CHECK 0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK 0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
/* Client-code macros to manipulate the state of memory. */
0, 0, 0, 0); \
}
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+ _qzz_len - 1]. Useful if you are debugging a JITter or some such,
+ since it provides a way to make sure valgrind will retranslate the
+ invalidated area. Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \
+ {unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \
+ VG_USERREQ__DISCARD_TRANSLATIONS, \
+ _qzz_addr, _qzz_len, 0, 0); \
+ }
+
+
#endif
+
/*--------------------------------------------------------------------*/
/*--- The cache simulation framework: instrumentation, recording ---*/
/*--- and results printing. ---*/
Copyright (C) 2000-2002 Julian Seward
jseward@acm.org
- Julian_Seward@muraroa.demon.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
The GNU General Public License is contained in the file LICENSE.
*/
-#include <string.h>
-
#include "vg_include.h"
#include "vg_cachesim_L2.c"
filename_hash = hash(filename, N_FILE_ENTRIES);
curr_file_node = BBCC_table[filename_hash];
while (NULL != curr_file_node &&
- strcmp(filename, curr_file_node->filename) != 0) {
+ VG_(strcmp)(filename, curr_file_node->filename) != 0) {
curr_file_node = curr_file_node->next;
}
if (NULL == curr_file_node) {
fnname_hash = hash(fn_name, N_FN_ENTRIES);
curr_fn_node = curr_file_node->fns[fnname_hash];
while (NULL != curr_fn_node &&
- strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+ VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
curr_fn_node = curr_fn_node->next;
}
if (NULL == curr_fn_node) {
/* Allow for filename switching in the middle of a BB; if this happens,
* must print the new filename with the function name. */
- if (0 != strcmp(fl_buf, curr_file)) {
+ if (0 != VG_(strcmp)(fl_buf, curr_file)) {
VG_(strcpy)(curr_file, fl_buf);
VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
/* If the function name for this instruction doesn't match that of the
* first instruction in the BB, print warning. */
- if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+ if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
VG_(printf)("Mismatched function names\n");
VG_(printf)(" filenames: BB:%s, instr:%s;"
" fn_names: BB:%s, instr:%s;"
VGP_POPCC;
}
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+ VG_(printf)( "cachesim_notify_discard: %p for %d\n",
+ tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
VG_(detect_memory_leaks)();
return 0; /* return value is meaningless */
+ case VG_USERREQ__DISCARD_TRANSLATIONS:
+ VG_(invalidate_translations)( arg[1], arg[2] );
+ return 0; /* return value is meaningless */
+
default:
VG_(message)(Vg_UserMsg,
"Warning: unknown client request code %d", arg[0]);
/* Constants for the fast original-code-write check cache. */
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS 19
-#define VG_SMC_CACHE_SIZE (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
/* Assembly code stubs make these requests ... */
#define VG_USERREQ__SIGNAL_RETURNS 0x4001
#define VG_USERREQ__PTHREAD_RETURNS 0x4002
}
-/* A word in memory containing a pointer to vg_helper_smc_check4.
- Never changes.
-*/
-static const Addr vg_helper_smc_check4_ADDR
- = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
- UInt offset;
-
- /*
- In this example, reg is %eax and sz == 8:
-
- -- check the first four bytes
- 0087 89C5 movl %eax, %ebp
- 0089 FF1544332211 call * 0x11223344
-
- -- check the second four
- 008f 89C5 movl %eax, %ebp
- 0091 83C504 addl $4, %ebp
- 0094 FF1544332211 call * 0x11223344
-
- Because we can't call an absolute address (alas), the
- address called is stored in memory at 0x11223344 in this
- example, and it just contains the address of
- vg_helper_smc_check4 -- which is where we really want
- to get to.
- */
- vg_assert(0);
-
- if (sz < 4) sz = 4;
-
- for (offset = 0; offset < sz; offset += 4) {
-
- emit_movl_reg_reg ( reg, R_EBP );
-
- if (offset > 0) {
- newEmit();
- emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
- if (dis) VG_(printf)("\n");
- }
-
- newEmit();
- emitB ( 0xFF ); emitB ( 0x15 );
- emitL ( (Addr)&vg_helper_smc_check4_ADDR );
- if (dis) VG_(printf)("\n");
- }
-}
-
-
/* Synthesise a minimal test (and which discards result) of reg32
against lit. It's always safe do simply
emit_testv_lit_reg ( 4, lit, reg32 )
vg_assert(u->tag1 == RealReg);
vg_assert(u->tag2 == RealReg);
synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+ /* No longer possible, but retained for illustrative purposes.
if (u->smc_check)
synth_orig_code_write_check ( u->size, u->val2 );
+ */
break;
}
synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
u->val1 & 0xFF,
u->val2 );
+ /* No longer possible, but retained for illustrative purposes.
if (u->opcode == FPU_W && u->smc_check)
synth_orig_code_write_check ( u->size, u->val2 );
+ */
break;
case FPU:
ret
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
- # save the live regs
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushl %esi
- pushl %edi
-
- pushl %ebp
- call VG_(smc_check4)
- addl $4, %esp
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-
- ret
-#else
- incl VG_(smc_total_check4s)
- pushl %ebp
- shrl $VG_SMC_CACHE_SHIFT, %ebp
- andl $VG_SMC_CACHE_MASK, %ebp
- cmpb $0, VG_(smc_cache)(%ebp)
- jnz vg_smc_cache_failure
- addl $4, %esp
- ret
- vg_smc_cache_failure:
- popl %ebp
- pushal
- pushl %ebp
- call VG_(smc_check4)
- addl $4, %esp
- popal
- ret
-#endif
-
-
/* Fetch the time-stamp-ctr reg.
On entry:
dummy, replaced by %EAX value
extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
Char* fn_name, Int n_fn_name);
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
/* ---------------------------------------------------------------------
/* total of register ranks over all translations */
extern UInt VG_(total_reg_rank);
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
- harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
/* Counts pertaining to internal sanity checking. */
extern UInt VG_(sanity_fast_count);
extern UInt VG_(sanity_slow_count);
extern void VG_(flush_transtab) ( void );
extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
-extern void VG_(smc_mark_original) ( Addr original_addr,
- Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
extern void VG_(sanity_check_tc_tt) ( void );
extern Addr VG_(search_transtab) ( Addr original_addr );
Exports of vg_helpers.S
------------------------------------------------------------------ */
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
/* Mul, div, etc, -- we don't codegen these directly. */
extern void VG_(helper_idiv_64_32);
extern void VG_(helper_div_64_32);
extern void VG_(cachesim_log_non_mem_instr)( iCC* cc );
extern void VG_(cachesim_log_mem_instr) ( idCC* cc, Addr data_addr );
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
/* ---------------------------------------------------------------------
The state of the simulated CPU.
------------------------------------------------------------------ */
UInt VG_(total_reg_rank) = 0;
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
- harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
/* Counts pertaining to internal sanity checking. */
UInt VG_(sanity_fast_count) = 0;
UInt VG_(sanity_slow_count) = 0;
VG_(uinstrs_prealloc),
VG_(uinstrs_spill),
VG_(total_reg_rank) );
- VG_(message)(Vg_DebugMsg,
- "smc-check: %d checks, %d fast pass, "
- "%d slow pass, %d discards.",
- VG_(smc_total_check4s),
- VG_(smc_cache_passed),
- VG_(smc_fancy_passed),
- VG_(smc_discard_count) );
VG_(message)(Vg_DebugMsg,
" sanity: %d cheap, %d expensive checks.",
VG_(sanity_fast_count),
VGP_PUSHCC(VgpInitAudit);
VGM_(init_memory_audit)();
VGP_POPCC;
- VGP_PUSHCC(VgpReadSyms);
- VG_(read_symbols)();
- VGP_POPCC;
}
+ VGP_PUSHCC(VgpReadSyms);
+ VG_(read_symbols)();
+ VGP_POPCC;
+
/* End calibration of our RDTSC-based clock, leaving it as long as
we can. */
VG_(end_rdtsc_calibration)();
carefully sets up the permissions maps to cover the anonymous
mmaps for the translation table and translation cache, which
wastes > 20M of virtual address space. */
- VG_(init_transtab_and_SMC)();
+ VG_(init_tt_tc)();
if (VG_(clo_verbosity) == 1) {
VG_(message)(Vg_UserMsg,
VG_(overall_in_count) ++;
VG_(overall_in_osize) += orig_size;
VG_(overall_in_tsize) += trans_size;
- /* Record translated area for SMC detection. */
- VG_(smc_mark_original) ( orig_addr, orig_size );
}
case VG_USERREQ__MAKE_NOACCESS_STACK:
case VG_USERREQ__RUNNING_ON_VALGRIND:
case VG_USERREQ__DO_LEAK_CHECK:
+ case VG_USERREQ__DISCARD_TRANSLATIONS:
SET_EDX(
tid,
VG_(handle_client_request) ( &VG_(threads)[tid], arg )
/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
dlopen()ed libraries, which is something that KDE3 does a lot.
- Still kludgey, though less than before:
- * we don't check whether we should throw away some symbol tables
- when munmap() happens
+ Stabs reader greatly improved by Nick Nethercode, Apr 02.
- * symbol table reading code for ELF binaries is a shambles.
- Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+ 16 May 02: when notified about munmap, return a Bool indicating
+ whether or not the area being munmapped had executable permissions.
+ This is then used to determine whether or not
+ VG_(invalid_translations) should be called for that area. In order
+ that this work even if --instrument=no, in this case we still keep
+ track of the mapped executable segments, but do not load any debug
+ info or symbols.
*/
/*------------------------------------------------------------*/
= si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
/* And actually fill it up. */
- vg_read_lib_symbols ( si );
- canonicaliseSymtab ( si );
- canonicaliseLoctab ( si );
+ if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+ vg_read_lib_symbols ( si );
+ canonicaliseSymtab ( si );
+ canonicaliseLoctab ( si );
+ }
}
which happen to correspond to the munmap()d area. */
void VG_(read_symbols) ( void )
{
- if (! VG_(clo_instrument) && ! VG_(clo_cachesim))
- return;
-
VG_(read_procselfmaps) ( read_symtab_callback );
/* Do a sanity check on the symbol tables: ensure that the address
/* the main assertion */
overlap = (lo <= lo2 && lo2 <= hi)
|| (lo <= hi2 && hi2 <= hi);
- //vg_assert(!overlap);
if (overlap) {
VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
ppSegInfo ( si );
to a segment for a .so, and if so discard the relevant SegInfo.
This might not be a very clever idea from the point of view of
accuracy of error messages, but we need to do it in order to
- maintain the no-overlapping invariant.
+ maintain the no-overlapping invariant.
+
+ 16 May 02: Returns a Bool indicating whether or not the discarded
+ range falls inside a known executable segment. See comment at top
+ of file for why.
*/
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
{
SegInfo *prev, *curr;
- if (! VG_(clo_instrument))
- return;
-
prev = NULL;
curr = segInfo;
while (True) {
prev = curr;
curr = curr->next;
}
- if (curr == NULL) return;
+ if (curr == NULL)
+ return False;
VG_(message)(Vg_UserMsg,
"discard syms in %s due to munmap()",
}
freeSegInfo(curr);
+ return True;
}
KERNEL_DO_SYSCALL(tid,res);
if (!VG_(is_kerror)(res)) {
/* Copied from munmap() wrapper. */
+ Bool munmap_exe;
Addr start = arg1;
Addr length = arg2;
while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
make_noaccess( start, length );
- VG_(symtab_notify_munmap) ( start, length );
+ munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+ if (munmap_exe)
+ VG_(invalidate_translations) ( start, length );
approximate_mmap_permissions( (Addr)res, arg3, arg4 );
}
break;
pages. If we don't do that, our idea of addressible
memory diverges from that of the kernel's, which causes
the leak detector to crash. */
+ Bool munmap_exe;
Addr start = arg1;
Addr length = arg2;
while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
/* Tell our symbol table machinery about this, so that if
this happens to be a .so being unloaded, the relevant
symbols are removed too. */
- VG_(symtab_notify_munmap) ( start, length );
+ munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+ if (munmap_exe)
+ VG_(invalidate_translations) ( start, length );
}
break;
Important! If you change the set of allocatable registers from
%eax, %ebx, %ecx, %edx, %esi you must change the
- save/restore sequences in vg_helper_smc_check4 to match!
+ save/restore sequences in various places to match!
*/
__inline__ Int VG_(rankToRealRegNo) ( Int rank )
{
#include "vg_include.h"
#include "vg_constants.h"
+/* #define DEBUG_TRANSTAB */
+
/*------------------------------------------------------------*/
/*--- Management of the LRU-based translation table+cache. ---*/
of code retranslation. */
/* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
/* Do a LRU pass when the translation cache becomes this full. */
#define VG_TC_LIMIT_PERCENT 98
/* Number of entries in the translation table. This must be a prime
number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
/* Do an LRU pass when the translation table becomes this full. */
#define VG_TT_LIMIT_PERCENT /*67*/ 80
N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago. */
#define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
-/* This TT entry is empty. */
+/* This TT entry is empty. There is no associated TC storage. */
#define VG_TTE_EMPTY ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+ contribute to the orig->trans mapping. However, the ex-translation
+ it points at still occupies space in TC. This slot cannot be
+ re-used without doing an LRU pass. */
#define VG_TTE_DELETED ((Addr)3)
/* The TC. This used to be statically allocated, but that forces many
*/
static UChar* vg_tc = NULL;
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC. This includes those pointed to from
+ VG_TTE_DELETED entries. */
static Int vg_tc_used = 0;
/* The TT. Like TC, for the same reason, is dynamically allocated at
*/
static TTEntry* vg_tt = NULL;
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries. This includes deleted ones. */
static Int vg_tt_used = 0;
/* Fast helper for the TT. A direct-mapped cache which holds a
if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
return;
+# ifdef DEBUG_TRANSTAB
+ VG_(sanity_check_tc_tt)();
+# endif
+
VGP_PUSHCC(VgpDoLRU);
/*
VG_(printf)(
vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
- if (vg_tt[i].orig_addr == VG_TTE_EMPTY ||
- vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED)
+ continue;
j = vg_tt[i].mru_epoch;
vg_assert(j <= VG_(current_epoch));
j = VG_(current_epoch) - j;
recently used at most thresh epochs ago. Traverse the TT and
mark such entries as deleted. */
for (i = 0; i < VG_TT_SIZE; i++) {
- if (vg_tt[i].orig_addr == VG_TTE_EMPTY ||
- vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED)
+ continue;
if (vg_tt[i].mru_epoch <= thresh) {
vg_tt[i].orig_addr = VG_TTE_DELETED;
- vg_tt_used--;
VG_(this_epoch_out_count) ++;
VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
}
}
- vg_assert(vg_tt_used >= 0);
- vg_assert(vg_tt_used <= tt_target);
-
/* Now compact the TC, sliding live entries downwards to fill spaces
left by deleted entries. In this loop, r is the offset in TC of
the current translation under consideration, and w is the next
vg_tc[w+i] = vg_tc[r+i];
tte->trans_addr = (Addr)&vg_tc[w+4];
w += 4+tte->trans_size;
+ } else {
+ tte->orig_addr = VG_TTE_EMPTY;
+ vg_tt_used--;
}
r += 4+tte->trans_size;
}
vg_assert(w <= tc_target);
vg_tc_used = w;
+ vg_assert(vg_tt_used >= 0);
+ vg_assert(vg_tt_used <= tt_target);
+
/* Invalidate the fast cache, since it is now out of date. It will get
reconstructed incrementally when the client resumes. */
VG_(invalidate_tt_fast)();
);
/* Reconstruct the SMC detection structures. */
+# ifdef DEBUG_TRANSTAB
+ for (i = 0; i < VG_TT_SIZE; i++)
+ vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+# endif
+ VG_(sanity_check_tc_tt)();
VGP_POPCC;
}
for (i = 0; i < VG_TT_SIZE; i++) {
tte = &vg_tt[i];
if (tte->orig_addr == VG_TTE_EMPTY) continue;
- if (tte->orig_addr == VG_TTE_DELETED) continue;
vg_assert(tte->mru_epoch >= 0);
vg_assert(tte->mru_epoch <= VG_(current_epoch));
counted_entries++;
while (True) {
if (vg_tt[i].orig_addr == tte->orig_addr)
VG_(panic)("add_to_trans_tab: duplicate");
- if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
- vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
/* Put it here, and set the back pointer. */
vg_tt[i] = *tte;
VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
*/
static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
{
- //static Int queries = 0;
- //static Int probes = 0;
+ //static Int queries = 0;
+ //static Int probes = 0;
Int i;
/* Hash to get initial probe point. */
// if (queries == 10000) {
//queries++;
i = ((UInt)orig_addr) % VG_TT_SIZE;
while (True) {
- //probes++;
+ //probes++;
if (vg_tt[i].orig_addr == orig_addr)
return &vg_tt[i];
if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
}
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code. ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
- vg_oldmap -- array[64k] of Bool, which approximately records
- parts of the address space corresponding to code for which
- a translation exists in the translation table. vg_oldmap is
- consulted at each write, to determine whether that write might
- be writing a code address; if so, the program is stopped at
- the next jump, and the corresponding translations are invalidated.
-
- Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
- addresses a containing a code byte which has been translated. So
- it acts kind-of like a direct-mapped cache with 64k entries.
-
- The second structure is vg_CAW, a small array of addresses at which
- vg_oldmap indicates a code write may have happened. This is
- (effectively) checked at each control transfer (jump), so that
- translations can be discarded before going on. An array is
- somewhat overkill, since it strikes me as very unlikely that a
- single basic block will do more than one code write. Nevertheless
- ...
-
- ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+ This is slow, so you *really* don't want to call it very often.
*/
-
-
-/* Definitions for the self-modifying-code detection cache, intended
- as a fast check which clears the vast majority of writes. */
-
-#define VG_SMC_CACHE_HASH(aaa) \
- ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
- provides a precise record of which words in the address space
- belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
-{
- Int i;
- VgSmcSecondary* sec
- = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
- for (i = 0; i < 2048; i++)
- sec->chars[i] = 0;
- return sec;
-}
-
-#define GET_BIT_ARRAY(arr,indx) \
- (1 & ( ((UChar*)arr)[((UInt)indx) / 8] \
- >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx) \
- ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
- detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
+void VG_(invalidate_translations) ( Addr start, UInt range )
{
- UInt bit_index;
- VgSmcSecondary* smc_secondary;
+ Addr i_start, i_end, o_start, o_end;
+ UInt out_count, out_osize, out_tsize;
+ Int i;
-# if VG_SMC_FASTCHECK_IN_C
- VG_(smc_total_check4s)++;
-
- /* Try the fast check first. */
- if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+# ifdef DEBUG_TRANSTAB
+ VG_(sanity_check_tc_tt)();
# endif
+ i_start = start;
+ i_end = start + range - 1;
+ out_count = out_osize = out_tsize = 0;
- VG_(smc_cache_passed)++;
-
- /* Need to do a slow check. */
- smc_secondary = vg_smc_primary[a >> 16];
- if (smc_secondary == NULL) return;
-
- bit_index = (a & 0xFFFF) >> 2;
- if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
- VG_(smc_fancy_passed)++;
-
- /* Detected a Real Live write to code which has been translated.
- Note it. */
- if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
- VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
- "increase and recompile.");
- vg_origwrites[vg_origwrites_used] = a;
- vg_origwrites_used++;
-
- VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
- /* Force an exit before the next basic block, so the translation
- cache can be flushed appropriately. */
- // VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
- //VG_(dispatch_ctr) = 1;
- //VG_(interrupt_reason) = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
- updating both the fast-check cache and the slow-but-correct data
- structure.
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
- Addr a;
- VgSmcSecondary* smc_secondary;
- UInt bit_index;
-
- for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
- VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
- smc_secondary = vg_smc_primary[a >> 16];
- if (smc_secondary == NULL)
- smc_secondary =
- vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
- bit_index = (a & 0xFFFF) >> 2;
- SET_BIT_ARRAY(smc_secondary->chars, bit_index);
+ for (i = 0; i < VG_TT_SIZE; i++) {
+ if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+ || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+ o_start = vg_tt[i].orig_addr;
+ o_end = o_start + vg_tt[i].orig_size - 1;
+ if (o_end < i_start || o_start > i_end)
+ continue;
+ if (VG_(clo_cachesim))
+ VG_(cachesim_notify_discard)( & vg_tt[i] );
+ vg_tt[i].orig_addr = VG_TTE_DELETED;
+ VG_(this_epoch_out_count) ++;
+ VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+ VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+ VG_(overall_out_count) ++;
+ VG_(overall_out_osize) += vg_tt[i].orig_size;
+ VG_(overall_out_tsize) += vg_tt[i].trans_size;
+ out_count ++;
+ out_osize += vg_tt[i].orig_size;
+ out_tsize += vg_tt[i].trans_size;
}
-}
-
-/* Discard any translations whose original code overlaps with the
- range w_addr .. w_addr+3 inclusive.
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-# if 0
- Int i, rd, wr;
- Addr o_start, o_end;
- TTEntry* tt;
-
- for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
- tt = vg_transtab[i];
- wr = 0;
- for (rd = 0; rd < vg_transtab_used[i]; rd++) {
- o_start = tt[rd].orig_addr;
- o_end = o_start + tt[rd].orig_size;
- if (w_addr > o_end || (w_addr+3) < o_start) {
- /* No collision possible; keep this translation */
- VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
- if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
- wr++;
- } else {
- /* Possible collision; discard. */
- vg_smc_discards++;
- VG_(message) (Vg_DebugMsg,
- "discarding translation of %p .. %p",
- tt[rd].orig_addr,
- tt[rd].orig_addr + tt[rd].orig_size - 1);
- VG_(free)((void*)tt[rd].trans_addr);
- }
+ if (out_count > 0) {
+ VG_(invalidate_tt_fast)();
+ VG_(sanity_check_tc_tt)();
+# ifdef DEBUG_TRANSTAB
+ { Addr aa;
+ for (aa = i_start; aa <= i_end; aa++)
+ vg_assert(search_trans_table ( aa ) == NULL);
}
- vg_transtab_used[i] = wr;
- }
-# endif
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
- following the discovery of a (potential) original-code-write.
-*/
-void VG_(flush_transtab) ( void )
-{
-# if 0
- Addr w_addr;
- Int i, j;
-
- /* We shouldn't be here unless a code write was detected. */
- vg_assert(vg_origwrites_used > 0);
-
- /* Instead of incrementally fixing up the translation table cache,
- just invalidate the whole darn thing. Pray this doesn't happen
- very often :) */
- for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
- VG_(transtab_cache_orig)[i] =
- VG_(transtab_cache_trans)[i] = (Addr)0;
-
- /* Clear out the fast cache; discard_translations_bracketing
- reconstructs it. */
- for (i = 0; i < VG_SMC_CACHE_SIZE; i++)
- VG_(smc_cache)[i] = False;
-
- /* And also clear the slow-but-correct table. */
- for (i = 0; i < 65536; i++) {
- VgSmcSecondary* sec = vg_smc_primary[i];
- if (sec)
- for (j = 0; j < 2048; j++)
- sec->chars[j] = 0;
+# endif
}
- /* This doesn't need to be particularly fast, since we (presumably)
- don't have to handle particularly frequent writes to code
- addresses. */
- while (vg_origwrites_used > 0) {
- vg_origwrites_used--;
- w_addr = vg_origwrites[vg_origwrites_used];
- discard_translations_bracketing ( w_addr );
- }
-
- vg_assert(vg_origwrites_used == 0);
-# endif
+ if (1|| VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_UserMsg,
+ "discard %d (%d -> %d) translations in range %p .. %p",
+ out_count, out_osize, out_tsize, i_start, i_end );
}
/*--- Initialisation. ---*/
/*------------------------------------------------------------*/
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
{
Int i;
at the first TT entry, which is, of course, empty. */
for (i = 0; i < VG_TT_FAST_SIZE; i++)
VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
- /* No part of the address space has any translations. */
- for (i = 0; i < 65536; i++)
- vg_smc_primary[i] = NULL;
-
- /* ... and the associated fast-check cache reflects this. */
- for (i = 0; i < VG_SMC_CACHE_SIZE; i++)
- VG_(smc_cache)[i] = False;
-
- /* Finally, no original-code-writes have been recorded. */
- vg_origwrites_used = 0;
}
/*--------------------------------------------------------------------*/