automatic cache configuration detection using the CPUID instruction.
This can be overridden from the command-line if necessary.
vg_include.h:
- added the cache_t type and UNDEFINED_CACHE macro
- added command line args (of type cache_t) allowing manual override of
I1/D1/L2 configuration
- added log2(), which is generally useful
vg_main.c, valgrind.in, cachegrind.in:
- added handling of the new --{I1,D1,L2}=<size>,<assoc>,<line_size>
options
vg_cachesim.c:
- lots of stuff for auto-detecting cache configuration with CPUID.
Only handles Intel and AMD chips at the moment, and possibly not all of
them. Falls back onto defaults if anything goes wrong, and the configs
can be manually overridden from the command line anyway.
- now not printing cache summary stats if verbosity == 0. Still writing
cachegrind.out, though.
vg_cachesim_gen.c:
- new file containing stuff shared by the I1/D1/L2 simulations
vg_cachesim_{I1,D1,L2}:
- removed most of it; each now just calls a macro defined in
vg_cachesim_gen.c
vg_cachegen:
- has been cvs removed as it is no longer needed.
Makefile.am:
- added vg_cachesim_gen.c
- removed vg_cachegen
configure.in:
- removed vg_cachegen
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@400
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
--trace-children=no) vgopts="$vgopts $arg"; shift;;
--trace-children=yes) vgopts="$vgopts $arg"; shift;;
--suppressions=*) vgopts="$vgopts $arg"; shift;;
+ --I1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --D1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --L2=*,*,*) vgopts="$vgopts $arg"; shift;;
# options for debugging Cachegrind
--sanity-level=*) vgopts="$vgopts $arg"; shift;;
--single-step=yes) vgopts="$vgopts $arg"; shift;;
echo " --trace-children=no|yes Cachegrind-ise child processes? [no]"
echo " --logfile-fd=<number> file descriptor for messages [2=stderr]"
echo " --suppressions=<filename> is ignored"
+ echo " --I1=<size>,<assoc>,<line_size> set I1 cache manually"
+ echo " --D1=<size>,<assoc>,<line_size> set D1 cache manually"
+ echo " --L2=<size>,<assoc>,<line_size> set L2 cache manually"
echo
echo " options for debugging Cachegrind itself are:"
echo " --sanity-level=<number> level of sanity checking to do [1]"
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
my $threshold_files = print_summary_and_fn_totals();
annotate_ann_files($threshold_files);
+##--------------------------------------------------------------------##
+##--- end vg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
-
/*--------------------------------------------------------------------*/
/*--- The cache simulation framework: instrumentation, recording ---*/
/*--- and results printing. ---*/
The GNU General Public License is contained in the file LICENSE.
*/
+#include <signal.h>
+
#include "vg_include.h"
#include "vg_cachesim_L2.c"
/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
-#define MAX_x86_INSTR_SIZE 16
+#define MAX_x86_INSTR_SIZE 16
/* Size of various buffers used for storing strings */
-#define FILENAME_LEN 256
-#define FN_NAME_LEN 256
-#define BUF_LEN 512
-#define COMMIFY_BUF_LEN 128
-#define RESULTS_BUF_LEN 128
-#define LINE_BUF_LEN 64
+#define FILENAME_LEN 256
+#define FN_NAME_LEN 256
+#define BUF_LEN 512
+#define COMMIFY_BUF_LEN 128
+#define RESULTS_BUF_LEN 128
+#define LINE_BUF_LEN 64
+
+/*------------------------------------------------------------*/
+/*--- Generic utility stuff ---*/
+/*------------------------------------------------------------*/
+
+int log2(int x)
+{
+ int i;
+
+ /* Any more than 32 and we overflow anyway... */
+ for (i = 0; i < 32; i++) {
+ if (1 << i == x) return i;
+ }
+ return -1;
+}
/*------------------------------------------------------------*/
/*--- Output file related stuff ---*/
static void file_err()
{
VG_(message)(Vg_UserMsg,
- "FATAL: can't open cache simulation output file `%s'",
+ "error: can't open cache simulation output file `%s'",
OUT_FILE );
VG_(exit)(1);
}
cc->m2 = 0;
}
-
typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
/* Instruction-level cost-centres. The typedefs for these structs are in
/*--- Cache simulation stuff ---*/
/*------------------------------------------------------------*/
+#define MIN_LINE_SIZE 16
+
/* Total reads/writes/misses. Calculated during CC traversal at the end. */
static CC Ir_total;
static CC Dr_total;
static CC Dw_total;
+/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
+/* Probably only works for Intel and AMD chips, and probably only for some of
+ * them.
+ */
+
+static __inline__ void cpuid(int n, int *a, int *b, int *c, int *d)
+{
+ __asm__ __volatile__ (
+ "cpuid"
+ : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
+ : "0" (n) /* input */
+ );
+}
+
+static void micro_ops_warn(int actual_size, int used_size, int line_size)
+{
+ VG_(message)(Vg_DebugMsg,
+ "warning: Pentium with %d K micro_op instruction trace cache",
+ actual_size);
+ VG_(message)(Vg_DebugMsg,
+ " Simulating a %d KB cache with %d B lines",
+ used_size, line_size);
+}
+
+/* Intel method is truly wretched. We have to do an insane indexing into an
+ * array of pre-defined configurations for various parts of the memory
+ * hierarchy.
+ */
+static
+int Intel_cache_info(int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ unsigned char info[16];
+ int i;
+ int trials;
+
+ if (level < 2) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: CPUID level < 2 for Intel processor (%d)",
+ level);
+ return -1;
+ }
+
+ cpuid(2, (int*)&info[0], (int*)&info[4], (int*)&info[8], (int*)&info[12]);
+ trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
+ info[0] = 0x0; /* reset AL */
+
+ if (0 != trials) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: non-zero CPUID trials for Intel processor (%d)",
+ trials);
+ return -1;
+ }
+
+ for (i = 0; i < 16; i++) {
+
+ switch (info[i]) {
+
+ case 0x0: /* ignore zeros */
+ break;
+
+ case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */
+ case 0x90: case 0x96: case 0x9b:
+ break;
+
+ case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
+ case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
+
+ case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
+ case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+
+ case 0x22: case 0x23: case 0x25: case 0x29:
+ case 0x88: case 0x89: case 0x8a:
+ VG_(message)(Vg_DebugMsg, "warning: L3 cache detected but ignored\n");
+ break;
+
+ case 0x40:
+ VG_(message)(Vg_DebugMsg,
+ "warning: L2 cache not installed, ignore L2 results.");
+ break;
+
+ case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break;
+ case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break;
+ case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break;
+ case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break;
+ case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break;
+
+ /* These are sectored, whatever that means */
+ case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
+ case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
+ case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
+
+ /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+ * conversion to byte size is a total guess; treat the 12K and 16K
+ * cases the same since the cache byte size must be a power of two for
+ * everything to work!. Also guessing 32 bytes for the line size...
+ */
+ case 0x70: /* 12K micro-ops, 8-way */
+ *I1c = (cache_t) { 16, 8, 32 };
+ micro_ops_warn(12, 16, 32);
+ break;
+ case 0x71: /* 16K micro-ops, 8-way */
+ *I1c = (cache_t) { 16, 8, 32 };
+ micro_ops_warn(16, 16, 32);
+ break;
+ case 0x72: /* 32K micro-ops, 8-way */
+ *I1c = (cache_t) { 32, 8, 32 };
+ micro_ops_warn(32, 32, 32);
+ break;
+
+ case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */
+ case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */
+ case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */
+ case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */
+
+ case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break;
+ case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break;
+ case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break;
+ case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break;
+ case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break;
+
+ default:
+ VG_(message)(Vg_DebugMsg,
+ "warning: Unknown Intel cache config value "
+ "(0x%x), ignoring\n", info[i]);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* AMD method is straightforward, just extract appropriate bits from the
+ * result registers.
+ *
+ * Bits, for D1 and I1:
+ * 31..24 data L1 cache size in KBs
+ * 23..16 data L1 cache associativity (FFh=full)
+ * 15.. 8 data L1 cache lines per tag
+ * 7.. 0 data L1 cache line size in bytes
+ *
+ * Bits, for L2:
+ * 31..16 unified L2 cache size in KBs
+ * 15..12 unified L2 cache associativity (0=off, FFh=full)
+ * 11.. 8 unified L2 cache lines per tag
+ * 7.. 0 unified L2 cache line size in bytes
+ *
+ * #3 The AMD K7 processor's L2 cache must be configured prior to relying
+ * upon this information. (Whatever that means -- njn)
+ *
+ * Returns 0 on success, non-zero on failure.
+ */
+static int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ int dummy;
+ int ext_level;
+ int I1i, D1i, L2i;
+
+ cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+
+ if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
+ VG_(message)(Vg_UserMsg,
+ "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
+ ext_level);
+ return -1;
+ }
+
+ cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
+ cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
+
+ D1c->size = (D1i >> 24) & 0xff;
+ D1c->assoc = (D1i >> 16) & 0xff;
+ D1c->line_size = (D1i >> 0) & 0xff;
+
+ I1c->size = (I1i >> 24) & 0xff;
+ I1c->assoc = (I1i >> 16) & 0xff;
+ I1c->line_size = (I1i >> 0) & 0xff;
+
+ L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
+ L2c->assoc = (L2i >> 12) & 0xf;
+ L2c->line_size = (L2i >> 0) & 0xff;
+
+ return 0;
+}
+
+static jmp_buf cpuid_jmpbuf;
+
+static
+void cpuid_SIGILL_handler(int signum)
+{
+ __builtin_longjmp(cpuid_jmpbuf, 1);
+}
+
+static
+int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ int level;
+ char vendor_id[13];
+ vki_ksigaction sigill_new, sigill_saved;
+ int res, ret;
+
+ /* Install own SIGILL handler */
+ sigill_new.ksa_handler = cpuid_SIGILL_handler;
+ sigill_new.ksa_flags = 0;
+ sigill_new.ksa_restorer = NULL;
+ res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
+ vg_assert(res == 0);
+
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
+ vg_assert(res == 0);
+
+ /* Trap for illegal instruction, in case it's a really old processor that
+ * doesn't support CPUID. */
+ if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
+ cpuid(0, &level, (int*)&vendor_id[0],
+ (int*)&vendor_id[8], (int*)&vendor_id[4]);
+ vendor_id[12] = '\0';
+
+ /* Restore old SIGILL handler */
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
+ vg_assert(res == 0);
+
+ } else {
+ VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
+
+ /* Restore old SIGILL handler */
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
+ vg_assert(res == 0);
+ return -1;
+ }
+
+ if (0 == level) {
+ VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
+ return -1;
+ }
+
+ /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
+ if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
+ ret = Intel_cache_info(level, I1c, D1c, L2c);
+
+ } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
+ ret = AMD_cache_info(I1c, D1c, L2c);
+
+ } else {
+ VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
+ vendor_id);
+ return -1;
+ }
+
+ /* Successful! Convert sizes from KB to bytes */
+ I1c->size *= 1024;
+ D1c->size *= 1024;
+ L2c->size *= 1024;
+
+ return ret;
+}
+
+/* Checks cache config is ok; makes it so if not. */
+static void check_cache(cache_t* cache, cache_t* dflt, char *name)
+{
+ /* First check they're all powers of two */
+ if (-1 == log2(cache->size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s size of %dB not a power of two; "
+ "defaulting to %dB", name, cache->size, dflt->size);
+ cache->size = dflt->size;
+ }
+
+ if (-1 == log2(cache->assoc)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s associativity of %d not a power of two; "
+ "defaulting to %d-way", name, cache->assoc, dflt->assoc);
+ cache->assoc = dflt->assoc;
+ }
+
+ if (-1 == log2(cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s line size of %dB not a power of two; "
+ "defaulting to %dB",
+ name, cache->line_size, dflt->line_size);
+ cache->line_size = dflt->line_size;
+ }
+
+ /* Then check line size >= 16 -- any smaller and a single instruction could
+ * straddle three cache lines, which breaks a simulation assertion and is
+ * stupid anyway. */
+ if (cache->line_size < MIN_LINE_SIZE) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s line size of %dB too small; "
+ "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
+ cache->line_size = MIN_LINE_SIZE;
+ }
+
+ /* Then check cache size > line size (causes seg faults if not). */
+ if (cache->size <= cache->line_size) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s cache size of %dB <= line size of %dB; "
+ "increasing to %dB", name, cache->size, cache->line_size,
+ cache->line_size * 2);
+ cache->size = cache->line_size * 2;
+ }
+
+ /* Then check assoc <= (size / line size) (seg faults otherwise). */
+ if (cache->assoc > (cache->size / cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s associativity > (size / line size); "
+ "increasing size to %dB",
+ name, cache->assoc * cache->line_size);
+ cache->size = cache->assoc * cache->line_size;
+ }
+}
+
+/* On entry, args are undefined. Fill them with any info from the
+ * command-line, then fill in any remaining with CPUID instruction if possible,
+ * otherwise use defaults. Then check them and fix if not ok. */
+static void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ /* Defaults are for a model 3 or 4 Athlon */
+ cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
+ cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
+ cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
+
+#define CMD_LINE_DEFINED(L) \
+ (-1 != VG_(clo_##L##_cache).size || \
+ -1 != VG_(clo_##L##_cache).assoc || \
+ -1 != VG_(clo_##L##_cache).line_size)
+
+ /* If any undefined on command-line, try CPUID */
+ if (! CMD_LINE_DEFINED(I1) ||
+ ! CMD_LINE_DEFINED(D1) ||
+ ! CMD_LINE_DEFINED(L2)) {
+
+ /* Overwrite CPUID result for any cache defined on command-line */
+ if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
+
+ if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
+ if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
+ if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
+
+ /* CPUID failed, use defaults for each undefined by command-line */
+ } else {
+ VG_(message)(Vg_DebugMsg,
+ "Couldn't detect cache configuration, using one "
+ "or more defaults ");
+
+ *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
+ *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
+ *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
+ }
+ }
+#undef CMD_LINE_DEFINED
+
+ check_cache(I1c, &I1_dflt, "I1");
+ check_cache(D1c, &D1_dflt, "D1");
+ check_cache(L2c, &L2_dflt, "L2");
+
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_UserMsg, "Cache configuration used:");
+ VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
+ I1c->size, I1c->assoc, I1c->line_size);
+ VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
+ D1c->size, D1c->assoc, D1c->line_size);
+ VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
+ L2c->size, L2c->assoc, L2c->line_size);
+ }
+}
+
void VG_(init_cachesim)(void)
{
+ cache_t I1c, D1c, L2c;
+
/* Make sure the output file can be written. */
Int fd = VG_(open_write)(OUT_FILE);
if (-1 == fd) {
initCC(&Dr_discards);
initCC(&Dw_discards);
- cachesim_I1_initcache();
- cachesim_D1_initcache();
- cachesim_L2_initcache();
+ get_caches(&I1c, &D1c, &L2c);
+
+ cachesim_I1_initcache(I1c);
+ //cachesim_I1_initcache();
+ cachesim_D1_initcache(D1c);
+ //cachesim_D1_initcache();
+ cachesim_L2_initcache(L2c);
+ //cachesim_L2_initcache();
init_BBCC_table();
}
if (-1 == fd) { file_err(); }
/* "desc:" lines (giving I1/D1/L2 cache configuration) */
- VG_(write)(fd, (void*)I1_desc_line, VG_(strlen)(I1_desc_line));
- VG_(write)(fd, (void*)D1_desc_line, VG_(strlen)(D1_desc_line));
- VG_(write)(fd, (void*)L2_desc_line, VG_(strlen)(L2_desc_line));
+ VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
/* "cmd:" line */
VG_(strcpy)(buf, "cmd:");
for (i = 0; i < space; i++) buf[i] = ' ';
}
-void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
+void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
{
CC D_total;
ULong L2_total_m, L2_total_mr, L2_total_mw,
fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
+ if (VG_(clo_verbosity) == 0)
+ return;
+
/* I cache results. Use the I_refs value to determine the first column
* width. */
l1 = commify(Ir_total.a, 0, buf1);
-/* D1 cache simulator, generated by vg_cachegen.
- * total size = 65536 bytes
- * line size = 64 bytes
- * associativity = 2-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- D1 cache simulation. ---*/
+/*--- vg_cachesim_D1.c ---*/
+/*--------------------------------------------------------------------*/
-static char D1_desc_line[] =
- "desc: D1 cache: 65536 B, 64 B, 2-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt D1_tags[512][2];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_D1_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 2; way++)
- D1_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == D1_tags[set1][0]) {
- return;
- }
- else if (tag == D1_tags[set1][1]) {
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
- Bool is_D1_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_D1.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == D1_tags[set1][0]) {
- }
- else if (tag == D1_tags[set1][1]) {
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
-
- is_D1_miss = True;
- }
-
- /* Block two */
- if (tag == D1_tags[set2][0]) {
- }
- else if (tag == D1_tags[set2][1]) {
- D1_tags[set2][1] = D1_tags[set2][0];
- D1_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- D1_tags[set2][1] = D1_tags[set2][0];
- D1_tags[set2][0] = tag;
-
- is_D1_miss = True;
- }
-
- /* Miss treatment */
- if (is_D1_miss) {
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("D1 cache set mismatch");
- }
-}
-/* I1 cache simulator, generated by vg_cachegen.
- * total size = 65536 bytes
- * line size = 64 bytes
- * associativity = 2-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- I1 cache simulation. ---*/
+/*--- vg_cachesim_I1.c ---*/
+/*--------------------------------------------------------------------*/
-static char I1_desc_line[] =
- "desc: I1 cache: 65536 B, 64 B, 2-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt I1_tags[512][2];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_I1_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 2; way++)
- I1_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_I1_doref(Addr a, UChar size, ULong* m1, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == I1_tags[set1][0]) {
- return;
- }
- else if (tag == I1_tags[set1][1]) {
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
- Bool is_I1_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_I1.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == I1_tags[set1][0]) {
- }
- else if (tag == I1_tags[set1][1]) {
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
-
- is_I1_miss = True;
- }
-
- /* Block two */
- if (tag == I1_tags[set2][0]) {
- }
- else if (tag == I1_tags[set2][1]) {
- I1_tags[set2][1] = I1_tags[set2][0];
- I1_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- I1_tags[set2][1] = I1_tags[set2][0];
- I1_tags[set2][0] = tag;
-
- is_I1_miss = True;
- }
-
- /* Miss treatment */
- if (is_I1_miss) {
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("I1 cache set mismatch");
- }
-}
-/* L2 cache simulator, generated by vg_cachegen.
- * total size = 262144 bytes
- * line size = 64 bytes
- * associativity = 8-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- L2 cache simulation. ---*/
+/*--- vg_cachesim_L2.c ---*/
+/*--------------------------------------------------------------------*/
-static char L2_desc_line[] =
- "desc: L2 cache: 262144 B, 64 B, 8-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt L2_tags[512][8];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_L2_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 8; way++)
- L2_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_L2_doref(Addr a, UChar size, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == L2_tags[set1][0]) {
- return;
- }
- else if (tag == L2_tags[set1][1]) {
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][2]) {
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][3]) {
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][4]) {
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][5]) {
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][6]) {
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][7]) {
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m2)++;
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(L2, (*m2)++ );
- Bool is_L2_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_L2.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == L2_tags[set1][0]) {
- }
- else if (tag == L2_tags[set1][1]) {
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][2]) {
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][3]) {
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][4]) {
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][5]) {
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][6]) {
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][7]) {
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
-
- is_L2_miss = True;
- }
-
- /* Block two */
- if (tag == L2_tags[set2][0]) {
- }
- else if (tag == L2_tags[set2][1]) {
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][2]) {
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][3]) {
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][4]) {
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][5]) {
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][6]) {
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][7]) {
- L2_tags[set2][7] = L2_tags[set2][6];
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- L2_tags[set2][7] = L2_tags[set2][6];
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
-
- is_L2_miss = True;
- }
-
- /* Miss treatment */
- if (is_L2_miss) {
- (*m2)++;
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("L2 cache set mismatch");
- }
-}
AC_OUTPUT(
vg_annotate
- vg_cachegen
valgrind
valgrind.spec
cachegrind
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
--suppressions=*) vgopts="$vgopts $arg"; shift;;
--cachesim=yes) vgopts="$vgopts $arg"; shift;;
--cachesim=no) vgopts="$vgopts $arg"; shift;;
+ --I1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --D1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --L2=*,*,*) vgopts="$vgopts $arg"; shift;;
--weird-hacks=*) vgopts="$vgopts $arg"; shift;;
# options for debugging Valgrind
--sanity-level=*) vgopts="$vgopts $arg"; shift;;
echo " --check-addrVs=no|yes experimental lighterweight checking? [yes]"
echo " yes == Valgrind's original behaviour"
echo " --cachesim=no|yes do cache profiling? [no]"
+ echo " --I1=<size>,<assoc>,<line_size> set I1 cache manually"
+ echo " --D1=<size>,<assoc>,<line_size> set D1 cache manually"
+ echo " --L2=<size>,<assoc>,<line_size> set L2 cache manually"
echo " --weird-hacks=hack1,hack2,... [no hacks selected]"
echo " recognised hacks are: ioctl-VTIME"
echo ""
( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4) \
| ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
+/* For cache simulation */
+typedef struct {
+ int size; /* bytes */
+ int assoc;
+ int line_size; /* bytes */
+} cache_t;
+
+#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
/* ---------------------------------------------------------------------
Now the basic types are set up, we can haul in the kernel-interface
extern Bool VG_(clo_cleanup);
/* Cache simulation instrumentation? default: NO */
extern Bool VG_(clo_cachesim);
+/* I1 cache configuration. default: undefined */
+extern cache_t VG_(clo_I1_cache);
+/* D1 cache configuration. default: undefined */
+extern cache_t VG_(clo_D1_cache);
+/* L2 cache configuration. default: undefined */
+extern cache_t VG_(clo_L2_cache);
/* SMC write checks? default: SOME (1,2,4 byte movs to mem) */
extern Int VG_(clo_smc_check);
/* DEBUG: print system calls? default: NO */
const Char *format, va_list vargs );
extern Bool VG_(isspace) ( Char c );
+extern Bool VG_(isdigit) ( Char c );
extern Int VG_(strlen) ( const Char* str );
Exports of vg_cachesim.c
------------------------------------------------------------------ */
+extern int log2( int x );
+
extern UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr);
typedef struct _iCC iCC;
typedef struct _idCC idCC;
-extern void VG_(init_cachesim) ( void );
-extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
+extern void VG_(init_cachesim) ( void );
+extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv );
extern void VG_(cachesim_log_non_mem_instr)( iCC* cc );
extern void VG_(cachesim_log_mem_instr) ( idCC* cc, Addr data_addr );
Bool VG_(clo_instrument);
Bool VG_(clo_cleanup);
Bool VG_(clo_cachesim);
+cache_t VG_(clo_I1_cache);
+cache_t VG_(clo_D1_cache);
+cache_t VG_(clo_L2_cache);
Int VG_(clo_smc_check);
Bool VG_(clo_trace_syscalls);
Bool VG_(clo_trace_signals);
config_error("couldn't find client's argc/argc/envp");
}
+static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+ int i1, i2, i3;
+ int i;
+ char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt);
+
+ i = i1 = opt_len;
+
+ /* Option looks like "--I1=65536,2,64".
+ * Find commas, replace with NULs to make three independent
+ * strings, then extract numbers. Yuck. */
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i2 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i3 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if ('\0' != opt[i]) goto bad;
+
+ cache->size = (Int)VG_(atoll)(opt + i1);
+ cache->assoc = (Int)VG_(atoll)(opt + i2);
+ cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+ return;
+
+bad:
+ bad_option(orig_opt);
+}
static void process_cmd_line_options ( void )
{
VG_(clo_single_step) = False;
VG_(clo_optimise) = True;
VG_(clo_instrument) = True;
+ VG_(clo_cachesim) = False;
+ VG_(clo_I1_cache) = UNDEFINED_CACHE;
+ VG_(clo_D1_cache) = UNDEFINED_CACHE;
+ VG_(clo_L2_cache) = UNDEFINED_CACHE;
VG_(clo_cleanup) = True;
VG_(clo_smc_check) = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
VG_(clo_trace_syscalls) = False;
/* (Suggested by Fabrice Bellard ... )
We look for the Linux ELF table and go down until we find the
- envc & envp. It is not full proof, but these structures should
+ envc & envp. It is not fool-proof, but these structures should
change less often than the libc ones. */
{
UInt* sp = 0; /* bogus init to keep gcc -O happy */
else if (STREQ(argv[i], "--cachesim=no"))
VG_(clo_cachesim) = False;
+ /* 5 is length of "--I1=" */
+ else if (0 == VG_(strncmp)(argv[i], "--I1=", 5))
+ parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5);
+ else if (0 == VG_(strncmp)(argv[i], "--D1=", 5))
+ parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5);
+ else if (0 == VG_(strncmp)(argv[i], "--L2=", 5))
+ parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5);
+
else if (STREQ(argv[i], "--smc-check=none"))
VG_(clo_smc_check) = VG_CLO_SMC_NONE;
else if (STREQ(argv[i], "--smc-check=some"))
VG_(running_on_simd_CPU) = False;
if (VG_(clo_cachesim))
- VG_(show_cachesim_results)(VG_(client_argc), VG_(client_argv));
+ VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv));
VG_(do_sanity_checks)( True /*include expensive checks*/ );
return (c == ' ' || c == '\n' || c == '\t' || c == 0);
}
+Bool VG_(isdigit) ( Char c )
+{
+ return (c >= '0' && c <= '9');
+}
Int VG_(strlen) ( const Char* str )
{
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
INCLUDES = -I$(srcdir)/demangle
-bin_SCRIPTS = valgrind cachegrind vg_annotate vg_cachegen
+bin_SCRIPTS = valgrind cachegrind vg_annotate
SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
include_HEADERS = valgrind.h
noinst_HEADERS = \
+ vg_cachesim_gen.c \
vg_cachesim_I1.c \
vg_cachesim_D1.c \
vg_cachesim_L2.c \
--suppressions=*) vgopts="$vgopts $arg"; shift;;
--cachesim=yes) vgopts="$vgopts $arg"; shift;;
--cachesim=no) vgopts="$vgopts $arg"; shift;;
+ --I1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --D1=*,*,*) vgopts="$vgopts $arg"; shift;;
+ --L2=*,*,*) vgopts="$vgopts $arg"; shift;;
--weird-hacks=*) vgopts="$vgopts $arg"; shift;;
# options for debugging Valgrind
--sanity-level=*) vgopts="$vgopts $arg"; shift;;
echo " --check-addrVs=no|yes experimental lighterweight checking? [yes]"
echo " yes == Valgrind's original behaviour"
echo " --cachesim=no|yes do cache profiling? [no]"
+ echo " --I1=<size>,<assoc>,<line_size> set I1 cache manually"
+ echo " --D1=<size>,<assoc>,<line_size> set D1 cache manually"
+ echo " --L2=<size>,<assoc>,<line_size> set L2 cache manually"
echo " --weird-hacks=hack1,hack2,... [no hacks selected]"
echo " recognised hacks are: ioctl-VTIME"
echo ""
my $threshold_files = print_summary_and_fn_totals();
annotate_ann_files($threshold_files);
+##--------------------------------------------------------------------##
+##--- end vg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
+++ /dev/null
-#! /usr/bin/perl -w
-##--------------------------------------------------------------------##
-##--- The cache simulator generator ---##
-##--- vg_cachegen ---##
-##--------------------------------------------------------------------##
-
-# This file is part of Valgrind, an x86 protected-mode emulator
-# designed for debugging and profiling binaries on x86-Unixes.
-#
-# Copyright (C) 2002 Nicholas Nethercote
-# njn25@cam.ac.uk
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-# 02111-1307, USA.
-#
-# The GNU General Public License is contained in the file LICENSE.
-
-#----------------------------------------------------------------------------
-# Cache simulator generator, creates files vg_cachesim_{I1,D1,L2}.c to be
-# #included in vg_cachesim.c.
-#
-# Notes:
-# - simulates a write-allocate cache
-# - (block --> set) hash function uses simple bit selection
-# - handling references straddling two cache blocks:
-# - counts as only one cache access (not two)
-# - both blocks hit --> one hit
-# - one block hits, the other misses --> one miss
-# - both blocks miss --> one miss (not two)
-
-use strict;
-
-#----------------------------------------------------------------------------
-# Global variables
-#----------------------------------------------------------------------------
-my %log2 =
- ( 1 => 0,
- 2 => 1,
- 4 => 2,
- 8 => 3,
- 16 => 4,
- 32 => 5,
- 64 => 6,
- 128 => 7,
- 256 => 8,
- 512 => 9,
- 1024 => 10,
- 2048 => 11,
- 4096 => 12,
- 8192 => 13,
- 16384 => 14,
- 32768 => 15,
- 65536 => 16,
- 131072 => 17,
- 262144 => 18,
- 524288 => 19,
- 1048576 => 20,
- 2097152 => 21,
- 4194304 => 22,
- 8388608 => 23,
- 16777216 => 24,
- 33554432 => 25,
- 67108864 => 26,
- 134217728 => 27,
- 268435456 => 28,
- 536870912 => 29,
- 1073741824 => 30);
-
-my @valid_types =
- ("I1", "D1", "L2");
-
-my @valid_sizes =
- (1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288,
- 1048576, 2097152, 4194304, 8388608, 16777216, 67108864, 134217728);
-
-my @valid_linesizes =
- (8, 16, 32, 64, 128);
-
-my @valid_ways =
- (1, 2, 4, 8, 256);
-
-my %valid_types; for my $i (@valid_types) { $valid_types{$i} = 1; }
-my %valid_sizes; for my $i (@valid_sizes) { $valid_sizes{$i} = 1; }
-my %valid_linesizes; for my $i (@valid_linesizes) { $valid_linesizes{$i} = 1; }
-my %valid_ways; for my $i (@valid_ways) { $valid_ways{$i} = 1; }
-
-my ($L, $size, $line_size, $n_ways);
-
-my @caches;
-
-#----------------------------------------------------------------------------
-# Argument and option handling
-#----------------------------------------------------------------------------
-sub process_cmd_line ()
-{
-#----
- my $usage = <<END
-usage: cachegen [options]
-
- options are:
- --I1=size,line_size,number_of_ways
- --D1=size,line_size,number_of_ways
- --L2=size,line_size,number_of_ways
-
- where
- size is one of @valid_sizes
- line_size is one of @valid_linesizes
- number_of_ways is one of @valid_ways
-
- Valgrind is Copyright (C) 2000-2002 Julian Seward
- and licensed under the GNU General Public License, version 2.
- Bug reports, feedback, admiration, abuse, etc, to: jseward\@acm.org.
-
-END
-;
-#----
- (@ARGV > 0) or die($usage);
- for my $arg (@ARGV) {
-
- if ($arg =~ /--(I1|D1|L2)=(\d+),(\d+),(\d+)/) {
- my ($L, $size, $line_size, $n_ways) = ($1, $2, $3, $4);
-
- (defined $valid_sizes{$size} &&
- defined $valid_linesizes{$line_size} &&
- defined $valid_ways{$n_ways}) or die($usage);
-
- # Remember cache type too, and save
- push(@caches, [ $L, $size, $line_size, $n_ways ]);
-
- }
-
- else { # -h and --help fall under this case
- die($usage);
- }
- }
-}
-
-#----------------------------------------------------------------------------
-# Printing machinery (I)
-#----------------------------------------------------------------------------
-sub shiftSequence ($$$)
-{
- my $s = "";
- my ($i, $n, $L) = @_;
-
- while ($i > 0) {
- $s .= " ${L}_tags[set$n][$i] = ";
- $i--;
- $s .= "${L}_tags[set$n][$i];\n"
- }
- return $s;
-}
-
-sub trySet ($$$$$)
-{
- my $s = "";
- my ($k, $n, $L, $hit_ending, $miss_ending) = @_;
-
- for (my $i = 0; $i < $k; $i++) {
- $s .= " "
- . (0 == $i ? "if" : "else if")
- . " (tag == ${L}_tags[set$n][$i]) {\n"
- . shiftSequence($i, $n, $L)
- . (0 == $i ? "" : " ${L}_tags[set$n][0] = tag;\n")
- . ("" eq $hit_ending ? "" : " $hit_ending\n")
- . " }\n";
- }
-
- $s .= " else {\n"
- . " /* A miss */\n"
- . shiftSequence($k - 1, $n, $L) # not if exclusive
- . " ${L}_tags[set$n][0] = tag;\n" # not if exclusive
- . "\n"
- . " $miss_ending\n"
- . " }\n";
- return $s;
-}
-
-sub print_cache_simulator (@)
-{
- my ($L, $size, $line_size, $n_ways) = @_;
-
- my $n_lines = $size / $line_size;
- my $n_sets = $n_lines / $n_ways;
- my $n_line_bits = $log2{$line_size};
- my $n_set_bits = $log2{$n_sets};
-
- my $assoc = (1 == $n_ways ? "direct-mapped" : "$n_ways-way associative");
- my $L1_args = "Addr a, UChar size, ULong* m1, ULong *m2";
- my $L2_args = "Addr a, UChar size, ULong *m2";
- my $L_args = ($L ne "L2" ? $L1_args : $L2_args);
-
-#----
- my $comments_cache_init_desc_and_doref_start = <<END
-/* $L cache simulator, generated by vg_cachegen.
- * total size = $size bytes
- * line size = $line_size bytes
- * associativity = $assoc
- *
- * This file should be #include-d into vg_cachesim.c
- */
-
-static char ${L}_desc_line[] =
- "desc: ${L} cache: $size B, $line_size B, $assoc\\n";
-
-static UInt ${L}_tags[$n_sets][$n_ways];
-
-static void cachesim_${L}_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < $n_sets; set++)
- for (way = 0; way < $n_ways; way++)
- ${L}_tags[set][way] = 0;
-}
-
-static __inline__
-void cachesim_${L}_doref($L_args)
-{
- register UInt set1 = ( a >> $n_line_bits) & ($n_sets-1);
- register UInt set2 = ((a + size - 1) >> $n_line_bits) & ($n_sets-1);
- register UInt tag = a >> ($n_line_bits + $n_set_bits);
-
- if (set1 == set2) {
-
-END
-;
-#----
- my $doref_middle = <<END
-
- } else if ((set1 + 1) % $n_sets == set2) {
-
- Bool is_${L}_miss = False;
-
-END
-;
-#----
- my $L1_miss_treatment = "(*m1)++;\n" .
- " cachesim_L2_doref(a, size, m2);";
- my $L2_miss_treatment = "(*m2)++;";
- my $L_miss_treatment =
- ( $L ne "L2" ? $L1_miss_treatment : $L2_miss_treatment );
-
- my $straddle_case_miss_treatment = <<END
- /* Miss treatment */
- if (is_${L}_miss) {
- $L_miss_treatment
- }
-END
-;
-#----
- my $doref_end = <<END
-
- } else {
- VG_(printf)("\\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\\n", a, size);
- VG_(printf)("sets %d and %d.\\n", set1, set2);
- VG_(panic)("$L cache set mismatch");
- }
-}
-END
-;
-#----
- my $outfile = "vg_cachesim_${L}.c";
- open(OUTFILE, "> $outfile") or die("Couldn't open $outfile for writing\n");
-
- print(OUTFILE $comments_cache_init_desc_and_doref_start);
- print(OUTFILE trySet($n_ways, 1, $L, "return;", $L_miss_treatment));
- print(OUTFILE $doref_middle);
- print(OUTFILE " /* Block one */\n");
- print(OUTFILE trySet($n_ways, 1, $L, "", "is_${L}_miss = True;"));
- print(OUTFILE "\n");
- print(OUTFILE " /* Block two */\n");
- print(OUTFILE trySet($n_ways, 2, $L, "", "is_${L}_miss = True;"));
- print(OUTFILE "\n");
- print(OUTFILE $straddle_case_miss_treatment);
- print(OUTFILE $doref_end);
-
- close(OUTFILE);
-}
-
-#----------------------------------------------------------------------------
-# main()
-#----------------------------------------------------------------------------
-process_cmd_line();
-foreach my $cache (@caches) {
- print_cache_simulator(@$cache);
-}
-
-
/*--------------------------------------------------------------------*/
/*--- The cache simulation framework: instrumentation, recording ---*/
/*--- and results printing. ---*/
The GNU General Public License is contained in the file LICENSE.
*/
+#include <signal.h>
+
#include "vg_include.h"
#include "vg_cachesim_L2.c"
/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
-#define MAX_x86_INSTR_SIZE 16
+#define MAX_x86_INSTR_SIZE 16
/* Size of various buffers used for storing strings */
-#define FILENAME_LEN 256
-#define FN_NAME_LEN 256
-#define BUF_LEN 512
-#define COMMIFY_BUF_LEN 128
-#define RESULTS_BUF_LEN 128
-#define LINE_BUF_LEN 64
+#define FILENAME_LEN 256
+#define FN_NAME_LEN 256
+#define BUF_LEN 512
+#define COMMIFY_BUF_LEN 128
+#define RESULTS_BUF_LEN 128
+#define LINE_BUF_LEN 64
+
+/*------------------------------------------------------------*/
+/*--- Generic utility stuff ---*/
+/*------------------------------------------------------------*/
+
+int log2(int x)
+{
+ int i;
+
+ /* Any more than 32 and we overflow anyway... */
+ for (i = 0; i < 32; i++) {
+ if (1 << i == x) return i;
+ }
+ return -1;
+}
/*------------------------------------------------------------*/
/*--- Output file related stuff ---*/
static void file_err()
{
VG_(message)(Vg_UserMsg,
- "FATAL: can't open cache simulation output file `%s'",
+ "error: can't open cache simulation output file `%s'",
OUT_FILE );
VG_(exit)(1);
}
cc->m2 = 0;
}
-
typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
/* Instruction-level cost-centres. The typedefs for these structs are in
/*--- Cache simulation stuff ---*/
/*------------------------------------------------------------*/
+#define MIN_LINE_SIZE 16
+
/* Total reads/writes/misses. Calculated during CC traversal at the end. */
static CC Ir_total;
static CC Dr_total;
static CC Dw_total;
+/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
+/* Probably only works for Intel and AMD chips, and probably only for some of
+ * them.
+ */
+
+static __inline__ void cpuid(int n, int *a, int *b, int *c, int *d)
+{
+ __asm__ __volatile__ (
+ "cpuid"
+ : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
+ : "0" (n) /* input */
+ );
+}
+
+static void micro_ops_warn(int actual_size, int used_size, int line_size)
+{
+ VG_(message)(Vg_DebugMsg,
+ "warning: Pentium with %d K micro_op instruction trace cache",
+ actual_size);
+ VG_(message)(Vg_DebugMsg,
+ " Simulating a %d KB cache with %d B lines",
+ used_size, line_size);
+}
+
+/* Intel method is truly wretched. We have to do an insane indexing into an
+ * array of pre-defined configurations for various parts of the memory
+ * hierarchy.
+ */
+static
+int Intel_cache_info(int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ unsigned char info[16];
+ int i;
+ int trials;
+
+ if (level < 2) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: CPUID level < 2 for Intel processor (%d)",
+ level);
+ return -1;
+ }
+
+ cpuid(2, (int*)&info[0], (int*)&info[4], (int*)&info[8], (int*)&info[12]);
+ trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
+ info[0] = 0x0; /* reset AL */
+
+ if (0 != trials) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: non-zero CPUID trials for Intel processor (%d)",
+ trials);
+ return -1;
+ }
+
+ for (i = 0; i < 16; i++) {
+
+ switch (info[i]) {
+
+ case 0x0: /* ignore zeros */
+ break;
+
+ case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */
+ case 0x90: case 0x96: case 0x9b:
+ break;
+
+ case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
+ case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
+
+ case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
+ case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+
+ case 0x22: case 0x23: case 0x25: case 0x29:
+ case 0x88: case 0x89: case 0x8a:
+ VG_(message)(Vg_DebugMsg, "warning: L3 cache detected but ignored\n");
+ break;
+
+ case 0x40:
+ VG_(message)(Vg_DebugMsg,
+ "warning: L2 cache not installed, ignore L2 results.");
+ break;
+
+ case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break;
+ case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break;
+ case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break;
+ case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break;
+ case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break;
+
+ /* These are sectored, whatever that means */
+ case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
+ case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
+ case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
+
+ /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+ * conversion to byte size is a total guess; treat the 12K and 16K
+ * cases the same since the cache byte size must be a power of two for
+ * everything to work!. Also guessing 32 bytes for the line size...
+ */
+ case 0x70: /* 12K micro-ops, 8-way */
+ *I1c = (cache_t) { 16, 8, 32 };
+ micro_ops_warn(12, 16, 32);
+ break;
+ case 0x71: /* 16K micro-ops, 8-way */
+ *I1c = (cache_t) { 16, 8, 32 };
+ micro_ops_warn(16, 16, 32);
+ break;
+ case 0x72: /* 32K micro-ops, 8-way */
+ *I1c = (cache_t) { 32, 8, 32 };
+ micro_ops_warn(32, 32, 32);
+ break;
+
+ case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */
+ case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */
+ case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */
+ case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */
+
+ case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break;
+ case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break;
+ case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break;
+ case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break;
+ case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break;
+
+ default:
+ VG_(message)(Vg_DebugMsg,
+ "warning: Unknown Intel cache config value "
+ "(0x%x), ignoring\n", info[i]);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* AMD method is straightforward, just extract appropriate bits from the
+ * result registers.
+ *
+ * Bits, for D1 and I1:
+ * 31..24 data L1 cache size in KBs
+ * 23..16 data L1 cache associativity (FFh=full)
+ * 15.. 8 data L1 cache lines per tag
+ * 7.. 0 data L1 cache line size in bytes
+ *
+ * Bits, for L2:
+ * 31..16 unified L2 cache size in KBs
+ * 15..12 unified L2 cache associativity (0=off, FFh=full)
+ * 11.. 8 unified L2 cache lines per tag
+ * 7.. 0 unified L2 cache line size in bytes
+ *
+ * #3 The AMD K7 processor's L2 cache must be configured prior to relying
+ * upon this information. (Whatever that means -- njn)
+ *
+ * Returns 0 on success, non-zero on failure.
+ */
+static int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ int dummy;
+ int ext_level;
+ int I1i, D1i, L2i;
+
+ cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+
+ if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
+ VG_(message)(Vg_UserMsg,
+ "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
+ ext_level);
+ return -1;
+ }
+
+ cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
+ cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
+
+ D1c->size = (D1i >> 24) & 0xff;
+ D1c->assoc = (D1i >> 16) & 0xff;
+ D1c->line_size = (D1i >> 0) & 0xff;
+
+ I1c->size = (I1i >> 24) & 0xff;
+ I1c->assoc = (I1i >> 16) & 0xff;
+ I1c->line_size = (I1i >> 0) & 0xff;
+
+ L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
+ L2c->assoc = (L2i >> 12) & 0xf;
+ L2c->line_size = (L2i >> 0) & 0xff;
+
+ return 0;
+}
+
+static jmp_buf cpuid_jmpbuf;
+
+static
+void cpuid_SIGILL_handler(int signum)
+{
+ __builtin_longjmp(cpuid_jmpbuf, 1);
+}
+
+static
+int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ int level;
+ char vendor_id[13];
+ vki_ksigaction sigill_new, sigill_saved;
+ int res, ret;
+
+ /* Install own SIGILL handler */
+ sigill_new.ksa_handler = cpuid_SIGILL_handler;
+ sigill_new.ksa_flags = 0;
+ sigill_new.ksa_restorer = NULL;
+ res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
+ vg_assert(res == 0);
+
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
+ vg_assert(res == 0);
+
+ /* Trap for illegal instruction, in case it's a really old processor that
+ * doesn't support CPUID. */
+ if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
+ cpuid(0, &level, (int*)&vendor_id[0],
+ (int*)&vendor_id[8], (int*)&vendor_id[4]);
+ vendor_id[12] = '\0';
+
+ /* Restore old SIGILL handler */
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
+ vg_assert(res == 0);
+
+ } else {
+ VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
+
+ /* Restore old SIGILL handler */
+ res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
+ vg_assert(res == 0);
+ return -1;
+ }
+
+ if (0 == level) {
+ VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
+ return -1;
+ }
+
+ /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
+ if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
+ ret = Intel_cache_info(level, I1c, D1c, L2c);
+
+ } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
+ ret = AMD_cache_info(I1c, D1c, L2c);
+
+ } else {
+ VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
+ vendor_id);
+ return -1;
+ }
+
+ /* Successful! Convert sizes from KB to bytes */
+ I1c->size *= 1024;
+ D1c->size *= 1024;
+ L2c->size *= 1024;
+
+ return ret;
+}
+
+/* Checks cache config is ok; makes it so if not. */
+static void check_cache(cache_t* cache, cache_t* dflt, char *name)
+{
+ /* First check they're all powers of two */
+ if (-1 == log2(cache->size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s size of %dB not a power of two; "
+ "defaulting to %dB", name, cache->size, dflt->size);
+ cache->size = dflt->size;
+ }
+
+ if (-1 == log2(cache->assoc)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s associativity of %d not a power of two; "
+ "defaulting to %d-way", name, cache->assoc, dflt->assoc);
+ cache->assoc = dflt->assoc;
+ }
+
+ if (-1 == log2(cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s line size of %dB not a power of two; "
+ "defaulting to %dB",
+ name, cache->line_size, dflt->line_size);
+ cache->line_size = dflt->line_size;
+ }
+
+ /* Then check line size >= 16 -- any smaller and a single instruction could
+ * straddle three cache lines, which breaks a simulation assertion and is
+ * stupid anyway. */
+ if (cache->line_size < MIN_LINE_SIZE) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s line size of %dB too small; "
+ "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
+ cache->line_size = MIN_LINE_SIZE;
+ }
+
+ /* Then check cache size > line size (causes seg faults if not). */
+ if (cache->size <= cache->line_size) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s cache size of %dB <= line size of %dB; "
+ "increasing to %dB", name, cache->size, cache->line_size,
+ cache->line_size * 2);
+ cache->size = cache->line_size * 2;
+ }
+
+ /* Then check assoc <= (size / line size) (seg faults otherwise). */
+ if (cache->assoc > (cache->size / cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s associativity > (size / line size); "
+ "increasing size to %dB",
+ name, cache->assoc * cache->line_size);
+ cache->size = cache->assoc * cache->line_size;
+ }
+}
+
+/* On entry, args are undefined. Fill them with any info from the
+ * command-line, then fill in any remaining with CPUID instruction if possible,
+ * otherwise use defaults. Then check them and fix if not ok. */
+static void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+ /* Defaults are for a model 3 or 4 Athlon */
+ cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
+ cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
+ cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
+
+#define CMD_LINE_DEFINED(L) \
+ (-1 != VG_(clo_##L##_cache).size || \
+ -1 != VG_(clo_##L##_cache).assoc || \
+ -1 != VG_(clo_##L##_cache).line_size)
+
+ /* If any undefined on command-line, try CPUID */
+ if (! CMD_LINE_DEFINED(I1) ||
+ ! CMD_LINE_DEFINED(D1) ||
+ ! CMD_LINE_DEFINED(L2)) {
+
+ /* Overwrite CPUID result for any cache defined on command-line */
+ if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
+
+ if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
+ if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
+ if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
+
+ /* CPUID failed, use defaults for each undefined by command-line */
+ } else {
+ VG_(message)(Vg_DebugMsg,
+ "Couldn't detect cache configuration, using one "
+ "or more defaults ");
+
+ *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
+ *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
+ *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
+ }
+ }
+#undef CMD_LINE_DEFINED
+
+ check_cache(I1c, &I1_dflt, "I1");
+ check_cache(D1c, &D1_dflt, "D1");
+ check_cache(L2c, &L2_dflt, "L2");
+
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_UserMsg, "Cache configuration used:");
+ VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
+ I1c->size, I1c->assoc, I1c->line_size);
+ VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
+ D1c->size, D1c->assoc, D1c->line_size);
+ VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
+ L2c->size, L2c->assoc, L2c->line_size);
+ }
+}
+
void VG_(init_cachesim)(void)
{
+ cache_t I1c, D1c, L2c;
+
/* Make sure the output file can be written. */
Int fd = VG_(open_write)(OUT_FILE);
if (-1 == fd) {
initCC(&Dr_discards);
initCC(&Dw_discards);
- cachesim_I1_initcache();
- cachesim_D1_initcache();
- cachesim_L2_initcache();
+ get_caches(&I1c, &D1c, &L2c);
+
+ cachesim_I1_initcache(I1c);
+ //cachesim_I1_initcache();
+ cachesim_D1_initcache(D1c);
+ //cachesim_D1_initcache();
+ cachesim_L2_initcache(L2c);
+ //cachesim_L2_initcache();
init_BBCC_table();
}
if (-1 == fd) { file_err(); }
/* "desc:" lines (giving I1/D1/L2 cache configuration) */
- VG_(write)(fd, (void*)I1_desc_line, VG_(strlen)(I1_desc_line));
- VG_(write)(fd, (void*)D1_desc_line, VG_(strlen)(D1_desc_line));
- VG_(write)(fd, (void*)L2_desc_line, VG_(strlen)(L2_desc_line));
+ VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
/* "cmd:" line */
VG_(strcpy)(buf, "cmd:");
for (i = 0; i < space; i++) buf[i] = ' ';
}
-void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
+void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
{
CC D_total;
ULong L2_total_m, L2_total_mr, L2_total_mw,
fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
+ if (VG_(clo_verbosity) == 0)
+ return;
+
/* I cache results. Use the I_refs value to determine the first column
* width. */
l1 = commify(Ir_total.a, 0, buf1);
-/* D1 cache simulator, generated by vg_cachegen.
- * total size = 65536 bytes
- * line size = 64 bytes
- * associativity = 2-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- D1 cache simulation. ---*/
+/*--- vg_cachesim_D1.c ---*/
+/*--------------------------------------------------------------------*/
-static char D1_desc_line[] =
- "desc: D1 cache: 65536 B, 64 B, 2-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt D1_tags[512][2];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_D1_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 2; way++)
- D1_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == D1_tags[set1][0]) {
- return;
- }
- else if (tag == D1_tags[set1][1]) {
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
- Bool is_D1_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_D1.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == D1_tags[set1][0]) {
- }
- else if (tag == D1_tags[set1][1]) {
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- D1_tags[set1][1] = D1_tags[set1][0];
- D1_tags[set1][0] = tag;
-
- is_D1_miss = True;
- }
-
- /* Block two */
- if (tag == D1_tags[set2][0]) {
- }
- else if (tag == D1_tags[set2][1]) {
- D1_tags[set2][1] = D1_tags[set2][0];
- D1_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- D1_tags[set2][1] = D1_tags[set2][0];
- D1_tags[set2][0] = tag;
-
- is_D1_miss = True;
- }
-
- /* Miss treatment */
- if (is_D1_miss) {
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("D1 cache set mismatch");
- }
-}
-/* I1 cache simulator, generated by vg_cachegen.
- * total size = 65536 bytes
- * line size = 64 bytes
- * associativity = 2-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- I1 cache simulation. ---*/
+/*--- vg_cachesim_I1.c ---*/
+/*--------------------------------------------------------------------*/
-static char I1_desc_line[] =
- "desc: I1 cache: 65536 B, 64 B, 2-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt I1_tags[512][2];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_I1_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 2; way++)
- I1_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_I1_doref(Addr a, UChar size, ULong* m1, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == I1_tags[set1][0]) {
- return;
- }
- else if (tag == I1_tags[set1][1]) {
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
- Bool is_I1_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_I1.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == I1_tags[set1][0]) {
- }
- else if (tag == I1_tags[set1][1]) {
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- I1_tags[set1][1] = I1_tags[set1][0];
- I1_tags[set1][0] = tag;
-
- is_I1_miss = True;
- }
-
- /* Block two */
- if (tag == I1_tags[set2][0]) {
- }
- else if (tag == I1_tags[set2][1]) {
- I1_tags[set2][1] = I1_tags[set2][0];
- I1_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- I1_tags[set2][1] = I1_tags[set2][0];
- I1_tags[set2][0] = tag;
-
- is_I1_miss = True;
- }
-
- /* Miss treatment */
- if (is_I1_miss) {
- (*m1)++;
- cachesim_L2_doref(a, size, m2);
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("I1 cache set mismatch");
- }
-}
-/* L2 cache simulator, generated by vg_cachegen.
- * total size = 262144 bytes
- * line size = 64 bytes
- * associativity = 8-way associative
- *
- * This file should be #include-d into vg_cachesim.c
- */
+/*--------------------------------------------------------------------*/
+/*--- L2 cache simulation. ---*/
+/*--- vg_cachesim_L2.c ---*/
+/*--------------------------------------------------------------------*/
-static char L2_desc_line[] =
- "desc: L2 cache: 262144 B, 64 B, 8-way associative\n";
+/*
+ This file is part of Valgrind, an x86 protected-mode emulator
+ designed for debugging and profiling binaries on x86-Unixes.
-static UInt L2_tags[512][8];
+ Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
-static void cachesim_L2_initcache(void)
-{
- UInt set, way;
- for (set = 0; set < 512; set++)
- for (way = 0; way < 8; way++)
- L2_tags[set][way] = 0;
-}
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-static __inline__
-void cachesim_L2_doref(Addr a, UChar size, ULong *m2)
-{
- register UInt set1 = ( a >> 6) & (512-1);
- register UInt set2 = ((a + size - 1) >> 6) & (512-1);
- register UInt tag = a >> (6 + 9);
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
- if (set1 == set2) {
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
- if (tag == L2_tags[set1][0]) {
- return;
- }
- else if (tag == L2_tags[set1][1]) {
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][2]) {
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][3]) {
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][4]) {
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][5]) {
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][6]) {
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else if (tag == L2_tags[set1][7]) {
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- return;
- }
- else {
- /* A miss */
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
+ The GNU General Public License is contained in the file LICENSE.
+*/
- (*m2)++;
- }
+#include "vg_cachesim_gen.c"
- } else if ((set1 + 1) % 512 == set2) {
+CACHESIM(L2, (*m2)++ );
- Bool is_L2_miss = False;
+/*--------------------------------------------------------------------*/
+/*--- end vg_cachesim_L2.c ---*/
+/*--------------------------------------------------------------------*/
- /* Block one */
- if (tag == L2_tags[set1][0]) {
- }
- else if (tag == L2_tags[set1][1]) {
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][2]) {
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][3]) {
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][4]) {
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][5]) {
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][6]) {
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else if (tag == L2_tags[set1][7]) {
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
- }
- else {
- /* A miss */
- L2_tags[set1][7] = L2_tags[set1][6];
- L2_tags[set1][6] = L2_tags[set1][5];
- L2_tags[set1][5] = L2_tags[set1][4];
- L2_tags[set1][4] = L2_tags[set1][3];
- L2_tags[set1][3] = L2_tags[set1][2];
- L2_tags[set1][2] = L2_tags[set1][1];
- L2_tags[set1][1] = L2_tags[set1][0];
- L2_tags[set1][0] = tag;
-
- is_L2_miss = True;
- }
-
- /* Block two */
- if (tag == L2_tags[set2][0]) {
- }
- else if (tag == L2_tags[set2][1]) {
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][2]) {
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][3]) {
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][4]) {
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][5]) {
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][6]) {
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else if (tag == L2_tags[set2][7]) {
- L2_tags[set2][7] = L2_tags[set2][6];
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
- }
- else {
- /* A miss */
- L2_tags[set2][7] = L2_tags[set2][6];
- L2_tags[set2][6] = L2_tags[set2][5];
- L2_tags[set2][5] = L2_tags[set2][4];
- L2_tags[set2][4] = L2_tags[set2][3];
- L2_tags[set2][3] = L2_tags[set2][2];
- L2_tags[set2][2] = L2_tags[set2][1];
- L2_tags[set2][1] = L2_tags[set2][0];
- L2_tags[set2][0] = tag;
-
- is_L2_miss = True;
- }
-
- /* Miss treatment */
- if (is_L2_miss) {
- (*m2)++;
- }
-
- } else {
- VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
- VG_(printf)("sets %d and %d.\n", set1, set2);
- VG_(panic)("L2 cache set mismatch");
- }
-}
( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4) \
| ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
+/* For cache simulation */
+typedef struct {
+ int size; /* bytes */
+ int assoc;
+ int line_size; /* bytes */
+} cache_t;
+
+#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
/* ---------------------------------------------------------------------
Now the basic types are set up, we can haul in the kernel-interface
extern Bool VG_(clo_cleanup);
/* Cache simulation instrumentation? default: NO */
extern Bool VG_(clo_cachesim);
+/* I1 cache configuration. default: undefined */
+extern cache_t VG_(clo_I1_cache);
+/* D1 cache configuration. default: undefined */
+extern cache_t VG_(clo_D1_cache);
+/* L2 cache configuration. default: undefined */
+extern cache_t VG_(clo_L2_cache);
/* SMC write checks? default: SOME (1,2,4 byte movs to mem) */
extern Int VG_(clo_smc_check);
/* DEBUG: print system calls? default: NO */
const Char *format, va_list vargs );
extern Bool VG_(isspace) ( Char c );
+extern Bool VG_(isdigit) ( Char c );
extern Int VG_(strlen) ( const Char* str );
Exports of vg_cachesim.c
------------------------------------------------------------------ */
+extern int log2( int x );
+
extern UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr);
typedef struct _iCC iCC;
typedef struct _idCC idCC;
-extern void VG_(init_cachesim) ( void );
-extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
+extern void VG_(init_cachesim) ( void );
+extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv );
extern void VG_(cachesim_log_non_mem_instr)( iCC* cc );
extern void VG_(cachesim_log_mem_instr) ( idCC* cc, Addr data_addr );
Bool VG_(clo_instrument);
Bool VG_(clo_cleanup);
Bool VG_(clo_cachesim);
+cache_t VG_(clo_I1_cache);
+cache_t VG_(clo_D1_cache);
+cache_t VG_(clo_L2_cache);
Int VG_(clo_smc_check);
Bool VG_(clo_trace_syscalls);
Bool VG_(clo_trace_signals);
config_error("couldn't find client's argc/argc/envp");
}
+static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+ int i1, i2, i3;
+ int i;
+ char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt);
+
+ i = i1 = opt_len;
+
+ /* Option looks like "--I1=65536,2,64".
+ * Find commas, replace with NULs to make three independent
+ * strings, then extract numbers. Yuck. */
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i2 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i3 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if ('\0' != opt[i]) goto bad;
+
+ cache->size = (Int)VG_(atoll)(opt + i1);
+ cache->assoc = (Int)VG_(atoll)(opt + i2);
+ cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+ return;
+
+bad:
+ bad_option(orig_opt);
+}
static void process_cmd_line_options ( void )
{
VG_(clo_single_step) = False;
VG_(clo_optimise) = True;
VG_(clo_instrument) = True;
+ VG_(clo_cachesim) = False;
+ VG_(clo_I1_cache) = UNDEFINED_CACHE;
+ VG_(clo_D1_cache) = UNDEFINED_CACHE;
+ VG_(clo_L2_cache) = UNDEFINED_CACHE;
VG_(clo_cleanup) = True;
VG_(clo_smc_check) = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
VG_(clo_trace_syscalls) = False;
/* (Suggested by Fabrice Bellard ... )
We look for the Linux ELF table and go down until we find the
- envc & envp. It is not full proof, but these structures should
+ envc & envp. It is not fool-proof, but these structures should
change less often than the libc ones. */
{
UInt* sp = 0; /* bogus init to keep gcc -O happy */
else if (STREQ(argv[i], "--cachesim=no"))
VG_(clo_cachesim) = False;
+ /* 5 is length of "--I1=" */
+ else if (0 == VG_(strncmp)(argv[i], "--I1=", 5))
+ parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5);
+ else if (0 == VG_(strncmp)(argv[i], "--D1=", 5))
+ parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5);
+ else if (0 == VG_(strncmp)(argv[i], "--L2=", 5))
+ parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5);
+
else if (STREQ(argv[i], "--smc-check=none"))
VG_(clo_smc_check) = VG_CLO_SMC_NONE;
else if (STREQ(argv[i], "--smc-check=some"))
VG_(running_on_simd_CPU) = False;
if (VG_(clo_cachesim))
- VG_(show_cachesim_results)(VG_(client_argc), VG_(client_argv));
+ VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv));
VG_(do_sanity_checks)( True /*include expensive checks*/ );
return (c == ' ' || c == '\n' || c == '\t' || c == 0);
}
+Bool VG_(isdigit) ( Char c )
+{
+ return (c >= '0' && c <= '9');
+}
Int VG_(strlen) ( const Char* str )
{