From a3b8d67b91ddead7fbe5ef209345033efe429c6e Mon Sep 17 00:00:00 2001 From: Josef Weidendorfer Date: Mon, 20 Mar 2006 10:27:30 +0000 Subject: [PATCH] Callgrind merge: code git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5780 --- Makefile.am | 1 + callgrind/Makefile.am | 59 + callgrind/bb.c | 338 +++++ callgrind/bbcc.c | 883 +++++++++++++ callgrind/callgrind.h | 130 ++ callgrind/callgrind_annotate.in | 1191 +++++++++++++++++ callgrind/callgrind_control.in | 485 +++++++ callgrind/callstack.c | 424 ++++++ callgrind/clo.c | 765 +++++++++++ callgrind/command.c | 517 ++++++++ callgrind/context.c | 328 +++++ callgrind/costs.c | 79 ++ callgrind/costs.h | 35 + callgrind/debug.c | 453 +++++++ callgrind/docs/Makefile.am | 1 + callgrind/dump.c | 1715 ++++++++++++++++++++++++ callgrind/events.c | 575 ++++++++ callgrind/events.h | 113 ++ callgrind/fn.c | 616 +++++++++ callgrind/global.h | 838 ++++++++++++ callgrind/jumps.c | 233 ++++ callgrind/main.c | 1086 ++++++++++++++++ callgrind/sim.c | 2162 +++++++++++++++++++++++++++++++ callgrind/tests/Makefile.am | 14 + callgrind/threads.c | 456 +++++++ configure.in | 5 + 26 files changed, 13502 insertions(+) create mode 100644 callgrind/Makefile.am create mode 100644 callgrind/bb.c create mode 100644 callgrind/bbcc.c create mode 100644 callgrind/callgrind.h create mode 100644 callgrind/callgrind_annotate.in create mode 100644 callgrind/callgrind_control.in create mode 100644 callgrind/callstack.c create mode 100644 callgrind/clo.c create mode 100644 callgrind/command.c create mode 100644 callgrind/context.c create mode 100644 callgrind/costs.c create mode 100644 callgrind/costs.h create mode 100644 callgrind/debug.c create mode 100644 callgrind/docs/Makefile.am create mode 100644 callgrind/dump.c create mode 100644 callgrind/events.c create mode 100644 callgrind/events.h create mode 100644 callgrind/fn.c create mode 100644 callgrind/global.h create mode 100644 callgrind/jumps.c create mode 100644 callgrind/main.c create mode 100644 callgrind/sim.c create mode 100644 callgrind/tests/Makefile.am create mode 100644 callgrind/threads.c diff --git a/Makefile.am b/Makefile.am index 57592746ef..0bae4d0000 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,7 @@ include $(top_srcdir)/Makefile.all.am ## addrcheck must come after memcheck, for mac_*.o TOOLS = memcheck \ cachegrind \ + callgrind \ massif \ lackey \ none diff --git a/callgrind/Makefile.am b/callgrind/Makefile.am new file mode 100644 index 0000000000..0d8cf57510 --- /dev/null +++ b/callgrind/Makefile.am @@ -0,0 +1,59 @@ +include $(top_srcdir)/Makefile.tool.am + +bin_SCRIPTS = callgrind_annotate callgrind_control + +noinst_HEADERS = global.h costs.h events.h + +noinst_PROGRAMS = +if VG_X86_LINUX +noinst_PROGRAMS += callgrind-x86-linux +endif +if VG_AMD64_LINUX +noinst_PROGRAMS += callgrind-amd64-linux +endif +if VG_PPC32_LINUX +noinst_PROGRAMS += callgrind-ppc32-linux +endif +if VG_PPC64_LINUX +noinst_PROGRAMS += callgrind-ppc64-linux +endif + +CALLGRIND_SOURCES_COMMON = main.c events.c bb.c clo.c \ + costs.c bbcc.c command.c debug.c fn.c \ + sim.c callstack.c context.c dump.c jumps.c \ + threads.c + +CALLGRIND_SOURCES_X86 = ../cachegrind/cg-x86.c +CALLGRIND_SOURCES_AMD64 = ../cachegrind/cg-amd64.c +CALLGRIND_SOURCES_PPC32 = ../cachegrind/cg-ppc32.c +CALLGRIND_SOURCES_PPC64 = ../cachegrind/cg-ppc64.c + +CALLGRIND_CFLAGS_COMMON = -I../cachegrind + +callgrind_x86_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_X86) +callgrind_x86_linux_CPPFLAGS = $(AM_CPPFLAGS_X86_LINUX) +callgrind_x86_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_X86_LINUX) +callgrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX) +callgrind_x86_linux_LDADD = $(TOOL_LDADD_X86_LINUX) +callgrind_x86_linux_LDFLAGS = $(TOOL_LDFLAGS_X86_LINUX) + +callgrind_amd64_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_AMD64) +callgrind_amd64_linux_CPPFLAGS = $(AM_CPPFLAGS_AMD64_LINUX) +callgrind_amd64_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_AMD64_LINUX) +callgrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX) +callgrind_amd64_linux_LDADD = $(TOOL_LDADD_AMD64_LINUX) +callgrind_amd64_linux_LDFLAGS = $(TOOL_LDFLAGS_AMD64_LINUX) + +callgrind_ppc32_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_PPC32) +callgrind_ppc32_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC32_LINUX) +callgrind_ppc32_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_PPC32_LINUX) +callgrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX) +callgrind_ppc32_linux_LDADD = $(TOOL_LDADD_PPC32_LINUX) +callgrind_ppc32_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC32_LINUX) + +callgrind_ppc64_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_PPC64) +callgrind_ppc64_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC64_LINUX) +callgrind_ppc64_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_PPC64_LINUX) +callgrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX) +callgrind_ppc64_linux_LDADD = $(TOOL_LDADD_PPC64_LINUX) +callgrind_ppc64_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC64_LINUX) \ No newline at end of file diff --git a/callgrind/bb.c b/callgrind/bb.c new file mode 100644 index 0000000000..a6c8ebadcf --- /dev/null +++ b/callgrind/bb.c @@ -0,0 +1,338 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- bb.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +/*------------------------------------------------------------*/ +/*--- Basic block (BB) operations ---*/ +/*------------------------------------------------------------*/ + +/* BB hash, resizable */ +bb_hash bbs; + +void CLG_(init_bb_hash)() +{ + Int i; + + bbs.size = 8437; + bbs.entries = 0; + bbs.table = (BB**) CLG_MALLOC(bbs.size * sizeof(BB*)); + + for (i = 0; i < bbs.size; i++) bbs.table[i] = NULL; +} + +bb_hash* CLG_(get_bb_hash)() +{ + return &bbs; +} + +/* The hash stores BBs according to + * - ELF object (is 0 for code in anonymous mapping) + * - BB base as object file offset + */ +static __inline__ +UInt bb_hash_idx(obj_node* obj, OffT offset, UInt size) +{ + return (((Addr)obj) + offset) % size; +} + +/* double size of bb table */ +static +void resize_bb_table(void) +{ + Int i, new_size, conflicts1 = 0, conflicts2 = 0; + BB **new_table, *curr, *next; + UInt new_idx; + + new_size = 2* bbs.size +3; + new_table = (BB**) CLG_MALLOC(new_size * sizeof(BB*)); + + if (!new_table) return; + + for (i = 0; i < new_size; i++) + new_table[i] = NULL; + + for (i = 0; i < bbs.size; i++) { + if (bbs.table[i] == NULL) continue; + + curr = bbs.table[i]; + while (NULL != curr) { + next = curr->next; + + new_idx = bb_hash_idx(curr->obj, curr->offset, new_size); + + curr->next = new_table[new_idx]; + new_table[new_idx] = curr; + if (curr->next) { + conflicts1++; + if (curr->next->next) + conflicts2++; + } + + curr = next; + } + } + + VG_(free)(bbs.table); + + + CLG_DEBUG(0, "Resize BB Hash: %d => %d (entries %d, conflicts %d/%d)\n", + bbs.size, new_size, + bbs.entries, conflicts1, conflicts2); + + bbs.size = new_size; + bbs.table = new_table; + CLG_(stat).bb_hash_resizes++; +} + + +/** + * Allocate new BB structure (including space for event type list) + * Not initialized: + * - instr_len, cost_count, instr[] + */ +static BB* new_bb(obj_node* obj, OffT offset, + UInt instr_count, UInt cjmp_count, Bool cjmp_inverted) +{ + BB* new; + UInt new_idx; + + /* check fill degree of bb hash table and resize if needed (>80%) */ + bbs.entries++; + if (10 * bbs.entries / bbs.size > 8) + resize_bb_table(); + + new = (BB*) CLG_MALLOC(sizeof(BB) + + instr_count * sizeof(InstrInfo) + + (cjmp_count+1) * sizeof(CJmpInfo)); + + new->obj = obj; + new->offset = offset; + + new->instr_count = instr_count; + new->cjmp_count = cjmp_count; + new->cjmp_inverted = cjmp_inverted; + new->jmp = (CJmpInfo*) &(new->instr[instr_count]); + new->instr_len = 0; + new->cost_count = 0; + new->sect_kind = VG_(seginfo_sect_kind)(offset + obj->offset); + new->fn = 0; + new->line = 0; + new->is_entry = 0; + new->bbcc_list = 0; + new->last_bbcc = 0; + + /* insert into BB hash table */ + new_idx = bb_hash_idx(obj, offset, bbs.size); + new->next = bbs.table[new_idx]; + bbs.table[new_idx] = new; + + CLG_(stat).distinct_bbs++; + +#if CLG_ENABLE_DEBUG + CLG_DEBUGIF(3) { + VG_(printf)(" new_bb (instr %d, jmps %d, inv %s) [now %d]: ", + instr_count, cjmp_count, + cjmp_inverted ? "yes":"no", + CLG_(stat).distinct_bbs); + CLG_(print_bb)(0, new); + VG_(printf)("\n"); + } +#endif + + CLG_(get_fn_node)(new); + + return new; +} + + +/* get the BB structure for a BB start address */ +static __inline__ +BB* lookup_bb(obj_node* obj, OffT offset) +{ + BB* bb; + Int idx; + + idx = bb_hash_idx(obj, offset, bbs.size); + bb = bbs.table[idx]; + + while(bb) { + if ((bb->obj == obj) && (bb->offset == offset)) break; + bb = bb->next; + } + + CLG_DEBUG(5, " lookup_bb (Obj %s, off %p): %p\n", + obj->name, offset, bb); + return bb; +} + +static __inline__ +obj_node* obj_of_address(Addr addr) +{ + obj_node* obj; + SegInfo* si; + OffT offset; + + si = VG_(find_seginfo)(addr); + obj = CLG_(get_obj_node)( si ); + + /* Update symbol offset in object if remapped */ + offset = si ? VG_(seginfo_sym_offset)(si):0; + if (obj->offset != offset) { + Addr start = si ? VG_(seginfo_start)(si) : 0; + + CLG_DEBUG(0, "Mapping changed for '%s': %p -> %p\n", + obj->name, obj->start, start); + + /* Size should be the same, and offset diff == start diff */ + CLG_ASSERT( obj->size == (si ? VG_(seginfo_size)(si) : 0) ); + CLG_ASSERT( obj->start - start == obj->offset - offset ); + obj->offset = offset; + obj->start = start; + } + + return obj; +} + +/* Get the BB structure for a BB start address. + * If the BB has to be created, the IRBB is needed to + * compute the event type list for costs, and seen_before is + * set to False. Otherwise, seen_before is set to True. + * + * BBs are never discarded. There are 2 cases where this function + * is called from CLG_(instrument)() and a BB already exists: + * - The instrumented version was removed from Valgrinds TT cache + * - The ELF object of the BB was unmapped and mapped again. + * This involves a possibly different address, but is handled by + * looking up a BB keyed by (obj_node, file offset). + * + * bbIn==0 is possible for artifical BB without real code. + * Such a BB is created when returning to an unknown function. + */ +BB* CLG_(get_bb)(Addr addr, IRBB* bbIn, /*OUT*/ Bool *seen_before) +{ + BB* bb; + obj_node* obj; + UInt n_instrs, n_jmps; + Bool cjmp_inverted = False; + + CLG_DEBUG(5, "+ get_bb(BB %p)\n", addr); + + obj = obj_of_address(addr); + bb = lookup_bb(obj, addr - obj->offset); + + n_instrs = 0; + n_jmps = 0; + CLG_(collectBlockInfo)(bbIn, &n_instrs, &n_jmps, &cjmp_inverted); + + *seen_before = bb ? True : False; + if (*seen_before) { + if (bb->instr_count != n_instrs) { + VG_(message)(Vg_DebugMsg, + "ERROR: BB Retranslation Mismatch at BB %p", addr); + VG_(message)(Vg_DebugMsg, + " new: Obj %s, Off %p, BBOff %p, Instrs %u", + obj->name, obj->offset, + addr - obj->offset, n_instrs); + VG_(message)(Vg_DebugMsg, + " old: Obj %s, Off %p, BBOff %p, Instrs %u", + bb->obj->name, bb->obj->offset, + bb->offset, bb->instr_count); + CLG_ASSERT(bb->instr_count == n_instrs ); + } + CLG_ASSERT(bb->cjmp_count == n_jmps ); + CLG_(stat).bb_retranslations++; + + CLG_DEBUG(5, "- get_bb(BB %p): seen before.\n", addr); + return bb; + } + + bb = new_bb(obj, addr - obj->offset, n_instrs, n_jmps, cjmp_inverted); + + CLG_DEBUG(5, "- get_bb(BB %p)\n", addr); + + return bb; +} + +/* Delete the BB info for the bb with unredirected entry-point + address 'addr'. */ +void CLG_(delete_bb)(Addr addr) +{ + BB *bb, *bp; + Int idx, size; + + obj_node* obj = obj_of_address(addr); + OffT offset = addr - obj->offset; + + idx = bb_hash_idx(obj, offset, bbs.size); + bb = bbs.table[idx]; + + /* bb points at the current bb under consideration, and bp is the + one before. */ + bp = NULL; + while(bb) { + if ((bb->obj == obj) && (bb->offset == offset)) break; + bp = bb; + bb = bb->next; + } + + if (bb == NULL) { + CLG_DEBUG(3, " delete_bb (Obj %s, off %p): NOT FOUND\n", + obj->name, offset); + + /* we didn't find it. That's strange. */ + return; + } + + /* unlink it from hash table */ + + if (bp == NULL) { + /* we found the first one in the list. */ + tl_assert(bb == bbs.table[idx]); + bbs.table[idx] = bb->next; + } else { + tl_assert(bb != bbs.table[idx]); + bp->next = bb->next; + } + + CLG_DEBUG(3, " delete_bb (Obj %s, off %p): %p, BBCC head: %p\n", + obj->name, offset, bb, bb->bbcc_list); + + if (bb->bbcc_list == 0) { + /* can be safely deleted */ + + /* Fill the block up with junk and then free it, so we will + hopefully get a segfault if it is used again by mistake. */ + size = sizeof(BB) + + bb->instr_count * sizeof(InstrInfo) + + (bb->cjmp_count+1) * sizeof(CJmpInfo); + VG_(memset)( bb, 0xAA, size ); + CLG_FREE(bb); + } + CLG_DEBUG(3, " delete_bb: BB in use, can not free!\n"); +} diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c new file mode 100644 index 0000000000..d2eb4b93bb --- /dev/null +++ b/callgrind/bbcc.c @@ -0,0 +1,883 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- bbcc.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" +#include "costs.h" + +#include + +/*------------------------------------------------------------*/ +/*--- BBCC operations ---*/ +/*------------------------------------------------------------*/ + +#define N_BBCC_INITIAL_ENTRIES 10437 + +/* BBCC table (key is BB/Context), per thread, resizable */ +bbcc_hash current_bbccs; + +void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) +{ + Int i; + + CLG_ASSERT(bbccs != 0); + + bbccs->size = N_BBCC_INITIAL_ENTRIES; + bbccs->entries = 0; + bbccs->table = (BBCC**) CLG_MALLOC(bbccs->size * sizeof(BBCC*)); + + for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; +} + +void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) +{ + CLG_ASSERT(dst != 0); + + dst->size = current_bbccs.size; + dst->entries = current_bbccs.entries; + dst->table = current_bbccs.table; +} + +bbcc_hash* CLG_(get_current_bbcc_hash)() +{ + return ¤t_bbccs; +} + +void CLG_(set_current_bbcc_hash)(bbcc_hash* h) +{ + CLG_ASSERT(h != 0); + + current_bbccs.size = h->size; + current_bbccs.entries = h->entries; + current_bbccs.table = h->table; +} + +/* + * Zero all costs of a BBCC + */ +void CLG_(zero_bbcc)(BBCC* bbcc) +{ + Int i; + jCC* jcc; + + CLG_ASSERT(bbcc->cxt != 0); + CLG_DEBUG(1, " zero_bbcc: BB %p, Cxt %d " + "(fn '%s', rec %d)\n", + bb_addr(bbcc->bb), + bbcc->cxt->base_number + bbcc->rec_index, + bbcc->cxt->fn[0]->name, + bbcc->rec_index); + + if ((bbcc->ecounter_sum ==0) && + (bbcc->ret_counter ==0)) return; + + for(i=0;ibb->cost_count;i++) + bbcc->cost[i] = 0; + for(i=0;i <= bbcc->bb->cjmp_count;i++) { + bbcc->jmp[i].ecounter = 0; + for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) + CLG_(init_cost)( CLG_(sets).full, jcc->cost ); + } + bbcc->ecounter_sum = 0; + bbcc->ret_counter = 0; +} + + + +void CLG_(forall_bbccs)(void (*func)(BBCC*)) +{ + BBCC *bbcc, *bbcc2; + int i, j; + + for (i = 0; i < current_bbccs.size; i++) { + if ((bbcc=current_bbccs.table[i]) == NULL) continue; + while (bbcc) { + /* every bbcc should have a rec_array */ + CLG_ASSERT(bbcc->rec_array != 0); + + for(j=0;jcxt->fn[0]->separate_recursions;j++) { + if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; + + (*func)(bbcc2); + } + bbcc = bbcc->next; + } + } +} + + +/* All BBCCs for recursion level 0 are inserted into a + * thread specific hash table with key + * - address of BB structure (unique, as never freed) + * - current context (includes caller chain) + * BBCCs for other recursion levels are in bbcc->rec_array. + * + * The hash is used in setup_bb(), i.e. to find the cost + * counters to be changed in the execution of a BB. + */ + +static __inline__ +UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) +{ + CLG_ASSERT(bb != 0); + CLG_ASSERT(cxt != 0); + + return ((Addr)bb + (Addr)cxt) % size; +} + + +/* Lookup for a BBCC in hash. + */ +static +BBCC* lookup_bbcc(BB* bb, Context* cxt) +{ + BBCC* bbcc = bb->last_bbcc; + UInt idx; + + /* check LRU */ + if (bbcc->cxt == cxt) { + if (!CLG_(clo).separate_threads) { + /* if we don't dump threads separate, tid doesn't have to match */ + return bbcc; + } + if (bbcc->tid == CLG_(current_tid)) return bbcc; + } + + CLG_(stat).bbcc_lru_misses++; + + idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); + bbcc = current_bbccs.table[idx]; + while (bbcc && + (bb != bbcc->bb || + cxt != bbcc->cxt)) { + bbcc = bbcc->next; + } + + CLG_DEBUG(2," lookup_bbcc(BB %p, Cxt %d, fn '%s'): %p (tid %d)\n", + bb_addr(bb), cxt->base_number, cxt->fn[0]->name, + bbcc, bbcc ? bbcc->tid : 0); + + CLG_DEBUGIF(2) + if (bbcc) CLG_(print_bbcc)(-2,bbcc,False); + + return bbcc; +} + + +/* double size of hash table 1 (addr->BBCC) */ +static void resize_bbcc_hash(void) +{ + Int i, new_size, conflicts1 = 0, conflicts2 = 0; + BBCC** new_table; + UInt new_idx; + BBCC *curr_BBCC, *next_BBCC; + + new_size = 2*current_bbccs.size+3; + new_table = (BBCC**) CLG_MALLOC(new_size * sizeof(BBCC*)); + + if (!new_table) return; + + for (i = 0; i < new_size; i++) + new_table[i] = NULL; + + for (i = 0; i < current_bbccs.size; i++) { + if (current_bbccs.table[i] == NULL) continue; + + curr_BBCC = current_bbccs.table[i]; + while (NULL != curr_BBCC) { + next_BBCC = curr_BBCC->next; + + new_idx = bbcc_hash_idx(curr_BBCC->bb, + curr_BBCC->cxt, + new_size); + + curr_BBCC->next = new_table[new_idx]; + new_table[new_idx] = curr_BBCC; + if (curr_BBCC->next) { + conflicts1++; + if (curr_BBCC->next->next) + conflicts2++; + } + + curr_BBCC = next_BBCC; + } + } + + VG_(free)(current_bbccs.table); + + + CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", + current_bbccs.size, new_size, + current_bbccs.entries, conflicts1, conflicts2); + + current_bbccs.size = new_size; + current_bbccs.table = new_table; + CLG_(stat).bbcc_hash_resizes++; +} + + +static __inline +BBCC** new_recursion(int size) +{ + BBCC** bbccs; + int i; + + bbccs = (BBCC**) CLG_MALLOC(sizeof(BBCC*) * size); + for(i=0;icjmp_count+1) * sizeof(JmpData)); + new->bb = bb; + new->tid = CLG_(current_tid); + + new->ret_counter = 0; + new->skipped = 0; + new->cost = CLG_(get_costarray)(bb->cost_count); + for(i=0;icost_count;i++) + new->cost[i] = 0; + for(i=0; i<=bb->cjmp_count; i++) { + new->jmp[i].ecounter = 0; + new->jmp[i].jcc_list = 0; + } + new->ecounter_sum = 0; + + /* Init pointer caches (LRU) */ + new->lru_next_bbcc = 0; + new->lru_from_jcc = 0; + new->lru_to_jcc = 0; + + CLG_(stat).distinct_bbccs++; + + CLG_DEBUG(3, " new_bbcc(BB %p): %p (now %d)\n", + bb_addr(bb), new, CLG_(stat).distinct_bbccs); + + return new; +} + + +/** + * Inserts a new BBCC into hashes. + * BBCC specific items must be set as this is used for the hash + * keys: + * fn : current function + * tid : current thread ID + * from : position where current function is called from + * + * Recursion level doesn't need to be set as this is not included + * in the hash key: Only BBCCs with rec level 0 are in hashes. + */ +static +void insert_bbcc_into_hash(BBCC* bbcc) +{ + UInt idx; + + CLG_ASSERT(bbcc->cxt != 0); + + CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %p, fn '%s')\n", + bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); + + /* check fill degree of hash and resize if needed (>90%) */ + current_bbccs.entries++; + if (100 * current_bbccs.entries / current_bbccs.size > 90) + resize_bbcc_hash(); + + idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); + bbcc->next = current_bbccs.table[idx]; + current_bbccs.table[idx] = bbcc; + + CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n", + current_bbccs.entries); +} + +static Char* mangled_cxt(Context* cxt, int rec_index) +{ + static Char mangled[FN_NAME_LEN]; + int i, p; + + if (!cxt) return "(no context)"; + + p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); + if (rec_index >0) + p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); + for(i=1;isize;i++) + p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); + + return mangled; +} + + +/* Create a new BBCC as a copy of an existing one, + * but with costs set to 0 and jcc chains empty. + * + * This is needed when a BB is executed in another context than + * the one at instrumentation time of the BB. + * + * Use cases: + * rec_index == 0: clone from a BBCC with differing tid/cxt + * and insert into hashes + * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 + * don't insert into hashes + */ +static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) +{ + BBCC* new; + + CLG_DEBUG(3,"+ clone_bbcc(BB %p, rec %d, fn %s)\n", + bb_addr(orig->bb), rec_index, cxt->fn[0]->name); + + new = new_bbcc(orig->bb); + + if (rec_index == 0) { + + /* hash insertion is only allowed if tid or cxt is different */ + CLG_ASSERT((orig->tid != CLG_(current_tid)) || + (orig->cxt != cxt)); + + new->rec_index = 0; + new->cxt = cxt; + new->rec_array = new_recursion(cxt->fn[0]->separate_recursions); + new->rec_array[0] = new; + + insert_bbcc_into_hash(new); + } + else { + if (CLG_(clo).separate_threads) + CLG_ASSERT(orig->tid == CLG_(current_tid)); + + CLG_ASSERT(orig->cxt == cxt); + CLG_ASSERT(orig->rec_array); + CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); + CLG_ASSERT(orig->rec_array[rec_index] ==0); + + /* new BBCC will only have differing recursion level */ + new->rec_index = rec_index; + new->cxt = cxt; + new->rec_array = orig->rec_array; + new->rec_array[rec_index] = new; + } + + /* update list of BBCCs for same BB */ + new->next_bbcc = orig->bb->bbcc_list; + orig->bb->bbcc_list = new; + + + CLG_DEBUGIF(3) + CLG_(print_bbcc)(-2, new, False); + + CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %p\n" + " orig %s\n" + " new %s\n", + orig, rec_index, bb_addr(orig->bb), + mangled_cxt(orig->cxt, orig->rec_index), + mangled_cxt(new->cxt, new->rec_index)); + + CLG_(stat).bbcc_clones++; + + return new; +}; + + + +/* Get a pointer to the cost centre structure for given basic block + * address. If created, the BBCC is inserted into the BBCC hash. + * Also sets BB_seen_before by reference. + * + */ +BBCC* CLG_(get_bbcc)(BB* bb) +{ + BBCC* bbcc; + + CLG_DEBUG(3, "+ get_bbcc(BB %p)\n", bb_addr(bb)); + + bbcc = bb->bbcc_list; + + if (!bbcc) { + bbcc = new_bbcc(bb); + + /* initialize BBCC */ + bbcc->cxt = 0; + bbcc->rec_array = 0; + bbcc->rec_index = 0; + + bbcc->next_bbcc = bb->bbcc_list; + bb->bbcc_list = bbcc; + bb->last_bbcc = bbcc; + + CLG_DEBUGIF(3) + CLG_(print_bbcc)(-2, bbcc, False); + } + + CLG_DEBUG(3, "- get_bbcc(BB %p): BBCC %p\n", + bb_addr(bb), bbcc); + + return bbcc; +} + + +/* Callgrind manages its own call stack for each thread. + * When leaving a function, a underflow can happen when + * Callgrind's tracing was switched on in the middle of + * a run, i.e. when Callgrind was not able to trace the + * call instruction. + * This function tries to reconstruct the original call. + * As we know the return address (the address following + * the CALL instruction), we can detect the function + * we return back to, but the original call site is unknown. + * We suppose a call site at return address - 1. + * (TODO: other heuristic: lookup info of instrumented BBs). + */ +static void handleUnderflow(BB* bb) +{ + /* RET at top of call stack */ + BBCC* source_bbcc; + BB* source_bb; + jCC* jcc; + Bool seen_before; + fn_node* caller; + int fn_number, *pactive; + call_entry* call_entry_up; + + CLG_DEBUG(1," Callstack underflow !\n"); + + /* we emulate an old call from the function we return to + * by using ( -1) */ + source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); + source_bbcc = CLG_(get_bbcc)(source_bb); + + /* seen_before can be true if RET from a signal handler */ + if (!seen_before) { + source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; + } + else if (CLG_(current_state).collect) + source_bbcc->ecounter_sum++; + + /* Force a new top context, will be set active by push_cxt() */ + CLG_(current_fn_stack).top--; + CLG_(current_state).cxt = 0; + caller = CLG_(get_fn_node)(bb); + CLG_(push_cxt)( caller ); + + if (!seen_before) { + /* set rec array for source BBCC: this is at rec level 1 */ + source_bbcc->rec_array = new_recursion(caller->separate_recursions); + source_bbcc->rec_array[0] = source_bbcc; + + CLG_ASSERT(source_bbcc->cxt == 0); + source_bbcc->cxt = CLG_(current_state).cxt; + insert_bbcc_into_hash(source_bbcc); + } + CLG_ASSERT(CLG_(current_state).bbcc); + + /* correct active counts */ + fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; + pactive = CLG_(get_fn_entry)(fn_number); + (*pactive)--; + + /* This assertion is not correct for reentrant + * signal handlers */ + /* CLG_ASSERT(*pactive == 0); */ + + CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ + /* back to current context */ + CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); + CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, + (Addr)-1, False); + call_entry_up = + &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); + jcc = call_entry_up->jcc; + /* assume this call is lasting since last dump or + * for a signal handler since it's call */ + if (CLG_(current_state).sig == 0) + CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, + CLG_(get_current_thread)()->lastdump_cost ); + else + CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); +} + + +/* + * Helper function called at start of each instrumented BB to setup + * pointer to costs for current thread/context/recursion level + */ + +VG_REGPARM(1) +void CLG_(setup_bbcc)(BB* bb) +{ + BBCC *bbcc, *last_bbcc; + Bool call_emulation = False, delayed_push = False, skip = False; + Addr sp; + BB* last_bb; + ThreadId tid; + Int jmpkind, passed = 0, csp; + Bool ret_without_call = False; + Int popcount_on_return = 1; + + CLG_DEBUG(3,"+ setup_bbcc(BB %p)\n", bb_addr(bb)); + + /* This is needed because thread switches can not reliable be tracked + * with callback CLG_(run_thread) only: we have otherwise no way to get + * the thread ID after a signal handler returns. + * This could be removed again if that bug is fixed in Valgrind. + * This is in the hot path but hopefully not to costly. + */ + tid = VG_(get_running_tid)(); +#if 1 + CLG_(switch_thread)(tid); +#else + CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); +#endif + + sp = VG_(get_SP)(tid); + last_bbcc = CLG_(current_state).bbcc; + last_bb = last_bbcc ? last_bbcc->bb : 0; + + if (last_bb) { + passed = CLG_(current_state).jmps_passed; + if (passed == last_bb->cjmp_count) { + jmpkind = last_bb->jmpkind; + + /* VEX always gives a Boring jump kind also when passed trough */ + if ((jmpkind == Ijk_Boring) && + (last_bb->offset + last_bb->instr_len == bb->offset)) + jmpkind = JmpNone; + } + else + jmpkind = JmpCond; + + /* if we are in a function which is skipped in the call graph, we + * do not increment the exe counter to produce cost (if simulation off), + * which would lead to dumping this BB to be skipped + */ + if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) { + last_bbcc->ecounter_sum++; + last_bbcc->jmp[passed].ecounter++; + if (!CLG_(clo).simulate_cache) { + /* update Ir cost */ + int instr_count = last_bb->jmp[passed].instr+1; + CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count; + } + } + + CLG_DEBUGIF(4) { + CLG_(print_execstate)(-2, &CLG_(current_state) ); + CLG_(print_bbcc_cost)(-2, last_bbcc); + } + } + else { + jmpkind = JmpNone; + } + + /* Manipulate JmpKind if needed, only using BB specific info */ + + csp = CLG_(current_call_stack).sp; + + /* A return not matching the top call in our callstack is a jump */ + if ( (jmpkind == Ijk_Ret) && (csp >0)) { + Int csp_up = csp-1; + call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); + + /* We have a real return if + * - the stack pointer (SP) left the current stack frame, or + * - SP has the same value as when reaching the current function + * and the address of this BB is the return address of last call + * (we even allow to leave multiple frames if the SP stays the + * same and we find a matching return address) + * The latter condition is needed because on PPC, SP can stay + * the same over CALL=b(c)l / RET=b(c)lr boundaries + */ + if (sp < top_ce->sp) popcount_on_return = 0; + else if (top_ce->sp == sp) { + while(1) { + if (top_ce->ret_addr == bb_addr(bb)) break; + if (csp_up>0) { + csp_up--; + top_ce = &(CLG_(current_call_stack).entry[csp_up]); + if (top_ce->sp == sp) { + popcount_on_return++; + continue; + } + } + popcount_on_return = 0; + break; + } + } + if (popcount_on_return == 0) { + jmpkind = Ijk_Boring; + ret_without_call = True; + } + } + + /* Should this jump be converted to call or pop/call ? */ + if (( jmpkind != Ijk_Ret) && + ( jmpkind != Ijk_Call) && last_bb) { + + /* We simulate a JMP/Cont to be a CALL if + * - jump is in another ELF object or section kind + * - jump is to first instruction of a function (tail recursion) + */ + if (ret_without_call || + /* This is for detection of optimized tail recursion. + * On PPC, this is only detected as call when going to another + * function. The problem is that on PPC it can go wrong + * more easily (no stack frame setup needed) + */ +#if defined(VGA_ppc32) + (bb->is_entry && (last_bb->fn != bb->fn)) || +#else + bb->is_entry || +#endif + (last_bb->sect_kind != bb->sect_kind) || + (last_bb->obj->number != bb->obj->number)) { + + CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", + last_bb->fn->name, last_bb->obj->name, + bb->fn->name, bb->obj->name, + ret_without_call?" (RET w/o CALL)":""); + + if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { + + call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); + + if (top_ce->jcc) { + + CLG_DEBUG(1," Pop on Jump!\n"); + + /* change source for delayed push */ + CLG_(current_state).bbcc = top_ce->jcc->from; + sp = top_ce->sp; + CLG_(pop_call_stack)(); + } + else { + CLG_ASSERT(CLG_(current_state).nonskipped != 0); + } + } + + jmpkind = Ijk_Call; + call_emulation = True; + } + } + + if (jmpkind == Ijk_Call) + skip = CLG_(get_fn_node)(bb)->skip; + + CLG_DEBUGIF(1) { + if (jmpkind == JmpCond) + VG_(printf)("Conditional"); + else if (jmpkind == JmpNone) + VG_(printf)("None"); + else + ppIRJumpKind( jmpkind ); + + VG_(printf)(" %08x -> %08x, SP %08x\n", + last_bb ? bb_jmpaddr(last_bb) : 0, + bb_addr(bb), sp); + } + + /* Handle CALL/RET and update context to get correct BBCC */ + + if (jmpkind == Ijk_Ret) { + + if ((csp == 0) || + ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && + ( *(CLG_(current_fn_stack).top-1)==0)) ) { + + /* On an empty call stack or at a signal separation marker, + * a RETURN generates an call stack underflow. + */ + handleUnderflow(bb); + CLG_(pop_call_stack)(); + } + else { + CLG_ASSERT(popcount_on_return >0); + CLG_(unwind_call_stack)(sp, popcount_on_return); + } + } + else { + CLG_(unwind_call_stack)(sp, 0); + + if (jmpkind == Ijk_Call) { + delayed_push = True; + + csp = CLG_(current_call_stack).sp; + if (call_emulation && csp>0) + sp = CLG_(current_call_stack).entry[csp-1].sp; + + } + } + + /* Change new context if needed, taking delayed_push into account */ + if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { + CLG_(push_cxt)(CLG_(get_fn_node)(bb)); + } + CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); + + /* If there is a fresh instrumented BBCC, assign current context */ + bbcc = CLG_(get_bbcc)(bb); + if (bbcc->cxt == 0) { + CLG_ASSERT(bbcc->rec_array == 0); + + bbcc->cxt = CLG_(current_state).cxt; + bbcc->rec_array = + new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); + bbcc->rec_array[0] = bbcc; + + insert_bbcc_into_hash(bbcc); + } + else { + /* get BBCC with current context */ + + /* first check LRU of last bbcc executed */ + + if (last_bbcc) { + bbcc = last_bbcc->lru_next_bbcc; + if (bbcc && + ((bbcc->bb != bb) || + (bbcc->cxt != CLG_(current_state).cxt))) + bbcc = 0; + } + else + bbcc = 0; + + if (!bbcc) + bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); + if (!bbcc) + bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); + + bb->last_bbcc = bbcc; + } + + /* save for fast lookup */ + if (last_bbcc) + last_bbcc->lru_next_bbcc = bbcc; + + if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { + UInt level, idx; + fn_node* top = *(CLG_(current_fn_stack).top); + + level = *CLG_(get_fn_entry)(top->number); + + if (delayed_push && !skip) { + if (CLG_(clo).skip_direct_recursion) { + /* do not increment rec. level if called from + * same function */ + if (!CLG_(current_state).bbcc || + (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])) + level++; + } + else level++; + } + if (level> top->separate_recursions) + level = top->separate_recursions; + + if (level == 0) { + /* can only happen if instrumentation just was switched on */ + level = 1; + *CLG_(get_fn_entry)(top->number) = 1; + } + + idx = level -1; + if (bbcc->rec_array[idx]) + bbcc = bbcc->rec_array[idx]; + else + bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); + + CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); + } + + if (delayed_push) { + if (!skip && CLG_(current_state).nonskipped) { + /* a call from skipped to nonskipped */ + CLG_(current_state).bbcc = CLG_(current_state).nonskipped; + } + CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, + bbcc, sp, skip); + } + + if (CLG_(clo).collect_jumps && + ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) { + + /* Handle conditional jumps followed, i.e. trace arcs + * This uses JCC structures, too */ + + jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); + CLG_ASSERT(jcc != 0); + // Change from default, and check if already changed + if (jcc->jmpkind == Ijk_Call) + jcc->jmpkind = jmpkind; + else { + // FIXME: Why can this fail? + // CLG_ASSERT(jcc->jmpkind == jmpkind); + } + + jcc->call_counter++; + if (jmpkind == JmpCond) + CLG_(stat).jcnd_counter++; + else + CLG_(stat).jump_counter++; + } + + CLG_(current_state).bbcc = bbcc; + + CLG_DEBUGIF(1) { + VG_(printf)(" "); + CLG_(print_bbcc_fn)(bbcc); + VG_(printf)("\n"); + } + + CLG_DEBUG(3,"- setup_bbcc (BB %p): Cost %p (Len %d), Instrs %d (Len %d)\n", + bb_addr(bb), bbcc->cost, bb->cost_count, + bb->instr_count, bb->instr_len); + CLG_DEBUGIF(3) + CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); + CLG_DEBUG(3,"\n"); + + (*CLG_(cachesim).after_bbsetup)(); + + CLG_(stat).bb_executions++; +} diff --git a/callgrind/callgrind.h b/callgrind/callgrind.h new file mode 100644 index 0000000000..c153dbd2d2 --- /dev/null +++ b/callgrind/callgrind.h @@ -0,0 +1,130 @@ + +/* + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (callgrind.h) only. The entire rest of Valgrind is licensed + under the terms of the GNU General Public License, version 2. See + the COPYING file in the source distribution for details. + + ---------------------------------------------------------------- + + This file is part of callgrind, a valgrind skin for cache simulation + and call tree tracing. + + Copyright (C) 2003,2004 Josef Weidendorfer. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (vgprof.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + +#ifndef __CALLGRIND_H +#define __CALLGRIND_H + +#include "valgrind.h" + +typedef + enum { + VG_USERREQ__DUMP_STATS = VG_USERREQ_TOOL_BASE('C','T'), + VG_USERREQ__ZERO_STATS, + VG_USERREQ__TOGGLE_COLLECT, + VG_USERREQ__DUMP_STATS_AT, + VG_USERREQ__START_INSTRUMENTATION, + VG_USERREQ__STOP_INSTRUMENTATION + } Vg_CalltreeClientRequest; + +/* Dump current state of cost centers. + This will also atomically zero the cost centers */ +#define CALLGRIND_DUMP_STATS() \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__DUMP_STATS, \ + 0, 0, 0, 0); \ + (void)0; \ + } while(0) + +/* Dump current state of cost centers. + This will also atomically zero the cost centers */ +#define CALLGRIND_DUMP_STATS_AT(pos_str) \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__DUMP_STATS_AT, \ + pos_str, 0, 0, 0); \ + (void)0; \ + } while(0) + +/* Zero cost centers */ +#define CALLGRIND_ZERO_STATS() \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__ZERO_STATS, \ + 0, 0, 0, 0); \ + (void)0; \ + } while(0) + +/* Toggle collection state, + * i.e. if events happening are collected into cost centers */ +#define CALLGRIND_TOGGLE_COLLECT() \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__TOGGLE_COLLECT, \ + 0, 0, 0, 0); \ + (void)0; \ + } while(0) + +/* Start instrumentation if not already on */ +#define CALLGRIND_START_INSTRUMENTATION() \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__START_INSTRUMENTATION,\ + 0, 0, 0, 0); \ + (void)0; \ + } while(0) + +/* Stop instrumentation if not already off */ +#define CALLGRIND_STOP_INSTRUMENTATION() \ + do { \ + unsigned int _qzz_res; \ + VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__STOP_INSTRUMENTATION,\ + 0, 0, 0, 0); \ + (void)0; \ + } while(0) + +#endif /* __CALLGRIND_H */ diff --git a/callgrind/callgrind_annotate.in b/callgrind/callgrind_annotate.in new file mode 100644 index 0000000000..6d36f0602b --- /dev/null +++ b/callgrind/callgrind_annotate.in @@ -0,0 +1,1191 @@ +#! /usr/bin/perl -w +##--------------------------------------------------------------------## +##--- The cache simulation framework: instrumentation, recording ---## +##--- and results printing. ---## +##--- callgrind_annotate ---## +##--------------------------------------------------------------------## + +# This file is part of Callgrind, a cache-simulator and call graph +# tracer built on Valgrind. +# +# Copyright (C) 2003 Josef Weidendorfer +# Josef.Weidendorfer@gmx.de +# +# This file is based heavily on vg_annotate, part of Valgrind. +# Copyright (C) 2002 Nicholas Nethercote +# njn25@cam.ac.uk +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# 02111-1307, USA. +# +# The GNU General Public License is contained in the file COPYING. + +#---------------------------------------------------------------------------- +# Annotator for cachegrind/callgrind. +# +# File format is described in /docs/techdocs.html. +# +# Performance improvements record, using cachegrind.out for cacheprof, doing no +# source annotation (irrelevant ones removed): +# user time +# 1. turned off warnings in add_hash_a_to_b() 3.81 --> 3.48s +# [now add_array_a_to_b()] +# 6. make line_to_CC() return a ref instead of a hash 3.01 --> 2.77s +# +#10. changed file format to avoid file/fn name repetition 2.40s +# (not sure why higher; maybe due to new '.' entries?) +#11. changed file format to drop unnecessary end-line "."s 2.36s +# (shrunk file by about 37%) +#12. switched from hash CCs to array CCs 1.61s +#13. only adding b[i] to a[i] if b[i] defined (was doing it if +# either a[i] or b[i] was defined, but if b[i] was undefined +# it just added 0) 1.48s +#14. Stopped converting "." entries to undef and then back 1.16s +#15. Using foreach $i (x..y) instead of for ($i = 0...) in +# add_array_a_to_b() 1.11s +# +# Auto-annotating primes: +#16. Finding count lengths by int((length-1)/3), not by +# commifying (halves the number of commify calls) 1.68s --> 1.47s + +use strict; + +#---------------------------------------------------------------------------- +# Overview: the running example in the comments is for: +# - events = A,B,C,D +# - --show=C,A,D +# - --sort=D,C +#---------------------------------------------------------------------------- + +#---------------------------------------------------------------------------- +# Global variables, main data structures +#---------------------------------------------------------------------------- +# CCs are arrays, the counts corresponding to @events, with 'undef' +# representing '.'. This makes things fast (faster than using hashes for CCs) +# but we have to use @sort_order and @show_order below to handle the --sort and +# --show options, which is a bit tricky. +#---------------------------------------------------------------------------- + +# Total counts for summary (an array reference). +my $summary_CC; + +# Totals for each function, for overall summary. +# hash(filename:fn_name => CC array) +my %fn_totals; + +# Individual CCs, organised by filename and line_num for easy annotation. +# hash(filename => hash(line_num => CC array)) +my %all_ind_CCs; + +# Files chosen for annotation on the command line. +# key = basename (trimmed of any directory), value = full filename +my %user_ann_files; + +# Generic description string. +my $desc = ""; + +# Command line of profiled program. +my $cmd = ""; + +# Info on the profiled process. +my $pid = ""; +my $part = ""; +my $thread = ""; + +# Positions used for cost lines; default: line numbers +my $has_line = 1; +my $has_addr = 0; + +# Events in input file, eg. (A,B,C,D) +my @events; +my $events; + +# Events to show, from command line, eg. (C,A,D) +my @show_events; + +# Map from @show_events indices to @events indices, eg. (2,0,3). Gives the +# order in which we must traverse @events in order to show the @show_events, +# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events. +# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).) +my @show_order; + +# Print out the function totals sorted by these events, eg. (D,C). +my @sort_events; + +# Map from @sort_events indices to @events indices, eg. (3,2). Same idea as +# for @show_order. +my @sort_order; + +# Thresholds, one for each sort event (or default to 1 if no sort events +# specified). We print out functions and do auto-annotations until we've +# handled this proportion of all the events thresholded. +my @thresholds; + +my $default_threshold = 99; + +my $single_threshold = $default_threshold; + +# If on, automatically annotates all files that are involved in getting over +# all the threshold counts. +my $auto_annotate = 0; + +# Number of lines to show around each annotated line. +my $context = 8; + +# Directories in which to look for annotation files. +my @include_dirs = (""); + +# Verbose mode +my $verbose = "1"; + +# Inclusive statistics (with subroutine events) +my $inclusive = 0; + +# Inclusive totals for each function, for overall summary. +# hash(filename:fn_name => CC array) +my %cfn_totals; + +# hash( file:func => [ called file:func ]) +my $called_funcs; + +# hash( file:func => [ calling file:func ]) +my $calling_funcs; + +# hash( file:func,line => [called file:func ]) +my $called_from_line; + +# hash( file:func,line => file:func +my %func_of_line; + +# hash (file:func => object name) +my %obj_name; + +# Print out the callers of a function +my $tree_caller = 0; + +# Print out the called functions +my $tree_calling = 0; + +# hash( file:func,cfile:cfunc => call CC[]) +my %call_CCs; + +# hash( file:func,cfile:cfunc => call counter) +my %call_counter; + +# hash(context, index) => realname for compressed traces +my %compressed; + +# Input file name, will be set in process_cmd_line +my $input_file = ""; + +# Version number +my $version = "@VERSION@"; + +# Usage message. +my $usage = < percentage of counts (of primary sort event) we + are interested in [$default_threshold%] + --auto=yes|no annotate all source files containing functions + that helped reach the event count threshold [no] + --context=N print N lines of context before and after + annotated lines [8] + --inclusive=yes|no add subroutine costs to functions calls [no] + --tree=none|caller| print for each function their callers, + calling|both the called functions or both [none] + -I --include= add to list of directories to search for + source files + +END +; + +# Used in various places of output. +my $fancy = '-' x 80 . "\n"; + +#----------------------------------------------------------------------------- +# Argument and option handling +#----------------------------------------------------------------------------- +sub process_cmd_line() +{ + for my $arg (@ARGV) { + + # Option handling + if ($arg =~ /^-/) { + + # --version + if ($arg =~ /^-v$|^--version$/) { + die("callgrind_annotate-$version\n"); + + # --show=A,B,C + } elsif ($arg =~ /^--show=(.*)$/) { + @show_events = split(/,/, $1); + + # --sort=A,B,C + } elsif ($arg =~ /^--sort=(.*)$/) { + @sort_events = split(/,/, $1); + foreach my $i (0 .. scalar @sort_events - 1) { + if ($sort_events[$i] =~#/.*:(\d+)$/) { + /.*:([\d\.]+)%?$/) { + my $th = $1; + ($th >= 0 && $th <= 100) or die($usage); + $sort_events[$i] =~ s/:.*//; + $thresholds[$i] = $th; + } else { + $thresholds[$i] = 0; + } + } + + # --threshold=X (tolerates a trailing '%') + } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) { + $single_threshold = $1; + ($1 >= 0 && $1 <= 100) or die($usage); + + # --auto=yes|no + } elsif ($arg =~ /^--auto=(yes|no)$/) { + $auto_annotate = 1 if ($1 eq "yes"); + $auto_annotate = 0 if ($1 eq "no"); + + # --context=N + } elsif ($arg =~ /^--context=([\d\.]+)$/) { + $context = $1; + if ($context < 0) { + die($usage); + } + + # --inclusive=yes|no + } elsif ($arg =~ /^--inclusive=(yes|no)$/) { + $inclusive = 1 if ($1 eq "yes"); + $inclusive = 0 if ($1 eq "no"); + + # --tree=none|caller|calling|both + } elsif ($arg =~ /^--tree=(none|caller|calling|both)$/) { + $tree_caller = 1 if ($1 eq "caller" || $1 eq "both"); + $tree_calling = 1 if ($1 eq "calling" || $1 eq "both"); + + # --include=A,B,C + } elsif ($arg =~ /^(-I|--include)=(.*)$/) { + my $inc = $2; + $inc =~ s|/$||; # trim trailing '/' + push(@include_dirs, "$inc/"); + + } else { # -h and --help fall under this case + die($usage); + } + + # Argument handling -- annotation file checking and selection. + # Stick filenames into a hash for quick 'n easy lookup throughout + } else { + if ($input_file eq "") { + $input_file = $arg; + } + else { + my $readable = 0; + foreach my $include_dir (@include_dirs) { + if (-r $include_dir . $arg) { + $readable = 1; + } + } + $readable or die("File $arg not found in any of: @include_dirs\n"); + $user_ann_files{$arg} = 1; + } + } + } + + if ($input_file eq "") { + $input_file = ()[0]; + if (!defined $input_file) { + $input_file = "cachegrind.out"; + } + print "Reading data from '$input_file'...\n"; + } +} + +#----------------------------------------------------------------------------- +# Reading of input file +#----------------------------------------------------------------------------- +sub max ($$) +{ + my ($x, $y) = @_; + return ($x > $y ? $x : $y); +} + +# Add the two arrays; any '.' entries are ignored. Two tricky things: +# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn +# off warnings to allow this. This makes things about 10% faster than +# checking for definedness ourselves. +# 2. We don't add an undefined count or a ".", even though it's value is 0, +# because we don't want to make an $a2->[$i] that is undef become 0 +# unnecessarily. +sub add_array_a_to_b ($$) +{ + my ($a1, $a2) = @_; + + my $n = max(scalar @$a1, scalar @$a2); + $^W = 0; + foreach my $i (0 .. $n-1) { + $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]); + } + $^W = 1; +} + +# Add each event count to the CC array. '.' counts become undef, as do +# missing entries (implicitly). +sub line_to_CC ($) +{ + my @CC = (split /\s+/, $_[0]); + (@CC <= @events) or die("Line $.: too many event counts\n"); + return \@CC; +} + +sub uncompressed_name($$) +{ + my ($context, $name) = @_; + + if ($name =~ /^\((\d+)\)\s*(.*)$/) { + my $index = $1; + my $realname = $2; + + if ($realname eq "") { + $realname = $compressed{$context,$index}; + } + else { + $compressed{$context,$index} = $realname; + } + return $realname; + } + return $name; +} + +sub read_input_file() +{ + open(INPUTFILE, "< $input_file") || die "File $input_file not opened\n"; + + my $line; + + # Read header + while() { + + # remove comments + s/#.*$//; + + if (/^$/) { ; } + + elsif (/^version:\s*(\d+)/) { + # Can't read format with major version > 1 + ($1<2) or die("Can't read format with major version $1.\n"); + } + + elsif (/^pid:\s+(.*)$/) { $pid = $1; } + elsif (/^thread:\s+(.*)$/) { $thread = $1; } + elsif (/^part:\s+(.*)$/) { $part = $1; } + elsif (/^desc:\s+(.*)$/) { + my $dline = $1; + # suppress profile options in description output + if ($dline =~ /^Option:/) {;} + else { $desc .= "$dline\n"; } + } + elsif (/^cmd:\s+(.*)$/) { $cmd = $1; } + elsif (/^positions:\s+(.*)$/) { + my $positions = $1; + $has_line = ($positions =~ /line/); + $has_addr = ($positions =~ /(addr|instr)/); + } + elsif (/^events:\s+(.*)$/) { + $events = $1; + + # events line is last in header + last; + } + else { + warn("WARNING: header line $. malformed, ignoring\n"); + if ($verbose) { chomp; warn(" line: '$_'\n"); } + } + } + + # Check for needed header entries + ($cmd ne "") or die("Line $.: missing command line\n"); + + # Read "events:" line. We make a temporary hash in which the Nth event's + # value is N, which is useful for handling --show/--sort options below. + ($events ne "") or die("Line $.: missing events line\n"); + @events = split(/\s+/, $events); + my %events; + my $n = 0; + foreach my $event (@events) { + $events{$event} = $n; + $n++ + } + + # If no --show arg give, default to showing all events in the file. + # If --show option is used, check all specified events appeared in the + # "events:" line. Then initialise @show_order. + if (@show_events) { + foreach my $show_event (@show_events) { + (defined $events{$show_event}) or + die("--show event `$show_event' did not appear in input\n"); + } + } else { + @show_events = @events; + } + foreach my $show_event (@show_events) { + push(@show_order, $events{$show_event}); + } + + # Do as for --show, but if no --sort arg given, default to sorting by + # column order (ie. first column event is primary sort key, 2nd column is + # 2ndary key, etc). + if (@sort_events) { + foreach my $sort_event (@sort_events) { + (defined $events{$sort_event}) or + die("--sort event `$sort_event' did not appear in input\n"); + } + } else { + @sort_events = @events; + } + foreach my $sort_event (@sort_events) { + push(@sort_order, $events{$sort_event}); + } + + # If multiple threshold args weren't given via --sort, stick in the single + # threshold (either from --threshold if used, or the default otherwise) for + # the primary sort event, and 0% for the rest. + if (not @thresholds) { + foreach my $e (@sort_order) { + push(@thresholds, 0); + } + $thresholds[0] = $single_threshold; + } + + my $curr_obj = ""; + my $curr_file; + my $curr_fn; + my $curr_name; + my $curr_line_num = 0; + + my $curr_cobj = ""; + my $curr_cfile = ""; + my $curr_cfunc = ""; + my $curr_cname; + my $curr_call_counter = 0; + my $curr_cfn_CC = []; + + my $curr_fn_CC = []; + my $curr_file_ind_CCs = {}; # hash(line_num => CC) + + # Read body of input file. + while () { + s/#.*$//; # remove comments + s/^\+(\d+)/$curr_line_num+$1/e; + s/^\-(\d+)/$curr_line_num-$1/e; + s/^\*/$curr_line_num/e; + if (s/^(\d+|0x\w+)\s+//) { + $curr_line_num = $1; + if ($has_addr) { + if ($has_line) { + s/^\+(\d+)/$curr_line_num+$1/e; + s/^\-(\d+)/$curr_line_num-$1/e; + s/^\*/$curr_line_num/e; + + if (s/^(\d+)\s+//) { $curr_line_num = $1; } + } + else { $curr_line_num = 0; } + } + my $CC = line_to_CC($_); + + if ($curr_call_counter>0) { +# print "Read ($curr_name => $curr_cname) $curr_call_counter\n"; + + if (defined $call_CCs{$curr_name,$curr_cname}) { + add_array_a_to_b($CC, $call_CCs{$curr_name,$curr_cname}); + $call_counter{$curr_name,$curr_cname} += $curr_call_counter; + } + else { + $call_CCs{$curr_name,$curr_cname} = $CC; + $call_counter{$curr_name,$curr_cname} = $curr_call_counter; + } + + my $tmp = $called_from_line->{$curr_file,$curr_line_num}; + if (!defined $tmp) { + $func_of_line{$curr_file,$curr_line_num} = $curr_name; + } + $tmp = {} unless defined $tmp; + $$tmp{$curr_cname} = 1; + $called_from_line->{$curr_file,$curr_line_num} = $tmp; + $call_CCs{$curr_name,$curr_cname,$curr_line_num} = $CC; + $call_counter{$curr_name,$curr_cname,$curr_line_num} = $curr_call_counter; + + $curr_call_counter = 0; + + # inclusive costs + $curr_cfn_CC = $cfn_totals{$curr_cname}; + $curr_cfn_CC = [] unless (defined $curr_cfn_CC); + add_array_a_to_b($CC, $curr_cfn_CC); + $cfn_totals{$curr_cname} = $curr_cfn_CC; + + if ($inclusive) { + add_array_a_to_b($CC, $curr_fn_CC); + } + next; + } + + add_array_a_to_b($CC, $curr_fn_CC); + + # If curr_file is selected, add CC to curr_file list. We look for + # full filename matches; or, if auto-annotating, we have to + # remember everything -- we won't know until the end what's needed. + if ($auto_annotate || defined $user_ann_files{$curr_file}) { + my $tmp = $curr_file_ind_CCs->{$curr_line_num}; + $tmp = [] unless defined $tmp; + add_array_a_to_b($CC, $tmp); + $curr_file_ind_CCs->{$curr_line_num} = $tmp; + } + + } elsif (s/^fn=(.*)$//) { + # Commit result from previous function + $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name); + + # Setup new one + $curr_fn = uncompressed_name("fn",$1); + $curr_name = "$curr_file:$curr_fn"; + $obj_name{$curr_name} = $curr_obj; + $curr_fn_CC = $fn_totals{$curr_name}; + $curr_fn_CC = [] unless (defined $curr_fn_CC); + + } elsif (s/^ob=(.*)$//) { + $curr_obj = uncompressed_name("ob",$1); + + } elsif (s/^fl=(.*)$//) { + $all_ind_CCs{$curr_file} = $curr_file_ind_CCs + if (defined $curr_file); + + $curr_file = uncompressed_name("fl",$1); + $curr_file_ind_CCs = $all_ind_CCs{$curr_file}; + $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs); + + } elsif (s/^(fi|fe)=(.*)$//) { + (defined $curr_name) or die("Line $.: Unexpected fi/fe line\n"); + $fn_totals{$curr_name} = $curr_fn_CC; + $all_ind_CCs{$curr_file} = $curr_file_ind_CCs; + + $curr_file = uncompressed_name("fl",$2); + $curr_name = "$curr_file:$curr_fn"; + $curr_file_ind_CCs = $all_ind_CCs{$curr_file}; + $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs); + $curr_fn_CC = $fn_totals{$curr_name}; + $curr_fn_CC = [] unless (defined $curr_fn_CC); + + } elsif (s/^\s*$//) { + # blank, do nothing + + } elsif (s/^cob=(.*)$//) { + $curr_cobj = uncompressed_name("ob",$1); + + } elsif (s/^cfi=(.*)$//) { + $curr_cfile = uncompressed_name("fl",$1); + + } elsif (s/^cfn=(.*)$//) { + $curr_cfunc = uncompressed_name("fn",$1); + if ($curr_cfile eq "") { + $curr_cname = "$curr_file:$curr_cfunc"; + } + else { + $curr_cname = "$curr_cfile:$curr_cfunc"; + $curr_cfile = ""; + } + + my $tmp = $calling_funcs->{$curr_cname}; + $tmp = {} unless defined $tmp; + $$tmp{$curr_name} = 1; + $calling_funcs->{$curr_cname} = $tmp; + + my $tmp2 = $called_funcs->{$curr_name}; + $tmp2 = {} unless defined $tmp2; + $$tmp2{$curr_cname} = 1; + $called_funcs->{$curr_name} = $tmp2; + + } elsif (s/^calls=(\d+)//) { + $curr_call_counter = $1; + + } elsif (s/^(jump|jcnd)=//) { + #ignore jump information + + } elsif (s/^totals:\s+//) { + #ignore + + } elsif (s/^summary:\s+//) { + $summary_CC = line_to_CC($_); + + } else { + warn("WARNING: line $. malformed, ignoring\n"); + if ($verbose) { chomp; warn(" line: '$_'\n"); } + } + } + + # Check if summary line was present + if (not defined $summary_CC) { + warn("WARNING: missing final summary line, no summary will be printed\n"); + } + else { + # Finish up handling final filename/fn_name counts + $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC + if (defined $curr_file && defined $curr_fn); + $all_ind_CCs{$curr_file} = + $curr_file_ind_CCs if (defined $curr_file); + + (scalar(@$summary_CC) == @events) + or die("Line $.: summary event and total event mismatch\n"); + } + + # Correct inclusive totals + if ($inclusive) { + foreach my $name (keys %cfn_totals) { + $fn_totals{$name} = $cfn_totals{$name}; + } + } + + close(INPUTFILE); +} + +#----------------------------------------------------------------------------- +# Print options used +#----------------------------------------------------------------------------- +sub print_options () +{ + print($fancy); + print($desc); + my $target = $cmd; + if ($pid ne "") { + $target .= " (PID $pid"; + if ($part ne "") { $target .= ", part $part"; } + if ($thread ne "") { $target .= ", thread $thread"; } + $target .= ")"; + } + print("Profiled target: $target\n"); + print("Events recorded: @events\n"); + print("Events shown: @show_events\n"); + print("Event sort order: @sort_events\n"); + print("Thresholds: @thresholds\n"); + + my @include_dirs2 = @include_dirs; # copy @include_dirs + shift(@include_dirs2); # remove "" entry, which is always the first + unshift(@include_dirs2, "") if (0 == @include_dirs2); + my $include_dir = shift(@include_dirs2); + print("Include dirs: $include_dir\n"); + foreach my $include_dir (@include_dirs2) { + print(" $include_dir\n"); + } + + my @user_ann_files = keys %user_ann_files; + unshift(@user_ann_files, "") if (0 == @user_ann_files); + my $user_ann_file = shift(@user_ann_files); + print("User annotated: $user_ann_file\n"); + foreach $user_ann_file (@user_ann_files) { + print(" $user_ann_file\n"); + } + + my $is_on = ($auto_annotate ? "on" : "off"); + print("Auto-annotation: $is_on\n"); + print("\n"); +} + +#----------------------------------------------------------------------------- +# Print summary and sorted function totals +#----------------------------------------------------------------------------- +sub mycmp ($$) +{ + my ($c, $d) = @_; + + # Iterate through sort events (eg. 3,2); return result if two are different + foreach my $i (@sort_order) { + my ($x, $y); + $x = $c->[$i]; + $y = $d->[$i]; + $x = -1 unless defined $x; + $y = -1 unless defined $y; + + my $cmp = $y <=> $x; # reverse sort + if (0 != $cmp) { + return $cmp; + } + } + # Exhausted events, equal + return 0; +} + +sub commify ($) { + my ($val) = @_; + 1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/); + return $val; +} + +# Because the counts can get very big, and we don't want to waste screen space +# and make lines too long, we compute exactly how wide each column needs to be +# by finding the widest entry for each one. +sub compute_CC_col_widths (@) +{ + my @CCs = @_; + my $CC_col_widths = []; + + # Initialise with minimum widths (from event names) + foreach my $event (@events) { + push(@$CC_col_widths, length($event)); + } + + # Find maximum width count for each column. @CC_col_width positions + # correspond to @CC positions. + foreach my $CC (@CCs) { + foreach my $i (0 .. scalar(@$CC)-1) { + if (defined $CC->[$i]) { + # Find length, accounting for commas that will be added + my $length = length $CC->[$i]; + my $clength = $length + int(($length - 1) / 3); + $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); + } + } + } + return $CC_col_widths; +} + +# Print the CC with each column's size dictated by $CC_col_widths. +sub print_CC ($$) +{ + my ($CC, $CC_col_widths) = @_; + + foreach my $i (@show_order) { + my $count = (defined $CC->[$i] ? commify($CC->[$i]) : "."); + my $space = ' ' x ($CC_col_widths->[$i] - length($count)); + print("$space$count "); + } +} + +sub print_events ($) +{ + my ($CC_col_widths) = @_; + + foreach my $i (@show_order) { + my $event = $events[$i]; + my $event_width = length($event); + my $col_width = $CC_col_widths->[$i]; + my $space = ' ' x ($col_width - $event_width); + print("$space$event "); + } +} + +# Prints summary and function totals (with separate column widths, so that +# function names aren't pushed over unnecessarily by huge summary figures). +# Also returns a hash containing all the files that are involved in getting the +# events count above the thresholds (ie. all the interesting ones). +sub print_summary_and_fn_totals () +{ + my @fn_fullnames = keys %fn_totals; + + # Work out the size of each column for printing (summary and functions + # separately). + my $summary_CC_col_widths = compute_CC_col_widths($summary_CC); + my $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals); + + # Header and counts for summary + print($fancy); + print_events($summary_CC_col_widths); + print("\n"); + print($fancy); + print_CC($summary_CC, $summary_CC_col_widths); + print(" PROGRAM TOTALS\n"); + print("\n"); + + # Header for functions + print($fancy); + print_events($fn_CC_col_widths); + print(" file:function\n"); + print($fancy); + + # Sort function names into order dictated by --sort option. + @fn_fullnames = sort { + mycmp($fn_totals{$a}, $fn_totals{$b}) + } @fn_fullnames; + + + # Assertion + (scalar @sort_order == scalar @thresholds) or + die("sort_order length != thresholds length:\n", + " @sort_order\n @thresholds\n"); + + my $threshold_files = {}; + # @curr_totals has the same shape as @sort_order and @thresholds + my @curr_totals = (); + foreach my $e (@thresholds) { + push(@curr_totals, 0); + } + + # Print functions, stopping when the threshold has been reached. + foreach my $fn_name (@fn_fullnames) { + + # Stop when we've reached all the thresholds + my $reached_all_thresholds = 1; + foreach my $i (0 .. scalar @thresholds - 1) { + my $prop = $curr_totals[$i] * 100; + if ($summary_CC->[$sort_order[$i]] >0) { + $prop = $prop / $summary_CC->[$sort_order[$i]]; + } + $reached_all_thresholds &= ($prop >= $thresholds[$i]); + } + last if $reached_all_thresholds; + + if ($tree_caller || $tree_calling) { print "\n"; } + + if ($tree_caller && ($fn_name ne "???:???")) { + # Print function callers + my $tmp1 = $calling_funcs->{$fn_name}; + if (defined $tmp1) { + foreach my $calling (keys %$tmp1) { + if (defined $call_counter{$calling,$fn_name}) { + print_CC($call_CCs{$calling,$fn_name}, $fn_CC_col_widths); + print" < $calling ("; + print $call_counter{$calling,$fn_name} . "x)"; + if (defined $obj_name{$calling}) { + print " [$obj_name{$calling}]"; + } + print "\n"; + } + } + } + } + + # Print function results + my $fn_CC = $fn_totals{$fn_name}; + print_CC($fn_CC, $fn_CC_col_widths); + if ($tree_caller || $tree_calling) { print " * "; } + print(" $fn_name"); + if (defined $obj_name{$fn_name}) { + print " [$obj_name{$fn_name}]"; + } + print "\n"; + + if ($tree_calling && ($fn_name ne "???:???")) { + # Print called functions + my $tmp2 = $called_funcs->{$fn_name}; + if (defined $tmp2) { + foreach my $called (keys %$tmp2) { + if (defined $call_counter{$fn_name,$called}) { + print_CC($call_CCs{$fn_name,$called}, $fn_CC_col_widths); + print" > $called ("; + print $call_counter{$fn_name,$called} . "x)"; + if (defined $obj_name{$called}) { + print " [$obj_name{$called}]"; + } + print "\n"; + } + } + } + } + + # Update the threshold counts + my $filename = $fn_name; + $filename =~ s/:.+$//; # remove function name + $threshold_files->{$filename} = 1; + foreach my $i (0 .. scalar @sort_order - 1) { + if ($inclusive) { + $curr_totals[$i] = $summary_CC->[$sort_order[$i]] - + $fn_CC->[$sort_order[$i]] + if (defined $fn_CC->[$sort_order[$i]]); + } else { + $curr_totals[$i] += $fn_CC->[$sort_order[$i]] + if (defined $fn_CC->[$sort_order[$i]]); + } + } + } + print("\n"); + + return $threshold_files; +} + +#----------------------------------------------------------------------------- +# Annotate selected files +#----------------------------------------------------------------------------- + +# Issue a warning that the source file is more recent than the input file. +sub warning_on_src_more_recent_than_inputfile ($) +{ + my $src_file = $_[0]; + + my $warning = <{"???"}; + %all_ann_files = (%user_ann_files, %$threshold_files) + } else { + %all_ann_files = %user_ann_files; + } + + # Track if we did any annotations. + my $did_annotations = 0; + + LOOP: + foreach my $src_file (keys %all_ann_files) { + + my $opened_file = ""; + my $full_file_name = ""; + foreach my $include_dir (@include_dirs) { + my $try_name = $include_dir . $src_file; + if (open(INPUTFILE, "< $try_name")) { + $opened_file = $try_name; + $full_file_name = ($include_dir eq "" + ? $src_file + : "$include_dir + $src_file"); + last; + } + } + + if (not $opened_file) { + # Failed to open the file. If chosen on the command line, die. + # If arose from auto-annotation, print a little message. + if (defined $user_ann_files{$src_file}) { + die("File $src_file not opened in any of: @include_dirs\n"); + + } else { + push(@unfound_auto_annotate_files, $src_file); + } + + } else { + # File header (distinguish between user- and auto-selected files). + print("$fancy"); + my $ann_type = + (defined $user_ann_files{$src_file} ? "User" : "Auto"); + print("-- $ann_type-annotated source: $full_file_name\n"); + print("$fancy"); + + # Get file's CCs + my $src_file_CCs = $all_ind_CCs{$src_file}; + if (!defined $src_file_CCs) { + print(" No information has been collected for $src_file\n\n"); + next LOOP; + } + + $did_annotations = 1; + + # Numeric, not lexicographic sort! + my @line_nums = sort {$a <=> $b} keys %$src_file_CCs; + + # If $src_file more recent than cachegrind.out, issue warning + my $src_more_recent_than_inputfile = 0; + if ((stat $opened_file)[9] > (stat $input_file)[9]) { + $src_more_recent_than_inputfile = 1; + warning_on_src_more_recent_than_inputfile($src_file); + } + + # Work out the size of each column for printing + my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs); + + # Events header + print_events($CC_col_widths); + print("\n\n"); + + # Shift out 0 if it's in the line numbers (from unknown entries, + # likely due to bugs in Valgrind's stabs debug info reader) + shift(@line_nums) if (0 == $line_nums[0]); + + # Finds interesting line ranges -- all lines with a CC, and all + # lines within $context lines of a line with a CC. + my $n = @line_nums; + my @pairs; + for (my $i = 0; $i < $n; $i++) { + push(@pairs, $line_nums[$i] - $context); # lower marker + while ($i < $n-1 && + $line_nums[$i] + 2*$context >= $line_nums[$i+1]) { + $i++; + } + push(@pairs, $line_nums[$i] + $context); # upper marker + } + + # Annotate chosen lines, tracking total counts of lines printed + $pairs[0] = 1 if ($pairs[0] < 1); + while (@pairs) { + my $low = shift @pairs; + my $high = shift @pairs; + while ($. < $low-1) { + my $tmp = ; + last unless (defined $tmp); # hack to detect EOF + } + my $src_line; + # Print line number, unless start of file + print("-- line $low " . '-' x 40 . "\n") if ($low != 1); + while (($. < $high) && ($src_line = )) { + if (defined $line_nums[0] && $. == $line_nums[0]) { + print_CC($src_file_CCs->{$.}, $CC_col_widths); + add_array_a_to_b($src_file_CCs->{$.}, + $printed_totals_CC); + shift(@line_nums); + + } else { + print_CC( [], $CC_col_widths); + } + + print(" $src_line"); + + my $tmp = $called_from_line->{$src_file,$.}; + my $func = $func_of_line{$src_file,$.}; + if (defined $tmp) { + foreach my $called (keys %$tmp) { + if (defined $call_CCs{$func,$called,$.}) { + print_CC($call_CCs{$func,$called,$.}, $CC_col_widths); + print " => $called ("; + print $call_counter{$func,$called,$.} . "x)\n"; + } + } + } + } + # Print line number, unless EOF + if ($src_line) { + print("-- line $high " . '-' x 40 . "\n"); + } else { + last; + } + } + + # If there was info on lines past the end of the file... + if (@line_nums) { + foreach my $line_num (@line_nums) { + print_CC($src_file_CCs->{$line_num}, $CC_col_widths); + print(" \n"); + } + print("\n"); + warning_on_nonexistent_lines($src_more_recent_than_inputfile, + $src_file, \@line_nums); + } + print("\n"); + + # Print summary of counts attributed to file but not to any + # particular line (due to incomplete debug info). + if ($src_file_CCs->{0}) { + print_CC($src_file_CCs->{0}, $CC_col_widths); + print(" \n\n"); + } + + close(INPUTFILE); + } + } + + # Print list of unfound auto-annotate selected files. + if (@unfound_auto_annotate_files) { + print("$fancy"); + print("The following files chosen for auto-annotation could not be found:\n"); + print($fancy); + foreach my $f (@unfound_auto_annotate_files) { + print(" $f\n"); + } + print("\n"); + } + + # If we did any annotating, print what proportion of events were covered by + # annotated lines above. + if ($did_annotations) { + my $percent_printed_CC; + foreach (my $i = 0; $i < @$summary_CC; $i++) { + $percent_printed_CC->[$i] = + sprintf("%.0f", + $printed_totals_CC->[$i] / $summary_CC->[$i] * 100); + } + my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC); + print($fancy); + print_events($pp_CC_col_widths); + print("\n"); + print($fancy); + print_CC($percent_printed_CC, $pp_CC_col_widths); + print(" percentage of events annotated\n\n"); + } +} + +#---------------------------------------------------------------------------- +# "main()" +#---------------------------------------------------------------------------- +process_cmd_line(); +read_input_file(); +print_options(); +my $threshold_files = print_summary_and_fn_totals(); +annotate_ann_files($threshold_files); + +##--------------------------------------------------------------------## +##--- end vg_annotate.in ---## +##--------------------------------------------------------------------## + + diff --git a/callgrind/callgrind_control.in b/callgrind/callgrind_control.in new file mode 100644 index 0000000000..869c9b3af3 --- /dev/null +++ b/callgrind/callgrind_control.in @@ -0,0 +1,485 @@ +#! /usr/bin/perl -w +##--------------------------------------------------------------------## +##--- Control supervision of applications run with callgrind ---## +##--- callgrind_control ---## +##--------------------------------------------------------------------## + +# This file is part of Callgrind, a cache-simulator and call graph +# tracer built on Valgrind. +# +# Copyright (C) 2003,2004,2005 Josef Weidendorfer +# Josef.Weidendorfer@gmx.de +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# 02111-1307, USA. + +sub getCallgrindPids { + + @pids = (); + foreach $f () { + ($pid) = ($f =~ /info\.(\d+)/); + if ($pid eq "") { next; } + $mapfile = "/proc/$pid/maps"; + if (!-e $mapfile) { next; } + + open MAP, "<$mapfile"; + $found = 0; + while() { + # works both for VG 3.0 and VG 3.1 + if (/callgrind/) { $found = 1; } + } + close MAP; + if ($found == 0) { next; } + + open INFO, "<$f"; + while() { + if (/version: (\d+)/) { $mversion{$pid} = $1; } + if (/cmd: (.+)$/) { $cmd{$pid} = $1; } + if (/control: (.+)$/) { $control{$pid} = $1; } + if (/base: (.+)$/) { $base{$pid} = $1; } + if (/result: (.+)$/) { $result{$pid} = $1; } + } + close INFO; + + if ($mversion{$pid} > 1) { + #print " Unsupported Callgrind Major Version $mversion.\n\n"; + next; + } + + push(@pids, $pid); + } +} + +sub printHeader { + if ($headerPrinted) { return; } + $headerPrinted = 1; + if ($beQuiet) { return; } + + print "Observe the status and control currently active callgrind runs.\n"; + print "(C) 2003-2005, Josef Weidendorfer (Josef.Weidendorfer\@gmx.de)\n\n"; +} + +sub printVersion { + print "callgrind_control-@VERSION@\n"; + exit; +} + +sub printHelp { + printHeader; + + print "Usage: callgrind_control [options] [ | ...]\n\n"; + print "If no PIDs/Names are given, an action is applied to all currently\n"; + print "active Callgrind runs. Default action is printing short information.\n\n"; + print "Options:\n"; + print " -h Print this help text\n"; + print " -v Print version\n"; + print " -q Be quiet\n"; + print " -l Print more information\n"; + print " -s Print status information\n"; + print " -b Print backtrace information\n"; + print " -e [A,..] Print event counters for A,.. [default: all]\n"; + print " -d [str] Request a profile dump, include as trigger hint\n"; + print " -z Zero all cost counters\n"; + print " -k Kill\n"; + print " -i on/off Switch instrumentation state on/off\n"; + print " -w Manually specify the working directory of a callgrind run\n"; + print "\n"; + exit; +} + + +# +# Parts more or less copied from ct_annotate (author: Nicholas Nethercote) +# + +sub prepareEvents { + + @events = split(/\s+/, $events); + %events = (); + $n = 0; + foreach $event (@events) { + $events{$event} = $n; + $n++; + } + if (@show_events) { + foreach my $show_event (@show_events) { + (defined $events{$show_event}) or + print "Warning: Event `$show_event' is not being collected\n"; + } + } else { + @show_events = @events; + } + @show_order = (); + foreach my $show_event (@show_events) { + push(@show_order, $events{$show_event}); + } +} + +sub max ($$) +{ + my ($x, $y) = @_; + return ($x > $y ? $x : $y); +} + +sub line_to_CC ($) +{ + my @CC = (split /\s+/, $_[0]); + (@CC <= @events) or die("Line $.: too many event counts\n"); + return \@CC; +} + +sub commify ($) { + my ($val) = @_; + 1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/); + return $val; +} + +sub compute_CC_col_widths (@) +{ + my @CCs = @_; + my $CC_col_widths = []; + + # Initialise with minimum widths (from event names) + foreach my $event (@events) { + push(@$CC_col_widths, length($event)); + } + + # Find maximum width count for each column. @CC_col_width positions + # correspond to @CC positions. + foreach my $CC (@CCs) { + foreach my $i (0 .. scalar(@$CC)-1) { + if (defined $CC->[$i]) { + # Find length, accounting for commas that will be added + my $length = length $CC->[$i]; + my $clength = $length + int(($length - 1) / 3); + $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); + } + } + } + return $CC_col_widths; +} + +# Print the CC with each column's size dictated by $CC_col_widths. +sub print_CC ($$) +{ + my ($CC, $CC_col_widths) = @_; + + foreach my $i (@show_order) { + my $count = (defined $CC->[$i] ? commify($CC->[$i]) : "."); + my $space = ' ' x ($CC_col_widths->[$i] - length($count)); + print("$space$count "); + } +} + +sub print_events ($) +{ + my ($CC_col_widths) = @_; + + foreach my $i (@show_order) { + my $event = $events[$i]; + my $event_width = length($event); + my $col_width = $CC_col_widths->[$i]; + my $space = ' ' x ($col_width - $event_width); + print("$space$event "); + } +} + + + +# +# Main +# + +getCallgrindPids; + +$requestEvents = 0; +$requestDump = 0; +$switchInstr = 0; +$headerPrinted = 0; +$beQuiet = 0; +$dumpHint = ""; +$gotW = 0; +$workingDir = ""; + +%spids = (); +foreach $arg (@ARGV) { + if ($arg =~ /^-/) { + if ($requestDump == 1) { $requestDump = 2; } + if ($requestEvents == 1) { $requestEvents = 2; } + if ($gotW == 1) { $gotW = 2; } + + if ($arg =~ /^-?-h/) { printHelp; } + if ($arg =~ /^-?-v/) { printVersion; } + if ($arg =~ /^-q/) { $beQuiet = 1; next; } + if ($arg =~ /^-l/) { $printLong = 1; next; } + if ($arg =~ /^-s/) { $printStatus = 1; next; } + if ($arg =~ /^-b/) { $printBacktrace = 1; next; } + if ($arg =~ /^-d/) { $requestDump = 1; next; } + if ($arg =~ /^-z/) { $requestZero = 1; next; } + if ($arg =~ /^-k/) { $requestKill = 1; next; } + if ($arg =~ /^-e/) { $requestEvents = 1; next; } + if ($arg =~ /^-i/) { $switchInstr = 1; next; } + if ($arg =~ /^-w/) { $gotW = 1; next; } + + printHeader; + print "Unknown option '$arg'.\n\n"; + printHelp; + } + + if ($arg =~ /^[A-Za-z_]/) { + # arguments of -d/-e/-i are non-numeric + if ($requestDump == 1) { + $requestDump = 2; + $dumpHint = $arg; + next; + } + + if ($requestEvents == 1) { + $requestEvents = 2; + @show_events = split(/,/, $arg); + next; + } + + if ($switchInstr == 1) { + $switchInstr = 2; + $switchInstrMode = "+"; + if (($arg eq "off") || ($arg eq "no")) { + $switchInstrMode = "-"; + } + next; + } + } + + if ($gotW == 1) { + $gotW = 2; + $workingDir = $arg; + if (!-d $workingDir) { + print "Error: directory '$workingDir' does not exist.\n"; + printHelp; + } + next; + } + + if (defined $cmd{$arg}) { $spids{$arg} = 1; next; } + $nameFound = 0; + foreach $p (@pids) { + if ($cmd{$p} =~ /^$arg/) { + $nameFound = 1; + $spids{$p} = 1; + } + } + if ($nameFound) { next; } + + printHeader; + print "Non-existent Callgrind task with PID/Name '$arg'.\n\n"; + printHelp; +} + +if ($workingDir ne "") { + # Generate dummy information for dummy pid 0 + $pid = "0"; + $mversion{$pid} = "@VERSION@"; + $cmd{$pid} = "???"; + $base{$pid} = $workingDir; + $control{$pid} = "$workingDir/callgrind.cmd"; + # do not wait for any result... + $result{$pid} = ""; + + # Only handle this faked callgrind run + @pids = ($pid); +} + +if (scalar @pids == 0) { + print "No active callgrind runs detected.\n"; + #print "Detection fails when /proc/*/maps is not readable.\n"; + print "[Detection can fail on some systems; to work around this,\n"; + print " specify the working directory of a callgrind run with '-w']\n"; + exit; +} + +@spids = keys %spids; +if (scalar @spids >0) { @pids = @spids; } + +$command = ""; +$waitForAnswer = 0; +if ($requestDump) { + $command = "Dump"; + if ($dumpHint ne "") { $command .= " ".$dumpHint; } +} +if ($requestZero) { $command = "Zero"; } +if ($requestKill) { $command = "Kill"; } +if ($switchInstr) { $command = $switchInstrMode."Instrumentation"; } +if ($printStatus || $printBacktrace || $requestEvents) { + $command = "Status"; + $waitForAnswer = 1; +} + +foreach $pid (@pids) { + $pidstr = "PID $pid: "; + print $pidstr.$cmd{$pid}; + + if ($command eq "") { + if ($printLong) { + #print " " x length $pidstr; + print " (in $base{$pid})\n"; + } + else { + print "\n"; + } + next; + } + else { + if (! (open CONTROL, ">$control{$pid}")) { + print " [sending '$command' failed: permission denied]\n"; + next; + } + print " [requesting '$command'...]\n"; + print CONTROL $command; + close CONTROL; + + while(-e $control{$pid}) { + # sleep for 250 ms + select(undef, undef, undef, 0.25); + } + } + + if ($result{$pid} eq "") { $waitForAnswer=0; } + if (!$waitForAnswer) { print " OK.\n"; next; } + + if (! (open RESULT, "<$result{$pid}")) { + print " Warning: Can't open expected result file $result{$pid}.\n"; + next; + } + + @tids = (); + $ctid = 0; + %fcount = (); + %func = (); + %calls = (); + %events = (); + @events = (); + %totals = (); + + $exec_bbs = 0; + $dist_bbs = 0; + $exec_calls = 0; + $dist_calls = 0; + $dist_ctxs = 0; + $dist_funcs = 0; + $threads = 0; + $events = ""; + + while() { + if (/function-(\d+)-(\d+): (.+)$/) { + if ($ctid != $1) { + $ctid = $1; + push(@tids, $ctid); + $fcount{$ctid} = 0; + } + $fcount{$ctid}++; + $func{$ctid,$fcount{$ctid}} = $3; + } + elsif (/calls-(\d+)-(\d+): (.+)$/) { + if ($ctid != $1) { next; } + $calls{$ctid,$fcount{$ctid}} = $3; + } + elsif (/events-(\d+)-(\d+): (.+)$/) { + if ($ctid != $1) { next; } + $events{$ctid,$fcount{$ctid}} = line_to_CC($3); + } + elsif (/events-(\d+): (.+)$/) { + if (scalar @events == 0) { next; } + $totals{$1} = line_to_CC($2); + } + elsif (/executed-bbs: (\d+)/) { $exec_bbs = $1; } + elsif (/distinct-bbs: (\d+)/) { $dist_bbs = $1; } + elsif (/executed-calls: (\d+)/) { $exec_calls = $1; } + elsif (/distinct-calls: (\d+)/) { $dist_calls = $1; } + elsif (/distinct-functions: (\d+)/) { $dist_funcs = $1; } + elsif (/distinct-contexts: (\d+)/) { $dist_ctxs = $1; } + elsif (/events: (.+)$/) { $events = $1; prepareEvents; } + elsif (/threads: (\d+)$/) { $threads = $1; } + elsif (/instrumentation: (\w+)$/) { $instrumentation = $1; } + } + + unlink $result{$pid}; + + if ($instrumentation eq "off") { + print " No information available as instrumentation is switched off.\n\n"; + exit; + } + + if ($printStatus) { + if ($requestEvents <1) { + print " Number of threads: $threads\n"; + print " Events collected: $events\n"; + } + + print " Functions: ".commify($dist_funcs); + print " (executed ".commify($exec_calls); + print ", contexts ".commify($dist_ctxs).")\n"; + + print " Basic blocks: ".commify($dist_bbs); + print " (executed ".commify($exec_bbs); + print ", call sites ".commify($dist_calls).")\n"; + } + + if ($requestEvents >0) { + $totals_width = compute_CC_col_widths(values %totals); + print "\n Totals:"; + print_events($totals_width); + print("\n"); + foreach $tid (@tids) { + print " Th".substr(" ".$tid,-2)." "; + print_CC($totals{$tid}, $totals_width); + print("\n"); + } + } + + if ($printBacktrace) { + + if ($requestEvents >0) { + $totals_width = compute_CC_col_widths(values %events); + } + + foreach $tid (@tids) { + print "\n Frame: "; + if ($requestEvents >0) { + print_events($totals_width); + } + print "Backtrace for Thread $tid\n"; + + $i = $fcount{$tid}; + $c = 0; + while($i>0 && $c<100) { + $fc = substr(" $c",-2); + print " [$fc] "; + if ($requestEvents >0) { + print_CC($events{$tid,$i-1}, $totals_width); + } + print $func{$tid,$i}; + if ($i > 1) { + print " (".$calls{$tid,$i-1}." x)"; + } + print "\n"; + $i--; + $c++; + } + print "\n"; + } + } + print "\n"; +} + diff --git a/callgrind/callstack.c b/callgrind/callstack.c new file mode 100644 index 0000000000..6e14b2e1d0 --- /dev/null +++ b/callgrind/callstack.c @@ -0,0 +1,424 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_callstack.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +/*------------------------------------------------------------*/ +/*--- Call stack, operations ---*/ +/*------------------------------------------------------------*/ + +/* Stack of current thread. Gets initialized when switching to 1st thread. + * + * The artificial call stack is an array of call_entry's, representing + * stack frames of the executing program. + * Array call_stack and call_stack_esp have same size and grow on demand. + * Array call_stack_esp holds SPs of corresponding stack frames. + * + */ + +#define N_CALL_STACK_INITIAL_ENTRIES 500 + +call_stack CLG_(current_call_stack); + +void CLG_(init_call_stack)(call_stack* s) +{ + Int i; + + CLG_ASSERT(s != 0); + + s->size = N_CALL_STACK_INITIAL_ENTRIES; + s->entry = (call_entry*) CLG_MALLOC(s->size * sizeof(call_entry)); + s->sp = 0; + s->entry[0].cxt = 0; /* for assertion in push_cxt() */ + + for(i=0; isize; i++) s->entry[i].enter_cost = 0; +} + +call_entry* CLG_(get_call_entry)(Int sp) +{ + CLG_ASSERT(sp <= CLG_(current_call_stack).sp); + return &(CLG_(current_call_stack).entry[sp]); +} + +void CLG_(copy_current_call_stack)(call_stack* dst) +{ + CLG_ASSERT(dst != 0); + + dst->size = CLG_(current_call_stack).size; + dst->entry = CLG_(current_call_stack).entry; + dst->sp = CLG_(current_call_stack).sp; +} + +void CLG_(set_current_call_stack)(call_stack* s) +{ + CLG_ASSERT(s != 0); + + CLG_(current_call_stack).size = s->size; + CLG_(current_call_stack).entry = s->entry; + CLG_(current_call_stack).sp = s->sp; +} + + +static __inline__ +void ensure_stack_size(Int i) +{ + Int oldsize; + call_stack *cs = &CLG_(current_call_stack); + + if (i < cs->size) return; + + oldsize = cs->size; + cs->size *= 2; + while (i > cs->size) cs->size *= 2; + + cs->entry = (call_entry*) VG_(realloc)(cs->entry, + cs->size * sizeof(call_entry)); + + for(i=oldsize; isize; i++) + cs->entry[i].enter_cost = 0; + + CLG_(stat).call_stack_resizes++; + + CLG_DEBUGIF(2) + VG_(printf)(" call stack enlarged to %d entries\n", + CLG_(current_call_stack).size); +} + + + +/* Called when function entered nonrecursive */ +static void function_entered(fn_node* fn, BBCC* to) +{ + CLG_ASSERT(fn != 0); + +#if CLG_ENABLE_DEBUG + if (fn->verbosity >=0) { + Int old = CLG_(clo).verbose; + CLG_(clo).verbose = fn->verbosity; + fn->verbosity = old; + VG_(message)(Vg_DebugMsg, + "Entering %s: Verbosity set to %d", + fn->name, CLG_(clo).verbose); + } +#endif + + if (fn->dump_before) { + Char trigger[FN_NAME_LEN]; + VG_(sprintf)(trigger, "--dump-before=%s", fn->name); + CLG_(dump_profile)(trigger, True); + } + else if (fn->zero_before) { + CLG_(zero_all_cost)(True); + } + + if (fn->toggle_collect) { + CLG_(current_state).collect = !CLG_(current_state).collect; + CLG_DEBUG(2," entering %s: toggled collection state to %s\n", + fn->name, + CLG_(current_state).collect ? "ON" : "OFF"); + } +} + +/* Called when function left (no recursive level active) */ +static void function_left(fn_node* fn, BBCC* from) +{ + CLG_ASSERT(fn != 0); + + if (fn->dump_after) { + Char trigger[FN_NAME_LEN]; + VG_(sprintf)(trigger, "--dump-after=%s", fn->name); + CLG_(dump_profile)(trigger, True); + } + if (fn->toggle_collect) { + CLG_(current_state).collect = !CLG_(current_state).collect; + CLG_DEBUG(2," leaving %s: toggled collection state to %s\n", + fn->name, + CLG_(current_state).collect ? "ON" : "OFF"); + } + +#if CLG_ENABLE_DEBUG + if (fn->verbosity >=0) { + Int old = CLG_(clo).verbose; + CLG_(clo).verbose = fn->verbosity; + fn->verbosity = old; + VG_(message)(Vg_DebugMsg, + "Leaving %s: Verbosity set back to %d", + fn->name, CLG_(clo).verbose); + } +#endif +} + + +/* Push call on call stack. + * + * Increment the usage count for the function called. + * A jump from to , with . + * If is true, this is a call to a function to be skipped; + * for this, we set jcc = 0. + */ +void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip) +{ + jCC* jcc; + UInt* pdepth; + call_entry* current_entry; + Addr ret_addr; + + /* Ensure a call stack of size +1. + * The +1 is needed as push_cxt will store the + * context at [current_sp] + */ + ensure_stack_size(CLG_(current_call_stack).sp +1); + current_entry = &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp]); + + if (skip) { + jcc = 0; + } + else { + fn_node* to_fn = to->cxt->fn[0]; + + if (CLG_(current_state).nonskipped) { + /* this is a jmp from skipped to nonskipped */ + CLG_ASSERT(CLG_(current_state).nonskipped == from); + } + + /* As push_cxt() has to be called before push_call_stack if not + * skipping, the old context should already be saved on the stack */ + CLG_ASSERT(current_entry->cxt != 0); + CLG_(copy_cost_lz)( CLG_(sets).full, &(current_entry->enter_cost), + CLG_(current_state).cost ); + + jcc = CLG_(get_jcc)(from, jmp, to); + CLG_ASSERT(jcc != 0); + + pdepth = CLG_(get_fn_entry)(to_fn->number); + if (CLG_(clo).skip_direct_recursion) { + /* only increment depth if another function is called */ + if (jcc->from->cxt->fn[0] != to_fn) (*pdepth)++; + } + else (*pdepth)++; + + if (*pdepth>1) + CLG_(stat).rec_call_counter++; + + jcc->call_counter++; + CLG_(stat).call_counter++; + + if (*pdepth == 1) function_entered(to_fn, to); + } + + /* return address is only is useful with a real call; + * used to detect RET w/o CALL */ + ret_addr = (from->bb->jmpkind == Ijk_Call) ? + bb_addr(from->bb) + from->bb->instr_len : 0; + + /* put jcc on call stack */ + current_entry->jcc = jcc; + current_entry->sp = sp; + current_entry->ret_addr = ret_addr; + current_entry->nonskipped = CLG_(current_state).nonskipped; + + CLG_(current_call_stack).sp++; + + /* To allow for above assertion we set context of next frame to 0 */ + CLG_ASSERT(CLG_(current_call_stack).sp < CLG_(current_call_stack).size); + current_entry++; + current_entry->cxt = 0; + + if (!skip) + CLG_(current_state).nonskipped = 0; + else if (!CLG_(current_state).nonskipped) { + /* a call from nonskipped to skipped */ + CLG_(current_state).nonskipped = from; + if (!CLG_(current_state).nonskipped->skipped) { + CLG_(init_cost_lz)( CLG_(sets).full, + &CLG_(current_state).nonskipped->skipped); + CLG_(stat).distinct_skips++; + } + } + +#if CLG_ENABLE_DEBUG + CLG_DEBUGIF(0) { + if (CLG_(clo).verbose<2) { + if (jcc && jcc->to && jcc->to->bb) { + char spaces[][41] = { " . . . . . . . . . .", + " . . . . . . . . . . ", + " . . . . . . . . . . ", + ". . . . . . . . . . " }; + + int s = CLG_(current_call_stack).sp; + Int* pars = (Int*) sp; + + BB* bb = jcc->to->bb; + if (s>40) s=40; + VG_(printf)("%s> %s(0x%x, 0x%x, ...) [%s / %p]\n", spaces[s%4]+40-s, bb->fn->name, + pars ? pars[1]:0, + pars ? pars[2]:0, + bb->obj->name + bb->obj->last_slash_pos, + bb->offset); + } + } + else if (CLG_(clo).verbose<4) { + VG_(printf)("+ %2d ", CLG_(current_call_stack).sp); + CLG_(print_short_jcc)(jcc); + VG_(printf)(", SP %p, RA %p\n", sp, ret_addr); + } + else { + VG_(printf)(" Pushed "); + CLG_(print_stackentry)(3, CLG_(current_call_stack).sp-1); + } + } +#endif + +} + + +/* Pop call stack and update inclusive sums. + * Returns modified fcc. + * + * If the JCC becomes inactive, call entries are freed if possible + */ +void CLG_(pop_call_stack)() +{ + jCC* jcc; + Int depth = 0; + call_entry* lower_entry; + + if (CLG_(current_state).sig >0) { + /* Check if we leave a signal handler; this can happen when + * calling longjmp() in the handler */ + CLG_(run_post_signal_on_call_stack_bottom)(); + } + + lower_entry = + &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp-1]); + + CLG_DEBUG(4,"+ pop_call_stack: frame %d, jcc %p\n", + CLG_(current_call_stack).sp, lower_entry->jcc); + + /* jCC item not any more on real stack: pop */ + jcc = lower_entry->jcc; + CLG_(current_state).nonskipped = lower_entry->nonskipped; + + if (jcc) { + fn_node* to_fn = jcc->to->cxt->fn[0]; + UInt* pdepth = CLG_(get_fn_entry)(to_fn->number); + if (CLG_(clo).skip_direct_recursion) { + /* only decrement depth if another function was called */ + if (jcc->from->cxt->fn[0] != to_fn) (*pdepth)--; + } + else (*pdepth)--; + depth = *pdepth; + + /* add cost difference to sum */ + if ( CLG_(add_diff_cost_lz)( CLG_(sets).full, &(jcc->cost), + lower_entry->enter_cost, + CLG_(current_state).cost) ) { + + /* only count this call if it attributed some cost. + * the ret_counter is used to check if a BBCC dump is needed. + */ + jcc->from->ret_counter++; + } + CLG_(stat).ret_counter++; + + /* restore context */ + CLG_(current_state).cxt = lower_entry->cxt; + CLG_(current_fn_stack).top = + CLG_(current_fn_stack).bottom + lower_entry->fn_sp; + CLG_ASSERT(CLG_(current_state).cxt != 0); + + if (depth == 0) function_left(to_fn, jcc->from); + } + + /* To allow for an assertion in push_call_stack() */ + lower_entry->cxt = 0; + + CLG_(current_call_stack).sp--; + +#if CLG_ENABLE_DEBUG + CLG_DEBUGIF(1) { + if (CLG_(clo).verbose<4) { + if (jcc) { + /* popped JCC target first */ + VG_(printf)("- %2d %p => ", + CLG_(current_call_stack).sp, + bb_addr(jcc->to->bb)); + CLG_(print_addr)(bb_jmpaddr(jcc->from->bb)); + VG_(printf)(", SP %p\n", + CLG_(current_call_stack).entry[CLG_(current_call_stack).sp].sp); + CLG_(print_cost)(10, CLG_(sets).full, jcc->cost); + } + else + VG_(printf)("- %2d [Skipped JCC], SP %p\n", + CLG_(current_call_stack).sp, + CLG_(current_call_stack).entry[CLG_(current_call_stack).sp].sp); + } + else { + VG_(printf)(" Popped "); + CLG_(print_stackentry)(7, CLG_(current_call_stack).sp); + if (jcc) { + VG_(printf)(" returned to "); + CLG_(print_addr_ln)(bb_jmpaddr(jcc->from->bb)); + } + } + } +#endif + +} + + +/* remove CallStack items to sync with current SP + */ +void CLG_(unwind_call_stack)(Addr sp, Int minpops) +{ + Int csp; + CLG_DEBUG(4,"+ unwind_call_stack(sp %p, minpops %d): frame %d\n", + sp, minpops, CLG_(current_call_stack).sp); + + /* We pop old stack frames. + * For a call, be p the stack address with return address. + * - call_stack_esp[] has SP after the CALL: p-4 + * - current sp is after a RET: >= p + */ + + while( (csp=CLG_(current_call_stack).sp) >0) { + call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); + + if ((top_ce->sp < sp) || + ((top_ce->sp == sp) && minpops>0)) { + + minpops--; + CLG_(pop_call_stack)(); + csp=CLG_(current_call_stack).sp; + continue; + } + break; + } + + CLG_DEBUG(4,"- unwind_call_stack\n"); +} diff --git a/callgrind/clo.c b/callgrind/clo.c new file mode 100644 index 0000000000..184fed1068 --- /dev/null +++ b/callgrind/clo.c @@ -0,0 +1,765 @@ +/* + This file is part of Callgrind, a Valgrind skin for call graph + profiling programs. + + Copyright (C) 2002-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This skin is derived from and contains lot of code from Cachegrind + Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "config.h" // for VG_PREFIX + +#include "global.h" + + + +/*------------------------------------------------------------*/ +/*--- Function specific configuration options ---*/ +/*------------------------------------------------------------*/ + +/* Special value for separate_callers: automatic = adaptive */ +#define CONFIG_AUTO -1 + +#define CONFIG_DEFAULT -1 +#define CONFIG_FALSE 0 +#define CONFIG_TRUE 1 + +/* Logging configuration for a function */ +struct _fn_config { + Int dump_before; + Int dump_after; + Int zero_before; + Int toggle_collect; + + Int skip; /* Handle CALL to this function as JMP (= Skip)? */ + Int group; /* don't change caller dependency inside group !=0 */ + Int pop_on_jump; + + Int separate_callers; /* separate logging dependent on caller */ + Int separate_recursions; /* separate logging of rec. levels */ + +#if CLG_ENABLE_DEBUG + Int verbosity; /* Change debug verbosity level while in function */ +#endif +}; + +/* Configurations for function name prefix patterns. + * Currently, only very limit patterns are possible: + * Exact prefix patterns and "*::" are allowed. + * E.g. + * - "abc" matches all functions starting with "abc". + * - "abc*::def" matches all functions starting with "abc" and + * starting with "def" after the first "::" separator. + * - "*::print(" matches C++ methods "print" in all classes + * without namespace. I.e. "*" doesn't match a "::". + * + * We build a trie from patterns, and for a given function, we + * go down the tree and apply all non-default configurations. + */ + + +#define NODE_DEGREE 30 + +/* node of compressed trie search structure */ +typedef struct _config_node config_node; +struct _config_node { + Int length; + + fn_config* config; + config_node* sub_node[NODE_DEGREE]; + config_node* next; + config_node* wild_star; + config_node* wild_char; + + Char name[1]; +}; + +/* root of trie */ +static config_node* fn_configs = 0; + +static __inline__ +fn_config* new_fnc(void) +{ + fn_config* new = (fn_config*) CLG_MALLOC(sizeof(fn_config)); + + new->dump_before = CONFIG_DEFAULT; + new->dump_after = CONFIG_DEFAULT; + new->zero_before = CONFIG_DEFAULT; + new->toggle_collect = CONFIG_DEFAULT; + new->skip = CONFIG_DEFAULT; + new->pop_on_jump = CONFIG_DEFAULT; + new->group = CONFIG_DEFAULT; + new->separate_callers = CONFIG_DEFAULT; + new->separate_recursions = CONFIG_DEFAULT; + +#if CLG_ENABLE_DEBUG + new->verbosity = CONFIG_DEFAULT; +#endif + + return new; +} + + +static config_node* new_config(Char* name, int length) +{ + int i; + config_node* node = (config_node*) CLG_MALLOC(sizeof(config_node) + length); + + for(i=0;iname[i] = name[i]; + } + node->name[i] = 0; + + node->length = length; + node->config = 0; + for(i=0;isub_node[i] = 0; + node->next = 0; + node->wild_char = 0; + node->wild_star = 0; + + CLG_DEBUG(3, " new_config('%s', len %d)\n", node->name, length); + + return node; +} + +static __inline__ +Bool is_wild(Char n) +{ + return (n == '*') || (n == '?'); +} + +/* Recursively build up function matching tree (prefix tree). + * Returns function config object for pattern + * and starting at tree node <*pnode>. + * + * Tree nodes (config_node) are created as needed, + * tree root is stored into <*pnode>, and the created + * leaf (fn_config) for the given pattern is returned. + */ +static fn_config* get_fnc2(config_node* node, Char* name) +{ + config_node *new_sub, *n, *nprev; + int offset, len; + + CLG_DEBUG(3, " get_fnc2(%p, '%s')\n", node, name); + + if (name[0] == 0) { + if (!node->config) node->config = new_fnc(); + return node->config; + } + + if (is_wild(*name)) { + if (*name == '*') { + while(name[1] == '*') name++; + new_sub = node->wild_star; + } + else + new_sub = node->wild_char; + + if (!new_sub) { + new_sub = new_config(name, 1); + if (*name == '*') + node->wild_star = new_sub; + else + node->wild_char = new_sub; + } + + return get_fnc2( new_sub, name+1); + } + + n = node->sub_node[ name[0]%NODE_DEGREE ]; + nprev = 0; + len = 0; + while(n) { + for(len=0; name[len] == n->name[len]; len++); + if (len>0) break; + nprev = n; + n = n->next; + } + + if (!n) { + len = 1; + while(name[len] && (!is_wild(name[len]))) len++; + new_sub = new_config(name, len); + new_sub->next = node->sub_node[ name[0]%NODE_DEGREE ]; + node->sub_node[ name[0]%NODE_DEGREE ] = new_sub; + + if (name[len] == 0) { + new_sub->config = new_fnc(); + return new_sub->config; + } + + /* recurse on wildcard */ + return get_fnc2( new_sub, name+len); + } + + if (len < n->length) { + + /* split up the subnode */ + config_node *new_node; + int i; + + new_node = new_config(n->name, len); + if (nprev) + nprev->next = new_node; + else + node->sub_node[ n->name[0]%NODE_DEGREE ] = new_node; + new_node->next = n->next; + + new_node->sub_node[ n->name[len]%NODE_DEGREE ] = n; + + for(i=0, offset=len; offset < n->length; i++, offset++) + n->name[i] = n->name[offset]; + n->name[i] = 0; + n->length = i; + + name += len; + offset = 0; + while(name[offset] && (!is_wild(name[offset]))) offset++; + new_sub = new_config(name, offset); + /* this sub_node of new_node could already be set: chain! */ + new_sub->next = new_node->sub_node[ name[0]%NODE_DEGREE ]; + new_node->sub_node[ name[0]%NODE_DEGREE ] = new_sub; + + if (name[offset]==0) { + new_sub->config = new_fnc(); + return new_sub->config; + } + + /* recurse on wildcard */ + return get_fnc2( new_sub, name+offset); + } + + name += n->length; + + if (name[0] == 0) { + /* name and node name are the same */ + if (!n->config) n->config = new_fnc(); + return n->config; + } + + offset = 1; + while(name[offset] && (!is_wild(name[offset]))) offset++; + + new_sub = new_config(name, offset); + new_sub->next = n->sub_node[ name[offset]%NODE_DEGREE ]; + n->sub_node[ name[offset]%NODE_DEGREE ] = new_sub; + + return get_fnc2(new_sub, name+offset); +} + +static void print_config_node(int s, config_node* node) +{ + config_node* n; + int i; + + if (node != fn_configs) { + char sp[] = " "; + + if (s>40) s=40; + VG_(printf)(sp+40-s); + VG_(printf)("'%s'/%d\n", node->name, node->length); + } + for(i=0;isub_node[i]; + while(n) { + print_config_node(s+1, n); + n = n->next; + } + } + if (node->wild_char) print_config_node(s+1, node->wild_char); + if (node->wild_star) print_config_node(s+1, node->wild_star); +} + +/* get a function config for a name pattern (from command line) */ +static fn_config* get_fnc(Char* name) +{ + fn_config* fnc; + + CLG_DEBUG(3, " +get_fnc(%s)\n", name); + if (fn_configs == 0) + fn_configs = new_config(name, 0); + fnc = get_fnc2(fn_configs, name); + + CLG_DEBUGIF(3) { + CLG_DEBUG(3, " -get_fnc(%s):\n", name); + print_config_node(3, fn_configs); + } + return fnc; +} + + + +static void update_fn_config1(fn_node* fn, fn_config* fnc) +{ + if (fnc->dump_before != CONFIG_DEFAULT) + fn->dump_before = (fnc->dump_before == CONFIG_TRUE); + + if (fnc->dump_after != CONFIG_DEFAULT) + fn->dump_after = (fnc->dump_after == CONFIG_TRUE); + + if (fnc->zero_before != CONFIG_DEFAULT) + fn->zero_before = (fnc->zero_before == CONFIG_TRUE); + + if (fnc->toggle_collect != CONFIG_DEFAULT) + fn->toggle_collect = (fnc->toggle_collect == CONFIG_TRUE); + + if (fnc->skip != CONFIG_DEFAULT) + fn->skip = (fnc->skip == CONFIG_TRUE); + + if (fnc->pop_on_jump != CONFIG_DEFAULT) + fn->pop_on_jump = (fnc->pop_on_jump == CONFIG_TRUE); + + if (fnc->group != CONFIG_DEFAULT) + fn->group = fnc->group; + + if (fnc->separate_callers != CONFIG_DEFAULT) + fn->separate_callers = fnc->separate_callers; + + if (fnc->separate_recursions != CONFIG_DEFAULT) + fn->separate_recursions = fnc->separate_recursions; + +#if CLG_ENABLE_DEBUG + if (fnc->verbosity != CONFIG_DEFAULT) + fn->verbosity = fnc->verbosity; +#endif +} + +/* Recursively go down the function matching tree, + * looking for a match to . For every matching leaf, + * is updated with the pattern config. + */ +static void update_fn_config2(fn_node* fn, Char* name, config_node* node) +{ + config_node* n; + + CLG_DEBUG(3, " update_fn_config2('%s', node '%s'): \n", + name, node->name); + if ((*name == 0) && node->config) { + CLG_DEBUG(3, "Found!\n"); + update_fn_config1(fn, node->config); + return; + } + + n = node->sub_node[ name[0]%NODE_DEGREE ]; + while(n) { + if (VG_(strncmp)(name, n->name, n->length)==0) break; + n = n->next; + } + if (n) update_fn_config2(fn, name+n->length, n); + + if (node->wild_char) + update_fn_config2(fn, name+1, node->wild_char); + + if (node->wild_star) { + while(*name) { + update_fn_config2(fn, name, node->wild_star); + name++; + } + update_fn_config2(fn, name, node->wild_star); + } +} + +/* Update function config according to configs of name prefixes */ +void CLG_(update_fn_config)(fn_node* fn) +{ + CLG_DEBUG(3, " update_fn_config('%s')\n", fn->name); + if (fn_configs) + update_fn_config2(fn, fn->name, fn_configs); +} + + +/*--------------------------------------------------------------------*/ +/*--- Command line processing ---*/ +/*--------------------------------------------------------------------*/ + +static Char* getUInt(Char* s, UInt* pn) +{ + UInt n = 0; + while((*s >='0') && (*s <='9')) { + n = 10*n + (*s-'0'); + s++; + } + if (pn) *pn = n; + return s; +} + +__attribute__((unused)) +static UWord getUWord(Char* s) +{ + UWord n = 0; + Bool isHex = False; + + if ((s[0] == '0') && (s[1] == 'x')) { + isHex = True; + s += 2; + } + + if (!isHex) { + while((*s >='0') && (*s <='9')) { + n = 10*n + (*s-'0'); + s++; + } + } + else { + while(1) { + if ((*s >='0') && (*s <='9')) { + n = 16*n + (*s-'0'); + s++; + continue; + } + if ((*s >='a') && (*s <='f')) { + n = 16*n + (*s-'a'+10); + s++; + continue; + } + if ((*s >='A') && (*s <='F')) { + n = 16*n + (*s-'A'+10); + s++; + continue; + } + break; + } + } + + return n; +} + +Bool CLG_(process_cmd_line_option)(Char* arg) +{ + if (0 == VG_(strcmp)(arg, "--skip-plt=yes")) + CLG_(clo).skip_plt = True; + else if (0 == VG_(strcmp)(arg, "--skip-plt=no")) + CLG_(clo).skip_plt = False; + + else if (0 == VG_(strcmp)(arg, "--collect-jumps=yes")) + CLG_(clo).collect_jumps = True; + else if (0 == VG_(strcmp)(arg, "--collect-jumps=no")) + CLG_(clo).collect_jumps = False; + /* compatibility alias, deprecated option */ + else if (0 == VG_(strcmp)(arg, "--trace-jump=yes")) + CLG_(clo).collect_jumps = True; + else if (0 == VG_(strcmp)(arg, "--trace-jump=no")) + CLG_(clo).collect_jumps = False; + + else if (0 == VG_(strcmp)(arg, "--combine-dumps=yes")) + CLG_(clo).combine_dumps = True; + else if (0 == VG_(strcmp)(arg, "--combine-dumps=no")) + CLG_(clo).combine_dumps = False; + + else if (0 == VG_(strcmp)(arg, "--collect-atstart=yes")) + CLG_(clo).collect_atstart = True; + else if (0 == VG_(strcmp)(arg, "--collect-atstart=no")) + CLG_(clo).collect_atstart = False; + + else if (0 == VG_(strcmp)(arg, "--instr-atstart=yes")) + CLG_(clo).instrument_atstart = True; + else if (0 == VG_(strcmp)(arg, "--instr-atstart=no")) + CLG_(clo).instrument_atstart = False; + + else if (0 == VG_(strcmp)(arg, "--separate-threads=yes")) + CLG_(clo).separate_threads = True; + else if (0 == VG_(strcmp)(arg, "--separate-threads=no")) + CLG_(clo).separate_threads = False; + + else if (0 == VG_(strcmp)(arg, "--compress-strings=yes")) + CLG_(clo).compress_strings = True; + else if (0 == VG_(strcmp)(arg, "--compress-strings=no")) + CLG_(clo).compress_strings = False; + + else if (0 == VG_(strcmp)(arg, "--compress-mangled=yes")) + CLG_(clo).compress_mangled = True; + else if (0 == VG_(strcmp)(arg, "--compress-mangled=no")) + CLG_(clo).compress_mangled = False; + + else if (0 == VG_(strcmp)(arg, "--compress-pos=yes")) + CLG_(clo).compress_pos = True; + else if (0 == VG_(strcmp)(arg, "--compress-pos=no")) + CLG_(clo).compress_pos = False; + + else if (0 == VG_(strncmp)(arg, "--fn-skip=", 10)) { + fn_config* fnc = get_fnc(arg+10); + fnc->skip = CONFIG_TRUE; + } + + else if (0 == VG_(strncmp)(arg, "--dump-before=", 14)) { + fn_config* fnc = get_fnc(arg+14); + fnc->dump_before = CONFIG_TRUE; + } + + else if (0 == VG_(strncmp)(arg, "--zero-before=", 14)) { + fn_config* fnc = get_fnc(arg+14); + fnc->zero_before = CONFIG_TRUE; + } + + else if (0 == VG_(strncmp)(arg, "--dump-after=", 13)) { + fn_config* fnc = get_fnc(arg+13); + fnc->dump_after = CONFIG_TRUE; + } + + else if (0 == VG_(strncmp)(arg, "--toggle-collect=", 17)) { + fn_config* fnc = get_fnc(arg+17); + fnc->toggle_collect = CONFIG_TRUE; + /* defaults to initial collection off */ + CLG_(clo).collect_atstart = False; + } + + else if (0 == VG_(strncmp)(arg, "--separate-recs=", 16)) + CLG_(clo).separate_recursions = (Int)VG_(atoll)(&arg[16]); + + /* workaround to find runtime_resolve (needs special handling) */ + else if (0 == VG_(strncmp)(arg, "--pop-on-jump=", 14)) { + fn_config* fnc = get_fnc(arg+14); + fnc->pop_on_jump = CONFIG_TRUE; + } + +#if CLG_ENABLE_DEBUG + else if (0 == VG_(strncmp)(arg, "--ct-verbose=", 13)) + CLG_(clo).verbose = (Int)VG_(atoll)(&arg[13]); + + else if (0 == VG_(strncmp)(arg, "--ct-vstart=", 12)) + CLG_(clo).verbose_start = (ULong)VG_(atoll)(&arg[12]); + + else if (0 == VG_(strncmp)(arg, "--ct-verbose", 12)) { + UInt n; + fn_config* fnc; + Char* s = getUInt(arg+12, &n); + if ((n == 0) || *s != '=') return False; + fnc = get_fnc(s+1); + fnc->verbosity = n; + } +#endif + + else if (0 == VG_(strncmp)(arg, "--separate-callers=", 19)) { + if (0 == VG_(strcmp)(arg+19, "auto")) + CLG_(clo).separate_callers = CONFIG_AUTO; + else + CLG_(clo).separate_callers = (Int)VG_(atoll)(&arg[19]); + } + + else if (0 == VG_(strncmp)(arg, "--fn-group", 10)) { + UInt n; + fn_config* fnc; + Char* s = getUInt(arg+10, &n); + if ((n == 0) || *s != '=') return False; + fnc = get_fnc(s+1); + fnc->group = n; + } + + else if (0 == VG_(strncmp)(arg, "--separate-callers", 18)) { + UInt n; + fn_config* fnc; + Char* s = getUInt(arg+18, &n); + if ((n == 0) || *s != '=') return False; + fnc = get_fnc(s+1); + fnc->separate_callers = n; + } + + else if (0 == VG_(strncmp)(arg, "--separate-recs", 15)) { + UInt n; + fn_config* fnc; + Char* s = getUInt(arg+15, &n); + if ((n == 0) || *s != '=') return False; + fnc = get_fnc(s+1); + fnc->separate_recursions = n; + } + + else if (0 == VG_(strncmp)(arg, "--base=", 7)) + CLG_(clo).filename_base = VG_(strdup)(arg+7); + + else if (0 == VG_(strcmp)(arg, "--mangle-names=yes")) + CLG_(clo).mangle_names = True; + else if (0 == VG_(strcmp)(arg, "--mangle-names=no")) + CLG_(clo).mangle_names = False; + + else if (0 == VG_(strcmp)(arg, "--skip-direct-rec=yes")) + CLG_(clo).skip_direct_recursion = True; + else if (0 == VG_(strcmp)(arg, "--skip-direct-rec=no")) + CLG_(clo).skip_direct_recursion = False; + + else if (0 == VG_(strcmp)(arg, "--dump-bbs=yes")) + CLG_(clo).dump_bbs = True; + else if (0 == VG_(strcmp)(arg, "--dump-bbs=no")) + CLG_(clo).dump_bbs = False; + + else if (0 == VG_(strcmp)(arg, "--dump-line=yes")) + CLG_(clo).dump_line = True; + else if (0 == VG_(strcmp)(arg, "--dump-line=no")) + CLG_(clo).dump_line = False; + + else if (0 == VG_(strcmp)(arg, "--dump-instr=yes")) + CLG_(clo).dump_instr = True; + else if (0 == VG_(strcmp)(arg, "--dump-instr=no")) + CLG_(clo).dump_instr = False; + + else if (0 == VG_(strcmp)(arg, "--dump-bb=yes")) + CLG_(clo).dump_bb = True; + else if (0 == VG_(strcmp)(arg, "--dump-bb=no")) + CLG_(clo).dump_bb = False; + + else if (0 == VG_(strncmp)(arg, "--dump-every-bb=", 16)) + CLG_(clo).dump_every_bb = (Int)VG_(atoll)(&arg[16]); + + + else if (0 == VG_(strcmp)(arg, "--collect-alloc=yes")) + CLG_(clo).collect_alloc = True; + else if (0 == VG_(strcmp)(arg, "--collect-alloc=no")) + CLG_(clo).collect_alloc = False; + + else if (0 == VG_(strcmp)(arg, "--collect-systime=yes")) + CLG_(clo).collect_systime = True; + else if (0 == VG_(strcmp)(arg, "--collect-systime=no")) + CLG_(clo).collect_systime = False; + + else if (0 == VG_(strcmp)(arg, "--simulate-cache=yes")) + CLG_(clo).simulate_cache = True; + else if (0 == VG_(strcmp)(arg, "--simulate-cache=no")) + CLG_(clo).simulate_cache = False; + + else { + Bool isCachesimOption = (*CLG_(cachesim).parse_opt)(arg); + + /* cache simulator is used if a simulator option is given */ + if (isCachesimOption) + CLG_(clo).simulate_cache = True; + + return isCachesimOption; + } + + return True; +} + +void CLG_(print_usage)(void) +{ + VG_(printf)( +"\n dump creation options:\n" +" --base= Prefix for profile files [" DEFAULT_DUMPNAME "]\n" +" --dump-line=no|yes Dump source lines of costs? [yes]\n" +" --dump-instr=no|yes Dump instruction address of costs? [no]\n" +" --compress-strings=no|yes Compress strings in profile dump? [yes]\n" +" --compress-pos=no|yes Compress positions in profile dump? [yes]\n" +" --combine-dumps=no|yes Concat all dumps into same file [no]\n" +#if CLG_EXPERIMENTAL +" --compress-events=no|yes Compress events in profile dump? [no]\n" +" --dump-bb=no|yes Dump basic block address of costs? [no]\n" +" --dump-bbs=no|yes Dump basic block info? [no]\n" +" --dump-skipped=no|yes Dump info on skipped functions in calls? [no]\n" +" --mangle-names=no|yes Mangle separation into names? [yes]\n" +#endif + +"\n activity options (for interactivity use callgrind_control):\n" +" --dump-every-bb= Dump every basic blocks [0=never]\n" +" --dump-before= Dump when entering function\n" +" --zero-before= Zero all costs when entering function\n" +" --dump-after= Dump when leaving function\n" +#if CLG_EXPERIMENTAL +" --dump-objs=no|yes Dump static object information [no]\n" +#endif + +"\n data collection options:\n" +" --instr-atstart=no|yes Do instrumentation at callgrind start [yes]\n" +" --collect-atstart=no|yes Collect at process/thread start [yes]\n" +" --toggle-collect= Toggle collection on enter/leave function\n" +" --collect-jumps=no|yes Collect jumps? [no]\n" +#if CLG_EXPERIMENTAL +" --collect-alloc=no|yes Collect memory allocation info? [no]\n" +#endif +" --collect-systime=no|yes Collect system call time info? [no]\n" + +"\n cost entity separation options:\n" +" --separate-threads=no|yes Separate data per thread [no]\n" +" --separate-callers= Separate functions by call chain length [0]\n" +" --separate-recs= Separate function recursions upto level [2]\n" +" --skip-plt=no|yes Ignore calls to/from PLT sections? [yes]\n" +" --separate-recs= Separate recursions for function \n" +" --separate-callers= Separate callers for function \n" +" --skip-direct-rec=no|yes Ignore direct recursions? [yes]\n" +" --fn-skip= Ignore calls to/from function?\n" +#if CLG_EXPERIMENTAL +" --fn-group= Put function into separation group \n" +#endif + ); + + (*CLG_(cachesim).print_opts)(); + +// VG_(printf)("\n" +// " For full callgrind documentation, see\n" +// " "VG_PREFIX"/share/doc/callgrind/html/callgrind.html\n\n"); +} + +void CLG_(print_debug_usage)(void) +{ + VG_(printf)( + +#if CLG_ENABLE_DEBUG +" --ct-verbose= Verbosity of standard debug output [0]\n" +" --ct-vstart= Only be verbose after basic block [0]\n" +" --ct-verbose= Verbosity while in \n" +#else +" (none)\n" +#endif + + ); +} + + +void CLG_(set_clo_defaults)(void) +{ + /* Default values for command line arguments */ + + /* dump options */ + CLG_(clo).filename_base = 0; + CLG_(clo).combine_dumps = False; + CLG_(clo).compress_strings = True; + CLG_(clo).compress_mangled = False; + CLG_(clo).compress_events = False; + CLG_(clo).compress_pos = True; + CLG_(clo).mangle_names = True; + CLG_(clo).dump_line = True; + CLG_(clo).dump_instr = False; + CLG_(clo).dump_bb = False; + CLG_(clo).dump_bbs = False; + + CLG_(clo).dump_every_bb = 0; + + /* Collection */ + CLG_(clo).separate_threads = False; + CLG_(clo).collect_atstart = True; + CLG_(clo).collect_jumps = False; + CLG_(clo).collect_alloc = False; + CLG_(clo).collect_systime = False; + + CLG_(clo).skip_plt = True; + CLG_(clo).separate_callers = 0; + CLG_(clo).separate_recursions = 2; + CLG_(clo).skip_direct_recursion = False; + + /* Instrumentation */ + CLG_(clo).instrument_atstart = True; + CLG_(clo).simulate_cache = False; + +#if CLG_ENABLE_DEBUG + CLG_(clo).verbose = 0; + CLG_(clo).verbose_start = 0; +#endif +} diff --git a/callgrind/command.c b/callgrind/command.c new file mode 100644 index 0000000000..23c14d9025 --- /dev/null +++ b/callgrind/command.c @@ -0,0 +1,517 @@ +/* + This file is part of Callgrind, a Valgrind skin for call graph + profiling programs. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This skin is derived from and contains lot of code from Cachegrind + Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +/* + * Functions related to interactive commands via "callgrind.cmd" + */ + +#include "config.h" +#include "global.h" + +#include // VG_N_THREADS + +static Char outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN]; + +static Char* command_file = 0; +static Char* command_file2 = 0; +static Char* result_file = 0; +static Char* info_file = 0; +static Char* dump_base = 0; + +static Bool command_inited = False; + +void CLG_(init_command)(Char* dir, Char* dumps) +{ + Int fd, size; + SysRes res; + + dump_base = dumps; + + size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_COMMANDNAME) +10; + command_file = (char*) CLG_MALLOC(size); + CLG_ASSERT(command_file != 0); + VG_(sprintf)(command_file, "%s/%s.%d", + dir, DEFAULT_COMMANDNAME, VG_(getpid)()); + + /* This is for compatibility with the "Force Now" Button of current + * KCachegrind releases, as it doesn't use ".pid" to distinguish + * different callgrind instances from same base directory. + * Should be removed sometimes in the future (29.10.03) + */ + command_file2 = (char*) CLG_MALLOC(size); + CLG_ASSERT(command_file2 != 0); + VG_(sprintf)(command_file2, "%s/%s", + dir, DEFAULT_COMMANDNAME); + + size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_RESULTNAME) +10; + result_file = (char*) CLG_MALLOC(size); + CLG_ASSERT(result_file != 0); + VG_(sprintf)(result_file, "%s/%s.%d", + dir, DEFAULT_RESULTNAME, VG_(getpid)()); + + info_file = (char*) CLG_MALLOC(VG_(strlen)(DEFAULT_INFONAME) + 10); + CLG_ASSERT(info_file != 0); + VG_(sprintf)(info_file, "%s.%d", DEFAULT_INFONAME, VG_(getpid)()); + + CLG_DEBUG(1, " dump file base: '%s'\n", dump_base); + CLG_DEBUG(1, " command file: '%s'\n", command_file); + CLG_DEBUG(1, " result file: '%s'\n", result_file); + CLG_DEBUG(1, " info file: '%s'\n", info_file); + + /* create info file to indicate that we are running */ + res = VG_(open)(info_file, VKI_O_WRONLY|VKI_O_TRUNC, 0); + if (res.isError) { + res = VG_(open)(info_file, VKI_O_CREAT|VKI_O_WRONLY, + VKI_S_IRUSR|VKI_S_IWUSR); + if (res.isError) { + VG_(message)(Vg_DebugMsg, + "warning: can't write info file '%s'", info_file); + info_file = 0; + fd = -1; + } + } + if (!res.isError) + fd = (Int) res.val; + if (fd>=0) { + Char buf[512]; + Int i; + + VG_(sprintf)(buf, + "# This file is generated by Callgrind-" VERSION ".\n" + "# It is used to enable controlling the supervision of\n" + "# '%s'\n" + "# by external tools.\n\n", +#if VG_CORE_INTERFACE_VERSION < 9 + VG_(client_argv[0]) +#else + VG_(args_the_exename) +#endif + ); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "version: " VERSION "\n"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "base: %s\n", dir); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "dumps: %s\n", dump_base); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "control: %s\n", command_file); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "result: %s\n", result_file); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(strcpy)(buf, "cmd:"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); +#if VG_CORE_INTERFACE_VERSION < 9 + for (i = 0; i < VG_(client_argc); i++) { + if (!VG_(client_argv[i])) continue; + VG_(sprintf)(buf, " %s", VG_(client_argv[i])); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + } +#else + VG_(sprintf)(buf, " %s", VG_(args_the_exename)); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + for (i = 0; i < VG_(args_for_client).used; i++) { + if (!VG_(args_for_client).strs[i]) continue; + VG_(sprintf)(buf, " %s", VG_(args_for_client).strs[i]); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + } +#endif + VG_(write)(fd, "\n", 1); + VG_(close)(fd); + } + + command_inited = True; +} + +void CLG_(finish_command)() +{ + /* unlink info file */ + if (info_file) VG_(unlink)(info_file); +} + + +static Int createRes(Int fd) +{ + SysRes res; + + if (fd > -2) return fd; + + /* fd == -2: No error, but we need to create the file */ + res = VG_(open)(result_file, + VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC, + VKI_S_IRUSR|VKI_S_IWUSR); + + /* VG_(open) can return any negative number on error. Remap errors to -1, + * to not confuse it with our special value -2 + */ + if (res.isError) fd = -1; + else fd = (Int) res.val; + + return fd; +} + +/* Run Info: Fixed information for a callgrind run */ +static Int dump_info(Int fd) +{ + Char* buf = outbuf; + int i; + + if ( (fd = createRes(fd)) <0) return fd; + + /* version */ + VG_(sprintf)(buf, "version: " VERSION "\n"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + /* "pid:" line */ + VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)()); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + /* "base:" line */ + VG_(sprintf)(buf, "base: %s\n", dump_base); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + /* "cmd:" line */ + VG_(strcpy)(buf, "cmd:"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); +#if VG_CORE_INTERFACE_VERSION < 9 + for (i = 0; i < VG_(client_argc); i++) { + if (!VG_(client_argv[i])) continue; + VG_(sprintf)(buf, " %s", VG_(client_argv[i])); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + } +#else + VG_(sprintf)(buf, " %s", VG_(args_the_exename)); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + for (i = 0; i < VG_(args_for_client).used; i++) { + if (!VG_(args_for_client).strs[i]) continue; + VG_(sprintf)(buf, " %s", VG_(args_for_client).strs[i]); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + } +#endif + + return fd; +} + + +/* Helper for dump_state */ + +Int dump_fd; + +void static dump_state_of_thread(thread_info* ti) +{ + Char* buf = outbuf; + int t = CLG_(current_tid); + Int p, i; + static FullCost sum = 0, tmp = 0; + BBCC *from, *to; + call_entry* ce; + + p = VG_(sprintf)(buf, "events-%d: ", t); + CLG_(init_cost_lz)( CLG_(sets).full, &sum ); + CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost ); + CLG_(add_diff_cost)( CLG_(sets).full, sum, + ti->lastdump_cost, + ti->states.entry[0]->cost); + CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp ); + p += CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum); + p += VG_(sprintf)(buf+p, "\n"); + VG_(write)(dump_fd, (void*)buf, p); + + p = VG_(sprintf)(buf, "frames-%d: %d\n", t, + CLG_(current_call_stack).sp); + VG_(write)(dump_fd, (void*)buf, p); + ce = 0; + for(i = 0; i < CLG_(current_call_stack).sp; i++) { + ce = CLG_(get_call_entry)(i); + /* if this frame is skipped, we don't have counters */ + if (!ce->jcc) continue; + + from = ce->jcc->from; + p = VG_(sprintf)(buf, "function-%d-%d: %s\n",t, i, + from->cxt->fn[0]->name); + VG_(write)(dump_fd, (void*)buf, p); + + p = VG_(sprintf)(buf, "calls-%d-%d: ",t, i); + p+= VG_(sprintf)(buf+p, "%llu\n", ce->jcc->call_counter); + VG_(write)(dump_fd, (void*)buf, p); + + /* FIXME: EventSets! */ + CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost ); + CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost ); + CLG_(add_diff_cost)( CLG_(sets).full, sum, + ce->enter_cost, CLG_(current_state).cost ); + CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp ); + + p = VG_(sprintf)(buf, "events-%d-%d: ",t, i); + p += CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum ); + p += VG_(sprintf)(buf+p, "\n"); + VG_(write)(dump_fd, (void*)buf, p); + } + if (ce && ce->jcc) { + to = ce->jcc->to; + p = VG_(sprintf)(buf, "function-%d-%d: %s\n",t, i, + to->cxt->fn[0]->name ); + VG_(write)(dump_fd, (void*)buf, p); + } +} + +/* Dump info on current callgrind state */ +static Int dump_state(Int fd) +{ + Char* buf = outbuf; + thread_info** th; + int t, p; + Int orig_tid = CLG_(current_tid); + + if ( (fd = createRes(fd)) <0) return fd; + + VG_(sprintf)(buf, "instrumentation: %s\n", + CLG_(instrument_state) ? "on":"off"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + if (!CLG_(instrument_state)) return fd; + + VG_(sprintf)(buf, "executed-bbs: %llu\n", CLG_(stat).bb_executions); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "executed-calls: %llu\n", CLG_(stat).call_counter); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "distinct-bbs: %d\n", CLG_(stat).distinct_bbs); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "distinct-calls: %d\n", CLG_(stat).distinct_jccs); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "distinct-functions: %d\n", CLG_(stat).distinct_fns); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "distinct-contexts: %d\n", CLG_(stat).distinct_contexts); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + /* "events:" line. Given here because it will be dynamic in the future */ + p = VG_(sprintf)(buf, "events: "); + CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap)); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + VG_(write)(fd, "\n", 1); + + /* "part:" line (number of last part. Is 0 at start */ + VG_(sprintf)(buf, "\npart: %d\n", CLG_(get_dump_counter)()); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + /* threads */ + th = CLG_(get_threads)(); + p = VG_(sprintf)(buf, "threads:"); + for(t=1;t0) { + cmdPos = cmdBuffer; + } + } + + /* force creation of result file if needed */ + fd = -2; + + while((bytesRead>0) && *cmdPos) { + + /* Calculate pointer for next line */ + cmdNextLine = cmdPos+1; + while((bytesRead>0) && *cmdNextLine && (*cmdNextLine != '\n')) { + cmdNextLine++; + bytesRead--; + } + if ((bytesRead>0) && (*cmdNextLine == '\n')) { + *cmdNextLine = 0; + cmdNextLine++; + bytesRead--; + } + + /* Command with integer option */ + if ((*cmdPos >= '0') && (*cmdPos <='9')) { + int value = *cmdPos-'0'; + cmdPos++; + while((*cmdPos >= '0') && (*cmdPos <='9')) { + value = 10*value + (*cmdPos-'0'); + cmdPos++; + } + while((*cmdPos == ' ') || (*cmdPos == '\t')) cmdPos++; + + switch(*cmdPos) { +#if CLG_ENABLE_DEBUG + /* verbosity */ + case 'V': + case 'v': + CLG_(clo).verbose = value; + break; +#endif + default: + break; + } + + cmdPos = cmdNextLine; + continue; + } + + /* Command with boolean/switch option */ + if ((*cmdPos=='+') || + (*cmdPos=='-')) { + int value = (cmdPos[0] == '+'); + cmdPos++; + while((*cmdPos == ' ') || (*cmdPos == '\t')) cmdPos++; + + switch(*cmdPos) { + case 'I': + case 'i': + CLG_(set_instrument_state)("Command", value); + break; + + default: + break; + } + + cmdPos = cmdNextLine; + continue; + } + + /* regular command */ + switch(*cmdPos) { + case 'D': + case 'd': + /* DUMP */ + + /* skip command */ + while(*cmdPos && (*cmdPos != ' ')) cmdPos++; + if (*cmdPos) + VG_(sprintf)(buf, "Dump Command:%s", cmdPos); + else + VG_(sprintf)(buf, "Dump Command"); + CLG_(dump_profile)(buf, False); + break; + + case 'Z': + case 'z': + CLG_(zero_all_cost)(False); + break; + + case 'K': + case 'k': + /* Kill: Delay to be able to remove command file before. */ + do_kill = 1; + break; + + case 'I': + case 'i': + fd = dump_info(fd); + break; + + case 's': + case 'S': + fd = dump_state(fd); + break; + + case 'O': + case 'o': + /* Options Info */ + if ( (fd = createRes(fd)) <0) break; + + VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n", + CLG_(clo).skip_plt ? "yes" : "no"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n", + CLG_(clo).collect_jumps ? "yes" : "no"); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n", + CLG_(clo).separate_recursions); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n", + CLG_(clo).separate_callers); + VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); + + break; + + default: + break; + } + + cmdPos = cmdNextLine; + } + + /* If command executed, delete command file */ + if (cmdPos) VG_(unlink)(cfile); + if (fd>=0) VG_(close)(fd); + + if (do_kill) { + VG_(message)(Vg_UserMsg, + "Killed because of command from %s", cfile); + CLG_(fini)(0); + VG_(exit)(1); + } +} diff --git a/callgrind/context.c b/callgrind/context.c new file mode 100644 index 0000000000..ade251f791 --- /dev/null +++ b/callgrind/context.c @@ -0,0 +1,328 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_context.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + + +/*------------------------------------------------------------*/ +/*--- Context operations ---*/ +/*------------------------------------------------------------*/ + +#define N_FNSTACK_INITIAL_ENTRIES 500 +#define N_CXT_INITIAL_ENTRIES 2537 + +fn_stack CLG_(current_fn_stack); + +void CLG_(init_fn_stack)(fn_stack* s) +{ + CLG_ASSERT(s != 0); + + s->size = N_FNSTACK_INITIAL_ENTRIES; + s->bottom = (fn_node**) CLG_MALLOC(s->size * sizeof(fn_node*)); + s->top = s->bottom; + s->bottom[0] = 0; +} + +void CLG_(copy_current_fn_stack)(fn_stack* dst) +{ + CLG_ASSERT(dst != 0); + + dst->size = CLG_(current_fn_stack).size; + dst->bottom = CLG_(current_fn_stack).bottom; + dst->top = CLG_(current_fn_stack).top; +} + +void CLG_(set_current_fn_stack)(fn_stack* s) +{ + CLG_ASSERT(s != 0); + + CLG_(current_fn_stack).size = s->size; + CLG_(current_fn_stack).bottom = s->bottom; + CLG_(current_fn_stack).top = s->top; +} + +static cxt_hash cxts; + +void CLG_(init_cxt_table)() +{ + Int i; + + cxts.size = N_CXT_INITIAL_ENTRIES; + cxts.entries = 0; + cxts.table = (Context**) CLG_MALLOC(cxts.size * sizeof(Context*)); + + for (i = 0; i < cxts.size; i++) + cxts.table[i] = 0; +} + +cxt_hash* CLG_(get_cxt_hash)() +{ + return &cxts; +} + +/* double size of cxt table */ +static void resize_cxt_table(void) +{ + UInt i, new_size, conflicts1 = 0, conflicts2 = 0; + Context **new_table, *curr, *next; + UInt new_idx; + + new_size = 2* cxts.size +3; + new_table = (Context**) CLG_MALLOC(new_size * sizeof(Context*)); + + if (!new_table) return; + + for (i = 0; i < new_size; i++) + new_table[i] = NULL; + + for (i = 0; i < cxts.size; i++) { + if (cxts.table[i] == NULL) continue; + + curr = cxts.table[i]; + while (NULL != curr) { + next = curr->next; + + new_idx = (UInt) (curr->hash % new_size); + + curr->next = new_table[new_idx]; + new_table[new_idx] = curr; + if (curr->next) { + conflicts1++; + if (curr->next->next) + conflicts2++; + } + + curr = next; + } + } + + VG_(free)(cxts.table); + + + CLG_DEBUG(0, "Resize Context Hash: %d => %d (entries %d, conflicts %d/%d)\n", + cxts.size, new_size, + cxts.entries, conflicts1, conflicts2); + + cxts.size = new_size; + cxts.table = new_table; + CLG_(stat).cxt_hash_resizes++; +} + +__inline__ +static UWord cxt_hash_val(fn_node** fn, UInt size) +{ + UWord hash = 0; + UInt count = size; + while(*fn != 0) { + hash = (hash<<7) + (hash>>25) + (UWord)(*fn); + fn--; + count--; + if (count==0) break; + } + return hash; +} + +__inline__ +static Bool is_cxt(UWord hash, fn_node** fn, Context* cxt) +{ + int count; + fn_node** cxt_fn; + + if (hash != cxt->hash) return False; + + count = cxt->size; + cxt_fn = &(cxt->fn[0]); + while((*fn != 0) && (count>0)) { + if (*cxt_fn != *fn) return False; + fn--; + cxt_fn++; + count--; + } + return True; +} + +/** + * Allocate new Context structure + */ +static Context* new_cxt(fn_node** fn) +{ + Context* new; + UInt idx, offset; + UWord hash; + int size, recs; + fn_node* top_fn; + + CLG_ASSERT(fn); + top_fn = *fn; + if (top_fn == 0) return 0; + + size = top_fn->separate_callers +1; + recs = top_fn->separate_recursions; + if (recs<1) recs=1; + + /* check fill degree of context hash table and resize if needed (>80%) */ + cxts.entries++; + if (10 * cxts.entries / cxts.size > 8) + resize_cxt_table(); + + new = (Context*) CLG_MALLOC(sizeof(Context)+sizeof(fn_node*)*size); + + // hash value calculation similar to cxt_hash_val(), but additionally + // copying function pointers in one run + hash = 0; + offset = 0; + while(*fn != 0) { + hash = (hash<<7) + (hash>>25) + (UWord)(*fn); + new->fn[offset] = *fn; + offset++; + fn--; + if (offset >= size) break; + } + if (offset < size) size = offset; + + new->size = size; + new->base_number = CLG_(stat).context_counter; + new->hash = hash; + + CLG_(stat).context_counter += recs; + CLG_(stat).distinct_contexts++; + + /* insert into Context hash table */ + idx = (UInt) (hash % cxts.size); + new->next = cxts.table[idx]; + cxts.table[idx] = new; + +#if CLG_ENABLE_DEBUG + CLG_DEBUGIF(3) { + VG_(printf)(" new_cxt ox%p: ", new); + CLG_(print_cxt)(12, new, 0); + } +#endif + + return new; +} + +/* get the Context structure for current context */ +Context* CLG_(get_cxt)(fn_node** fn) +{ + Context* cxt; + UInt size, idx; + UWord hash; + + CLG_ASSERT(fn != 0); + if (*fn == 0) return 0; + size = (*fn)->separate_callers+1; + if (size<=0) { size = -size+1; } + + CLG_DEBUG(5, "+ get_cxt(fn '%s'): size %d\n", + (*fn)->name, size); + + hash = cxt_hash_val(fn, size); + + if ( ((cxt = (*fn)->last_cxt) != 0) && is_cxt(hash, fn, cxt)) { + CLG_DEBUG(5, "- get_cxt: %p\n", cxt); + return cxt; + } + + CLG_(stat).cxt_lru_misses++; + + idx = (UInt) (hash % cxts.size); + cxt = cxts.table[idx]; + + while(cxt) { + if (is_cxt(hash,fn,cxt)) break; + cxt = cxt->next; + } + + if (!cxt) + cxt = new_cxt(fn); + + (*fn)->last_cxt = cxt; + + CLG_DEBUG(5, "- get_cxt: %p\n", cxt); + + return cxt; +} + + +/** + * Change execution context by calling a new function from current context + * + */ +void CLG_(push_cxt)(fn_node* fn) +{ + call_stack* cs = &CLG_(current_call_stack); + Int fn_entries; + + /* save old context on stack (even if not changed at all!) */ + CLG_ASSERT(cs->sp < cs->size); + CLG_ASSERT(cs->entry[cs->sp].cxt == 0); + cs->entry[cs->sp].cxt = CLG_(current_state).cxt; + cs->entry[cs->sp].fn_sp = CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom; + + if (*(CLG_(current_fn_stack).top) == fn) return; + if (fn && (fn->group>0) && + ((*(CLG_(current_fn_stack).top))->group == fn->group)) return; + + /* resizing needed ? */ + fn_entries = CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom; + if (fn_entries == CLG_(current_fn_stack).size-1) { + int new_size = CLG_(current_fn_stack).size *2; + fn_node** new = (fn_node**) CLG_MALLOC(new_size * sizeof(fn_node*)); + int i; + for(i=0;i %d (pushing '%s')\n", + CLG_(current_fn_stack).size, new_size, + fn ? fn->name : (Char*)"0x0"); + + CLG_(current_fn_stack).size = new_size; + } + + if (*(CLG_(current_fn_stack).top) == 0) { + UInt *pactive; + + /* this is first function: increment its active count */ + CLG_ASSERT(fn != 0); + pactive = CLG_(get_fn_entry)(fn->number); + (*pactive)++; + } + + CLG_(current_fn_stack).top++; + *(CLG_(current_fn_stack).top) = fn; + CLG_(current_state).cxt = CLG_(get_cxt)(CLG_(current_fn_stack).top); + + CLG_DEBUG(5, " push_cxt(fn '%s'): %d\n", + fn ? fn->name : (Char*)"0x0", + CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom); +} + diff --git a/callgrind/costs.c b/callgrind/costs.c new file mode 100644 index 0000000000..1fa1b6108d --- /dev/null +++ b/callgrind/costs.c @@ -0,0 +1,79 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_costs.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +#include + +#define COSTCHUNK_SIZE 100000 + +UInt CLG_(costarray_entries) = 0; +UInt CLG_(costarray_chunks) = 0; +static CostChunk* cost_chunk_base = 0; +static CostChunk* cost_chunk_current = 0; + +ULong* CLG_(get_costarray)(Int size) +{ + ULong* ptr; + + if (!cost_chunk_current || + (cost_chunk_current->size - cost_chunk_current->used < size)) { + CostChunk* cc = (CostChunk*) CLG_MALLOC(sizeof(CostChunk) + + COSTCHUNK_SIZE * sizeof(ULong)); + cc->size = COSTCHUNK_SIZE; + cc->used = 0; + cc->next = 0; + + if (cost_chunk_current) + cost_chunk_current->next = cc; + cost_chunk_current = cc; + + if (!cost_chunk_base) cost_chunk_base = cc; + + CLG_(costarray_chunks)++; + } + + ptr = &(cost_chunk_current->data[cost_chunk_current->used]); + cost_chunk_current->used += size; + + CLG_(costarray_entries) += size; + + return ptr; +} + +void CLG_(free_costarrays)() +{ + CostChunk* cc = cost_chunk_base, *cc_next; + while(cc) { + cc_next = cc->next; + VG_(free)(cc); + cc = cc_next; + } + cost_chunk_base = 0; + cost_chunk_current = 0; +} diff --git a/callgrind/costs.h b/callgrind/costs.h new file mode 100644 index 0000000000..5e5ccfdd0c --- /dev/null +++ b/callgrind/costs.h @@ -0,0 +1,35 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_costs.h ---*/ +/*--- (C) 2004, Josef Weidendorfer ---*/ +/*--------------------------------------------------------------------*/ + +#ifndef CT_COSTS +#define CT_COSTS + +#include "pub_tool_basics.h" + +#define CLG_(str) VGAPPEND(vgCallgrind_,str) + +extern UInt CLG_(costarray_entries); +extern UInt CLG_(costarray_chunks); + +/* Array of 64bit costs. This is separated from other structs + * to support a dynamic number of costs for a cost item. + * Chunks are allocated on demand, and deallocated at program termination. + */ +typedef struct _CostChunk CostChunk; +struct _CostChunk { + Int size; + Int used; + CostChunk *next, *prev; + ULong data[0]; +}; + +/* Allocate a number of 64bit cost values. + * Typically used from ct_events.c */ +ULong* CLG_(get_costarray)(Int size); +void CLG_(free_costarrays)(void); + + +#endif /* CT_COSTS */ diff --git a/callgrind/debug.c b/callgrind/debug.c new file mode 100644 index 0000000000..2e3ef608cb --- /dev/null +++ b/callgrind/debug.c @@ -0,0 +1,453 @@ +/* + This file is part of Callgrind, a Valgrind skin for call graph + profiling programs. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This skin is derived from and contains lot of code from Cachegrind + Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" +#include "events.h" + +/* If debugging mode of, dummy functions are provided (see below) + */ +#if CLG_ENABLE_DEBUG + +/*------------------------------------------------------------*/ +/*--- Debug output helpers ---*/ +/*------------------------------------------------------------*/ + +static void print_indent(int s) +{ + /* max of 40 spaces */ + char sp[] = " "; + if (s>40) s=40; + VG_(printf)(sp+40-s); +} + +void CLG_(print_bb)(int s, BB* bb) +{ + if (s<0) { + s = -s; + print_indent(s); + } + + VG_(printf)("BB %p (Obj '%s')", bb_addr(bb), bb->obj->name); +} + +static +void print_mangled_cxt(Context* cxt, int rec_index) +{ + int i; + + if (!cxt) + VG_(printf)("(none)"); + else { + VG_(printf)("%s", cxt->fn[0]->name); + if (rec_index >0) + VG_(printf)("'%d", rec_index +1); + for(i=1;isize;i++) + VG_(printf)("'%s", cxt->fn[i]->name); + } +} + + + +void CLG_(print_cxt)(int s, Context* cxt, int rec_index) +{ + if (s<0) { + s = -s; + print_indent(s); + } + + if (cxt) { + UInt *pactive = CLG_(get_fn_entry)(cxt->fn[0]->number); + CLG_ASSERT(rec_index < cxt->fn[0]->separate_recursions); + + VG_(printf)("Cxt %d" ,cxt->base_number + rec_index); + if (*pactive>0) + VG_(printf)(" [active=%d]", *pactive); + VG_(printf)(": "); + print_mangled_cxt(cxt, rec_index); + VG_(printf)("\n"); + } + else + VG_(printf)("(no context)\n"); +} + +void CLG_(print_execstate)(int s, exec_state* es) +{ + if (s<0) { + s = -s; + print_indent(s); + } + + if (!es) { + VG_(printf)("ExecState 0x0\n"); + return; + } + + VG_(printf)("ExecState [Sig %d, collect %s, nonskipped %p]: jmps_passed %d\n", + es->sig, es->collect?"yes":"no", + es->nonskipped, es->jmps_passed); +} + + +void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool jumpaddr) +{ + BB* bb; + + if (s<0) { + s = -s; + print_indent(s); + } + + if (!bbcc) { + VG_(printf)("BBCC 0x0\n"); + return; + } + + bb = bbcc->bb; + CLG_ASSERT(bb!=0); + +#if 0 + if (jumpaddr) + VG_(printf)("%s +%p=%p, ", + bb->obj->name + bb->obj->last_slash_pos, + bb->jmp_offset, bb_jmpaddr(bb)); + else +#endif + VG_(printf)("%s +%p=%p, ", + bb->obj->name + bb->obj->last_slash_pos, + bb->offset, bb_addr(bb)); + CLG_(print_cxt)(s+8, bbcc->cxt, bbcc->rec_index); +} + +void CLG_(print_eventset)(int s, EventSet* es) +{ + int i; + + if (s<0) { + s = -s; + print_indent(s); + } + + if (!es) { + VG_(printf)("(EventSet not set)\n"); + return; + } + + VG_(printf)("%5s (Size/Cap %d/%d): ", + es->name, es->size, es->capacity); + + if (es->size == 0) + VG_(printf)("-"); + else { + for(i=0; i< es->size; i++) { + if (i>0) { + VG_(printf)(" "); + if (es->e[i-1].nextTop == i) + VG_(printf)("| "); + } + VG_(printf)(es->e[i].type->name); + } + } + VG_(printf)("\n"); +} + + +void CLG_(print_cost)(int s, EventSet* es, ULong* c) +{ + Int i, pos; + + if (s<0) { + s = -s; + print_indent(s); + } + + if (!es) { + VG_(printf)("Cost (Nothing, EventSet not set)\n"); + return; + } + if (!c) { + VG_(printf)("Cost (Null, EventSet %s)\n", es->name); + return; + } + + if (es->size == 0) { + VG_(printf)("Cost (Nothing, EventSet %s with len 0)\n", es->name); + return; + } + + pos = s; + pos += VG_(printf)("Cost %s [%p]: %s %llu", es->name, c, es->e[0].type->name, c[0]); + + i = 1; + while(isize) { + if (pos > 70) { + VG_(printf)(",\n"); + print_indent(s+5); + pos = s+5; + } + else + pos += VG_(printf)(", "); + pos += VG_(printf)("%s %llu", es->e[i].type->name, c[i]); + i++; + } + VG_(printf)("\n"); +} + + +void CLG_(print_short_jcc)(jCC* jcc) +{ + if (jcc) + VG_(printf)("%p => %p [%llu/%llu,%llu,%llu]", + bb_jmpaddr(jcc->from->bb), + bb_addr(jcc->to->bb), + jcc->call_counter, + jcc->cost ? jcc->cost[CLG_(sets).off_sim_Ir]:0, + jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dr]:0, + jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dw]:0); + else + VG_(printf)("[Skipped JCC]"); +} + +void CLG_(print_jcc)(int s, jCC* jcc) +{ + if (s<0) { + s = -s; + print_indent(s); + } + + if (!jcc) { + VG_(printf)("JCC to skipped function\n"); + return; + } + VG_(printf)("JCC %p from ", jcc); + CLG_(print_bbcc)(s+9, jcc->from, True); + print_indent(s+4); + VG_(printf)("to "); + CLG_(print_bbcc)(s+9, jcc->to, False); + print_indent(s+4); + VG_(printf)("Calls %llu\n", jcc->call_counter); + print_indent(s+4); + CLG_(print_cost)(s+9, CLG_(sets).full, jcc->cost); +} + +/* dump out the current call stack */ +void CLG_(print_stackentry)(int s, int sp) +{ + call_entry* ce; + + if (s<0) { + s = -s; + print_indent(s); + } + + ce = CLG_(get_call_entry)(sp); + VG_(printf)("[%-2d] SP %p, RA %p", sp, ce->sp, ce->ret_addr); + if (ce->nonskipped) + VG_(printf)(" NonSkipped BB %p / %s", + bb_addr(ce->nonskipped->bb), + ce->nonskipped->cxt->fn[0]->name); + VG_(printf)("\n"); + print_indent(s+5); + CLG_(print_jcc)(5,ce->jcc); +} + +/* debug output */ +#if 0 +static void print_call_stack() +{ + int c; + + VG_(printf)("Call Stack:\n"); + for(c=0;cbb), + (bbcc->bb->sect_kind == Vg_SectText) ? 'T' : + (bbcc->bb->sect_kind == Vg_SectData) ? 'D' : + (bbcc->bb->sect_kind == Vg_SectBSS) ? 'B' : + (bbcc->bb->sect_kind == Vg_SectGOT) ? 'G' : + (bbcc->bb->sect_kind == Vg_SectPLT) ? 'P' : 'U', + bbcc->cxt->base_number+bbcc->rec_index); + print_mangled_cxt(bbcc->cxt, bbcc->rec_index); + + obj = bbcc->cxt->fn[0]->file->obj; + if (obj->name[0]) + VG_(printf)(" %s", obj->name+obj->last_slash_pos); + + if (VG_(strcmp)(bbcc->cxt->fn[0]->file->name, "???") !=0) { + VG_(printf)(" %s", bbcc->cxt->fn[0]->file->name); + if ((bbcc->cxt->fn[0] == bbcc->bb->fn) && (bbcc->bb->line>0)) + VG_(printf)(":%d", bbcc->bb->line); + } +} + +void CLG_(print_bbcc_cost)(int s, BBCC* bbcc) +{ + BB* bb; + Int i, cjmpNo; + ULong ecounter; + + if (s<0) { + s = -s; + print_indent(s); + } + + if (!bbcc) { + VG_(printf)("BBCC 0x0\n"); + return; + } + + bb = bbcc->bb; + CLG_ASSERT(bb!=0); + + CLG_(print_bbcc)(s, bbcc, False); + + ecounter = bbcc->ecounter_sum; + + print_indent(s+2); + VG_(printf)("ECounter: sum %d ", ecounter); + for(i=0; icjmp_count; i++) { + VG_(printf)("[%d]=%d ", + bb->jmp[i].instr, bbcc->jmp[i].ecounter); + } + VG_(printf)("\n"); + + cjmpNo = 0; + for(i=0; iinstr_count; i++) { + InstrInfo* ii = &(bb->instr[i]); + print_indent(s+2); + VG_(printf)("[%2d] IOff %2d ecnt %3d ", + i, ii->instr_offset, ecounter); + CLG_(print_cost)(s+5, ii->eventset, bbcc->cost + ii->cost_offset); + + /* update execution counter */ + if (cjmpNo < bb->cjmp_count) + if (bb->jmp[cjmpNo].instr == i) { + ecounter -= bbcc->jmp[cjmpNo].ecounter; + cjmpNo++; + } + } +} + + +/* dump out an address with source info if available */ +void CLG_(print_addr)(Addr addr) +{ + Char fl_buf[FILENAME_LEN]; + Char fn_buf[FN_NAME_LEN]; + const UChar* obj_name; + SegInfo* si; + int ln, i=0, opos=0; + + if (addr == 0) { + VG_(printf)("%08x", addr); + return; + } + + CLG_(get_debug_info)(addr, fl_buf, fn_buf, &ln, &si); + + if (VG_(strcmp)(fn_buf,"???")==0) + VG_(printf)("%p", addr); + else + VG_(printf)("%p %s", addr, fn_buf); + + if (si) { + obj_name = VG_(seginfo_filename)(si); + if (obj_name) { + while(obj_name[i]) { + if (obj_name[i]=='/') opos = i+1; + i++; + } + if (obj_name[0]) + VG_(printf)(" %s", obj_name+opos); + } + } + + if (ln>0) + VG_(printf)(" (%s:%u)", fl_buf,ln); +} + +void CLG_(print_addr_ln)(Addr addr) +{ + CLG_(print_addr)(addr); + VG_(printf)("\n"); +} + +static ULong bb_written = 0; + +void CLG_(print_bbno)(void) +{ + if (bb_written != CLG_(stat).bb_executions) { + bb_written = CLG_(stat).bb_executions; + VG_(printf)("BB# %llu\n",CLG_(stat).bb_executions); + } +} + +void CLG_(print_context)(void) +{ + BBCC* bbcc; + + CLG_DEBUG(0,"In tid %d [%d] ", + CLG_(current_tid), CLG_(current_call_stack).sp); + bbcc = CLG_(current_state).bbcc; + print_mangled_cxt(CLG_(current_state).cxt, + bbcc ? bbcc->rec_index : 0); + VG_(printf)("\n"); +} + +void* CLG_(malloc)(UWord s, char* f) +{ + CLG_DEBUG(3, "Malloc(%d) in %s.\n", s, f); + return VG_(malloc)(s); +} + +#else /* CLG_ENABLE_DEBUG */ + +void CLG_(print_bbno)(void) {} +void CLG_(print_context)(void) {} +void CLG_(print_jcc)(int s, jCC* jcc) {} +void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool b) {} +void CLG_(print_bbcc_fn)(BBCC* bbcc) {} +void CLG_(print_cost)(int s, EventSet* es, ULong* cost) {} +void CLG_(print_bb)(int s, BB* bb) {} +void CLG_(print_cxt)(int s, Context* cxt, int rec_index) {} +void CLG_(print_short_jcc)(jCC* jcc) {} +void CLG_(print_stackentry)(int s, int sp) {} +void CLG_(print_addr)(Addr addr) {} +void CLG_(print_addr_ln)(Addr addr) {} + +#endif diff --git a/callgrind/docs/Makefile.am b/callgrind/docs/Makefile.am new file mode 100644 index 0000000000..d539a6ecd5 --- /dev/null +++ b/callgrind/docs/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST = diff --git a/callgrind/dump.c b/callgrind/dump.c new file mode 100644 index 0000000000..3f13aea132 --- /dev/null +++ b/callgrind/dump.c @@ -0,0 +1,1715 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- dump.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "config.h" +#include "global.h" + +#include +#include + +/*------------------------------------------------------------*/ +/*--- Support for signal handlers and multi-threading ---*/ +/*------------------------------------------------------------*/ + +/* Dump Part Counter */ +static Int out_counter = 0; + +static Char* dump_file_base = 0; +static Char* base_directory = 0; + +/* Command */ +static Char cmdbuf[BUF_LEN]; + +/* Total reads/writes/misses sum over all dumps and threads. + * Updated during CC traversal at dump time. + */ +FullCost CLG_(total_cost) = 0; +static FullCost dump_total_cost = 0; + +EventMapping* CLG_(dumpmap) = 0; + +/* Temporary output buffer for + * print_fn_pos, fprint_apos, fprint_fcost, fprint_jcc, + * fprint_fcc_ln, dump_run_info, dump_state_info + */ +static Char outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN]; + +Int CLG_(get_dump_counter)(void) +{ + return out_counter; +} + +Char* CLG_(get_dump_file_base)() +{ + return dump_file_base; +} + +/*------------------------------------------------------------*/ +/*--- Output file related stuff ---*/ +/*------------------------------------------------------------*/ + +/* Boolean dumping array */ +static Bool* dump_array = 0; +static Int dump_array_size = 0; +static Bool* obj_dumped = 0; +static Bool* file_dumped = 0; +static Bool* fn_dumped = 0; +static Bool* cxt_dumped = 0; + +static +void reset_dump_array(void) +{ + int i; + + CLG_ASSERT(dump_array != 0); + + for(i=0;ifile = 0; + p->fn = 0; + p->obj = 0; + p->cxt = 0; + p->rec_index = 0; +} + + +#if 0 +static __inline__ +static void my_fwrite(Int fd, Char* buf, Int len) +{ + VG_(write)(fd, (void*)buf, len); +} +#else + +#define FWRITE_BUFSIZE 32000 +#define FWRITE_THROUGH 10000 +static Char fwrite_buf[FWRITE_BUFSIZE]; +static Int fwrite_pos; +static Int fwrite_fd = -1; + +static __inline__ +void fwrite_flush(void) +{ + if ((fwrite_fd>=0) && (fwrite_pos>0)) + VG_(write)(fwrite_fd, (void*)fwrite_buf, fwrite_pos); + fwrite_pos = 0; +} + +static void my_fwrite(Int fd, Char* buf, Int len) +{ + if (fwrite_fd != fd) { + fwrite_flush(); + fwrite_fd = fd; + } + if (len > FWRITE_THROUGH) { + fwrite_flush(); + VG_(write)(fd, (void*)buf, len); + return; + } + if (FWRITE_BUFSIZE - fwrite_pos <= len) fwrite_flush(); + VG_(strncpy)(fwrite_buf + fwrite_pos, buf, len); + fwrite_pos += len; +} +#endif + + +static void print_obj(Char* buf, obj_node* obj) +{ + int n; + + if (CLG_(clo).compress_strings) { + CLG_ASSERT(obj_dumped != 0); + if (obj_dumped[obj->number]) + n = VG_(sprintf)(buf, "(%d)\n", obj->number); + else { + n = VG_(sprintf)(buf, "(%d) %s\n", + obj->number, obj->name); + } + } + else + n = VG_(sprintf)(buf, "%s\n", obj->name); + +#if 0 + /* add mapping parameters the first time a object is dumped + * format: mp=0xSTART SIZE 0xOFFSET */ + if (!obj_dumped[obj->number]) { + obj_dumped[obj->number]; + VG_(sprintf)(buf+n, "mp=%p %p %p\n", + pos->obj->start, pos->obj->size, pos->obj->offset); + } +#else + obj_dumped[obj->number] = True; +#endif +} + +static void print_file(Char* buf, file_node* file) +{ + if (CLG_(clo).compress_strings) { + CLG_ASSERT(file_dumped != 0); + if (file_dumped[file->number]) + VG_(sprintf)(buf, "(%d)\n", file->number); + else { + VG_(sprintf)(buf, "(%d) %s\n", + file->number, file->name); + file_dumped[file->number] = True; + } + } + else + VG_(sprintf)(buf, "%s\n", file->name); +} + +/* + * tag can be "fn", "cfn", "jfn" + */ +static void print_fn(Int fd, Char* buf, Char* tag, fn_node* fn) +{ + int p; + p = VG_(sprintf)(buf, "%s=",tag); + if (CLG_(clo).compress_strings) { + CLG_ASSERT(fn_dumped != 0); + if (fn_dumped[fn->number]) + p += VG_(sprintf)(buf+p, "(%d)\n", fn->number); + else { + p += VG_(sprintf)(buf+p, "(%d) %s\n", + fn->number, fn->name); + fn_dumped[fn->number] = True; + } + } + else + p += VG_(sprintf)(buf+p, "%s\n", fn->name); + + my_fwrite(fd, buf, p); +} + +static void print_mangled_fn(Int fd, Char* buf, Char* tag, + Context* cxt, int rec_index) +{ + int p, i; + + if (CLG_(clo).compress_strings && CLG_(clo).compress_mangled) { + + int n; + Context* last; + + CLG_ASSERT(cxt_dumped != 0); + if (cxt_dumped[cxt->base_number+rec_index]) { + p = VG_(sprintf)(buf, "%s=(%d)\n", + tag, cxt->base_number + rec_index); + my_fwrite(fd, buf, p); + return; + } + + last = 0; + /* make sure that for all context parts compressed data is written */ + for(i=cxt->size;i>0;i--) { + CLG_ASSERT(cxt->fn[i-1]->pure_cxt != 0); + n = cxt->fn[i-1]->pure_cxt->base_number; + if (cxt_dumped[n]) continue; + p = VG_(sprintf)(buf, "%s=(%d) %s\n", + tag, n, cxt->fn[i-1]->name); + my_fwrite(fd, buf, p); + + cxt_dumped[n] = True; + last = cxt->fn[i-1]->pure_cxt; + } + /* If the last context was the context to print, we are finished */ + if ((last == cxt) && (rec_index == 0)) return; + + p = VG_(sprintf)(buf, "%s=(%d) (%d)", tag, + cxt->base_number + rec_index, + cxt->fn[0]->pure_cxt->base_number); + if (rec_index >0) + p += VG_(sprintf)(buf+p, "'%d", rec_index +1); + for(i=1;isize;i++) + p += VG_(sprintf)(buf+p, "'(%d)", + cxt->fn[i]->pure_cxt->base_number); + p += VG_(sprintf)(buf+p, "\n"); + my_fwrite(fd, buf, p); + + cxt_dumped[cxt->base_number+rec_index] = True; + return; + } + + + p = VG_(sprintf)(buf, "%s=", tag); + if (CLG_(clo).compress_strings) { + CLG_ASSERT(cxt_dumped != 0); + if (cxt_dumped[cxt->base_number+rec_index]) { + p += VG_(sprintf)(buf+p, "(%d)\n", cxt->base_number + rec_index); + my_fwrite(fd, buf, p); + return; + } + else { + p += VG_(sprintf)(buf+p, "(%d) ", cxt->base_number + rec_index); + cxt_dumped[cxt->base_number+rec_index] = True; + } + } + + p += VG_(sprintf)(buf+p, "%s", cxt->fn[0]->name); + if (rec_index >0) + p += VG_(sprintf)(buf+p, "'%d", rec_index +1); + for(i=1;isize;i++) + p += VG_(sprintf)(buf+p, "'%s", cxt->fn[i]->name); + + p += VG_(sprintf)(buf+p, "\n"); + my_fwrite(fd, buf, p); +} + + + +/** + * Print function position of the BBCC, but only print info differing to + * the position, update + * Return True if something changes. + */ +static Bool print_fn_pos(int fd, FnPos* last, BBCC* bbcc) +{ + Bool res = False; + + CLG_DEBUGIF(3) { + CLG_DEBUG(2, "+ print_fn_pos: "); + CLG_(print_cxt)(16, bbcc->cxt, bbcc->rec_index); + } + + if (!CLG_(clo).mangle_names) { + if (last->rec_index != bbcc->rec_index) { + VG_(sprintf)(outbuf, "rec=%d\n\n", bbcc->rec_index); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + last->rec_index = bbcc->rec_index; + last->cxt = 0; /* reprint context */ + res = True; + } + + if (last->cxt != bbcc->cxt) { + fn_node* last_from = (last->cxt && last->cxt->size>1) ? + last->cxt->fn[1] : 0; + fn_node* curr_from = (bbcc->cxt && bbcc->cxt->size>1) ? + bbcc->cxt->fn[1] : 0; + if (curr_from == 0) { + if (last_from != 0) { + /* switch back to no context */ + VG_(sprintf)(outbuf, "frfn=(spontaneous)\n"); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + res = True; + } + } + else if (last_from != curr_from) { + print_fn(fd,outbuf,"frfn", curr_from); + res = True; + } + last->cxt = bbcc->cxt; + } + } + + if (last->obj != bbcc->cxt->fn[0]->file->obj) { + VG_(sprintf)(outbuf, "ob="); + print_obj(outbuf+3, bbcc->cxt->fn[0]->file->obj); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + last->obj = bbcc->cxt->fn[0]->file->obj; + res = True; + } + + if (last->file != bbcc->cxt->fn[0]->file) { + VG_(sprintf)(outbuf, "fl="); + print_file(outbuf+3, bbcc->cxt->fn[0]->file); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + last->file = bbcc->cxt->fn[0]->file; + res = True; + } + + if (!CLG_(clo).mangle_names) { + if (last->fn != bbcc->cxt->fn[0]) { + print_fn(fd,outbuf, "fn", bbcc->cxt->fn[0]); + last->fn = bbcc->cxt->fn[0]; + res = True; + } + } + else { + /* Print mangled name if context or rec_index changes */ + if ((last->rec_index != bbcc->rec_index) || + (last->cxt != bbcc->cxt)) { + + print_mangled_fn(fd, outbuf, "fn", bbcc->cxt, bbcc->rec_index); + last->fn = bbcc->cxt->fn[0]; + last->rec_index = bbcc->rec_index; + res = True; + } + } + + last->cxt = bbcc->cxt; + + CLG_DEBUG(2, "- print_fn_pos: %s\n", res ? "changed" : ""); + + return res; +} + +/* the debug lookup cache is useful if BBCC for same BB are + * dumped directly in a row. This is a direct mapped cache. + */ +#define DEBUG_CACHE_SIZE 1777 + +static Addr debug_cache_addr[DEBUG_CACHE_SIZE]; +static file_node* debug_cache_file[DEBUG_CACHE_SIZE]; +static int debug_cache_line[DEBUG_CACHE_SIZE]; +static Bool debug_cache_info[DEBUG_CACHE_SIZE]; + +static __inline__ +void init_debug_cache(void) +{ + int i; + for(i=0;iline = debug_cache_line[cachepos]; + p->file = debug_cache_file[cachepos]; + res = debug_cache_info[cachepos]; + } + else { + res = VG_(get_filename_linenum)(addr, + file, FILENAME_LEN, + NULL, 0, NULL, //FIXME + &(p->line)); + if (!res) { + VG_(strcpy)(file, "???"); + p->line = 0; + } + p->file = CLG_(get_file_node)(bbcc->bb->obj, file); + + debug_cache_info[cachepos] = res; + debug_cache_addr[cachepos] = addr; + debug_cache_line[cachepos] = p->line; + debug_cache_file[cachepos] = p->file; + } + + /* Address offset from bbcc start address */ + p->addr = addr - bbcc->bb->obj->offset; + p->bb_addr = bbcc->bb->offset; + + CLG_DEBUG(3, " get_debug_pos(%p): BB %p, fn '%s', file '%s', line %u\n", + addr, bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name, + p->file->name, p->line); + + return res; +} + + +/* copy file position and init cost */ +static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file) +{ + p->addr = addr; + p->bb_addr = bbaddr; + p->file = file; + p->line = 0; +} + +static void copy_apos(AddrPos* dst, AddrPos* src) +{ + dst->addr = src->addr; + dst->bb_addr = src->bb_addr; + dst->file = src->file; + dst->line = src->line; +} + +/* copy file position and init cost */ +static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file) +{ + init_apos( &(c->p), addr, bbaddr, file); + /* FIXME: This is a memory leak as a AddrCost is inited multiple times */ + c->cost = CLG_(get_eventset_cost)( CLG_(sets).full ); + CLG_(init_cost)( CLG_(sets).full, c->cost ); +} + + +/** + * print position change inside of a BB (last -> curr) + * this doesn't update last to curr! + */ +static void fprint_apos(Int fd, AddrPos* curr, AddrPos* last, file_node* func_file) +{ + CLG_ASSERT(curr->file != 0); + CLG_DEBUG(2, " print_apos(file '%s', line %d, bb %p, addr %p) fnFile '%s'\n", + curr->file->name, curr->line, curr->bb_addr, curr->addr, + func_file->name); + + if (curr->file != last->file) { + + /* if we switch back to orig file, use fe=... */ + if (curr->file == func_file) + VG_(sprintf)(outbuf, "fe="); + else + VG_(sprintf)(outbuf, "fi="); + print_file(outbuf+3, curr->file); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + } + + if (CLG_(clo).dump_bbs) { + if (curr->line != last->line) { + VG_(sprintf)(outbuf, "ln=%d\n", curr->line); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + } + } +} + + + +/** + * Print a position. + * This prints out differences if allowed + * + * This doesn't set last to curr afterwards! + */ +static +void fprint_pos(Int fd, AddrPos* curr, AddrPos* last) +{ + if (0) //CLG_(clo).dump_bbs) + VG_(sprintf)(outbuf, "%u ", curr->addr - curr->bb_addr); + else { + int p = 0; + if (CLG_(clo).dump_instr) { + int diff = curr->addr - last->addr; + if ( CLG_(clo).compress_pos && (last->addr >0) && + (diff > -100) && (diff < 100)) { + if (diff >0) + p = VG_(sprintf)(outbuf, "+%d ", diff); + else if (diff==0) + p = VG_(sprintf)(outbuf, "* "); + else + p = VG_(sprintf)(outbuf, "%d ", diff); + } + else + p = VG_(sprintf)(outbuf, "%p ", curr->addr); + } + + if (CLG_(clo).dump_bb) { + int diff = curr->bb_addr - last->bb_addr; + if ( CLG_(clo).compress_pos && (last->bb_addr >0) && + (diff > -100) && (diff < 100)) { + if (diff >0) + p += VG_(sprintf)(outbuf+p, "+%d ", diff); + else if (diff==0) + p += VG_(sprintf)(outbuf+p, "* "); + else + p += VG_(sprintf)(outbuf+p, "%d ", diff); + } + else + p += VG_(sprintf)(outbuf+p, "%p ", curr->bb_addr); + } + + if (CLG_(clo).dump_line) { + int diff = curr->line - last->line; + if ( CLG_(clo).compress_pos && (last->line >0) && + (diff > -100) && (diff < 100)) { + + if (diff >0) + VG_(sprintf)(outbuf+p, "+%d ", diff); + else if (diff==0) + VG_(sprintf)(outbuf+p, "* "); + else + VG_(sprintf)(outbuf+p, "%d ", diff); + } + else + VG_(sprintf)(outbuf+p, "%u ", curr->line); + } + } + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); +} + + +/** + * Print events. + */ + +static +void fprint_cost(int fd, EventMapping* es, ULong* cost) +{ + int p = CLG_(sprint_mappingcost)(outbuf, es, cost); + VG_(sprintf)(outbuf+p, "\n"); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + return; +} + + + +/* Write the cost of a source line; only that parts of the source + * position are written that changed relative to last written position. + * funcPos is the source position of the first line of actual function. + * Something is written only if cost != 0; returns True in this case. + */ +static void fprint_fcost(Int fd, AddrCost* c, AddrPos* last) +{ + CLG_DEBUGIF(3) { + CLG_DEBUG(2, " print_fcost(file '%s', line %d, bb %p, addr %p):\n", + c->p.file->name, c->p.line, c->p.bb_addr, c->p.addr); + CLG_(print_cost)(-5, CLG_(sets).full, c->cost); + } + + fprint_pos(fd, &(c->p), last); + copy_apos( last, &(c->p) ); /* update last to current position */ + + fprint_cost(fd, CLG_(dumpmap), c->cost); + + /* add cost to total */ + CLG_(add_and_zero_cost)( CLG_(sets).full, dump_total_cost, c->cost ); +} + + +/* Write out the calls from jcc (at pos) + */ +static void fprint_jcc(Int fd, jCC* jcc, AddrPos* curr, AddrPos* last, ULong ecounter) +{ + static AddrPos target; + file_node* file; + obj_node* obj; + + CLG_DEBUGIF(2) { + CLG_DEBUG(2, " fprint_jcc (jkind %d)\n", jcc->jmpkind); + CLG_(print_jcc)(-10, jcc); + } + + if (!get_debug_pos(jcc->to, bb_addr(jcc->to->bb), &target)) { + /* if we don't have debug info, don't switch to file "???" */ + target.file = last->file; + } + + if (jcc->from && + (jcc->jmpkind == JmpCond || jcc->jmpkind == Ijk_Boring)) { + + /* this is a JCC for a followed conditional or boring jump. */ + CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)); + + /* objects among jumps should be the same. + * Otherwise this jump would have been changed to a call + * (see setup_bbcc) + */ + CLG_ASSERT(jcc->from->bb->obj == jcc->to->bb->obj); + + /* only print if target position info is usefull */ + if (!CLG_(clo).dump_instr && !CLG_(clo).dump_bb && target.line==0) { + jcc->call_counter = 0; + return; + } + + /* Different files/functions are possible e.g. with longjmp's + * which change the stack, and thus context + */ + if (last->file != target.file) { + VG_(sprintf)(outbuf, "jfi="); + print_file(outbuf+4, target.file); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + } + + if (jcc->from->cxt != jcc->to->cxt) { + if (CLG_(clo).mangle_names) + print_mangled_fn(fd, outbuf, "jfn", + jcc->to->cxt, jcc->to->rec_index); + else + print_fn(fd, outbuf, "jfn", jcc->to->cxt->fn[0]); + } + + if (jcc->jmpkind == JmpCond) { + /* format: jcnd=/ */ + VG_(sprintf)(outbuf, "jcnd=%llu/%llu ", + jcc->call_counter, ecounter); + } + else { + /* format: jump= */ + VG_(sprintf)(outbuf, "jump=%llu ", + jcc->call_counter); + } + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + + fprint_pos(fd, &target, last); + my_fwrite(fd, "\n", 1); + fprint_pos(fd, curr, last); + my_fwrite(fd, "\n", 1); + + jcc->call_counter = 0; + return; + } + + CLG_ASSERT(jcc->to !=0); + + file = jcc->to->cxt->fn[0]->file; + obj = jcc->to->bb->obj; + + /* object of called position different to object of this function?*/ + if (jcc->from->cxt->fn[0]->file->obj != obj) { + VG_(sprintf)(outbuf, "cob="); + print_obj(outbuf+4, obj); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + } + + /* file of called position different to current file? */ + if (last->file != file) { + VG_(sprintf)(outbuf, "cfi="); + print_file(outbuf+4, file); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + } + + if (CLG_(clo).mangle_names) + print_mangled_fn(fd, outbuf, "cfn", jcc->to->cxt, jcc->to->rec_index); + else + print_fn(fd, outbuf, "cfn", jcc->to->cxt->fn[0]); + + if (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)) { + VG_(sprintf)(outbuf, "calls=%llu ", + jcc->call_counter); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); + + fprint_pos(fd, &target, last); + my_fwrite(fd, "\n", 1); + fprint_pos(fd, curr, last); + fprint_cost(fd, CLG_(dumpmap), jcc->cost); + + CLG_(init_cost)( CLG_(sets).full, jcc->cost ); + + jcc->call_counter = 0; + } +} + + + +/* Cost summation of functions.We use alternately ccSum[0/1], thus + * ssSum[currSum] for recently read lines with same line number. + */ +static AddrCost ccSum[2]; +static int currSum; + +/* + * Print all costs of a BBCC: + * - FCCs of instructions + * - JCCs of the unique jump of this BB + * returns True if something was written + */ +static Bool fprint_bbcc(Int fd, BBCC* bbcc, AddrPos* last) +{ + InstrInfo* instr_info; + ULong ecounter; + Bool something_written = False; + jCC* jcc; + AddrCost *currCost, *newCost; + Int jcc_count = 0, instr, i, jmp; + BB* bb = bbcc->bb; + + CLG_ASSERT(bbcc->cxt != 0); + CLG_DEBUGIF(1) { + VG_(printf)("+ fprint_bbcc (Instr %d): ", bb->instr_count); + CLG_(print_bbcc)(15, bbcc, False); + } + + CLG_ASSERT(currSum == 0 || currSum == 1); + currCost = &(ccSum[currSum]); + newCost = &(ccSum[1-currSum]); + + ecounter = bbcc->ecounter_sum; + jmp = 0; + instr_info = &(bb->instr[0]); + for(instr=0; instrinstr_count; instr++, instr_info++) { + + /* get debug info of current instruction address and dump cost + * if CLG_(clo).dump_bbs or file/line has changed + */ + if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset, + &(newCost->p))) { + /* if we don't have debug info, don't switch to file "???" */ + newCost->p.file = bbcc->cxt->fn[0]->file; + } + + if (CLG_(clo).dump_bbs || CLG_(clo).dump_instr || + (newCost->p.line != currCost->p.line) || + (newCost->p.file != currCost->p.file)) { + + if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) { + something_written = True; + + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + fprint_fcost(fd, currCost, last); + } + + /* switch buffers */ + currSum = 1 - currSum; + currCost = &(ccSum[currSum]); + newCost = &(ccSum[1-currSum]); + } + + /* add line cost to current cost sum */ + (*CLG_(cachesim).add_icost)(currCost->cost, bbcc, instr_info, ecounter); + + /* print jcc's if there are: only jumps */ + if (bb->jmp[jmp].instr == instr) { + jcc_count=0; + for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) + if ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) + jcc_count++; + + if (jcc_count>0) { + if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) { + /* no need to switch buffers, as position is the same */ + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + fprint_fcost(fd, currCost, last); + } + get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p)); + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + something_written = True; + for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) { + if ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) + fprint_jcc(fd, jcc, &(currCost->p), last, ecounter); + } + } + } + + /* update execution counter */ + if (jmp < bb->cjmp_count) + if (bb->jmp[jmp].instr == instr) { + ecounter -= bbcc->jmp[jmp].ecounter; + jmp++; + } + } + + /* jCCs at end? If yes, dump cumulated line info first */ + jcc_count = 0; + for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) { + /* yes, if JCC only counts jmp arcs or cost >0 */ + if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) || + (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost ))) + jcc_count++; + } + + if ( (bbcc->skipped && + !CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) || + (jcc_count>0) ) { + + if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) { + /* no need to switch buffers, as position is the same */ + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + fprint_fcost(fd, currCost, last); + } + + get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p)); + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + something_written = True; + + /* first, print skipped costs for calls */ + if (bbcc->skipped && !CLG_(is_zero_cost)( CLG_(sets).full, + bbcc->skipped )) { + CLG_(add_and_zero_cost)( CLG_(sets).full, + currCost->cost, bbcc->skipped ); +#if 0 + VG_(sprintf)(outbuf, "# Skipped\n"); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); +#endif + fprint_fcost(fd, currCost, last); + } + + if (jcc_count > 0) + for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) { + CLG_ASSERT(jcc->jmp == jmp); + if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) || + (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost ))) + + fprint_jcc(fd, jcc, &(currCost->p), last, ecounter); + } + } + + if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) { + if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) { + something_written = True; + + fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file); + fprint_fcost(fd, currCost, last); + } + if (CLG_(clo).dump_bbs) my_fwrite(fd, (void*)"\n", 1); + + /* when every cost was immediatly written, we must have done so, + * as this function is only called when there's cost in a BBCC + */ + CLG_ASSERT(something_written); + } + + bbcc->ecounter_sum = 0; + for(i=0; i<=bbcc->bb->cjmp_count; i++) + bbcc->jmp[i].ecounter = 0; + bbcc->ret_counter = 0; + + CLG_DEBUG(1, "- fprint_bbcc: JCCs %d\n", jcc_count); + + return something_written; +} + +/* order by + * recursion, + * from->bb->obj, from->bb->fn + * obj, fn[0]->file, fn + * address + */ +static int my_cmp(BBCC** pbbcc1, BBCC** pbbcc2) +{ +#if 0 + return (*pbbcc1)->bb->offset - (*pbbcc2)->bb->offset; +#else + BBCC *bbcc1 = *pbbcc1; + BBCC *bbcc2 = *pbbcc2; + Context* cxt1 = bbcc1->cxt; + Context* cxt2 = bbcc2->cxt; + int off = 1; + + if (cxt1->fn[0]->file->obj != cxt2->fn[0]->file->obj) + return cxt1->fn[0]->file->obj - cxt2->fn[0]->file->obj; + + if (cxt1->fn[0]->file != cxt2->fn[0]->file) + return cxt1->fn[0]->file - cxt2->fn[0]->file; + + if (cxt1->fn[0] != cxt2->fn[0]) + return cxt1->fn[0] - cxt2->fn[0]; + + if (bbcc1->rec_index != bbcc2->rec_index) + return bbcc1->rec_index - bbcc2->rec_index; + + while((off < cxt1->size) && (off < cxt2->size)) { + fn_node* ffn1 = cxt1->fn[off]; + fn_node* ffn2 = cxt2->fn[off]; + if (ffn1->file->obj != ffn2->file->obj) + return ffn1->file->obj - ffn2->file->obj; + if (ffn1 != ffn2) + return ffn1 - ffn2; + off++; + } + if (cxt1->size > cxt2->size) return 1; + else if (cxt1->size < cxt2->size) return -1; + + return bbcc1->bb->offset - bbcc2->bb->offset; +#endif +} + + + + + +/* modified version of: + * + * qsort -- qsort interface implemented by faster quicksort. + * J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265. + * Copyright 1993, John Wiley. +*/ + +static __inline__ +void swapfunc(BBCC** a, BBCC** b, int n) +{ + while(n>0) { + BBCC* t = *a; *a = *b; *b = t; + a++, b++; + n--; + } +} + +static __inline__ +void swap(BBCC** a, BBCC** b) +{ + BBCC* t; + t = *a; *a = *b; *b = t; +} + +#define min(x, y) ((x)<=(y) ? (x) : (y)) + +static +BBCC** med3(BBCC **a, BBCC **b, BBCC **c, int (*cmp)(BBCC**,BBCC**)) +{ return cmp(a, b) < 0 ? + (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a) + : (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a); +} + +static BBCC** qsort_start = 0; + +static void qsort(BBCC **a, int n, int (*cmp)(BBCC**,BBCC**)) +{ + BBCC **pa, **pb, **pc, **pd, **pl, **pm, **pn, **pv; + int s, r; + BBCC* v; + + CLG_DEBUG(8, " qsort(%d,%d)\n", a-qsort_start, n); + + if (n < 7) { /* Insertion sort on smallest arrays */ + for (pm = a+1; pm < a+n; pm++) + for (pl = pm; pl > a && cmp(pl-1, pl) > 0; pl --) + swap(pl, pl-1); + + CLG_DEBUGIF(8) { + for (pm = a; pm < a+n; pm++) { + VG_(printf)(" %3d BB %p, ", pm - qsort_start, + bb_addr((*pm)->bb)); + CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index); + } + } + return; + } + pm = a + n/2; /* Small arrays, middle element */ + if (n > 7) { + pl = a; + pn = a + (n-1); + if (n > 40) { /* Big arrays, pseudomedian of 9 */ + s = n/8; + pl = med3(pl, pl+s, pl+2*s, cmp); + pm = med3(pm-s, pm, pm+s, cmp); + pn = med3(pn-2*s, pn-s, pn, cmp); + } + pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */ + } + + + v = *pm; + pv = &v; + pa = pb = a; + pc = pd = a + (n-1); + for (;;) { + while ((pb <= pc) && ((r=cmp(pb, pv)) <= 0)) { + if (r==0) { + /* same as pivot, to start */ + swap(pa,pb); pa++; + } + pb ++; + } + while ((pb <= pc) && ((r=cmp(pc, pv)) >= 0)) { + if (r==0) { + /* same as pivot, to end */ + swap(pc,pd); pd--; + } + pc --; + } + if (pb > pc) { break; } + swap(pb, pc); + pb ++; + pc --; + } + pb--; + pc++; + + /* put pivot from start into middle */ + if ((s = pa-a)>0) { for(r=0;r0) { for(r=0;rbb)); + CLG_(print_cxt)(9, (*pv)->cxt, (*pv)->rec_index); + + s = pb-pa+1; + VG_(printf)(" Lower %d - %d:\n", a-qsort_start, a+s-1-qsort_start); + for (r=0;rbb)); + CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index); + } + + s = pd-pc+1; + VG_(printf)(" Upper %d - %d:\n", + a+n-s-qsort_start, a+n-1-qsort_start); + for (r=0;rbb)); + CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index); + } + } + + if ((s = pb+1-pa) > 1) qsort(a, s, cmp); + if ((s = pd+1-pc) > 1) qsort(a+n-s, s, cmp); +} + + +/* Helpers for prepare_dump */ + +static Int prepare_count; +static BBCC** prepare_ptr; + + +static void hash_addCount(BBCC* bbcc) +{ + if ((bbcc->ecounter_sum > 0) || (bbcc->ret_counter>0)) + prepare_count++; +} + +static void hash_addPtr(BBCC* bbcc) +{ + if ((bbcc->ecounter_sum == 0) && + (bbcc->ret_counter == 0)) return; + + *prepare_ptr = bbcc; + prepare_ptr++; +} + + +static void cs_addCount(thread_info* ti) +{ + Int i; + BBCC* bbcc; + + /* add BBCCs with active call in call stack of current thread. + * update cost sums for active calls + */ + + for(i = 0; i < CLG_(current_call_stack).sp; i++) { + call_entry* e = &(CLG_(current_call_stack).entry[i]); + if (e->jcc == 0) continue; + + CLG_(add_diff_cost_lz)( CLG_(sets).full, &(e->jcc->cost), + e->enter_cost, CLG_(current_state).cost); + bbcc = e->jcc->from; + + CLG_DEBUG(1, " [%2d] (tid %d), added active: %s\n", + i,CLG_(current_tid),bbcc->cxt->fn[0]->name); + + if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) { + /* already counted */ + continue; + } + prepare_count++; + } +} + +static void cs_addPtr(thread_info* ti) +{ + Int i; + BBCC* bbcc; + + /* add BBCCs with active call in call stack of current thread. + * update cost sums for active calls + */ + + for(i = 0; i < CLG_(current_call_stack).sp; i++) { + call_entry* e = &(CLG_(current_call_stack).entry[i]); + if (e->jcc == 0) continue; + + bbcc = e->jcc->from; + + if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) { + /* already counted */ + continue; + } + + *prepare_ptr = bbcc; + prepare_ptr++; + } +} + + +/** + * Put all BBCCs with costs into a sorted array. + * The returned arrays ends with a null pointer. + * Must be freed after dumping. + */ +static +BBCC** prepare_dump(void) +{ + BBCC **array; + + prepare_count = 0; + + /* if we do not separate among threads, this gives all */ + /* count number of BBCCs with >0 executions */ + CLG_(forall_bbccs)(hash_addCount); + + /* even if we do not separate among threads, + * call stacks are separated */ + if (CLG_(clo).separate_threads) + cs_addCount(0); + else + CLG_(forall_threads)(cs_addCount); + + CLG_DEBUG(0, "prepare_dump: %d BBCCs\n", prepare_count); + + /* allocate bbcc array, insert BBCCs and sort */ + prepare_ptr = array = + (BBCC**) CLG_MALLOC((prepare_count+1) * sizeof(BBCC*)); + + CLG_(forall_bbccs)(hash_addPtr); + + if (CLG_(clo).separate_threads) + cs_addPtr(0); + else + CLG_(forall_threads)(cs_addPtr); + + CLG_ASSERT(array + prepare_count == prepare_ptr); + + /* end mark */ + *prepare_ptr = 0; + + CLG_DEBUG(0," BBCCs inserted\n"); + + qsort_start = array; + qsort(array, prepare_count, my_cmp); + + CLG_DEBUG(0," BBCCs sorted\n"); + + return array; +} + + + + +static void fprint_cost_ln(int fd, Char* prefix, + EventMapping* em, ULong* cost) +{ + int p; + + p = VG_(sprintf)(outbuf, "%s", prefix); + p += CLG_(sprint_mappingcost)(outbuf + p, em, cost); + VG_(sprintf)(outbuf + p, "\n"); + my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf)); +} + +static ULong bbs_done = 0; +static Char* filename = 0; + +static +void file_err(void) +{ + VG_(message)(Vg_UserMsg, + "Error: can not open cache simulation output file `%s'", + filename ); + VG_(exit)(1); +} + +/** + * Create a new dump file and write header. + * + * Naming: .[.][-] + * is skipped for final dump (trigger==0) + * is skipped for thread 1 with CLG_(clo).separate_threads=no + * + * Returns the file descriptor, and -1 on error (no write permission) + */ +static int new_dumpfile(Char buf[BUF_LEN], int tid, Char* trigger) +{ + Bool appending = False; + int i, fd; + FullCost sum = 0; + SysRes res; + + CLG_ASSERT(filename != 0); + + if (!CLG_(clo).combine_dumps) { + i = VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)()); + + if (trigger) + i += VG_(sprintf)(filename+i, ".%d", out_counter); + + if (CLG_(clo).separate_threads) + i += VG_(sprintf)(filename+i, "-%02d", tid); + + res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0); + } + else { + VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)()); + res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_APPEND, 0); + if (!res.isError && out_counter>1) + appending = True; + } + + if (res.isError) { + res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY, + VKI_S_IRUSR|VKI_S_IWUSR); + if (res.isError) { + /* If the file can not be opened for whatever reason (conflict + between multiple supervised processes?), give up now. */ + file_err(); + } + } + fd = (Int) res.val; + + CLG_DEBUG(2, " new_dumpfile '%s'\n", filename); + + if (!appending) + reset_dump_array(); + + + if (!appending) { + /* version */ + VG_(sprintf)(buf, "version: 1\n"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + + /* creator */ + VG_(sprintf)(buf, "creator: callgrind-" VERSION "\n"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + + /* "pid:" line */ + VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)()); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + + /* "cmd:" line */ + VG_(strcpy)(buf, "cmd: "); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + my_fwrite(fd, (void*)cmdbuf, VG_(strlen)(cmdbuf)); + } + + VG_(sprintf)(buf, "\npart: %d\n", out_counter); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + if (CLG_(clo).separate_threads) { + VG_(sprintf)(buf, "thread: %d\n", tid); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + + /* "desc:" lines */ + if (!appending) { + my_fwrite(fd, "\n", 1); + +#if 0 + /* Global options changing the tracing behaviour */ + VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n", + CLG_(clo).skip_plt ? "yes" : "no"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n", + CLG_(clo).collect_jumps ? "yes" : "no"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n", + CLG_(clo).separate_recursions); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n", + CLG_(clo).separate_callers); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + + VG_(sprintf)(buf, "desc: Option: --dump-bbs=%s\n", + CLG_(clo).dump_bbs ? "yes" : "no"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Option: --separate-threads=%s\n", + CLG_(clo).separate_threads ? "yes" : "no"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); +#endif + + (*CLG_(cachesim).getdesc)(buf); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + + VG_(sprintf)(buf, "\ndesc: Timerange: Basic block %llu - %llu\n", + bbs_done, CLG_(stat).bb_executions); + + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + VG_(sprintf)(buf, "desc: Trigger: %s\n", + trigger ? trigger : (Char*)"Program termination"); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + +#if 0 + /* Output function specific config + * FIXME */ + for (i = 0; i < N_FNCONFIG_ENTRIES; i++) { + fnc = fnc_table[i]; + while (fnc) { + if (fnc->skip) { + VG_(sprintf)(buf, "desc: Option: --fn-skip=%s\n", fnc->name); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + if (fnc->dump_at_enter) { + VG_(sprintf)(buf, "desc: Option: --fn-dump-at-enter=%s\n", + fnc->name); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + if (fnc->dump_at_leave) { + VG_(sprintf)(buf, "desc: Option: --fn-dump-at-leave=%s\n", + fnc->name); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + if (fnc->separate_callers != CLG_(clo).separate_callers) { + VG_(sprintf)(buf, "desc: Option: --separate-callers%d=%s\n", + fnc->separate_callers, fnc->name); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + if (fnc->separate_recursions != CLG_(clo).separate_recursions) { + VG_(sprintf)(buf, "desc: Option: --separate-recs%d=%s\n", + fnc->separate_recursions, fnc->name); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + } + fnc = fnc->next; + } + } +#endif + + /* "positions:" line */ + VG_(sprintf)(buf, "\npositions:%s%s%s\n", + CLG_(clo).dump_instr ? " instr" : "", + CLG_(clo).dump_bb ? " bb" : "", + CLG_(clo).dump_line ? " line" : ""); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + + /* "events:" line */ + i = VG_(sprintf)(buf, "events: "); + CLG_(sprint_eventmapping)(buf+i, CLG_(dumpmap)); + my_fwrite(fd, (void*)buf, VG_(strlen)(buf)); + my_fwrite(fd, "\n", 1); + + /* summary lines */ + sum = CLG_(get_eventset_cost)( CLG_(sets).full ); + CLG_(zero_cost)(CLG_(sets).full, sum); + if (CLG_(clo).separate_threads) { + thread_info* ti = CLG_(get_current_thread)(); + CLG_(add_diff_cost)(CLG_(sets).full, sum, ti->lastdump_cost, + ti->states.entry[0]->cost); + } + else { + /* This function is called once for thread 1, where + * all costs are summed up when not dumping separate per thread. + * But this is not true for summary: we need to add all threads. + */ + int t; + thread_info** thr = CLG_(get_threads)(); + for(t=1;tlastdump_cost, + thr[t]->states.entry[0]->cost); + } + } + fprint_cost_ln(fd, "summary: ", CLG_(dumpmap), sum); + + /* all dumped cost will be added to total_fcc */ + CLG_(init_cost_lz)( CLG_(sets).full, &dump_total_cost ); + + my_fwrite(fd, "\n\n",2); + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Dump to %s", filename); + + return fd; +} + + +static void close_dumpfile(Char buf[BUF_LEN], int fd, int tid) +{ + if (fd <0) return; + + fprint_cost_ln(fd, "totals: ", CLG_(dumpmap), + dump_total_cost); + //fprint_fcc_ln(fd, "summary: ", &dump_total_fcc); + CLG_(add_cost_lz)(CLG_(sets).full, + &CLG_(total_cost), dump_total_cost); + + fwrite_flush(); + VG_(close)(fd); + + if (filename[0] == '.') { + if (-1 == VG_(rename) (filename, filename+1)) { + /* Can not rename to correct file name: give out warning */ + VG_(message)(Vg_DebugMsg, "Warning: Can not rename .%s to %s", + filename, filename); + } + } +} + + +/* Helper for print_bbccs */ + +static Int print_fd; +static Char* print_trigger; +static Char print_buf[BUF_LEN]; + +static void print_bbccs_of_thread(thread_info* ti) +{ + BBCC **p, **array; + FnPos lastFnPos; + AddrPos lastAPos; + + CLG_DEBUG(1, "+ print_bbccs(tid %d)\n", CLG_(current_tid)); + + print_fd = new_dumpfile(print_buf, CLG_(current_tid), print_trigger); + if (print_fd <0) { + CLG_DEBUG(1, "- print_bbccs(tid %d): No output...\n", CLG_(current_tid)); + return; + } + + p = array = prepare_dump(); + init_fpos(&lastFnPos); + init_apos(&lastAPos, 0, 0, 0); + + if (p) while(1) { + + /* on context/function change, print old cost buffer before */ + if (lastFnPos.cxt && ((*p==0) || + (lastFnPos.cxt != (*p)->cxt) || + (lastFnPos.rec_index != (*p)->rec_index))) { + if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) { + /* no need to switch buffers, as position is the same */ + fprint_apos(print_fd, &(ccSum[currSum].p), &lastAPos, + lastFnPos.cxt->fn[0]->file); + fprint_fcost(print_fd, &ccSum[currSum], &lastAPos); + } + + if (ccSum[currSum].p.file != lastFnPos.cxt->fn[0]->file) { + /* switch back to file of function */ + VG_(sprintf)(print_buf, "fe="); + print_file(print_buf+3, lastFnPos.cxt->fn[0]->file); + my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf)); + } + my_fwrite(print_fd, "\n", 1); + } + + if (*p == 0) break; + + if (print_fn_pos(print_fd, &lastFnPos, *p)) { + + /* new function */ + init_apos(&lastAPos, 0, 0, (*p)->cxt->fn[0]->file); + init_fcost(&ccSum[0], 0, 0, 0); + init_fcost(&ccSum[1], 0, 0, 0); + currSum = 0; + } + + if (CLG_(clo).dump_bbs) { + /* FIXME: Specify Object of BB if different to object of fn */ + int i, pos = 0; + ULong ecounter = (*p)->ecounter_sum; + pos = VG_(sprintf)(print_buf, "bb=%p ", (*p)->bb->offset); + for(i = 0; i<(*p)->bb->cjmp_count;i++) { + pos += VG_(sprintf)(print_buf+pos, "%d %llu ", + (*p)->bb->jmp[i].instr, + ecounter); + ecounter -= (*p)->jmp[i].ecounter; + } + VG_(sprintf)(print_buf+pos, "%d %llu\n", + (*p)->bb->instr_count, + ecounter); + my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf)); + } + + fprint_bbcc(print_fd, *p, &lastAPos); + + p++; + } + + close_dumpfile(print_buf, print_fd, CLG_(current_tid)); + if (array) VG_(free)(array); + + /* set counters of last dump */ + CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, + CLG_(current_state).cost ); + + CLG_DEBUG(1, "- print_bbccs(tid %d)\n", CLG_(current_tid)); +} + + +static void print_bbccs(Char* trigger, Bool only_current_thread) +{ + init_dump_array(); + init_debug_cache(); + + print_fd = -1; + print_trigger = trigger; + + if (!CLG_(clo).separate_threads) { + /* All BBCC/JCC costs is stored for thread 1 */ + Int orig_tid = CLG_(current_tid); + + CLG_(switch_thread)(1); + print_bbccs_of_thread( CLG_(get_current_thread)() ); + CLG_(switch_thread)(orig_tid); + } + else if (only_current_thread) + print_bbccs_of_thread( CLG_(get_current_thread)() ); + else + CLG_(forall_threads)(print_bbccs_of_thread); + + free_dump_array(); +} + + +void CLG_(dump_profile)(Char* trigger, Bool only_current_thread) +{ + CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n", + trigger ? trigger : (Char*)"Prg.Term."); + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...", + CLG_(stat).bb_executions, + trigger ? trigger : (Char*)"Prg.Term."); + + out_counter++; + + print_bbccs(trigger, only_current_thread); + + + bbs_done = CLG_(stat).bb_executions++; + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Dumping done."); +} + +/* copy command to cmd buffer (could change) */ +static +void init_cmdbuf(void) +{ + Int i,j,size = 0; + HChar* argv; + +#if VG_CORE_INTERFACE_VERSION > 8 + if (VG_(args_the_exename)) + size = VG_(sprintf)(cmdbuf, " %s", VG_(args_the_exename)); + + for(i = 0; i < VG_(args_for_client).used; i++) { + argv = VG_(args_for_client).strs[i]; + if (!argv) continue; + if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' '; + for(j=0;argv[j]!=0;j++) + if (size < BUF_LEN) cmdbuf[size++] = argv[j]; + } +#else + for(i = 0; i < VG_(client_argc); i++) { + argv = VG_(client_argv[i]); + if (!argv) continue; + if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' '; + for(j=0;argv[j]!=0;j++) + if (size < BUF_LEN) cmdbuf[size++] = argv[j]; + } +#endif + + if (size == BUF_LEN) size--; + cmdbuf[size] = 0; +} + +void CLG_(init_files)(Char** dir, Char** file) +{ + Int size; + SysRes res; + + if (!CLG_(clo).filename_base) + CLG_(clo).filename_base = DEFAULT_DUMPNAME; + + /* get base directory for dump/command/result files */ + if (CLG_(clo).filename_base[0] == '/') { + int lastSlash = 0, i =1; + while(CLG_(clo).filename_base[i]) { + for(; CLG_(clo).filename_base[i] && + CLG_(clo).filename_base[i] != '/'; i++); + if (CLG_(clo).filename_base[i] != '/') break; + lastSlash = i; + i++; + } + base_directory = (Char*) CLG_MALLOC(i+1); + VG_(strncpy)(base_directory, CLG_(clo).filename_base, i); + base_directory[i] = 0; + + dump_file_base = CLG_(clo).filename_base; + } + else { + size = 100; + base_directory = 0; + + /* getcwd() fails if the buffer isn't big enough -- keep doubling size + until it succeeds. */ + while (NULL == base_directory) { + base_directory = CLG_MALLOC(size); + if (!VG_(getcwd)(base_directory, size)) { + VG_(free)(base_directory); + base_directory = 0; + size *= 2; + } + } + + size = VG_(strlen)(base_directory) + VG_(strlen)(CLG_(clo).filename_base) +2; + dump_file_base = (Char*) CLG_MALLOC(size); + CLG_ASSERT(dump_file_base != 0); + VG_(sprintf)(dump_file_base, "%s/%s", + base_directory, CLG_(clo).filename_base); + } + + /* allocate space big enough for final filenames */ + filename = (Char*) CLG_MALLOC(VG_(strlen)(dump_file_base)+32); + CLG_ASSERT(filename != 0); + + /* Make sure the output base file can be written. + * This is used for the dump at program termination. + * We stop with an error here if we can not create the + * file: This is probably because of missing rights, + * and trace parts wouldn't be allowed to be written, too. + */ + VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)()); + res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0); + if (res.isError) { + res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY, + VKI_S_IRUSR|VKI_S_IWUSR); + if (res.isError) { + file_err(); + } + } + if (!res.isError) VG_(close)( (Int)res.val ); + + *dir = base_directory; + *file = filename; + + init_cmdbuf(); +} diff --git a/callgrind/events.c b/callgrind/events.c new file mode 100644 index 0000000000..6ef8d8523e --- /dev/null +++ b/callgrind/events.c @@ -0,0 +1,575 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- events.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +#define MAX_EVENTTYPE 20 + +static EventType eventtype[MAX_EVENTTYPE]; +static Int eventtype_count = 0; + +EventType* CLG_(register_eventtype)(Char* name) +{ + EventType* et; + + if (eventtype_count == MAX_EVENTTYPE) { + VG_(printf)("\nMore than %d event types used!\n" + "Increase MAX_EVENTTYPE in ct_events.c and recomile this tool!\n", + MAX_EVENTTYPE); + VG_(tool_panic)("Too many event types requested."); + } + + et = &(eventtype[eventtype_count]); + et->id = eventtype_count; + et->name = (UChar*) VG_(strdup)(name); + et->description = 0; + + eventtype_count++; + + return et; +} + + +EventType* CLG_(get_eventtype)(Char* name) +{ + Int i; + + for(i=0;i= 0) && (id < eventtype_count)) + return eventtype+id; + return 0; +} + +/* Allocate space for an event set */ +EventSet* CLG_(get_eventset)(Char* n, Int capacity) +{ + EventSet* es; + + es = (EventSet*) CLG_MALLOC(sizeof(EventSet) + + capacity * sizeof(EventSetEntry)); + es->capacity = capacity; + es->size = 0; + es->name = n; + + return es; +} + +/* Incorporate a event type into a set, get start offset */ +Int CLG_(add_eventtype)(EventSet* es, EventType* t) +{ + Int offset = es->size; + if (es->capacity - offset < 1) return -1; + + es->size++; + es->e[offset].type = t; + es->e[offset].nextTop = es->size; + + return offset; +} + +/* Incorporate one event set into another, get start offset */ +Int CLG_(add_eventset)(EventSet* dst, EventSet* src) +{ + Int offset = dst->size, i; + if (!src || (src->size == 0)) return offset; + + if (dst->capacity - offset < src->size) return -1; + + for(i=0;isize;i++) { + dst->e[offset+i].type = src->e[i].type; + dst->e[offset+i].nextTop = src->e[i].nextTop + offset; + } + dst->size += src->size; + + return offset; +} + +/* Incorporate two event types into a set, with second < first */ +Int CLG_(add_dep_event2)(EventSet* es, EventType* e1, EventType* e2) +{ + Int offset = es->size; + + if (es->capacity - offset < 2) return -1; + + es->size += 2; + es->e[offset].type = e1; + es->e[offset].nextTop = es->size; + es->e[offset+1].type = e2; + es->e[offset+1].nextTop = es->size; + + return offset; +} + +/* Incorporate 3 event types into a set, with third < second < first */ +Int CLG_(add_dep_event3)(EventSet* es, + EventType* e1, EventType* e2, EventType* e3) +{ + Int offset = es->size; + + if (es->capacity - offset < 3) return -1; + + es->size += 3; + es->e[offset].type = e1; + es->e[offset].nextTop = es->size; + es->e[offset+1].type = e2; + es->e[offset+1].nextTop = es->size; + es->e[offset+2].type = e3; + es->e[offset+2].nextTop = es->size; + + return offset; +} + +Int CLG_(add_dep_event4)(EventSet* es, + EventType* e1, EventType* e2, + EventType* e3, EventType* e4) +{ + Int offset = es->size; + + if (es->capacity - offset < 4) return -1; + + es->size += 4; + es->e[offset].type = e1; + es->e[offset].nextTop = es->size; + es->e[offset+1].type = e2; + es->e[offset+1].nextTop = es->size; + es->e[offset+2].type = e3; + es->e[offset+2].nextTop = es->size; + es->e[offset+3].type = e4; + es->e[offset+3].nextTop = es->size; + + return offset; +} + +/* Returns number of characters written */ +Int CLG_(sprint_eventset)(Char* buf, EventSet* es) +{ + Int i, pos = 0; + + for(i=0; i< es->size; i++) { + if (pos>0) buf[pos++] = ' '; + pos += VG_(sprintf)(buf + pos, es->e[i].type->name); + } + buf[pos] = 0; + + return pos; +} + +/* Get cost array for an event set */ +ULong* CLG_(get_eventset_cost)(EventSet* es) +{ + return CLG_(get_costarray)(es->capacity); +} + +/* Set all costs of an event set to zero */ +void CLG_(init_cost)(EventSet* es, ULong* cost) +{ + Int i; + + if (!cost) return; + + for(i=0;icapacity;i++) + cost[i] = 0; +} + +/* Set all costs of an event set to zero */ +void CLG_(init_cost_lz)(EventSet* es, ULong** cost) +{ + Int i; + + CLG_ASSERT(cost != 0); + if (!(*cost)) + *cost = CLG_(get_eventset_cost)(es); + + for(i=0;icapacity;i++) + (*cost)[i] = 0; +} + +void CLG_(zero_cost)(EventSet* es, ULong* cost) +{ + Int i; + + if (!cost) return; + + for(i=0;isize;i++) + cost[i] = 0; +} + +Bool CLG_(is_zero_cost)(EventSet* es, ULong* cost) +{ + Int i = 0; + + if (!cost) return True; + + while(isize) { + if (cost[i] != 0) return False; + i = es->e[i].nextTop; + } + return True; +} + +Bool CLG_(is_equal_cost)(EventSet* es, ULong* c1, ULong* c2) +{ + Int i = 0; + + if (!c1) return CLG_(is_zero_cost)(es,c2); + if (!c2) return CLG_(is_zero_cost)(es,c1); + + while(isize) { + if (c1[i] != c2[i]) return False; + if (c1[i] == 0) + i = es->e[i].nextTop; + else + i++; + } + return True; +} + +void CLG_(copy_cost)(EventSet* es, ULong* dst, ULong* src) +{ + Int i; + + if (!src) { + CLG_(zero_cost)(es, dst); + return; + } + CLG_ASSERT(dst != 0); + + for(i=0;isize;i++) + dst[i] = src[i]; +} + +void CLG_(copy_cost_lz)(EventSet* es, ULong** pdst, ULong* src) +{ + Int i; + ULong* dst; + + CLG_ASSERT(pdst != 0); + + if (!src) { + CLG_(zero_cost)(es, *pdst); + return; + } + dst = *pdst; + if (!dst) + dst = *pdst = CLG_(get_eventset_cost)(es); + + for(i=0;isize;i++) + dst[i] = src[i]; +} + +void CLG_(add_cost)(EventSet* es, ULong* dst, ULong* src) +{ + Int i = 0; + + if (!src) return; + CLG_ASSERT(dst != 0); + + while(isize) { + if (src[i] == 0) + i = es->e[i].nextTop; + else { + dst[i] += src[i]; + i++; + } + } +} + +void CLG_(add_cost_lz)(EventSet* es, ULong** pdst, ULong* src) +{ + Int i; + ULong* dst; + + if (!src) return; + CLG_ASSERT(pdst != 0); + + dst = *pdst; + if (!dst) { + dst = *pdst = CLG_(get_eventset_cost)(es); + CLG_(copy_cost)(es,dst,src); + return; + } + + i = 0; + while(isize) { + if (src[i] == 0) + i = es->e[i].nextTop; + else { + dst[i] += src[i]; + i++; + } + } +} + +/* Adds src to dst and zeros src. Returns false if nothing changed */ +Bool CLG_(add_and_zero_cost)(EventSet* es, ULong* dst, ULong* src) +{ + Int i = 0, j = 0; + + CLG_DEBUGIF(6) { + CLG_DEBUG(6, " add_and_zero_cost(%s, dst %p, src %p)\n", es->name, dst, src); + CLG_(print_cost)(-5, es, src); + } + + if (!es || !src) return False; + + while(isize) { + if (src[i] == 0) + i = es->e[i].nextTop; + else { + dst[i] += src[i]; + src[i] = 0; + i++; + j++; + } + } + + return (j>0); +} + +/* Adds src to dst and zeros src. Returns false if nothing changed */ +Bool CLG_(add_and_zero_cost_lz)(EventSet* es, ULong** pdst, ULong* src) +{ + Int i; + ULong* dst; + + if (!src) return False; + + i = 0; + while(1) { + if (i >= es->size) return False; + if (src[i] != 0) break; + i = es->e[i].nextTop; + } + + CLG_ASSERT(pdst != 0); + dst = *pdst; + if (!dst) { + dst = *pdst = CLG_(get_eventset_cost)(es); + CLG_(copy_cost)(es,dst,src); + CLG_(zero_cost)(es,src); + return True; + } + + dst[i] += src[i]; + src[i] = 0; + i++; + + while(isize) { + if (src[i] == 0) + i = es->e[i].nextTop; + else { + dst[i] += src[i]; + src[i] = 0; + } + } + + return True; +} + +/* Adds difference of new and old to dst, and set old to new. + * Returns false if nothing changed */ +Bool CLG_(add_diff_cost)(EventSet* es, ULong* dst, ULong* old, ULong* new) +{ + Int i = 0, j = 0; + + while(isize) { + if (new[i] == old[i]) + i = es->e[i].nextTop; + else { + dst[i] += new[i] - old[i]; + old[i] = new[i]; + i++; + j++; + } + } + + return (j>0); +} + +/* Adds difference of new and old to dst, and set old to new. + * Returns false if nothing changed */ +Bool CLG_(add_diff_cost_lz)(EventSet* es, ULong** pdst, + ULong* old, ULong* new) +{ + Int i; + ULong* dst; + + if (!old && !new) return False; + CLG_ASSERT(old && new); + + i = 0; + while(1) { + if (i >= es->size) return False; + if (old[i] != new[i]) break; + i = es->e[i].nextTop; + } + + CLG_ASSERT(pdst != 0); + dst = *pdst; + if (!dst) { + dst = *pdst = CLG_(get_eventset_cost)(es); + CLG_(zero_cost)(es,dst); + } + + dst[i] += new[i] - old[i]; + old[i] = new[i]; + i++; + + while(isize) { + if (new[i] == old[i]) + i = es->e[i].nextTop; + else { + dst[i] += new[i] - old[i]; + old[i] = new[i]; + i++; + } + } + + return True; +} + +/* Returns number of characters written */ +Int CLG_(sprint_cost)(Char* buf, EventSet* es, ULong* c) +{ + Int i, pos, skipped = 0; + + if (!c || es->size==0) return 0; + + /* At least one entry */ + pos = VG_(sprintf)(buf, "%llu", c[0]); + i = 1; + + while(isize) { + if (c[i] == 0) { + skipped += es->e[i].nextTop - i; + i = es->e[i].nextTop; + } + else { + while(skipped>0) { + buf[pos++] = ' '; + buf[pos++] = '0'; + skipped--; + } + buf[pos++] = ' '; + pos += VG_(sprintf)(buf+pos, "%llu", c[i]); + i++; + } + } + + return pos; +} + + +/* Allocate space for an event mapping */ +EventMapping* CLG_(get_eventmapping)(EventSet* es) +{ + EventMapping* em; + + CLG_ASSERT(es != 0); + + em = (EventMapping*) CLG_MALLOC(sizeof(EventMapping) + + es->capacity * sizeof(Int)); + em->capacity = es->capacity; + em->size = 0; + em->set = es; + + return em; +} + +void CLG_(append_event)(EventMapping* em, Char* n) +{ + Int i; + + CLG_ASSERT(em != 0); + + for(i=0; iset->size; i++) + if (VG_(strcmp)(n, em->set->e[i].type->name)==0) + break; + + if (i == em->set->size) return; + + CLG_ASSERT(em->capacity > em->size); + + em->index[em->size] = i; + em->size++; +} + + +/* Returns number of characters written */ +Int CLG_(sprint_eventmapping)(Char* buf, EventMapping* em) +{ + Int i, pos = 0; + + CLG_ASSERT(em != 0); + + for(i=0; i< em->size; i++) { + if (pos>0) buf[pos++] = ' '; + pos += VG_(sprintf)(buf + pos, em->set->e[em->index[i]].type->name); + } + buf[pos] = 0; + + return pos; +} + +/* Returns number of characters written */ +Int CLG_(sprint_mappingcost)(Char* buf, EventMapping* em, ULong* c) +{ + Int i, pos, skipped = 0; + + if (!c || em->size==0) return 0; + + /* At least one entry */ + pos = VG_(sprintf)(buf, "%llu", c[em->index[0]]); + i = 1; + + while(isize) { + if (c[em->index[i]] == 0) { + skipped++; + i++; + } + else { + while(skipped>0) { + buf[pos++] = ' '; + buf[pos++] = '0'; + skipped--; + } + buf[pos++] = ' '; + pos += VG_(sprintf)(buf+pos, "%llu", c[em->index[i]]); + i++; + } + } + + return pos; +} diff --git a/callgrind/events.h b/callgrind/events.h new file mode 100644 index 0000000000..d2cad1e2a9 --- /dev/null +++ b/callgrind/events.h @@ -0,0 +1,113 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- events.h ---*/ +/*--- (C) 2004-2005, Josef Weidendorfer ---*/ +/*--------------------------------------------------------------------*/ + + +/* Abstractions for 64-bit cost lists (events.h) */ + +#ifndef CG_EVENTS +#define CG_EVENTS + +#include "pub_tool_basics.h" + +#define CLG_(str) VGAPPEND(vgCallgrind_,str) + +/* An event type */ +typedef struct _EventType EventType; +struct _EventType { + Char* name; + Char* description; + Int id; +}; + +EventType* CLG_(register_eventtype)(Char*); +EventType* CLG_(get_eventtype)(Char*); +EventType* CLG_(get_eventtype_byindex)(Int id); + +/* An event set is a ordered list of event types, which comes down + * to some description for ordered lists of costs. + * Often, costs of 2 event types are related, e.g. one is always smaller + * than the other. This is useful to speed up arithmetics on cost lists: + * Each event type in the set has a . All indexes before are + * promised to hold smaller values than the current. + */ +typedef struct _EventSetEntry EventSetEntry; +struct _EventSetEntry { + EventType* type; + Int nextTop; +}; +typedef struct _EventSet EventSet; +struct _EventSet { + Char* name; + Int size; + Int capacity; + EventSetEntry e[0]; +}; + + +/* Some events out of an event set. + * Used to print out part of an EventSet, or in another order. + */ +typedef struct _EventMapping EventMapping; +struct _EventMapping { + EventSet* set; + Int size; + Int capacity; + Int index[0]; +}; + + +/* Allocate space for an event set */ +EventSet* CLG_(get_eventset)(Char* n, Int capacity); +/* Incorporate a event type into a set, get start offset */ +Int CLG_(add_eventtype)(EventSet* dst, EventType*); +/* Incorporate event types into a set, with ... < second < first */ +Int CLG_(add_dep_event2)(EventSet* dst, EventType* e1, EventType* e2); +Int CLG_(add_dep_event3)(EventSet* dst, + EventType* e1, EventType* e2, EventType* e3); +Int CLG_(add_dep_event4)(EventSet* dst, + EventType* e1, EventType* e2, EventType* e3, + EventType* e4); +/* Incorporate one event set into another, get start offset */ +Int CLG_(add_eventset)(EventSet* dst, EventSet* src); +/* Returns number of characters written */ +Int CLG_(sprint_eventset)(Char* buf, EventSet*); +/* Allocate cost array for an event set */ +ULong* CLG_(get_eventset_cost)(EventSet*); + +/* Operations on costs. A cost pointer of 0 means zero cost. + * Functions ending in _lz allocate costs lazy if needed + */ +/* Set costs according full capacity of event set to 0 */ +void CLG_(init_cost)(EventSet*,ULong*); +/* This always allocates counter and sets them to 0 */ +void CLG_(init_cost_lz)(EventSet*,ULong**); +/* Set costs of an event set to zero */ +void CLG_(zero_cost)(EventSet*,ULong*); +Bool CLG_(is_zero_cost)(EventSet*,ULong*); +Bool CLG_(is_equal_cost)(EventSet*,ULong*,ULong*); +void CLG_(copy_cost)(EventSet*,ULong* dst, ULong* src); +void CLG_(copy_cost_lz)(EventSet*,ULong** pdst, ULong* src); +void CLG_(add_cost)(EventSet*,ULong* dst, ULong* src); +void CLG_(add_cost_lz)(EventSet*,ULong** pdst, ULong* src); +/* Adds src to dst and zeros src. Returns false if nothing changed */ +Bool CLG_(add_and_zero_cost)(EventSet*,ULong* dst, ULong* src); +Bool CLG_(add_and_zero_cost_lz)(EventSet*,ULong** pdst, ULong* src); +/* Adds difference of new and old to to dst, and set old to new. + * Returns false if nothing changed */ +Bool CLG_(add_diff_cost)(EventSet*,ULong* dst, ULong* old, ULong* new); +Bool CLG_(add_diff_cost_lz)(EventSet*,ULong** pdst, ULong* old, ULong* new); +/* Returns number of characters written */ +Int CLG_(sprint_cost)(Char* buf, EventSet*, ULong*); + +/* Allocate space for an event mapping */ +EventMapping* CLG_(get_eventmapping)(EventSet*); +void CLG_(append_event)(EventMapping*, Char*); +/* Returns number of characters written */ +Int CLG_(sprint_eventmapping)(Char* buf, EventMapping*); +/* Returns number of characters written */ +Int CLG_(sprint_mappingcost)(Char* buf, EventMapping*, ULong*); + +#endif /* CG_EVENTS */ diff --git a/callgrind/fn.c b/callgrind/fn.c new file mode 100644 index 0000000000..a786c5097a --- /dev/null +++ b/callgrind/fn.c @@ -0,0 +1,616 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_fn.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +#define N_INITIAL_FN_ARRAY_SIZE 10071 + +static fn_array current_fn_active; + +static Addr runtime_resolve_addr = 0; +static int runtime_resolve_length = 0; + +/* _ld_runtime_resolve, located in needs special handling: + * The jump at end into the resolved function should not be + * represented as a call (as usually done in callgrind with jumps), + * but as a return + call. Otherwise, the repeated existance of + * _ld_runtime_resolve in call chains will lead to huge cycles, + * making the profile almost worthless. + * + * If ld.so is stripped, the symbol will not appear. But as this + * function is handcrafted assembler, we search for it... + * + * Returns 0 if code not found, otherwise start address + */ +static void search_runtime_resolve(obj_node* obj) +{ + /* We do not check target address of , therefore we have >1 ranges. + * We use a tuple sequence (offset,length) into the code array for this + */ + +#if defined(VGA_x86) + /* Check ranges [0-11], [16-23] */ + static int code_offsets[] = { 0, 12, 16, 8, 24, 0 }; + static unsigned char code[] = { + /* 0*/ 0x50, 0x51, 0x52, 0x8b, 0x54, 0x24, 0x10, 0x8b, + /* 8*/ 0x44, 0x24, 0x0c, 0xe8, 0x70, 0x01, 0x00, 0x00, + /*16*/ 0x5a, 0x59, 0x87, 0x04, 0x24, 0xc2, 0x08, 0x00 }; +#else +#if defined(VGA_ppc32) + static int code_offsets[] = {0, 65, 68, 64, 132, 0 }; + static unsigned char code[] = { + /* 0*/ 0x94, 0x21, 0xff, 0xc0, 0x90, 0x01, 0x00, 0x0c, + /* 8*/ 0x90, 0x61, 0x00, 0x10, 0x90, 0x81, 0x00, 0x14, + /*16*/ 0x7d, 0x83, 0x63, 0x78, 0x90, 0xa1, 0x00, 0x18, + /*24*/ 0x7d, 0x64, 0x5b, 0x78, 0x90, 0xc1, 0x00, 0x1c, + /*32*/ 0x7c, 0x08, 0x02, 0xa6, 0x90, 0xe1, 0x00, 0x20, + /*40*/ 0x90, 0x01, 0x00, 0x30, 0x91, 0x01, 0x00, 0x24, + /*48*/ 0x7c, 0x00, 0x00, 0x26, 0x91, 0x21, 0x00, 0x28, + /*56*/ 0x91, 0x41, 0x00, 0x2c, 0x90, 0x01, 0x00, 0x08, + /*64*/ 0x48, 0x00, 0x02, 0x91, 0x7c, 0x69, 0x03, 0xa6, /* at 64: bl aff0 */ + /*72*/ 0x80, 0x01, 0x00, 0x30, 0x81, 0x41, 0x00, 0x2c, + /*80*/ 0x81, 0x21, 0x00, 0x28, 0x7c, 0x08, 0x03, 0xa6, + /*88*/ 0x81, 0x01, 0x00, 0x24, 0x80, 0x01, 0x00, 0x08, + /*96*/ 0x80, 0xe1, 0x00, 0x20, 0x80, 0xc1, 0x00, 0x1c, + /*104*/0x7c, 0x0f, 0xf1, 0x20, 0x80, 0xa1, 0x00, 0x18, + /*112*/0x80, 0x81, 0x00, 0x14, 0x80, 0x61, 0x00, 0x10, + /*120*/0x80, 0x01, 0x00, 0x0c, 0x38, 0x21, 0x00, 0x40, + /*128*/0x4e, 0x80, 0x04, 0x20 }; +#else +#if defined(VGA_amd64) + /* x86_64 */ + static int code_offsets[] = {0, 62, 66, 44, 110, 0 }; + static unsigned char code[] = { + /* 0*/ 0x48, 0x83, 0xec, 0x38, 0x48, 0x89, 0x04, 0x24, + /* 8*/ 0x48, 0x89, 0x4c, 0x24, 0x08, 0x48, 0x89, 0x54, 0x24, 0x10, + /*18*/ 0x48, 0x89, 0x74, 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, + /*28*/ 0x4c, 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, 0x30, + /*38*/ 0x48, 0x8b, 0x74, 0x24, 0x40, 0x49, 0x89, 0xf3, + /*46*/ 0x4c, 0x01, 0xde, 0x4c, 0x01, 0xde, 0x48, 0xc1, 0xe6, 0x03, + /*56*/ 0x48, 0x8b, 0x7c, 0x24, 0x38, 0xe8, 0xee, 0x01, 0x00, 0x00, + /*66*/ 0x49, 0x89, 0xc3, 0x4c, 0x8b, 0x4c, 0x24, 0x30, + /*74*/ 0x4c, 0x8b, 0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20, + /*84*/ 0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, 0x24, 0x10, + /*94*/ 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, 0x8b, 0x04, 0x24, + /*103*/0x48, 0x83, 0xc4, 0x48, 0x41, 0xff, 0xe3 }; +#else + /* Unknown architecture, no check is done */ + static int code_offsets[] = {0, 0 }; + static unsigned char code[] = { 0 }; +#endif +#endif +#endif + + int *range = &(code_offsets[0]), *r = 0; + Bool found = False; + Addr addr, end; + + /* Only search in libraries with a given name pattern */ + if ((VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) && + (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0)) return; + + CLG_DEBUG(1, "search_rs: Checking %d bytes of [%x %x %x...]\n", + range[1], code[0], code[1], code[2]); + + end = obj->start + obj->size - range[1]; + addr = obj->start; + while(addr < end) { + if (VG_(memcmp)( (void*)addr, code, range[1]) == 0) { + + r = range + 2; + found = True; + while(r[1]) { + CLG_DEBUG(1, " [%p] Found! Checking %d bytes of [%x %x %x...]\n", + addr, r[1], code[r[0]], code[r[0]+1], code[r[0]+2]); + + if (VG_(memcmp)( (void*)(addr+r[0]), code+r[0], r[1]) != 0) { + found = False; + break; + } + r += 2; + } + if (found) break; + } + addr++; + } + + if (!found || (r==0)) return; + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Code check found runtime_resolve: %s +%p=%p, length %d", + obj->name + obj->last_slash_pos, + addr - obj->start, addr, r[0]); + + runtime_resolve_addr = addr; + runtime_resolve_length = r[0]; +} + +/*------------------------------------------------------------*/ +/*--- Object/File/Function hash entry operations ---*/ +/*------------------------------------------------------------*/ + +/* Object hash table, fixed */ +static obj_node* obj_table[N_OBJ_ENTRIES]; + +void CLG_(init_obj_table)() +{ + Int i; + for (i = 0; i < N_OBJ_ENTRIES; i++) + obj_table[i] = 0; +} + +#define HASH_CONSTANT 256 + +static UInt str_hash(const Char *s, UInt table_size) +{ + int hash_value = 0; + for ( ; *s; s++) + hash_value = (HASH_CONSTANT * hash_value + *s) % table_size; + return hash_value; +} + + +static Char* anonymous_obj = "???"; + +static __inline__ +obj_node* new_obj_node(SegInfo* si, obj_node* next) +{ + Int i; + obj_node* new; + + new = (obj_node*) CLG_MALLOC(sizeof(obj_node)); + new->name = si ? VG_(strdup)( VG_(seginfo_filename)(si) ) + : anonymous_obj; + for (i = 0; i < N_FILE_ENTRIES; i++) { + new->files[i] = NULL; + } + CLG_(stat).distinct_objs ++; + new->number = CLG_(stat).distinct_objs; + new->start = si ? VG_(seginfo_start)(si) : 0; + new->size = si ? VG_(seginfo_size)(si) : 0; + new->offset = si ? VG_(seginfo_sym_offset)(si) : 0; + new->next = next; + + // not only used for debug output (see static.c) + new->last_slash_pos = 0; + i = 0; + while(new->name[i]) { + if (new->name[i]=='/') new->last_slash_pos = i+1; + i++; + } + + if (runtime_resolve_addr == 0) search_runtime_resolve(new); + + return new; +} + +obj_node* CLG_(get_obj_node)(SegInfo* si) +{ + obj_node* curr_obj_node; + UInt objname_hash; + const UChar* obj_name; + + obj_name = si ? (Char*) VG_(seginfo_filename)(si) : anonymous_obj; + + /* lookup in obj hash */ + objname_hash = str_hash(obj_name, N_OBJ_ENTRIES); + curr_obj_node = obj_table[objname_hash]; + while (NULL != curr_obj_node && + VG_(strcmp)(obj_name, curr_obj_node->name) != 0) { + curr_obj_node = curr_obj_node->next; + } + if (NULL == curr_obj_node) { + obj_table[objname_hash] = curr_obj_node = + new_obj_node(si, obj_table[objname_hash]); + } + + return curr_obj_node; +} + + +static __inline__ +file_node* new_file_node(Char filename[FILENAME_LEN], + obj_node* obj, file_node* next) +{ + Int i; + file_node* new = (file_node*) CLG_MALLOC(sizeof(file_node)); + new->name = VG_(strdup)(filename); + for (i = 0; i < N_FN_ENTRIES; i++) { + new->fns[i] = NULL; + } + CLG_(stat).distinct_files++; + new->number = CLG_(stat).distinct_files; + new->obj = obj; + new->next = next; + return new; +} + + +file_node* CLG_(get_file_node)(obj_node* curr_obj_node, + Char filename[FILENAME_LEN]) +{ + file_node* curr_file_node; + UInt filename_hash; + + /* lookup in file hash */ + filename_hash = str_hash(filename, N_FILE_ENTRIES); + curr_file_node = curr_obj_node->files[filename_hash]; + while (NULL != curr_file_node && + VG_(strcmp)(filename, curr_file_node->name) != 0) { + curr_file_node = curr_file_node->next; + } + if (NULL == curr_file_node) { + curr_obj_node->files[filename_hash] = curr_file_node = + new_file_node(filename, curr_obj_node, + curr_obj_node->files[filename_hash]); + } + + return curr_file_node; +} + +/* forward decl. */ +static void resize_fn_array(void); + +static __inline__ +fn_node* new_fn_node(Char fnname[FILENAME_LEN], + file_node* file, fn_node* next) +{ + fn_node* new = (fn_node*) CLG_MALLOC(sizeof(fn_node)); + new->name = VG_(strdup)(fnname); + + CLG_(stat).distinct_fns++; + new->number = CLG_(stat).distinct_fns; + new->last_cxt = 0; + new->pure_cxt = 0; + new->file = file; + new->next = next; + + new->dump_before = False; + new->dump_after = False; + new->zero_before = False; + new->toggle_collect = False; + new->skip = False; + new->pop_on_jump = False; + new->is_malloc = False; + new->is_realloc = False; + new->is_free = False; + + new->group = 0; + new->separate_callers = CLG_(clo).separate_callers; + new->separate_recursions = CLG_(clo).separate_recursions; + +#if CLG_ENABLE_DEBUG + new->verbosity = -1; +#endif + + if (CLG_(stat).distinct_fns >= current_fn_active.size) + resize_fn_array(); + + return new; +} + + +/* Get a function node in hash2 with known file node. + * hash nodes are created if needed + */ +static +fn_node* get_fn_node_infile(file_node* curr_file_node, + Char fnname[FN_NAME_LEN]) +{ + fn_node* curr_fn_node; + UInt fnname_hash; + + CLG_ASSERT(curr_file_node != 0); + + /* lookup in function hash */ + fnname_hash = str_hash(fnname, N_FN_ENTRIES); + curr_fn_node = curr_file_node->fns[fnname_hash]; + while (NULL != curr_fn_node && + VG_(strcmp)(fnname, curr_fn_node->name) != 0) { + curr_fn_node = curr_fn_node->next; + } + if (NULL == curr_fn_node) { + curr_file_node->fns[fnname_hash] = curr_fn_node = + new_fn_node(fnname, curr_file_node, + curr_file_node->fns[fnname_hash]); + } + + return curr_fn_node; +} + + +/* Get a function node in a Segment. + * Hash nodes are created if needed. + */ +static __inline__ +fn_node* get_fn_node_inseg(SegInfo* si, + Char filename[FILENAME_LEN], + Char fnname[FN_NAME_LEN]) +{ + obj_node *obj = CLG_(get_obj_node)(si); + file_node *file = CLG_(get_file_node)(obj, filename); + fn_node *fn = get_fn_node_infile(file, fnname); + + return fn; +} + + +Bool CLG_(get_debug_info)(Addr instr_addr, + Char filename[FILENAME_LEN], + Char fn_name[FN_NAME_LEN], UInt* line_num, + SegInfo** pSegInfo) +{ + Bool found1, found2, result = True; + UInt line; + + CLG_DEBUG(6, " + get_debug_info(%p)\n", instr_addr); + + if (pSegInfo) { + *pSegInfo = VG_(find_seginfo)(instr_addr); + + // for generated code in anonymous space, pSegInfo is 0 + } + + found1 = VG_(get_filename_linenum)(instr_addr, + filename, FILENAME_LEN, + NULL, 0, NULL, // FIXME: add dirnames! + &line); + found2 = VG_(get_fnname)(instr_addr, + fn_name, FN_NAME_LEN); + + if (!found1 && !found2) { + CLG_(stat).no_debug_BBs++; + VG_(strcpy)(filename, "???"); + VG_(strcpy)(fn_name, "???"); + if (line_num) *line_num=0; + result = False; + + } else if ( found1 && found2) { + CLG_(stat).full_debug_BBs++; + if (line_num) *line_num=line; + + } else if ( found1 && !found2) { + CLG_(stat).file_line_debug_BBs++; + VG_(strcpy)(fn_name, "???"); + if (line_num) *line_num=line; + + } else /*(!found1 && found2)*/ { + CLG_(stat).fn_name_debug_BBs++; + VG_(strcpy)(filename, "???"); + if (line_num) *line_num=0; + } + + CLG_DEBUG(6, " - get_debug_info(%p): seg '%s', fn %s\n", + instr_addr, + !pSegInfo ? (const UChar*)"-" : + (*pSegInfo) ? VG_(seginfo_filename)(*pSegInfo) : + (const UChar*)"(None)", + fn_name); + + return result; +} + +/* for _libc_freeres_wrapper => _exit renaming */ +static BB* exit_bb = 0; + + +/* + * Attach function struct to a BB from debug info. + */ +fn_node* CLG_(get_fn_node)(BB* bb) +{ + Char filename[FILENAME_LEN], fnname[FN_NAME_LEN]; + SegInfo* si; + UInt line_num; + fn_node* fn; + + /* fn from debug info is idempotent for a BB */ + if (bb->fn) return bb->fn; + + CLG_DEBUG(3,"+ get_fn_node(BB %p)\n", bb_addr(bb)); + + /* get function/file name, line number and object of + * the BB according to debug information + */ + CLG_(get_debug_info)(bb_addr(bb), + filename, fnname, &line_num, &si); + + if (0 == VG_(strcmp)(fnname, "???")) { + int p; + + /* Use address as found in library */ + if (sizeof(Addr) == 4) + p = VG_(sprintf)(fnname, "%08p", bb->offset); + else + // 64bit address + p = VG_(sprintf)(fnname, "%016p", bb->offset); + + VG_(sprintf)(fnname+p, "%s", + (bb->sect_kind == Vg_SectData) ? " [Data]" : + (bb->sect_kind == Vg_SectBSS) ? " [BSS]" : + (bb->sect_kind == Vg_SectGOT) ? " [GOT]" : + (bb->sect_kind == Vg_SectPLT) ? " [PLT]" : ""); + } + else { + if (VG_(get_fnname_if_entry)(bb_addr(bb), fnname, FN_NAME_LEN)) + bb->is_entry = 1; + } + + /* HACK for correct _exit: + * _exit is redirected to VG_(__libc_freeres_wrapper) by valgrind, + * so we rename it back again :-) + */ + if (0 == VG_(strcmp)(fnname, "vgPlain___libc_freeres_wrapper") + && exit_bb) { + CLG_(get_debug_info)(bb_addr(exit_bb), + filename, fnname, &line_num, &si); + + CLG_DEBUG(1, "__libc_freeres_wrapper renamed to _exit\n"); + } + if (0 == VG_(strcmp)(fnname, "_exit") && !exit_bb) + exit_bb = bb; + + if (runtime_resolve_addr && + (bb_addr(bb) >= runtime_resolve_addr) && + (bb_addr(bb) < runtime_resolve_addr + runtime_resolve_length)) { + /* BB in runtime_resolve found by code check; use this name */ + VG_(sprintf)(fnname, "_dl_runtime_resolve"); + } + + /* get fn_node struct for this function */ + fn = get_fn_node_inseg( si, filename, fnname); + + /* if this is the 1st time the function is seen, + * some attributes are set */ + if (fn->pure_cxt == 0) { + + /* Every function gets a "pure" context, i.e. a context with stack + * depth 1 only with this function. This is for compression of mangled + * names + */ + fn_node* pure[2]; + pure[0] = 0; + pure[1] = fn; + fn->pure_cxt = CLG_(get_cxt)(pure+1); + + if (bb->sect_kind == Vg_SectPLT) + fn->skip = CLG_(clo).skip_plt; + + if (VG_(strcmp)(fn->name, "_dl_runtime_resolve")==0) { + fn->pop_on_jump = True; + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Symbol match: found runtime_resolve: %s +%p=%p", + bb->obj->name + bb->obj->last_slash_pos, + bb->offset, bb_addr(bb)); + } + + fn->is_malloc = (VG_(strcmp)(fn->name, "malloc")==0); + fn->is_realloc = (VG_(strcmp)(fn->name, "realloc")==0); + fn->is_free = (VG_(strcmp)(fn->name, "free")==0); + + /* apply config options from function name patterns + * given on command line */ + CLG_(update_fn_config)(fn); + } + + + bb->fn = fn; + bb->line = line_num; + + CLG_DEBUG(3,"- get_fn_node(BB %p): %s (in %s:%u)\n", + bb_addr(bb), fnname, filename, line_num); + + return fn; +} + + +/*------------------------------------------------------------*/ +/*--- Active function array operations ---*/ +/*------------------------------------------------------------*/ + +/* The active function array is a thread-specific array + * of UInts, mapping function numbers to the active count of + * functions. + * The active count is the number of times a function appears + * in the current call stack, and is used when costs for recursion + * levels should be separated. + */ + +UInt* CLG_(get_fn_entry)(Int n) +{ + CLG_ASSERT(n < current_fn_active.size); + return current_fn_active.array + n; +} + +void CLG_(init_fn_array)(fn_array* a) +{ + Int i; + + CLG_ASSERT(a != 0); + + a->size = N_INITIAL_FN_ARRAY_SIZE; + if (a->size <= CLG_(stat).distinct_fns) + a->size = CLG_(stat).distinct_fns+1; + + a->array = (UInt*) CLG_MALLOC(a->size * sizeof(UInt)); + for(i=0;isize;i++) + a->array[i] = 0; +} + +void CLG_(copy_current_fn_array)(fn_array* dst) +{ + CLG_ASSERT(dst != 0); + + dst->size = current_fn_active.size; + dst->array = current_fn_active.array; +} + +fn_array* CLG_(get_current_fn_array)() +{ + return ¤t_fn_active; +} + +void CLG_(set_current_fn_array)(fn_array* a) +{ + CLG_ASSERT(a != 0); + + current_fn_active.size = a->size; + current_fn_active.array = a->array; + if (current_fn_active.size <= CLG_(stat).distinct_fns) + resize_fn_array(); +} + +/* ensure that active_array is big enough: + * is the highest index, so + * has to be bigger than that. + */ +static void resize_fn_array(void) +{ + UInt* new; + Int i, newsize; + + newsize = current_fn_active.size; + while (newsize <= CLG_(stat).distinct_fns) newsize *=2; + + CLG_DEBUG(0, "Resize fn_active_array: %d => %d\n", + current_fn_active.size, newsize); + + new = (UInt*) CLG_MALLOC(newsize * sizeof(UInt)); + for(i=0;ibb->jmp_addr => to->bb->addr) + * + * Each BB has at most one CALL instruction. The list of JCC from + * this call is a pointer to the list head (stored in BBCC), and + * in the JCC struct. + * + * For fast lookup, JCCs are reachable with a hash table, keyed by + * the (from_bbcc,to) pair. is used for the JCC chain + * of one hash table entry. + * + * Cost holds event counts for already returned executions. + * are the event counters at last enter of the subroutine. + * is updated on returning from the subroutine by + * adding the diff of and current event counters to . + * + * After updating, is set to current event counters. Thus, + * events are not counted twice for recursive calls (TODO: True?) + */ +#define JmpNone (Ijk_Boring+30) +#define JmpCond (Ijk_Boring+31) + +struct _jCC { + Int jmpkind; /* JmpCall, JmpBoring, JmpCond */ + jCC* next_hash; /* for hash entry chain */ + jCC* next_from; /* next JCC from a BBCC */ + BBCC *from, *to; /* call arc from/to this BBCC */ + UInt jmp; /* jump no. in source */ + + ULong call_counter; /* no wraparound with 64 bit */ + + FullCost cost; /* simulator + user counters */ +}; + + +/* + * Info for one instruction of a basic block. + */ +typedef struct _InstrInfo InstrInfo; +struct _InstrInfo { + UInt instr_offset; + UInt instr_size; + UInt data_size; + UInt cost_offset; + EventSet* eventset; +}; + + +/* + * Info for a conditional jump in a basic block + */ +typedef struct _CJmpInfo CJmpInfo; +struct _CJmpInfo { + UInt instr; /* instruction index in this basic block */ + Bool skip; /* Cond.Jumps to next instruction should be ignored */ +}; + + +/** + * An instrumented basic block (BB). + * + * BBs are put into a resizable hash to allow for fast detection if a + * BB is to be retranslated but cost info is already available. + * The key for a BB is a (object, offset) tupel making it independent + * from possibly multiple mappings of the same ELF object. + * + * At the beginning of each instrumented BB, + * a call to setup_bbcc(), specifying a pointer to the + * according BB structure, is added. + * + * As cost of a BB has to be distinguished depending on the context, + * multiple cost centers for one BB (struct BBCC) exist and the according + * BBCC is set by setup_bbcc. + */ +struct _BB { + obj_node* obj; /* ELF object of BB */ + OffT offset; /* offset of BB in ELF object file */ + BB* next; /* chaining for a hash entry */ + + VgSectKind sect_kind; /* section of this BB, e.g. PLT */ + UInt instr_count; + + /* filled by CLG_(get_fn_node) if debug info is available */ + fn_node* fn; /* debug info for this BB */ + UInt line; + Bool is_entry; /* True if this BB is a function entry */ + + BBCC* bbcc_list; /* BBCCs for same BB (see next_bbcc in BBCC) */ + BBCC* last_bbcc; /* Temporary: Cached for faster access (LRU) */ + + /* filled by CLG_(instrument) if not seen before */ + UInt cjmp_count; /* number of conditional exits */ + CJmpInfo* jmp; /* array of info for condition jumps, + * allocated directly after this struct */ + Int jmpkind; /* remember jump kind of final exit */ + Bool cjmp_inverted; /* condition of last cond.jump can be inverted by VEX */ + + UInt instr_len; + UInt cost_count; + InstrInfo instr[0]; /* info on instruction sizes and costs */ +}; + + + +/** + * Function context + * + * Basic blocks are always executed in the scope of a context. + * A function context is a list of function nodes representing + * the call chain to the current context: I.e. fn[0] is the + * function we are currently in, fn[1] has called fn[0], and so on. + * Recursion levels are used for fn[0]. + * + * To get a unique number for a full execution context, use + * rec_index = min(rec_separation>,) - 1; + * unique_no = + rec_index + * + * For each Context, recursion index and BB, there can be a BBCC. + */ +struct _Context { + UInt size; // number of function dependencies + UInt base_number; // for context compression & dump array + Context* next; // entry chaining for hash + UWord hash; // for faster lookup... + fn_node* fn[0]; +}; + + +/* + * Info for a conditional jump in a basic block + */ +typedef struct _JmpData JmpData; +struct _JmpData { + ULong ecounter; /* number of times the BB was left at this exit */ + jCC* jcc_list; /* JCCs for Cond.Jumps from this exit */ +}; + + +/* + * Basic Block Cost Center + * + * On demand, multiple BBCCs will be created for the same BB + * dependend on command line options and: + * - current function (it's possible that a BB is executed in the + * context of different functions, e.g. in manual assembler/PLT) + * - current thread ID + * - position where current function is called from + * - recursion level of current function + * + * The cost centres for the instructions of a basic block are + * stored in a contiguous array. + * They are distinguishable by their tag field. + */ +struct _BBCC { + BB* bb; /* BB for this cost center */ + + Context* cxt; /* execution context of this BBCC */ + ThreadId tid; /* only for assertion check purpose */ + UInt rec_index; /* Recursion index in rec->bbcc for this bbcc */ + BBCC** rec_array; /* Variable sized array of pointers to + * recursion BBCCs. Shared. */ + ULong ret_counter; /* how often returned from jccs of this bbcc; + * used to check if a dump for this BBCC is needed */ + + BBCC* next_bbcc; /* Chain of BBCCs for same BB */ + BBCC* lru_next_bbcc; /* BBCC executed next the last time */ + + jCC* lru_from_jcc; /* Temporary: Cached for faster access (LRU) */ + jCC* lru_to_jcc; /* Temporary: Cached for faster access (LRU) */ + FullCost skipped; /* cost for skipped functions called from + * jmp_addr. Allocated lazy */ + + BBCC* next; /* entry chain in hash */ + ULong* cost; /* start of 64bit costs for this BBCC */ + ULong ecounter_sum; /* execution counter for first instruction of BB */ + JmpData jmp[0]; +}; + + +/* the of fn_node, file_node and obj_node are for compressed dumping + * and a index into the dump boolean table and fn_info_table + */ + +struct _fn_node { + Char* name; + UInt number; + Context* last_cxt; /* LRU info */ + Context* pure_cxt; /* the context with only the function itself */ + file_node* file; /* reverse mapping for 2nd hash */ + fn_node* next; + + Bool dump_before :1; + Bool dump_after :1; + Bool zero_before :1; + Bool toggle_collect :1; + Bool skip :1; + Bool pop_on_jump : 1; + + Bool is_malloc :1; + Bool is_realloc :1; + Bool is_free :1; + + Int group; + Int separate_callers; + Int separate_recursions; +#if CLG_ENABLE_DEBUG + Int verbosity; /* Stores old verbosity level while in function */ +#endif +}; + +/* Quite arbitrary fixed hash sizes */ + +#define N_OBJ_ENTRIES 47 +#define N_FILE_ENTRIES 53 +#define N_FN_ENTRIES 87 +#define N_BBCC2_ENTRIES 37 + +struct _file_node { + Char* name; + fn_node* fns[N_FN_ENTRIES]; + UInt number; + obj_node* obj; + file_node* next; +}; + +/* If an object is dlopened multiple times, we hope that is unique; + * and can change with each dlopen, and is + * zero when object is unmapped (possible at dump time). + */ +struct _obj_node { + Char* name; + UInt last_slash_pos; + + Addr start; /* Start address of text segment mapping */ + SizeT size; /* Length of mapping */ + OffT offset; /* Offset between symbol address and file offset */ + + file_node* files[N_FILE_ENTRIES]; + UInt number; + obj_node* next; +}; + +/* an entry in the callstack + * + * is 0 if the function called is not skipped (usual case). + * Otherwise, it is the last non-skipped BBCC. This one gets all + * the calls to non-skipped functions and all costs in skipped + * instructions. + */ +struct _call_entry { + jCC* jcc; /* jCC for this call */ + FullCost enter_cost; /* cost event counters at entering frame */ + Addr sp; /* stack pointer directly after call */ + Addr ret_addr; /* address to which to return to + * is 0 on a simulated call */ + BBCC* nonskipped; /* see above */ + Context* cxt; /* context before call */ + Int fn_sp; /* function stack index before call */ +}; + + +/* + * Execution state of main thread or a running signal handler in + * a thread while interrupted by another signal handler. + * As there's no scheduling among running signal handlers of one thread, + * we only need a subset of a full thread state: + * - event counter + * - collect state + * - last BB, last jump kind, last nonskipped BB + * - callstack pointer for sanity checking and correct unwinding + * after exit + */ +typedef struct _exec_state exec_state; +struct _exec_state { + + /* the signum of the handler, 0 for main thread context + */ + Int sig; + + /* the old call stack pointer at entering the signal handler */ + Int orig_sp; + + FullCost cost; + Bool collect; + Context* cxt; + + Int jmps_passed; /* number of conditional jumps passed in last BB */ + BBCC* bbcc; /* last BB executed */ + BBCC* nonskipped; + + Int call_stack_bottom; /* Index into fn_stack */ +}; + +/* Global state structures */ +typedef struct _bb_hash bb_hash; +struct _bb_hash { + UInt size, entries; + BB** table; +}; + +typedef struct _cxt_hash cxt_hash; +struct _cxt_hash { + UInt size, entries; + Context** table; +}; + +/* Thread specific state structures, i.e. parts of a thread state. + * There are variables for the current state of each part, + * on which a thread state is copied at thread switch. + */ +typedef struct _bbcc_hash bbcc_hash; +struct _bbcc_hash { + UInt size, entries; + BBCC** table; +}; + +typedef struct _jcc_hash jcc_hash; +struct _jcc_hash { + UInt size, entries; + jCC** table; + jCC* spontaneous; +}; + +typedef struct _fn_array fn_array; +struct _fn_array { + UInt size; + UInt* array; +}; + +typedef struct _call_stack call_stack; +struct _call_stack { + UInt size; + Int sp; + call_entry* entry; +}; + +typedef struct _fn_stack fn_stack; +struct _fn_stack { + UInt size; + fn_node **bottom, **top; +}; + +/* The maximum number of simultaneous running signal handlers per thread. + * This is the number of execution states storable in a thread. + */ +#define MAX_SIGHANDLERS 10 + +typedef struct _exec_stack exec_stack; +struct _exec_stack { + Int sp; /* > 0 if a handler is running */ + exec_state* entry[MAX_SIGHANDLERS]; +}; + +/* Thread State + * + * This structure stores thread specific info while a thread is *not* + * running. See function switch_thread() for save/restore on thread switch. + * + * If --separate-threads=no, BBCCs and JCCs can be shared by all threads, i.e. + * only structures of thread 1 are used. + * This involves variables fn_info_table, bbcc_table and jcc_table. + */ +struct _thread_info { + + /* state */ + fn_stack fns; /* function stack */ + call_stack calls; /* context call arc stack */ + exec_stack states; /* execution states interrupted by signals */ + + /* dump statistics */ + FullCost lastdump_cost; /* Cost at last dump */ + FullCost sighandler_cost; + + /* thread specific data structure containers */ + fn_array fn_active; + jcc_hash jccs; + bbcc_hash bbccs; +}; + +/* Structs used for dumping */ + +/* Address position inside of a BBCC: + * This includes + * - the address offset from the BB start address + * - file/line from debug info for that address (can change inside a BB) + */ +typedef struct _AddrPos AddrPos; +struct _AddrPos { + Addr addr; + Addr bb_addr; + file_node* file; + UInt line; +}; + +/* a simulator cost entity that can be written out in one line */ +typedef struct _AddrCost AddrCost; +struct _AddrCost { + AddrPos p; + SimCost cost; +}; + +/* A function in an execution context */ +typedef struct _FnPos FnPos; +struct _FnPos { + file_node* file; + fn_node* fn; + obj_node* obj; + Context* cxt; + int rec_index; + UInt line; +}; + +/*------------------------------------------------------------*/ +/*--- Cache simulator interface ---*/ +/*------------------------------------------------------------*/ + +struct cachesim_if +{ + void (*print_opts)(void); + Bool (*parse_opt)(Char* arg); + void (*post_clo_init)(void); + void (*clear)(void); + void (*getdesc)(Char* buf); + void (*printstat)(void); + void (*add_icost)(SimCost, BBCC*, InstrInfo*, ULong); + void (*after_bbsetup)(void); + void (*finish)(void); + + void (*log_1I0D)(InstrInfo*) VG_REGPARM(1); + + void (*log_1I1Dr)(InstrInfo*, Addr) VG_REGPARM(2); + void (*log_1I1Dw)(InstrInfo*, Addr) VG_REGPARM(2); + void (*log_1I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3); + + void (*log_0I1Dr)(InstrInfo*, Addr) VG_REGPARM(2); + void (*log_0I1Dw)(InstrInfo*, Addr) VG_REGPARM(2); + void (*log_0I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3); + + // function names of helpers (for debugging generated code) + Char *log_1I0D_name; + Char *log_1I1Dr_name, *log_1I1Dw_name, *log_1I2D_name; + Char *log_0I1Dr_name, *log_0I1Dw_name, *log_0I2D_name; +}; + + +/*------------------------------------------------------------*/ +/*--- Functions ---*/ +/*------------------------------------------------------------*/ + +/* from clo.c */ + +void CLG_(set_clo_defaults)(void); +void CLG_(update_fn_config)(fn_node*); +Bool CLG_(process_cmd_line_option)(Char*); +void CLG_(print_usage)(void); +void CLG_(print_debug_usage)(void); + +/* from sim.c */ +struct event_sets { + EventSet *use, *Ir, *Dr, *Dw; + EventSet *D0, *D1r, *D1w, *D2; + EventSet *sim; + EventSet *full; /* sim plus user events */ + + /* offsets into eventsets */ + Int off_sim_Ir, off_sim_Dr, off_sim_Dw; + Int off_full_Ir, off_full_Dr, off_full_Dw; + Int off_full_user, off_full_alloc, off_full_systime; +}; + +extern struct event_sets CLG_(sets); +extern struct cachesim_if CLG_(cachesim); + +void CLG_(init_eventsets)(Int user); + +/* from main.c */ +Bool CLG_(get_debug_info)(Addr, Char filename[FILENAME_LEN], + Char fn_name[FN_NAME_LEN], UInt*, SegInfo**); +void CLG_(collectBlockInfo)(IRBB* bbIn, UInt*, UInt*, Bool*); +void CLG_(set_instrument_state)(Char*,Bool); +void CLG_(dump_profile)(Char* trigger,Bool only_current_thread); +void CLG_(zero_all_cost)(Bool only_current_thread); +Int CLG_(get_dump_counter)(void); +void CLG_(fini)(Int exitcode); + +/* from command.c */ +void CLG_(init_command)(Char* dir, Char* dumps); +void CLG_(check_command)(void); +void CLG_(finish_command)(void); + +/* from bb.c */ +void CLG_(init_bb_hash)(void); +bb_hash* CLG_(get_bb_hash)(void); +BB* CLG_(get_bb)(Addr addr, IRBB* bb_in, Bool *seen_before); +void CLG_(delete_bb)(Addr addr); + +static __inline__ Addr bb_addr(BB* bb) + { return bb->offset + bb->obj->offset; } +static __inline__ Addr bb_jmpaddr(BB* bb) + { return bb->instr[bb->instr_count-1].instr_offset + bb->offset + bb->obj->offset; } + +/* from fn.c */ +void CLG_(init_fn_array)(fn_array*); +void CLG_(copy_current_fn_array)(fn_array* dst); +fn_array* CLG_(get_current_fn_array)(void); +void CLG_(set_current_fn_array)(fn_array*); +UInt* CLG_(get_fn_entry)(Int n); + +void CLG_(init_obj_table)(void); +obj_node* CLG_(get_obj_node)(SegInfo* si); +file_node* CLG_(get_file_node)(obj_node*, Char* filename); +fn_node* CLG_(get_fn_node)(BB* bb); + +/* from bbcc.c */ +void CLG_(init_bbcc_hash)(bbcc_hash* bbccs); +void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst); +bbcc_hash* CLG_(get_current_bbcc_hash)(void); +void CLG_(set_current_bbcc_hash)(bbcc_hash*); +void CLG_(forall_bbccs)(void (*func)(BBCC*)); +void CLG_(zero_bbcc)(BBCC* bbcc); +BBCC* CLG_(get_bbcc)(BB* bb); +BBCC* CLG_(clone_bbcc)(BBCC* orig, Context* cxt, Int rec_index); +void CLG_(setup_bbcc)(BB* bb) VG_REGPARM(1); + + +/* from jumps.c */ +void CLG_(init_jcc_hash)(jcc_hash*); +void CLG_(copy_current_jcc_hash)(jcc_hash* dst); +jcc_hash* CLG_(get_current_jcc_hash)(void); +void CLG_(set_current_jcc_hash)(jcc_hash*); +jCC* CLG_(get_jcc)(BBCC* from, UInt, BBCC* to); + +/* from callstack.c */ +void CLG_(init_call_stack)(call_stack*); +void CLG_(copy_current_call_stack)(call_stack* dst); +void CLG_(set_current_call_stack)(call_stack*); +call_entry* CLG_(get_call_entry)(Int n); + +void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip); +void CLG_(pop_call_stack)(void); +void CLG_(unwind_call_stack)(Addr sp, Int); + +/* from context.c */ +void CLG_(init_fn_stack)(fn_stack*); +void CLG_(copy_current_fn_stack)(fn_stack*); +fn_stack* CLG_(get_current_fn_stack)(void); +void CLG_(set_current_fn_stack)(fn_stack*); + +void CLG_(init_cxt_table)(void); +cxt_hash* CLG_(get_cxt_hash)(void); +Context* CLG_(get_cxt)(fn_node** fn); +void CLG_(push_cxt)(fn_node* fn); + +/* from threads.c */ +void CLG_(init_threads)(void); +thread_info** CLG_(get_threads)(void); +thread_info* CLG_(get_current_thread)(void); +void CLG_(switch_thread)(ThreadId tid); +void CLG_(forall_threads)(void (*func)(thread_info*)); +void CLG_(run_thread)(ThreadId tid); + +void CLG_(init_exec_state)(exec_state* es); +void CLG_(init_exec_stack)(exec_stack*); +void CLG_(copy_current_exec_stack)(exec_stack*); +void CLG_(set_current_exec_stack)(exec_stack*); +void CLG_(pre_signal)(ThreadId tid, Int sigNum, Bool alt_stack); +void CLG_(post_signal)(ThreadId tid, Int sigNum); +void CLG_(run_post_signal_on_call_stack_bottom)(void); + +/* from dump.c */ +extern FullCost CLG_(total_cost); +void CLG_(init_files)(Char** dir, Char** file); +Char* CLG_(get_dump_file_base)(void); + + +/*------------------------------------------------------------*/ +/*--- Exported global variables ---*/ +/*------------------------------------------------------------*/ + +extern CommandLineOptions CLG_(clo); +extern Statistics CLG_(stat); +extern EventMapping* CLG_(dumpmap); + +/* Function active counter array, indexed by function number */ +extern UInt* CLG_(fn_active_array); +extern Bool CLG_(instrument_state); + +extern call_stack CLG_(current_call_stack); +extern fn_stack CLG_(current_fn_stack); +extern exec_state CLG_(current_state); +extern ThreadId CLG_(current_tid); + + +/*------------------------------------------------------------*/ +/*--- Debug output ---*/ +/*------------------------------------------------------------*/ + +#if CLG_ENABLE_DEBUG + +#define CLG_DEBUGIF(x) \ + if ( (CLG_(clo).verbose >x) && \ + (CLG_(stat).bb_executions >= CLG_(clo).verbose_start)) + +#define CLG_DEBUG(x,format,args...) \ + CLG_DEBUGIF(x) { \ + CLG_(print_bbno)(); \ + VG_(printf)(format,##args); \ + } + +#define CLG_ASSERT(cond) \ + if (!(cond)) { \ + CLG_(print_context)(); \ + CLG_(print_bbno)(); \ + tl_assert(cond); \ + } + +#else +#define CLG_DEBUGIF(x) if (0) +#define CLG_DEBUG(x...) {} +#define CLG_ASSERT(cond) tl_assert(cond); +#endif + +/* from debug.c */ +void CLG_(print_bbno)(void); +void CLG_(print_context)(void); +void CLG_(print_jcc)(int s, jCC* jcc); +void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool); +void CLG_(print_bbcc_fn)(BBCC* bbcc); +void CLG_(print_execstate)(int s, exec_state* es); +void CLG_(print_eventset)(int s, EventSet* es); +void CLG_(print_cost)(int s, EventSet*, ULong* cost); +void CLG_(print_bb)(int s, BB* bb); +void CLG_(print_bbcc_cost)(int s, BBCC*); +void CLG_(print_cxt)(int s, Context* cxt, int rec_index); +void CLG_(print_short_jcc)(jCC* jcc); +void CLG_(print_stackentry)(int s, int sp); +void CLG_(print_addr)(Addr addr); +void CLG_(print_addr_ln)(Addr addr); + +void* CLG_(malloc)(UWord s, char* f); +void* CLG_(free)(void* p, char* f); +#if 0 +#define CLG_MALLOC(x) CLG_(malloc)(x,__FUNCTION__) +#define CLG_FREE(p) CLG_(free)(p,__FUNCTION__) +#else +#define CLG_MALLOC(x) VG_(malloc)(x) +#define CLG_FREE(p) VG_(free)(p) +#endif + +#endif /* CLG_GLOBAL */ diff --git a/callgrind/jumps.c b/callgrind/jumps.c new file mode 100644 index 0000000000..2a6a09a3c9 --- /dev/null +++ b/callgrind/jumps.c @@ -0,0 +1,233 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_jumps.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +#define N_JCC_INITIAL_ENTRIES 4437 + +/*------------------------------------------------------------*/ +/*--- Jump Cost Center (JCC) operations, including Calls ---*/ +/*------------------------------------------------------------*/ + +#define N_JCC_INITIAL_ENTRIES 4437 + +jcc_hash current_jccs; + +void CLG_(init_jcc_hash)(jcc_hash* jccs) +{ + Int i; + + CLG_ASSERT(jccs != 0); + + jccs->size = N_JCC_INITIAL_ENTRIES; + jccs->entries = 0; + jccs->table = (jCC**) CLG_MALLOC(jccs->size * sizeof(jCC*)); + jccs->spontaneous = 0; + + for (i = 0; i < jccs->size; i++) + jccs->table[i] = 0; +} + + +void CLG_(copy_current_jcc_hash)(jcc_hash* dst) +{ + CLG_ASSERT(dst != 0); + + dst->size = current_jccs.size; + dst->entries = current_jccs.entries; + dst->table = current_jccs.table; + dst->spontaneous = current_jccs.spontaneous; +} + +void CLG_(set_current_jcc_hash)(jcc_hash* h) +{ + CLG_ASSERT(h != 0); + + current_jccs.size = h->size; + current_jccs.entries = h->entries; + current_jccs.table = h->table; + current_jccs.spontaneous = h->spontaneous; +} + +__inline__ +static UInt jcc_hash_idx(BBCC* from, UInt jmp, BBCC* to, UInt size) +{ + return (UInt) ( (UWord)from + 7* (UWord)to + 13*jmp) % size; +} + +/* double size of jcc table */ +static void resize_jcc_table(void) +{ + Int i, new_size, conflicts1 = 0, conflicts2 = 0; + jCC** new_table; + UInt new_idx; + jCC *curr_jcc, *next_jcc; + + new_size = 2* current_jccs.size +3; + new_table = (jCC**) CLG_MALLOC(new_size * sizeof(jCC*)); + + if (!new_table) return; + + for (i = 0; i < new_size; i++) + new_table[i] = NULL; + + for (i = 0; i < current_jccs.size; i++) { + if (current_jccs.table[i] == NULL) continue; + + curr_jcc = current_jccs.table[i]; + while (NULL != curr_jcc) { + next_jcc = curr_jcc->next_hash; + + new_idx = jcc_hash_idx(curr_jcc->from, curr_jcc->jmp, + curr_jcc->to, new_size); + + curr_jcc->next_hash = new_table[new_idx]; + new_table[new_idx] = curr_jcc; + if (curr_jcc->next_hash) { + conflicts1++; + if (curr_jcc->next_hash->next_hash) + conflicts2++; + } + + curr_jcc = next_jcc; + } + } + + VG_(free)(current_jccs.table); + + + CLG_DEBUG(0, "Resize JCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", + current_jccs.size, new_size, + current_jccs.entries, conflicts1, conflicts2); + + current_jccs.size = new_size; + current_jccs.table = new_table; + CLG_(stat).jcc_hash_resizes++; +} + + + +/* new jCC structure: a call was done to a BB of a BBCC + * for a spontaneous call, from is 0 (i.e. caller unknown) + */ +static jCC* new_jcc(BBCC* from, UInt jmp, BBCC* to) +{ + jCC* new; + UInt new_idx; + + /* check fill degree of jcc hash table and resize if needed (>80%) */ + current_jccs.entries++; + if (10 * current_jccs.entries / current_jccs.size > 8) + resize_jcc_table(); + + new = (jCC*) CLG_MALLOC(sizeof(jCC)); + + new->from = from; + new->jmp = jmp; + new->to = to; + new->jmpkind = Ijk_Call; + new->call_counter = 0; + new->cost = 0; + + /* insert into JCC chain of calling BBCC. + * This list is only used at dumping time */ + + if (from) { + new->next_from = from->jmp[jmp].jcc_list; + from->jmp[jmp].jcc_list = new; + } + else { + new->next_from = current_jccs.spontaneous; + current_jccs.spontaneous = new; + } + + /* insert into JCC hash table */ + new_idx = jcc_hash_idx(from, jmp, to, current_jccs.size); + new->next_hash = current_jccs.table[new_idx]; + current_jccs.table[new_idx] = new; + + CLG_(stat).distinct_jccs++; + + CLG_DEBUGIF(3) { + VG_(printf)(" new_jcc (now %d): %p\n", + CLG_(stat).distinct_jccs, new); + } + + return new; +} + + +/* get the jCC for a call arc (BBCC->BBCC) */ +jCC* CLG_(get_jcc)(BBCC* from, UInt jmp, BBCC* to) +{ + jCC* jcc; + UInt idx; + + CLG_DEBUG(5, "+ get_jcc(bbcc %p/%d => bbcc %p)\n", + from, jmp, to); + + /* first check last recently used JCC */ + jcc = to->lru_to_jcc; + if (jcc && (jcc->from == from) && (jcc->jmp == jmp)) { + CLG_ASSERT(to == jcc->to); + CLG_DEBUG(5,"- get_jcc: [LRU to] jcc %p\n", jcc); + return jcc; + } + + jcc = from->lru_from_jcc; + if (jcc && (jcc->to == to) && (jcc->jmp == jmp)) { + CLG_ASSERT(from == jcc->from); + CLG_DEBUG(5, "- get_jcc: [LRU from] jcc %p\n", jcc); + return jcc; + } + + CLG_(stat).jcc_lru_misses++; + + idx = jcc_hash_idx(from, jmp, to, current_jccs.size); + jcc = current_jccs.table[idx]; + + while(jcc) { + if ((jcc->from == from) && + (jcc->jmp == jmp) && + (jcc->to == to)) break; + jcc = jcc->next_hash; + } + + if (!jcc) + jcc = new_jcc(from, jmp, to); + + /* set LRU */ + from->lru_from_jcc = jcc; + to->lru_to_jcc = jcc; + + CLG_DEBUG(5, "- get_jcc(bbcc %p => bbcc %p)\n", + from, to); + + return jcc; +} + diff --git a/callgrind/main.c b/callgrind/main.c new file mode 100644 index 0000000000..dd19b3b338 --- /dev/null +++ b/callgrind/main.c @@ -0,0 +1,1086 @@ + +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- main.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call graph + profiling programs. + + Copyright (C) 2002-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This skin is derived from and contains code from Cachegrind + Copyright (C) 2002-2005 Nicholas Nethercote (njn25@cam.ac.uk) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "config.h" +#include "callgrind.h" +#include "global.h" + +#include + +/*------------------------------------------------------------*/ +/*--- Global variables ---*/ +/*------------------------------------------------------------*/ + +/* for all threads */ +CommandLineOptions CLG_(clo); +Statistics CLG_(stat); +Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ + +/* thread and signal handler specific */ +exec_state CLG_(current_state); + + +/*------------------------------------------------------------*/ +/*--- Statistics ---*/ +/*------------------------------------------------------------*/ + +static void CLG_(init_statistics)(Statistics* s) +{ + s->call_counter = 0; + s->jcnd_counter = 0; + s->jump_counter = 0; + s->rec_call_counter = 0; + s->ret_counter = 0; + s->bb_executions = 0; + + s->context_counter = 0; + s->bb_retranslations = 0; + + s->distinct_objs = 0; + s->distinct_files = 0; + s->distinct_fns = 0; + s->distinct_contexts = 0; + s->distinct_bbs = 0; + s->distinct_bbccs = 0; + s->distinct_instrs = 0; + s->distinct_skips = 0; + + s->bb_hash_resizes = 0; + s->bbcc_hash_resizes = 0; + s->jcc_hash_resizes = 0; + s->cxt_hash_resizes = 0; + s->fn_array_resizes = 0; + s->call_stack_resizes = 0; + s->fn_stack_resizes = 0; + + s->full_debug_BBs = 0; + s->file_line_debug_BBs = 0; + s->fn_name_debug_BBs = 0; + s->no_debug_BBs = 0; + s->bbcc_lru_misses = 0; + s->jcc_lru_misses = 0; + s->cxt_lru_misses = 0; + s->bbcc_clones = 0; +} + + + + +/*------------------------------------------------------------*/ +/*--- Cache simulation instrumentation phase ---*/ +/*------------------------------------------------------------*/ + + +static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) +{ + // I'm assuming that for 'modify' instructions, that Vex always makes + // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp + // expressions, or both Const expressions. + CLG_ASSERT(isIRAtom(loadAddrExpr)); + CLG_ASSERT(isIRAtom(storeAddrExpr)); + return eqIRAtom(loadAddrExpr, storeAddrExpr); +} + +static +EventSet* insert_simcall(IRBB* bbOut, InstrInfo* ii, UInt dataSize, + Bool instrIssued, + IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) +{ + HChar* helperName; + void* helperAddr; + Int argc; + EventSet* es; + IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv; + IRDirty* di; + + /* Check type of original instruction regarding memory access, + * and collect info to be able to generate fitting helper call + */ + if (!loadAddrExpr && !storeAddrExpr) { + // no load/store + CLG_ASSERT(0 == dataSize); + if (instrIssued) { + helperName = 0; + helperAddr = 0; + } + else { + helperName = CLG_(cachesim).log_1I0D_name; + helperAddr = CLG_(cachesim).log_1I0D; + } + argc = 1; + es = CLG_(sets).D0; + + } else if (loadAddrExpr && !storeAddrExpr) { + // load + CLG_ASSERT( isIRAtom(loadAddrExpr) ); + if (instrIssued) { + helperName = CLG_(cachesim).log_0I1Dr_name; + helperAddr = CLG_(cachesim).log_0I1Dr; + } + else { + helperName = CLG_(cachesim).log_1I1Dr_name; + helperAddr = CLG_(cachesim).log_1I1Dr; + } + argc = 2; + arg2 = loadAddrExpr; + es = CLG_(sets).D1r; + + } else if (!loadAddrExpr && storeAddrExpr) { + // store + CLG_ASSERT( isIRAtom(storeAddrExpr) ); + if (instrIssued) { + helperName = CLG_(cachesim).log_0I1Dw_name; + helperAddr = CLG_(cachesim).log_0I1Dw; + } + else { + helperName = CLG_(cachesim).log_1I1Dw_name; + helperAddr = CLG_(cachesim).log_1I1Dw; + } + argc = 2; + arg2 = storeAddrExpr; + es = CLG_(sets).D1w; + + } else { + CLG_ASSERT( loadAddrExpr && storeAddrExpr ); + CLG_ASSERT( isIRAtom(loadAddrExpr) ); + CLG_ASSERT( isIRAtom(storeAddrExpr) ); + + if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) { + /* modify: suppose write access, as this is + * more resource consuming (as in callgrind for VG2) + * Cachegrind does a read here (!) + * DISCUSS: Best way depends on simulation model? + */ + if (instrIssued) { + helperName = CLG_(cachesim).log_0I1Dw_name; + helperAddr = CLG_(cachesim).log_0I1Dw; + } + else { + helperName = CLG_(cachesim).log_1I1Dw_name; + helperAddr = CLG_(cachesim).log_1I1Dw; + } + argc = 2; + arg2 = storeAddrExpr; + es = CLG_(sets).D1w; + + } else { + // load/store + if (instrIssued) { + helperName = CLG_(cachesim).log_0I2D_name; + helperAddr = CLG_(cachesim).log_0I2D; + } + else { + helperName = CLG_(cachesim).log_1I2D_name; + helperAddr = CLG_(cachesim).log_1I2D; + } + argc = 3; + arg2 = loadAddrExpr; + arg3 = storeAddrExpr; + es = CLG_(sets).D2; + } + } + + /* helper could be unset depending on the simulator used */ + if (helperAddr == 0) return 0; + + /* Setup 1st arg: InstrInfo */ + arg1 = mkIRExpr_HWord( (HWord)ii ); + + // Add call to the instrumentation function + if (argc == 1) + argv = mkIRExprVec_1(arg1); + else if (argc == 2) + argv = mkIRExprVec_2(arg1, arg2); + else if (argc == 3) + argv = mkIRExprVec_3(arg1, arg2, arg3); + else + VG_(tool_panic)("argc... not 1 or 2 or 3?"); + + di = unsafeIRDirty_0_N( argc, helperName, helperAddr, argv); + addStmtToIRBB( bbOut, IRStmt_Dirty(di) ); + + return es; +} + + +/* Instrumentation before a conditional jump or at the end + * of each original instruction. + * Fills the InstrInfo struct if not seen before + */ +static +void endOfInstr(IRBB* bbOut, InstrInfo* ii, Bool bb_seen_before, + UInt instr_offset, UInt instrLen, UInt dataSize, + UInt* cost_offset, Bool instrIssued, + IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) +{ + IRType wordTy; + EventSet* es; + + // Stay sane ... + CLG_ASSERT(sizeof(HWord) == sizeof(void*)); + if (sizeof(HWord) == 4) { + wordTy = Ity_I32; + } else + if (sizeof(HWord) == 8) { + wordTy = Ity_I64; + } else { + VG_(tool_panic)("endOfInstr: strange word size"); + } + + if (loadAddrExpr) + CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr)); + if (storeAddrExpr) + CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr)); + + // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be + // done inaccurately, but they're very rare and this avoids errors from + // hitting more than two cache lines in the simulation. + if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE; + + /* returns 0 if simulator needs no instrumentation */ + es = insert_simcall(bbOut, ii, dataSize, instrIssued, + loadAddrExpr, storeAddrExpr); + + if (bb_seen_before) { + CLG_ASSERT(ii->instr_offset == instr_offset); + CLG_ASSERT(ii->instr_size == instrLen); + CLG_ASSERT(ii->data_size == dataSize); + CLG_ASSERT(ii->cost_offset == *cost_offset); + CLG_ASSERT(ii->eventset == es); + } + else { + ii->instr_offset = instr_offset; + ii->instr_size = instrLen; + ii->data_size = dataSize; + ii->cost_offset = *cost_offset; + ii->eventset = es; + + CLG_(stat).distinct_instrs++; + } + + *cost_offset += es ? es->size : 0; + + CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n", + instr_offset, instrLen, dataSize, + es ? es->name : (Char*)"(no Instr)", + es ? es->size : 0); +} + +#if defined(VG_BIGENDIAN) +# define CLGEndness Iend_BE +#elif defined(VG_LITTLEENDIAN) +# define CLGEndness Iend_LE +#else +# error "Unknown endianness" +#endif + +static +Addr IRConst2Addr(IRConst* con) +{ + Addr addr; + + if (sizeof(Addr) == 4) { + CLG_ASSERT( con->tag == Ico_U32 ); + addr = con->Ico.U32; + } + else if (sizeof(Addr) == 8) { + CLG_ASSERT( con->tag == Ico_U64 ); + addr = con->Ico.U64; + } + else + VG_(tool_panic)("Callgrind: invalid Addr type"); + + return addr; +} + +/* First pass over a BB to instrument, counting instructions and jumps + * This is needed for the size of the BB struct to allocate + * + * Called from CLG_(get_bb) + */ +void CLG_(collectBlockInfo)(IRBB* bbIn, + /*INOUT*/ UInt* instrs, + /*INOUT*/ UInt* cjmps, + /*INOUT*/ Bool* cjmp_inverted) +{ + Int i; + IRStmt* st; + Addr instrAddr =0, jumpDst; + UInt instrLen = 0; + Bool toNextInstr = False; + + // Ist_Exit has to be ignored in preamble code, before first IMark: + // preamble code is added by VEX for self modifying code, and has + // nothing to do with client code + Bool inPreamble = True; + + if (!bbIn) return; + + for (i = 0; i < bbIn->stmts_used; i++) { + st = bbIn->stmts[i]; + if (Ist_IMark == st->tag) { + inPreamble = False; + + instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); + instrLen = st->Ist.IMark.len; + + (*instrs)++; + toNextInstr = False; + } + if (inPreamble) continue; + if (Ist_Exit == st->tag) { + jumpDst = IRConst2Addr(st->Ist.Exit.dst); + toNextInstr = (jumpDst == instrAddr + instrLen); + + (*cjmps)++; + } + } + + /* if the last instructions of BB conditionally jumps to next instruction + * (= first instruction of next BB in memory), this is a inverted by VEX. + */ + *cjmp_inverted = toNextInstr; +} + +static +void collectStatementInfo(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st, + Addr* instrAddr, UInt* instrLen, + IRExpr** loadAddrExpr, IRExpr** storeAddrExpr, + UInt* dataSize, IRType hWordTy) +{ + CLG_ASSERT(isFlatIRStmt(st)); + + switch (st->tag) { + case Ist_NoOp: + break; + + case Ist_AbiHint: + /* ABI hints aren't interesting. Ignore. */ + break; + + case Ist_IMark: + /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this + to the host's native pointer type; if that is 32 bits then it + discards the upper 32 bits. If we are cachegrinding on a + 32-bit host then we are also ensured that the guest word size + is 32 bits, due to the assertion in cg_instrument that the + host and guest word sizes must be the same. Hence + st->Ist.IMark.addr will have been derived from a 32-bit guest + code address and truncation of it is safe. I believe this + assignment should be correct for both 32- and 64-bit + machines. */ + *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); + *instrLen = st->Ist.IMark.len; + break; + + case Ist_Tmp: { + IRExpr* data = st->Ist.Tmp.data; + if (data->tag == Iex_Load) { + IRExpr* aexpr = data->Iex.Load.addr; + CLG_ASSERT( isIRAtom(aexpr) ); + // Note also, endianness info is ignored. I guess that's not + // interesting. + // XXX: repe cmpsb does two loads... the first one is ignored here! + //tl_assert( NULL == *loadAddrExpr ); // XXX: ??? + *loadAddrExpr = aexpr; + *dataSize = sizeofIRType(data->Iex.Load.ty); + } + break; + } + + case Ist_Store: { + IRExpr* data = st->Ist.Store.data; + IRExpr* aexpr = st->Ist.Store.addr; + CLG_ASSERT( isIRAtom(aexpr) ); + if ( NULL == *storeAddrExpr ) { + /* this is a kludge: ignore all except the first store from + an instruction. */ + *storeAddrExpr = aexpr; + *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data)); + } + break; + } + + case Ist_Dirty: { + IRDirty* d = st->Ist.Dirty.details; + if (d->mFx != Ifx_None) { + /* This dirty helper accesses memory. Collect the + details. */ + CLG_ASSERT(d->mAddr != NULL); + CLG_ASSERT(d->mSize != 0); + *dataSize = d->mSize; + if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) + *loadAddrExpr = d->mAddr; + if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) + *storeAddrExpr = d->mAddr; + } else { + CLG_ASSERT(d->mAddr == NULL); + CLG_ASSERT(d->mSize == 0); + } + break; + } + + case Ist_Put: + case Ist_PutI: + case Ist_MFence: + case Ist_Exit: + break; + + default: + VG_(printf)("\n"); + ppIRStmt(st); + VG_(printf)("\n"); + VG_(tool_panic)("Callgrind: unhandled IRStmt"); + } +} + +static +void addConstMemStoreStmt( IRBB* bbOut, UWord addr, UInt val, IRType hWordTy) +{ + addStmtToIRBB( bbOut, + IRStmt_Store(CLGEndness, + IRExpr_Const(hWordTy == Ity_I32 ? + IRConst_U32( addr ) : + IRConst_U64( addr )), + IRExpr_Const(IRConst_U32(val)) )); +} + +static +IRBB* CLG_(instrument)( VgCallbackClosure* closure, + IRBB* bbIn, + VexGuestLayout* layout, + VexGuestExtents* vge, + IRType gWordTy, IRType hWordTy ) +{ + Int i; + IRBB* bbOut; + IRStmt* st, *stnext; + Addr instrAddr, origAddr; + UInt instrLen = 0, dataSize; + UInt instrCount, costOffset; + IRExpr *loadAddrExpr, *storeAddrExpr; + + BB* bb; + + IRDirty* di; + IRExpr *arg1, **argv; + + Bool bb_seen_before = False; + UInt cJumps = 0, cJumpsCorrected; + Bool beforeIBoundary, instrIssued; + + if (gWordTy != hWordTy) { + /* We don't currently support this case. */ + VG_(tool_panic)("host/guest word size mismatch"); + } + + // No instrumentation if it is switched off + if (! CLG_(instrument_state)) { + CLG_DEBUG(5, "instrument(BB %p) [Instrumentation OFF]\n", + (Addr)closure->readdr); + return bbIn; + } + + CLG_DEBUG(3, "+ instrument(BB %p)\n", (Addr)closure->readdr); + + /* Set up BB for instrumented IR */ + bbOut = emptyIRBB(); + bbOut->tyenv = dopyIRTypeEnv(bbIn->tyenv); + bbOut->next = dopyIRExpr(bbIn->next); + bbOut->jumpkind = bbIn->jumpkind; + + // Copy verbatim any IR preamble preceding the first IMark + i = 0; + while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) { + addStmtToIRBB( bbOut, bbIn->stmts[i] ); + i++; + } + + // Get the first statement, and origAddr from it + CLG_ASSERT(bbIn->stmts_used > 0); + st = bbIn->stmts[i]; + CLG_ASSERT(Ist_IMark == st->tag); + instrAddr = origAddr = (Addr)st->Ist.IMark.addr; + CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow + + /* Get BB (creating if necessary). + * JS: The hash table is keyed with orig_addr_noredir -- important! + * JW: Why? If it is because of different chasing of the redirection, + * this is not needed, as chasing is switched off in callgrind + */ + bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before); + //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before); + + /* + * Precondition: + * - jmps_passed has number of cond.jumps passed in last executed BB + * - current_bbcc has a pointer to the BBCC of the last executed BB + * Thus, if bbcc_jmpkind is != -1 (JmpNone), + * current_bbcc->bb->jmp_addr + * gives the address of the jump source. + * + * The BBCC setup does 2 things: + * - trace call: + * * Unwind own call stack, i.e sync our ESP with real ESP + * This is for ESP manipulation (longjmps, C++ exec handling) and RET + * * For CALLs or JMPs crossing objects, record call arg + + * push are on own call stack + * + * - prepare for cache log functions: + * Set current_bbcc to BBCC that gets the costs for this BB execution + * attached + */ + + // helper call to setup_bbcc, with pointer to basic block info struct as argument + arg1 = mkIRExpr_HWord( (HWord)bb ); + argv = mkIRExprVec_1(arg1); + di = unsafeIRDirty_0_N( 1, "setup_bbcc", & CLG_(setup_bbcc), argv); + addStmtToIRBB( bbOut, IRStmt_Dirty(di) ); + + instrCount = 0; + costOffset = 0; + + // loop for each host instruction (starting from 'i') + do { + + // We should be at an IMark statement + CLG_ASSERT(Ist_IMark == st->tag); + + // Reset stuff for this original instruction + loadAddrExpr = storeAddrExpr = NULL; + instrIssued = False; + dataSize = 0; + + // Process all the statements for this original instruction (ie. until + // the next IMark statement, or the end of the block) + do { + i++; + stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL ); + beforeIBoundary = !stnext || (Ist_IMark == stnext->tag); + collectStatementInfo(bbIn->tyenv, bbOut, st, &instrAddr, &instrLen, + &loadAddrExpr, &storeAddrExpr, &dataSize, hWordTy); + + // instrument a simulator call before conditional jumps + if (st->tag == Ist_Exit) { + // Nb: instrLen will be zero if Vex failed to decode it. + // Also Client requests can appear to be very large (eg. 18 + // bytes on x86) because they are really multiple instructions. + CLG_ASSERT( 0 == instrLen || + bbIn->jumpkind == Ijk_ClientReq || + (instrLen >= VG_MIN_INSTR_SZB && + instrLen <= VG_MAX_INSTR_SZB) ); + + // Add instrumentation before this statement + endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, + instrAddr - origAddr, instrLen, dataSize, &costOffset, + instrIssued, loadAddrExpr, storeAddrExpr); + + // prepare for a possible further simcall in same host instr + loadAddrExpr = storeAddrExpr = NULL; + instrIssued = True; + + if (!bb_seen_before) { + bb->jmp[cJumps].instr = instrCount; + bb->jmp[cJumps].skip = False; + } + + /* Update global variable jmps_passed (this is before the jump!) + * A correction is needed if VEX inverted the last jump condition + */ + cJumpsCorrected = cJumps; + if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++; + addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, + cJumpsCorrected, hWordTy); + + cJumps++; + } + + addStmtToIRBB( bbOut, st ); + st = stnext; + } + while (!beforeIBoundary); + + // Add instrumentation for this original instruction. + if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0)) + endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, + instrAddr - origAddr, instrLen, dataSize, &costOffset, + instrIssued, loadAddrExpr, storeAddrExpr); + + instrCount++; + } + while (st); + + /* Always update global variable jmps_passed (at end of BB) + * A correction is needed if VEX inverted the last jump condition + */ + cJumpsCorrected = cJumps; + if (bb->cjmp_inverted) cJumpsCorrected--; + addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, + cJumpsCorrected, hWordTy); + + /* This stores the instr of the call/ret at BB end */ + bb->jmp[cJumps].instr = instrCount-1; + + CLG_ASSERT(bb->cjmp_count == cJumps); + CLG_ASSERT(bb->instr_count == instrCount); + + instrAddr += instrLen; + if (bb_seen_before) { + CLG_ASSERT(bb->instr_len == instrAddr - origAddr); + CLG_ASSERT(bb->cost_count == costOffset); + CLG_ASSERT(bb->jmpkind == bbIn->jumpkind); + } + else { + bb->instr_len = instrAddr - origAddr; + bb->cost_count = costOffset; + bb->jmpkind = bbIn->jumpkind; + } + + CLG_DEBUG(3, "- instrument(BB %p): byteLen %u, CJumps %u, CostLen %u\n", + origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count); + if (cJumps>0) { + CLG_DEBUG(3, " [ "); + for (i=0;ijmp[i].instr); + CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no"); + } + + return bbOut; +} + +/*--------------------------------------------------------------------*/ +/*--- Discarding BB info ---*/ +/*--------------------------------------------------------------------*/ + +// Called when a translation is removed from the translation cache for +// any reason at all: to free up space, because the guest code was +// unmapped or modified, or for any arbitrary reason. +static +void clg_discard_basic_block_info ( Addr64 orig_addr64, VexGuestExtents vge ) +{ + Addr orig_addr = (Addr)orig_addr64; + + tl_assert(vge.n_used > 0); + + if (0) + VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", + (void*)(Addr)orig_addr, + (void*)(Addr)vge.base[0], (ULong)vge.len[0]); + + // Get BB info, remove from table, free BB info. Simple! Note that we + // use orig_addr, not the first instruction address in vge. + CLG_(delete_bb)(orig_addr); +} + + +/*------------------------------------------------------------*/ +/*--- CLG_(fini)() and related function ---*/ +/*------------------------------------------------------------*/ + + + +static void zero_thread_cost(thread_info* t) +{ + Int i; + + for(i = 0; i < CLG_(current_call_stack).sp; i++) { + if (!CLG_(current_call_stack).entry[i].jcc) continue; + + /* reset call counters to current for active calls */ + CLG_(copy_cost)( CLG_(sets).full, + CLG_(current_call_stack).entry[i].enter_cost, + CLG_(current_state).cost ); + } + + CLG_(forall_bbccs)(CLG_(zero_bbcc)); + + /* set counter for last dump */ + CLG_(copy_cost)( CLG_(sets).full, + t->lastdump_cost, CLG_(current_state).cost ); +} + +void CLG_(zero_all_cost)(Bool only_current_thread) +{ + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, " Zeroing costs..."); + + if (only_current_thread) + zero_thread_cost(CLG_(get_current_thread)()); + else + CLG_(forall_threads)(zero_thread_cost); + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, " ...done"); +} + +static +void unwind_thread(thread_info* t) +{ + /* unwind signal handlers */ + while(CLG_(current_state).sig !=0) + CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); + + /* unwind regular call stack */ + while(CLG_(current_call_stack).sp>0) + CLG_(pop_call_stack)(); +} + +/* Ups, this can go wrong... */ +extern void VG_(discard_translations) ( Addr64 start, ULong range ); + +void CLG_(set_instrument_state)(Char* reason, Bool state) +{ + if (CLG_(instrument_state) == state) { + CLG_DEBUG(2, "%s: instrumentation already %s\n", + reason, state ? "ON" : "OFF"); + return; + } + CLG_(instrument_state) = state; + CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", + reason, state ? "ON" : "OFF"); + + VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl); + + /* reset internal state: call stacks, simulator */ + CLG_(forall_threads)(unwind_thread); + (*CLG_(cachesim).clear)(); + if (0) + CLG_(forall_threads)(zero_thread_cost); + + if (!state) + CLG_(init_exec_state)( &CLG_(current_state) ); + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n", + reason, state ? "ON" : "OFF"); +} + + +static +Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) +{ + if (!VG_IS_TOOL_USERREQ('C','T',args[0])) + return False; + + switch(args[0]) { + case VG_USERREQ__DUMP_STATS: + CLG_(dump_profile)("Client Request", True); + *ret = 0; /* meaningless */ + break; + + case VG_USERREQ__DUMP_STATS_AT: + { + Char buf[512]; + VG_(sprintf)(buf,"Client Request: %d", args[1]); + CLG_(dump_profile)(buf, True); + *ret = 0; /* meaningless */ + } + break; + + case VG_USERREQ__ZERO_STATS: + CLG_(zero_all_cost)(True); + *ret = 0; /* meaningless */ + break; + + case VG_USERREQ__TOGGLE_COLLECT: + CLG_(current_state).collect = !CLG_(current_state).collect; + CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", + CLG_(current_state).collect ? "ON" : "OFF"); + *ret = 0; /* meaningless */ + break; + + case VG_USERREQ__START_INSTRUMENTATION: + CLG_(set_instrument_state)("Client Request", True); + *ret = 0; /* meaningless */ + break; + + case VG_USERREQ__STOP_INSTRUMENTATION: + CLG_(set_instrument_state)("Client Request", False); + *ret = 0; /* meaningless */ + break; + + default: + return False; + } + + return True; +} + + +/* Syscall Timing */ + +/* struct timeval syscalltime[VG_N_THREADS]; */ +#if CLG_MICROSYSTIME +#include +#include +extern Int VG_(do_syscall) ( UInt, ... ); + +ULong syscalltime[VG_N_THREADS]; +#else +UInt syscalltime[VG_N_THREADS]; +#endif + +static +void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno) +{ + if (CLG_(clo).collect_systime) { +#if CLG_MICROSYSTIME + struct vki_timeval tv_now; + VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); + syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; +#else + syscalltime[tid] = VG_(read_millisecond_timer)(); +#endif + } +} + +static +void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, SysRes res) +{ + if (CLG_(clo).collect_systime) { + Int o = CLG_(sets).off_full_systime; +#if CLG_MICROSYSTIME + struct vki_timeval tv_now; + ULong diff; + + VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); + diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; +#else + UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; +#endif + + CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff); + + if (o<0) return; + + CLG_(current_state).cost[o] ++; + CLG_(current_state).cost[o+1] += diff; + if (!CLG_(current_state).bbcc->skipped) + CLG_(init_cost_lz)(CLG_(sets).full, + &(CLG_(current_state).bbcc->skipped)); + CLG_(current_state).bbcc->skipped[o] ++; + CLG_(current_state).bbcc->skipped[o+1] += diff; + } +} + +static +void finish(void) +{ + char buf[RESULTS_BUF_LEN]; + + CLG_DEBUG(0, "finish()\n"); + + (*CLG_(cachesim).finish)(); + + /* pop all remaining items from CallStack for correct sum + */ + CLG_(forall_threads)(unwind_thread); + + CLG_(dump_profile)(0, False); + + CLG_(finish_command)(); + + if (VG_(clo_verbosity) == 0) return; + + /* Hash table stats */ + if (VG_(clo_verbosity) > 1) { + int BB_lookups = + CLG_(stat).full_debug_BBs + + CLG_(stat).fn_name_debug_BBs + + CLG_(stat).file_line_debug_BBs + + CLG_(stat).no_debug_BBs; + + VG_(message)(Vg_DebugMsg, ""); + VG_(message)(Vg_DebugMsg, "Distinct objects: %d", + CLG_(stat).distinct_objs); + VG_(message)(Vg_DebugMsg, "Distinct files: %d", + CLG_(stat).distinct_files); + VG_(message)(Vg_DebugMsg, "Distinct fns: %d", + CLG_(stat).distinct_fns); + VG_(message)(Vg_DebugMsg, "Distinct contexts:%d", + CLG_(stat).distinct_contexts); + VG_(message)(Vg_DebugMsg, "Distinct BBs: %d", + CLG_(stat).distinct_bbs); + VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)", + CLG_(costarray_entries), CLG_(costarray_chunks)); + VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d", + CLG_(stat).distinct_bbccs); + VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d", + CLG_(stat).distinct_jccs); + VG_(message)(Vg_DebugMsg, "Distinct skips: %d", + CLG_(stat).distinct_skips); + VG_(message)(Vg_DebugMsg, "BB lookups: %d", + BB_lookups); + if (BB_lookups>0) { + VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)", + CLG_(stat).full_debug_BBs * 100 / BB_lookups, + CLG_(stat).full_debug_BBs); + VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)", + CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, + CLG_(stat).file_line_debug_BBs); + VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)", + CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, + CLG_(stat).fn_name_debug_BBs); + VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)", + CLG_(stat).no_debug_BBs * 100 / BB_lookups, + CLG_(stat).no_debug_BBs); + } + VG_(message)(Vg_DebugMsg, "BBCC Clones: %d", + CLG_(stat).bbcc_clones); + VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", + CLG_(stat).bb_retranslations); + VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", + CLG_(stat).distinct_instrs); + VG_(message)(Vg_DebugMsg, ""); + + VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d", + CLG_(stat).cxt_lru_misses); + VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d", + CLG_(stat).bbcc_lru_misses); + VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d", + CLG_(stat).jcc_lru_misses); + VG_(message)(Vg_DebugMsg, "BBs Executed: %llu", + CLG_(stat).bb_executions); + VG_(message)(Vg_DebugMsg, "Calls: %llu", + CLG_(stat).call_counter); + VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu", + CLG_(stat).jcnd_counter); + VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu", + CLG_(stat).jump_counter); + VG_(message)(Vg_DebugMsg, "Recursive calls: %llu", + CLG_(stat).rec_call_counter); + VG_(message)(Vg_DebugMsg, "Returns: %llu", + CLG_(stat).ret_counter); + + VG_(message)(Vg_DebugMsg, ""); + } + + CLG_(sprint_eventmapping)(buf, CLG_(dumpmap)); + VG_(message)(Vg_UserMsg, "Events : %s", buf); + CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost)); + VG_(message)(Vg_UserMsg, "Collected : %s", buf); + VG_(message)(Vg_UserMsg, ""); + + // if (CLG_(clo).simulate_cache) + (*CLG_(cachesim).printstat)(); +} + + +void CLG_(fini)(Int exitcode) +{ + finish(); +} + + +/*--------------------------------------------------------------------*/ +/*--- Setup ---*/ +/*--------------------------------------------------------------------*/ + +static +void CLG_(post_clo_init)(void) +{ + Char *dir = 0, *fname = 0; + + VG_(clo_vex_control).iropt_unroll_thresh = 0; + VG_(clo_vex_control).guest_chase_thresh = 0; + + CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); + CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); + CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); + + if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { + VG_(message)(Vg_UserMsg, "Using source line as position."); + CLG_(clo).dump_line = True; + } + + CLG_(init_files)(&dir,&fname); + CLG_(init_command)(dir,fname); + + (*CLG_(cachesim).post_clo_init)(); + + CLG_(init_eventsets)(0); + CLG_(init_statistics)(& CLG_(stat)); + CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); + + /* initialize hash tables */ + CLG_(init_obj_table)(); + CLG_(init_cxt_table)(); + CLG_(init_bb_hash)(); + + CLG_(init_threads)(); + CLG_(run_thread)(1); + + CLG_(instrument_state) = CLG_(clo).instrument_atstart; + + VG_(message)(Vg_UserMsg, ""); + VG_(message)(Vg_UserMsg, "For interactive control, run 'callgrind_control -h'."); +} + +static +void CLG_(pre_clo_init)(void) +{ + VG_(details_name) ("Callgrind"); + VG_(details_version) (VERSION); + VG_(details_description) ("a call-graph generating cache profiler"); + VG_(details_copyright_author)("Copyright (C) 2002-2006, and GNU GPL'd, " + "by J.Weidendorfer et al."); + VG_(details_bug_reports_to) ("Josef.Weidendorfer@gmx.de"); + VG_(details_avg_translation_sizeB) ( 155 ); + + VG_(basic_tool_funcs) (CLG_(post_clo_init), + CLG_(instrument), + CLG_(fini)); + + VG_(needs_basic_block_discards)(clg_discard_basic_block_info); + + + VG_(needs_command_line_options)(CLG_(process_cmd_line_option), + CLG_(print_usage), + CLG_(print_debug_usage)); + + VG_(needs_client_requests)(CLG_(handle_client_request)); + VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), + CLG_(post_syscalltime)); + + VG_(track_thread_run) ( & CLG_(run_thread) ); + VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); + VG_(track_post_deliver_signal) ( & CLG_(post_signal) ); + + CLG_(set_clo_defaults)(); +} + +VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) + +/*--------------------------------------------------------------------*/ +/*--- end main.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/callgrind/sim.c b/callgrind/sim.c new file mode 100644 index 0000000000..e61eb6971c --- /dev/null +++ b/callgrind/sim.c @@ -0,0 +1,2162 @@ + +/*--------------------------------------------------------------------*/ +/*--- Cache simulation. ---*/ +/*--- sim.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind. + (c) 2003-2005, Josef Weidendorfer + + Parts are Copyright (C) 2002 Nicholas Nethercote + njn25@cam.ac.uk + + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + + +/* Notes: + - simulates a write-allocate cache + - (block --> set) hash function uses simple bit selection + - handling of references straddling two cache blocks: + - counts as only one cache access (not two) + - both blocks hit --> one hit + - one block hits, the other misses --> one miss + - both blocks miss --> one miss (not two) +*/ + +/* Cache configuration */ +#include "cg_arch.h" + +/* additional structures for cache use info, separated + * according usage frequency: + * - line_loaded : pointer to cost center of instruction + * which loaded the line into cache. + * Needed to increment counters when line is evicted. + * - line_use : updated on every access + */ +typedef struct { + UInt count; + UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */ +} line_use; + +typedef struct { + Addr memline, iaddr; + line_use* dep_use; /* point to higher-level cacheblock for this memline */ + ULong* use_base; +} line_loaded; + +/* Cache state */ +typedef struct { + char* name; + int size; /* bytes */ + int assoc; + int line_size; /* bytes */ + Bool sectored; /* prefetch nearside cacheline on read */ + int sets; + int sets_min_1; + int assoc_bits; + int line_size_bits; + int tag_shift; + UWord tag_mask; + char desc_line[128]; + UWord* tags; + + /* for cache use */ + int line_size_mask; + int* line_start_mask; + int* line_end_mask; + line_loaded* loaded; + line_use* use; +} cache_t2; + +/* + * States of flat caches in our model. + * We use a 2-level hierarchy, + */ +static cache_t2 I1, D1, L2; + +/* Lower bits of cache tags are used as flags for a cache line */ +#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1) +#define CACHELINE_DIRTY 1 + + +/* Cache simulator Options */ +static Bool clo_simulate_writeback = False; +static Bool clo_simulate_hwpref = False; +static Bool clo_simulate_sectors = False; +static Bool clo_collect_cacheuse = False; + +/* Following global vars are setup before by + * setup_bbcc()/cachesim_after_bbsetup(): + * + * - Addr bb_base (instruction start address of original BB) + * - ULong* cost_base (start of cost array for BB) + * - BBCC* nonskipped (only != 0 when in a function not skipped) + */ + +/* Offset to events in event set, used in log_* functions */ +static Int off_D0_Ir; +static Int off_D1r_Ir; +static Int off_D1r_Dr; +static Int off_D1w_Ir; +static Int off_D1w_Dw; +static Int off_D2_Ir; +static Int off_D2_Dr; +static Int off_D2_Dw; + +static Addr bb_base; +static ULong* cost_base; +static InstrInfo* current_ii; + +/* Cache use offsets */ +/* FIXME: The offsets are only correct because all eventsets get + * the "Use" set added first ! + */ +static Int off_I1_AcCost = 0; +static Int off_I1_SpLoss = 1; +static Int off_D1_AcCost = 0; +static Int off_D1_SpLoss = 1; +static Int off_L2_AcCost = 2; +static Int off_L2_SpLoss = 3; + +/* Cache access types */ +typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType; + +/* Result of a reference into a flat cache */ +typedef enum { Hit = 0, Miss, MissDirty } CacheResult; + +/* Result of a reference into a hierarchical cache model */ +typedef enum { + L1_Hit, + L2_Hit, + MemAccess, + WriteBackMemAccess } CacheModelResult; + +typedef CacheModelResult (*simcall_type)(Addr, UChar); + +static struct { + simcall_type I1_Read; + simcall_type D1_Read; + simcall_type D1_Write; +} simulator; + +/*------------------------------------------------------------*/ +/*--- Cache Simulator Initialization ---*/ +/*------------------------------------------------------------*/ + +static void cachesim_clearcache(cache_t2* c) +{ + Int i; + + for (i = 0; i < c->sets * c->assoc; i++) + c->tags[i] = 0; + if (c->use) { + for (i = 0; i < c->sets * c->assoc; i++) { + c->loaded[i].memline = 0; + c->loaded[i].use_base = 0; + c->loaded[i].dep_use = 0; + c->loaded[i].iaddr = 0; + c->use[i].mask = 0; + c->use[i].count = 0; + c->tags[i] = i % c->assoc; /* init lower bits as pointer */ + } + } +} + +static void cacheuse_initcache(cache_t2* c); + +/* By this point, the size/assoc/line_size has been checked. */ +static void cachesim_initcache(cache_t config, cache_t2* c) +{ + c->size = config.size; + c->assoc = config.assoc; + c->line_size = config.line_size; + c->sectored = False; // FIXME + + c->sets = (c->size / c->line_size) / c->assoc; + c->sets_min_1 = c->sets - 1; + c->assoc_bits = VG_(log2)(c->assoc); + c->line_size_bits = VG_(log2)(c->line_size); + c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); + c->tag_mask = ~((1<tag_shift)-1); + + /* Can bits in tag entries be used for flags? + * Should be always true as MIN_LINE_SIZE >= 16 */ + CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0); + + if (c->assoc == 1) { + VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s", + c->size, c->line_size, + c->sectored ? ", sectored":""); + } else { + VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s", + c->size, c->line_size, c->assoc, + c->sectored ? ", sectored":""); + } + + c->tags = (UWord*) CLG_MALLOC(sizeof(UWord) * c->sets * c->assoc); + if (clo_collect_cacheuse) + cacheuse_initcache(c); + else + c->use = 0; + cachesim_clearcache(c); +} + + +#if 0 +static void print_cache(cache_t2* c) +{ + UInt set, way, i; + + /* Note initialisation and update of 'i'. */ + for (i = 0, set = 0; set < c->sets; set++) { + for (way = 0; way < c->assoc; way++, i++) { + VG_(printf)("%8x ", c->tags[i]); + } + VG_(printf)("\n"); + } +} +#endif + + +/*------------------------------------------------------------*/ +/*--- Write Through Cache Simulation ---*/ +/*------------------------------------------------------------*/ + +/* + * Simple model: L1 & L2 Write Through + * Does not distinguish among read and write references + * + * Simulator functions: + * CacheModelResult cachesim_I1_ref(Addr a, UChar size) + * CacheModelResult cachesim_D1_ref(Addr a, UChar size) + */ + +static __inline__ +CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag) +{ + int i, j; + UWord *set; + + /* Shifting is a bit faster than multiplying */ + set = &(c->tags[set_no << c->assoc_bits]); + + /* This loop is unrolled for just the first case, which is the most */ + /* common. We can't unroll any further because it would screw up */ + /* if we have a direct-mapped (1-way) cache. */ + if (tag == set[0]) + return Hit; + + /* If the tag is one other than the MRU, move it into the MRU spot */ + /* and shuffle the rest down. */ + for (i = 1; i < c->assoc; i++) { + if (tag == set[i]) { + for (j = i; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tag; + return Hit; + } + } + + /* A miss; install this tag as MRU, shuffle rest down. */ + for (j = c->assoc - 1; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tag; + + return Miss; +} + +static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size) +{ + UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); + UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); + UWord tag = a >> c->tag_shift; + + /* Access entirely within line. */ + if (set1 == set2) + return cachesim_setref(c, set1, tag); + + /* Access straddles two lines. */ + /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ + else if (((set1 + 1) & (c->sets-1)) == set2) { + + /* the call updates cache structures as side effect */ + CacheResult res1 = cachesim_setref(c, set1, tag); + CacheResult res2 = cachesim_setref(c, set2, tag); + return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; + + } else { + VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); + VG_(tool_panic)("item straddles more than two cache sets"); + } + return Hit; +} + +static +CacheModelResult cachesim_I1_ref(Addr a, UChar size) +{ + if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; + if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit; + return MemAccess; +} + +static +CacheModelResult cachesim_D1_ref(Addr a, UChar size) +{ + if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; + if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit; + return MemAccess; +} + + +/*------------------------------------------------------------*/ +/*--- Write Back Cache Simulation ---*/ +/*------------------------------------------------------------*/ + +/* + * More complex model: L1 Write-through, L2 Write-back + * This needs to distinguish among read and write references. + * + * Simulator functions: + * CacheModelResult cachesim_I1_Read(Addr a, UChar size) + * CacheModelResult cachesim_D1_Read(Addr a, UChar size) + * CacheModelResult cachesim_D1_Write(Addr a, UChar size) + */ + +/* + * With write-back, result can be a miss evicting a dirty line + * The dirty state of a cache line is stored in Bit0 of the tag for + * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference + * type (Read/Write), the line gets dirty on a write. + */ +static __inline__ +CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag) +{ + int i, j; + UWord *set, tmp_tag; + + /* Shifting is a bit faster than multiplying */ + set = &(c->tags[set_no << c->assoc_bits]); + + /* This loop is unrolled for just the first case, which is the most */ + /* common. We can't unroll any further because it would screw up */ + /* if we have a direct-mapped (1-way) cache. */ + if (tag == (set[0] & ~CACHELINE_DIRTY)) { + set[0] |= ref; + return Hit; + } + /* If the tag is one other than the MRU, move it into the MRU spot */ + /* and shuffle the rest down. */ + for (i = 1; i < c->assoc; i++) { + if (tag == (set[i] & ~CACHELINE_DIRTY)) { + tmp_tag = set[i] | ref; // update dirty flag + for (j = i; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tmp_tag; + return Hit; + } + } + + /* A miss; install this tag as MRU, shuffle rest down. */ + tmp_tag = set[c->assoc - 1]; + for (j = c->assoc - 1; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tag | ref; + + return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss; +} + + +static __inline__ +CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size) +{ + UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); + UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); + UWord tag = a & c->tag_mask; + + /* Access entirely within line. */ + if (set1 == set2) + return cachesim_setref_wb(c, ref, set1, tag); + + /* Access straddles two lines. */ + /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ + else if (((set1 + 1) & (c->sets-1)) == set2) { + + /* the call updates cache structures as side effect */ + CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag); + CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag); + + if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty; + return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; + + } else { + VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); + VG_(tool_panic)("item straddles more than two cache sets"); + } + return Hit; +} + + +static +CacheModelResult cachesim_I1_Read(Addr a, UChar size) +{ + if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; + switch( cachesim_ref_wb( &L2, Read, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + +static +CacheModelResult cachesim_D1_Read(Addr a, UChar size) +{ + if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; + switch( cachesim_ref_wb( &L2, Read, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + +static +CacheModelResult cachesim_D1_Write(Addr a, UChar size) +{ + if ( cachesim_ref( &D1, a, size) == Hit ) { + /* Even for a L1 hit, the write-trough L1 passes + * the write to the L2 to make the L2 line dirty. + * But this causes no latency, so return the hit. + */ + cachesim_ref_wb( &L2, Write, a, size); + return L1_Hit; + } + switch( cachesim_ref_wb( &L2, Write, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + + +/*------------------------------------------------------------*/ +/*--- Hardware Prefetch Simulation ---*/ +/*------------------------------------------------------------*/ + +static ULong prefetch_up = 0; +static ULong prefetch_down = 0; + +#define PF_STREAMS 8 +#define PF_PAGEBITS 12 + +static UInt pf_lastblock[PF_STREAMS]; +static Int pf_seqblocks[PF_STREAMS]; + +static +void prefetch_clear(void) +{ + int i; + for(i=0;i> PF_PAGEBITS) % PF_STREAMS; + UInt block = ( a >> L2.line_size_bits); + + if (block != pf_lastblock[stream]) { + if (pf_seqblocks[stream] == 0) { + if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++; + else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--; + } + else if (pf_seqblocks[stream] >0) { + if (pf_lastblock[stream] +1 == block) { + pf_seqblocks[stream]++; + if (pf_seqblocks[stream] >= 2) { + prefetch_up++; + cachesim_ref(&L2, a + 5 * L2.line_size,1); + } + } + else pf_seqblocks[stream] = 0; + } + else if (pf_seqblocks[stream] <0) { + if (pf_lastblock[stream] -1 == block) { + pf_seqblocks[stream]--; + if (pf_seqblocks[stream] <= -2) { + prefetch_down++; + cachesim_ref(&L2, a - 5 * L2.line_size,1); + } + } + else pf_seqblocks[stream] = 0; + } + pf_lastblock[stream] = block; + } +} + +/* simple model with hardware prefetch */ + +static +CacheModelResult prefetch_I1_ref(Addr a, UChar size) +{ + if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; + prefetch_L2_doref(a,size); + if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit; + return MemAccess; +} + +static +CacheModelResult prefetch_D1_ref(Addr a, UChar size) +{ + if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; + prefetch_L2_doref(a,size); + if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit; + return MemAccess; +} + + +/* complex model with hardware prefetch */ + +static +CacheModelResult prefetch_I1_Read(Addr a, UChar size) +{ + if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; + prefetch_L2_doref(a,size); + switch( cachesim_ref_wb( &L2, Read, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + +static +CacheModelResult prefetch_D1_Read(Addr a, UChar size) +{ + if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; + prefetch_L2_doref(a,size); + switch( cachesim_ref_wb( &L2, Read, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + +static +CacheModelResult prefetch_D1_Write(Addr a, UChar size) +{ + prefetch_L2_doref(a,size); + if ( cachesim_ref( &D1, a, size) == Hit ) { + /* Even for a L1 hit, the write-trough L1 passes + * the write to the L2 to make the L2 line dirty. + * But this causes no latency, so return the hit. + */ + cachesim_ref_wb( &L2, Write, a, size); + return L1_Hit; + } + switch( cachesim_ref_wb( &L2, Write, a, size) ) { + case Hit: return L2_Hit; + case Miss: return MemAccess; + default: break; + } + return WriteBackMemAccess; +} + + +/*------------------------------------------------------------*/ +/*--- Cache Simulation with use metric collection ---*/ +/*------------------------------------------------------------*/ + +/* can not be combined with write-back or prefetch */ + +static +void cacheuse_initcache(cache_t2* c) +{ + int i; + unsigned int start_mask, start_val; + unsigned int end_mask, end_val; + + c->use = CLG_MALLOC(sizeof(line_use) * c->sets * c->assoc); + c->loaded = CLG_MALLOC(sizeof(line_loaded) * c->sets * c->assoc); + c->line_start_mask = CLG_MALLOC(sizeof(int) * c->line_size); + c->line_end_mask = CLG_MALLOC(sizeof(int) * c->line_size); + + + c->line_size_mask = c->line_size-1; + + /* Meaning of line_start_mask/line_end_mask + * Example: for a given cache line, you get an access starting at + * byte offset 5, length 4, byte 5 - 8 was touched. For a cache + * line size of 32, you have 1 bit per byte in the mask: + * + * bit31 bit8 bit5 bit 0 + * | | | | + * 11..111111100000 line_start_mask[5] + * 00..000111111111 line_end_mask[(5+4)-1] + * + * use_mask |= line_start_mask[5] && line_end_mask[8] + * + */ + start_val = end_val = ~0; + if (c->line_size < 32) { + int bits_per_byte = 32/c->line_size; + start_mask = (1<line_size;i++) { + c->line_start_mask[i] = start_val; + start_val = start_val & ~start_mask; + start_mask = start_mask << bits_per_byte; + + c->line_end_mask[c->line_size-i-1] = end_val; + end_val = end_val & ~end_mask; + end_mask = end_mask >> bits_per_byte; + } + } + else { + int bytes_per_bit = c->line_size/32; + start_mask = 1; + end_mask = 1 << 31; + for(i=0;iline_size;i++) { + c->line_start_mask[i] = start_val; + c->line_end_mask[c->line_size-i-1] = end_val; + if ( ((i+1)%bytes_per_bit) == 0) { + start_val &= ~start_mask; + end_val &= ~end_mask; + start_mask <<= 1; + end_mask >>= 1; + } + } + } + + CLG_DEBUG(6, "Config %s:\n", c->desc_line); + for(i=0;iline_size;i++) { + CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n", + i, c->line_start_mask[i], c->line_end_mask[i]); + } + + /* We use lower tag bits as offset pointers to cache use info. + * I.e. some cache parameters don't work. + */ + if (c->tag_shift < c->assoc_bits) { + VG_(message)(Vg_DebugMsg, + "error: Use associativity < %d for cache use statistics!", + (1<tag_shift) ); + VG_(tool_panic)("Unsupported cache configuration"); + } +} + +/* FIXME: A little tricky */ +#if 0 + +static __inline__ +void cacheuse_update_hit(cache_t2* c, UInt high_idx, UInt low_idx, UInt use_mask) +{ + int idx = (high_idx << c->assoc_bits) | low_idx; + + c->use[idx].count ++; + c->use[idx].mask |= use_mask; + + CLG_DEBUG(6," Hit [idx %d] (line %p from %p): %x => %08x, count %d\n", + idx, c->loaded[idx].memline, c->loaded[idx].iaddr, + use_mask, c->use[idx].mask, c->use[idx].count); +} + +/* only used for I1, D1 */ + +static __inline__ +CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag) +{ + int i, j, idx; + UWord *set, tmp_tag; + UInt use_mask; + + /* Shifting is a bit faster than multiplying */ + set = &(c->tags[set_no << c->assoc_bits]); + use_mask = + c->line_start_mask[a & c->line_size_mask] & + c->line_end_mask[(a+size-1) & c->line_size_mask]; + + /* This loop is unrolled for just the first case, which is the most */ + /* common. We can't unroll any further because it would screw up */ + /* if we have a direct-mapped (1-way) cache. */ + if (tag == (set[0] & c->tag_mask)) { + cacheuse_update(c, set_no, set[0] & ~c->tag_mask, use_mask); + return L1_Hit; + } + + /* If the tag is one other than the MRU, move it into the MRU spot */ + /* and shuffle the rest down. */ + for (i = 1; i < c->assoc; i++) { + if (tag == (set[i] & c->tag_mask)) { + tmp_tag = set[i]; + for (j = i; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tmp_tag; + + cacheuse_update(c, set_no, tmp_tag & ~c->tag_mask, use_mask); + return L1_Hit; + } + } + + /* A miss; install this tag as MRU, shuffle rest down. */ + tmp_tag = set[L.assoc - 1] & ~c->tag_mask; + for (j = c->assoc - 1; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tag | tmp_tag; + + cacheuse_L2_miss(c, (set_no << c->assoc_bits) | tmp_tag, + use_mask, a & ~c->line_size_mask); + + return Miss; +} + + +static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size) +{ + UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); + UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); + UWord tag = a >> c->tag_shift; + + /* Access entirely within line. */ + if (set1 == set2) + return cacheuse_setref(c, set1, tag); + + /* Access straddles two lines. */ + /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ + else if (((set1 + 1) & (c->sets-1)) == set2) { + + /* the call updates cache structures as side effect */ + CacheResult res1 = cacheuse_isMiss(c, set1, tag); + CacheResult res2 = cacheuse_isMiss(c, set2, tag); + return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; + + } else { + VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); + VG_(tool_panic)("item straddles more than two cache sets"); + } + return Hit; +} +#endif + + +/* for I1/D1 caches */ +#define CACHEUSE(L) \ + \ +static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \ +{ \ + register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ + register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ + register UWord tag = a & L.tag_mask; \ + int i, j, idx; \ + UWord *set, tmp_tag; \ + UInt use_mask; \ + \ + CLG_DEBUG(6,"%s.Acc(Addr %p, size %d): Sets [%d/%d]\n", \ + L.name, a, size, set1, set2); \ + \ + /* First case: word entirely within line. */ \ + if (set1 == set2) { \ + \ + /* Shifting is a bit faster than multiplying */ \ + set = &(L.tags[set1 << L.assoc_bits]); \ + use_mask = L.line_start_mask[a & L.line_size_mask] & \ + L.line_end_mask[(a+size-1) & L.line_size_mask]; \ + \ + /* This loop is unrolled for just the first case, which is the most */\ + /* common. We can't unroll any further because it would screw up */\ + /* if we have a direct-mapped (1-way) cache. */\ + if (tag == (set[0] & L.tag_mask)) { \ + idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + return L1_Hit; \ + } \ + /* If the tag is one other than the MRU, move it into the MRU spot */\ + /* and shuffle the rest down. */\ + for (i = 1; i < L.assoc; i++) { \ + if (tag == (set[i] & L.tag_mask)) { \ + tmp_tag = set[i]; \ + for (j = i; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tmp_tag; \ + idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + return L1_Hit; \ + } \ + } \ + \ + /* A miss; install this tag as MRU, shuffle rest down. */ \ + tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ + for (j = L.assoc - 1; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tag | tmp_tag; \ + idx = (set1 << L.assoc_bits) | tmp_tag; \ + return update_##L##_use(&L, idx, \ + use_mask, a &~ L.line_size_mask); \ + \ + /* Second case: word straddles two lines. */ \ + /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ + } else if (((set1 + 1) & (L.sets-1)) == set2) { \ + Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \ + set = &(L.tags[set1 << L.assoc_bits]); \ + use_mask = L.line_start_mask[a & L.line_size_mask]; \ + if (tag == (set[0] & L.tag_mask)) { \ + idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + goto block2; \ + } \ + for (i = 1; i < L.assoc; i++) { \ + if (tag == (set[i] & L.tag_mask)) { \ + tmp_tag = set[i]; \ + for (j = i; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tmp_tag; \ + idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + goto block2; \ + } \ + } \ + tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ + for (j = L.assoc - 1; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tag | tmp_tag; \ + idx = (set1 << L.assoc_bits) | tmp_tag; \ + miss1 = update_##L##_use(&L, idx, \ + use_mask, a &~ L.line_size_mask); \ +block2: \ + set = &(L.tags[set2 << L.assoc_bits]); \ + use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \ + if (tag == (set[0] & L.tag_mask)) { \ + idx = (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + return miss1; \ + } \ + for (i = 1; i < L.assoc; i++) { \ + if (tag == (set[i] & L.tag_mask)) { \ + tmp_tag = set[i]; \ + for (j = i; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tmp_tag; \ + idx = (set2 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \ + L.use[idx].count ++; \ + L.use[idx].mask |= use_mask; \ + CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\ + i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ + use_mask, L.use[idx].mask, L.use[idx].count); \ + return miss1; \ + } \ + } \ + tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ + for (j = L.assoc - 1; j > 0; j--) { \ + set[j] = set[j - 1]; \ + } \ + set[0] = tag | tmp_tag; \ + idx = (set2 << L.assoc_bits) | tmp_tag; \ + miss2 = update_##L##_use(&L, idx, \ + use_mask, (a+size-1) &~ L.line_size_mask); \ + return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \ + \ + } else { \ + VG_(printf)("addr: %p size: %u sets: %d %d", a, size, set1, set2); \ + VG_(tool_panic)("item straddles more than two cache sets"); \ + } \ + return 0; \ +} + + +/* logarithmic bitcounting algorithm, see + * http://graphics.stanford.edu/~seander/bithacks.html + */ +static __inline__ unsigned int countBits(unsigned int bits) +{ + unsigned int c; // store the total here + const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers + const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; + + c = bits; + c = ((c >> S[0]) & B[0]) + (c & B[0]); + c = ((c >> S[1]) & B[1]) + (c & B[1]); + c = ((c >> S[2]) & B[2]) + (c & B[2]); + c = ((c >> S[3]) & B[3]) + (c & B[3]); + c = ((c >> S[4]) & B[4]) + (c & B[4]); + return c; +} + +static void update_L2_use(int idx, Addr memline) +{ + line_loaded* loaded = &(L2.loaded[idx]); + line_use* use = &(L2.use[idx]); + int i = ((32 - countBits(use->mask)) * L2.line_size)>>5; + + CLG_DEBUG(2, " L2.miss [%d]: at %p accessing memline %p\n", + idx, bb_base + current_ii->instr_offset, memline); + if (use->count>0) { + CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n", + use->count, i, use->mask, loaded->memline, loaded->iaddr); + CLG_DEBUG(2, " collect: %d, use_base %p\n", + CLG_(current_state).collect, loaded->use_base); + + if (CLG_(current_state).collect && loaded->use_base) { + (loaded->use_base)[off_L2_AcCost] += 1000 / use->count; + (loaded->use_base)[off_L2_SpLoss] += i; + } + } + + use->count = 0; + use->mask = 0; + + loaded->memline = memline; + loaded->iaddr = bb_base + current_ii->instr_offset; + loaded->use_base = (CLG_(current_state).nonskipped) ? + CLG_(current_state).nonskipped->skipped : + cost_base + current_ii->cost_offset; +} + +static +CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded) +{ + UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1); + UWord* set = &(L2.tags[setNo << L2.assoc_bits]); + UWord tag = memline & L2.tag_mask; + + int i, j, idx; + UWord tmp_tag; + + CLG_DEBUG(6,"L2.Acc(Memline %p): Set %d\n", memline, setNo); + + if (tag == (set[0] & L2.tag_mask)) { + idx = (setNo << L2.assoc_bits) | (set[0] & ~L2.tag_mask); + l1_loaded->dep_use = &(L2.use[idx]); + + CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): => %08x, count %d\n", + idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr, + L2.use[idx].mask, L2.use[idx].count); + return L2_Hit; + } + for (i = 1; i < L2.assoc; i++) { + if (tag == (set[i] & L2.tag_mask)) { + tmp_tag = set[i]; + for (j = i; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tmp_tag; + idx = (setNo << L2.assoc_bits) | (tmp_tag & ~L2.tag_mask); + l1_loaded->dep_use = &(L2.use[idx]); + + CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): => %08x, count %d\n", + i, idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr, + L2.use[idx].mask, L2.use[idx].count); + return L2_Hit; + } + } + + /* A miss; install this tag as MRU, shuffle rest down. */ + tmp_tag = set[L2.assoc - 1] & ~L2.tag_mask; + for (j = L2.assoc - 1; j > 0; j--) { + set[j] = set[j - 1]; + } + set[0] = tag | tmp_tag; + idx = (setNo << L2.assoc_bits) | tmp_tag; + l1_loaded->dep_use = &(L2.use[idx]); + + update_L2_use(idx, memline); + + return MemAccess; +} + + + + +#define UPDATE_USE(L) \ + \ +static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \ + UInt mask, Addr memline) \ +{ \ + line_loaded* loaded = &(cache->loaded[idx]); \ + line_use* use = &(cache->use[idx]); \ + int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \ + \ + CLG_DEBUG(2, " %s.miss [%d]: at %p accessing memline %p (mask %08x)\n", \ + cache->name, idx, bb_base + current_ii->instr_offset, memline, mask); \ + if (use->count>0) { \ + CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n",\ + use->count, c, use->mask, loaded->memline, loaded->iaddr); \ + CLG_DEBUG(2, " collect: %d, use_base %p\n", \ + CLG_(current_state).collect, loaded->use_base); \ + \ + if (CLG_(current_state).collect && loaded->use_base) { \ + (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \ + (loaded->use_base)[off_##L##_SpLoss] += c; \ + \ + /* FIXME (?): L1/L2 line sizes must be equal ! */ \ + loaded->dep_use->mask |= use->mask; \ + loaded->dep_use->count += use->count; \ + } \ + } \ + \ + use->count = 1; \ + use->mask = mask; \ + loaded->memline = memline; \ + loaded->iaddr = bb_base + current_ii->instr_offset; \ + loaded->use_base = (CLG_(current_state).nonskipped) ? \ + CLG_(current_state).nonskipped->skipped : \ + cost_base + current_ii->cost_offset; \ + \ + if (memline == 0) return L2_Hit; \ + return cacheuse_L2_access(memline, loaded); \ +} + +UPDATE_USE(I1); +UPDATE_USE(D1); + +CACHEUSE(I1); +CACHEUSE(D1); + + +static +void cacheuse_finish(void) +{ + int i; + InstrInfo ii = { 0,0,0,0,0 }; + + if (!CLG_(current_state).collect) return; + + bb_base = 0; + current_ii = ⅈ + cost_base = 0; + + /* update usage counters */ + if (I1.use) + for (i = 0; i < I1.sets * I1.assoc; i++) + if (I1.loaded[i].use_base) + update_I1_use( &I1, i, 0,0); + + if (D1.use) + for (i = 0; i < D1.sets * D1.assoc; i++) + if (D1.loaded[i].use_base) + update_D1_use( &D1, i, 0,0); + + if (L2.use) + for (i = 0; i < L2.sets * L2.assoc; i++) + if (L2.loaded[i].use_base) + update_L2_use(i, 0); +} + + + +/*------------------------------------------------------------*/ +/*--- Helper functions called by instrumented code ---*/ +/*------------------------------------------------------------*/ + + +static __inline__ +void inc_costs(CacheModelResult r, ULong* c1, ULong* c2) +{ + switch(r) { + case WriteBackMemAccess: + if (clo_simulate_writeback) { + c1[3]++; + c2[3]++; + } + // fall through + + case MemAccess: + c1[2]++; + c2[2]++; + // fall through + + case L2_Hit: + c1[1]++; + c2[1]++; + // fall through + + default: + c1[0]++; + c2[0]++; + } +} + + +VG_REGPARM(1) +static void log_1I0D(InstrInfo* ii) +{ + CacheModelResult IrRes; + + current_ii = ii; + IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); + + CLG_DEBUG(6, "log_1I0D: Ir=%p/%u => Ir %d\n", + bb_base + ii->instr_offset, ii->instr_size, IrRes); + + if (CLG_(current_state).collect) { + ULong* cost_Ir; + + if (CLG_(current_state).nonskipped) + cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir; + else + cost_Ir = cost_base + ii->cost_offset + off_D0_Ir; + + inc_costs(IrRes, cost_Ir, + CLG_(current_state).cost + CLG_(sets).off_full_Ir ); + } +} + + +/* Instruction doing a read access */ + +VG_REGPARM(2) +static void log_1I1Dr(InstrInfo* ii, Addr data) +{ + CacheModelResult IrRes, DrRes; + + current_ii = ii; + IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); + DrRes = (*simulator.D1_Read)(data, ii->data_size); + + CLG_DEBUG(6, "log_1I1Dr: Ir=%p/%u, Dr=%p/%u => Ir %d, Dr %d\n", + bb_base + ii->instr_offset, ii->instr_size, + data, ii->data_size, IrRes, DrRes); + + if (CLG_(current_state).collect) { + ULong *cost_Ir, *cost_Dr; + + if (CLG_(current_state).nonskipped) { + cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir; + cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr; + } + else { + cost_Ir = cost_base + ii->cost_offset + off_D1r_Ir; + cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr; + } + + inc_costs(IrRes, cost_Ir, + CLG_(current_state).cost + CLG_(sets).off_full_Ir ); + inc_costs(DrRes, cost_Dr, + CLG_(current_state).cost + CLG_(sets).off_full_Dr ); + } +} + + +VG_REGPARM(2) +static void log_0I1Dr(InstrInfo* ii, Addr data) +{ + CacheModelResult DrRes; + + current_ii = ii; + DrRes = (*simulator.D1_Read)(data, ii->data_size); + + CLG_DEBUG(6, "log_0I1Dr: Dr=%p/%u => Dr %d\n", + data, ii->data_size, DrRes); + + if (CLG_(current_state).collect) { + ULong *cost_Dr; + + if (CLG_(current_state).nonskipped) { + cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr; + } + else { + cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr; + } + + inc_costs(DrRes, cost_Dr, + CLG_(current_state).cost + CLG_(sets).off_full_Dr ); + } +} + + +/* Instruction doing a write access */ + +VG_REGPARM(2) +static void log_1I1Dw(InstrInfo* ii, Addr data) +{ + CacheModelResult IrRes, DwRes; + + current_ii = ii; + IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); + DwRes = (*simulator.D1_Write)(data, ii->data_size); + + CLG_DEBUG(6, "log_1I1Dw: Ir=%p/%u, Dw=%p/%u => Ir %d, Dw %d\n", + bb_base + ii->instr_offset, ii->instr_size, + data, ii->data_size, IrRes, DwRes); + + if (CLG_(current_state).collect) { + ULong *cost_Ir, *cost_Dw; + + if (CLG_(current_state).nonskipped) { + cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir; + cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; + } + else { + cost_Ir = cost_base + ii->cost_offset + off_D1w_Ir; + cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw; + } + + inc_costs(IrRes, cost_Ir, + CLG_(current_state).cost + CLG_(sets).off_full_Ir ); + inc_costs(DwRes, cost_Dw, + CLG_(current_state).cost + CLG_(sets).off_full_Dw ); + } +} + +VG_REGPARM(2) +static void log_0I1Dw(InstrInfo* ii, Addr data) +{ + CacheModelResult DwRes; + + current_ii = ii; + DwRes = (*simulator.D1_Write)(data, ii->data_size); + + CLG_DEBUG(6, "log_0I1Dw: Dw=%p/%u => Dw %d\n", + data, ii->data_size, DwRes); + + if (CLG_(current_state).collect) { + ULong *cost_Dw; + + if (CLG_(current_state).nonskipped) { + cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw; + } + else { + cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw; + } + + inc_costs(DwRes, cost_Dw, + CLG_(current_state).cost + CLG_(sets).off_full_Dw ); + } +} + +/* Instruction doing a read and a write access */ + +VG_REGPARM(3) +static void log_1I2D(InstrInfo* ii, Addr data1, Addr data2) +{ + CacheModelResult IrRes, DrRes, DwRes; + + current_ii = ii; + IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); + DrRes = (*simulator.D1_Read)(data1, ii->data_size); + DwRes = (*simulator.D1_Write)(data2, ii->data_size); + + CLG_DEBUG(6, + "log_1I2D: Ir=%p/%u, Dr=%p/%u, Dw=%p/%u => Ir %d, Dr %d, Dw %d\n", + bb_base + ii->instr_offset, ii->instr_size, + data1, ii->data_size, data2, ii->data_size, IrRes, DrRes, DwRes); + + if (CLG_(current_state).collect) { + ULong *cost_Ir, *cost_Dr, *cost_Dw; + + if (CLG_(current_state).nonskipped) { + cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir; + cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr; + cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; + } + else { + cost_Ir = cost_base + ii->cost_offset + off_D2_Ir; + cost_Dr = cost_base + ii->cost_offset + off_D2_Dr; + cost_Dw = cost_base + ii->cost_offset + off_D2_Dw; + } + + inc_costs(IrRes, cost_Ir, + CLG_(current_state).cost + CLG_(sets).off_full_Ir ); + inc_costs(DrRes, cost_Dr, + CLG_(current_state).cost + CLG_(sets).off_full_Dr ); + inc_costs(DwRes, cost_Dw, + CLG_(current_state).cost + CLG_(sets).off_full_Dw ); + } +} + +VG_REGPARM(3) +static void log_0I2D(InstrInfo* ii, Addr data1, Addr data2) +{ + CacheModelResult DrRes, DwRes; + + current_ii = ii; + DrRes = (*simulator.D1_Read)(data1, ii->data_size); + DwRes = (*simulator.D1_Write)(data2, ii->data_size); + + CLG_DEBUG(6, + "log_0D2D: Dr=%p/%u, Dw=%p/%u => Dr %d, Dw %d\n", + data1, ii->data_size, data2, ii->data_size, DrRes, DwRes); + + if (CLG_(current_state).collect) { + ULong *cost_Dr, *cost_Dw; + + if (CLG_(current_state).nonskipped) { + cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr; + cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; + } + else { + cost_Dr = cost_base + ii->cost_offset + off_D2_Dr; + cost_Dw = cost_base + ii->cost_offset + off_D2_Dw; + } + + inc_costs(DrRes, cost_Dr, + CLG_(current_state).cost + CLG_(sets).off_full_Dr ); + inc_costs(DwRes, cost_Dw, + CLG_(current_state).cost + CLG_(sets).off_full_Dw ); + } +} + + +/*------------------------------------------------------------*/ +/*--- Cache configuration ---*/ +/*------------------------------------------------------------*/ + +#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 }) + +static cache_t clo_I1_cache = UNDEFINED_CACHE; +static cache_t clo_D1_cache = UNDEFINED_CACHE; +static cache_t clo_L2_cache = UNDEFINED_CACHE; + + +/* Checks cache config is ok; makes it so if not. */ +static +void check_cache(cache_t* cache, Char *name) +{ + /* First check they're all powers of two */ + if (-1 == VG_(log2)(cache->size)) { + VG_(message)(Vg_UserMsg, + "error: %s size of %dB not a power of two; aborting.", + name, cache->size); + VG_(exit)(1); + } + + if (-1 == VG_(log2)(cache->assoc)) { + VG_(message)(Vg_UserMsg, + "error: %s associativity of %d not a power of two; aborting.", + name, cache->assoc); + VG_(exit)(1); + } + + if (-1 == VG_(log2)(cache->line_size)) { + VG_(message)(Vg_UserMsg, + "error: %s line size of %dB not a power of two; aborting.", + name, cache->line_size); + VG_(exit)(1); + } + + // Then check line size >= 16 -- any smaller and a single instruction could + // straddle three cache lines, which breaks a simulation assertion and is + // stupid anyway. + if (cache->line_size < MIN_LINE_SIZE) { + VG_(message)(Vg_UserMsg, + "error: %s line size of %dB too small; aborting.", + name, cache->line_size); + VG_(exit)(1); + } + + /* Then check cache size > line size (causes seg faults if not). */ + if (cache->size <= cache->line_size) { + VG_(message)(Vg_UserMsg, + "error: %s cache size of %dB <= line size of %dB; aborting.", + name, cache->size, cache->line_size); + VG_(exit)(1); + } + + /* Then check assoc <= (size / line size) (seg faults otherwise). */ + if (cache->assoc > (cache->size / cache->line_size)) { + VG_(message)(Vg_UserMsg, + "warning: %s associativity > (size / line size); aborting.", name); + VG_(exit)(1); + } +} + +static +void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c) +{ +#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size) + + Int n_clos = 0; + + // Count how many were defined on the command line. + if (DEFINED(clo_I1_cache)) { n_clos++; } + if (DEFINED(clo_D1_cache)) { n_clos++; } + if (DEFINED(clo_L2_cache)) { n_clos++; } + + // Set the cache config (using auto-detection, if supported by the + // architecture) + VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) ); + + // Then replace with any defined on the command line. + if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; } + if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; } + if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; } + + // Then check values and fix if not acceptable. + check_cache(I1c, "I1"); + check_cache(D1c, "D1"); + check_cache(L2c, "L2"); + + if (VG_(clo_verbosity) > 1) { + VG_(message)(Vg_UserMsg, "Cache configuration used:"); + VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines", + I1c->size, I1c->assoc, I1c->line_size); + VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines", + D1c->size, D1c->assoc, D1c->line_size); + VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines", + L2c->size, L2c->assoc, L2c->line_size); + } +#undef CMD_LINE_DEFINED +} + + +/* Initialize and clear simulator state */ +static void cachesim_post_clo_init(void) +{ + /* Cache configurations. */ + cache_t I1c, D1c, L2c; + + /* Initialize access handlers */ + if (!CLG_(clo).simulate_cache) { + CLG_(cachesim).log_1I0D = 0; + CLG_(cachesim).log_1I0D_name = "(no function)"; + + CLG_(cachesim).log_1I1Dr = 0; + CLG_(cachesim).log_1I1Dw = 0; + CLG_(cachesim).log_1I2D = 0; + CLG_(cachesim).log_1I1Dr_name = "(no function)"; + CLG_(cachesim).log_1I1Dw_name = "(no function)"; + CLG_(cachesim).log_1I2D_name = "(no function)"; + + CLG_(cachesim).log_0I1Dr = 0; + CLG_(cachesim).log_0I1Dw = 0; + CLG_(cachesim).log_0I2D = 0; + CLG_(cachesim).log_0I1Dr_name = "(no function)"; + CLG_(cachesim).log_0I1Dw_name = "(no function)"; + CLG_(cachesim).log_0I2D_name = "(no function)"; + return; + } + + /* Configuration of caches only needed with real cache simulation */ + configure_caches(&I1c, &D1c, &L2c); + + I1.name = "I1"; + D1.name = "D1"; + L2.name = "L2"; + + cachesim_initcache(I1c, &I1); + cachesim_initcache(D1c, &D1); + cachesim_initcache(L2c, &L2); + + /* the other cache simulators use the standard helpers + * with dispatching via simulator struct */ + + CLG_(cachesim).log_1I0D = log_1I0D; + CLG_(cachesim).log_1I0D_name = "log_1I0D"; + + CLG_(cachesim).log_1I1Dr = log_1I1Dr; + CLG_(cachesim).log_1I1Dw = log_1I1Dw; + CLG_(cachesim).log_1I2D = log_1I2D; + CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr"; + CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw"; + CLG_(cachesim).log_1I2D_name = "log_1I2D"; + + CLG_(cachesim).log_0I1Dr = log_0I1Dr; + CLG_(cachesim).log_0I1Dw = log_0I1Dw; + CLG_(cachesim).log_0I2D = log_0I2D; + CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr"; + CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw"; + CLG_(cachesim).log_0I2D_name = "log_0I2D"; + + if (clo_collect_cacheuse) { + + /* Output warning for not supported option combinations */ + if (clo_simulate_hwpref) { + VG_(message)(Vg_DebugMsg, + "warning: prefetch simulation can not be used with cache usage"); + clo_simulate_hwpref = False; + } + + if (clo_simulate_writeback) { + VG_(message)(Vg_DebugMsg, + "warning: write-back simulation can not be used with cache usage"); + clo_simulate_writeback = False; + } + + simulator.I1_Read = cacheuse_I1_doRead; + simulator.D1_Read = cacheuse_D1_doRead; + simulator.D1_Write = cacheuse_D1_doRead; + return; + } + + if (clo_simulate_hwpref) { + prefetch_clear(); + + if (clo_simulate_writeback) { + simulator.I1_Read = prefetch_I1_Read; + simulator.D1_Read = prefetch_D1_Read; + simulator.D1_Write = prefetch_D1_Write; + } + else { + simulator.I1_Read = prefetch_I1_ref; + simulator.D1_Read = prefetch_D1_ref; + simulator.D1_Write = prefetch_D1_ref; + } + + return; + } + + if (clo_simulate_writeback) { + simulator.I1_Read = cachesim_I1_Read; + simulator.D1_Read = cachesim_D1_Read; + simulator.D1_Write = cachesim_D1_Write; + } + else { + simulator.I1_Read = cachesim_I1_ref; + simulator.D1_Read = cachesim_D1_ref; + simulator.D1_Write = cachesim_D1_ref; + } +} + + +/* Clear simulator state. Has to be initialized before */ +static +void cachesim_clear(void) +{ + cachesim_clearcache(&I1); + cachesim_clearcache(&D1); + cachesim_clearcache(&L2); + + prefetch_clear(); +} + + +static void cachesim_getdesc(Char* buf) +{ + Int p; + p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line); + p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line); + VG_(sprintf)(buf+p, "desc: L2 cache: %s\n", L2.desc_line); +} + +static +void cachesim_print_opts(void) +{ + VG_(printf)( +"\n cache simulator options:\n" +" --simulate-cache=no|yes Do cache simulation [no]\n" +" --simulate-wb=no|yes Count write-back events [no]\n" +" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n" +#if CLG_EXPERIMENTAL +" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n" +#endif +" --cacheuse=no|yes Collect cache block use [no]\n" +" --I1=,, set I1 cache manually\n" +" --D1=,, set D1 cache manually\n" +" --L2=,, set L2 cache manually\n" + ); +} + +static void parse_opt ( cache_t* cache, char* orig_opt, int opt_len ) +{ + int i1, i2, i3; + int i; + char *opt = VG_(strdup)(orig_opt); + + i = i1 = opt_len; + + /* Option looks like "--I1=65536,2,64". + * Find commas, replace with NULs to make three independent + * strings, then extract numbers. Yuck. */ + while (VG_(isdigit)(opt[i])) i++; + if (',' == opt[i]) { + opt[i++] = '\0'; + i2 = i; + } else goto bad; + while (VG_(isdigit)(opt[i])) i++; + if (',' == opt[i]) { + opt[i++] = '\0'; + i3 = i; + } else goto bad; + while (VG_(isdigit)(opt[i])) i++; + if ('\0' != opt[i]) goto bad; + + cache->size = (Int)VG_(atoll)(opt + i1); + cache->assoc = (Int)VG_(atoll)(opt + i2); + cache->line_size = (Int)VG_(atoll)(opt + i3); + + VG_(free)(opt); + + return; + + bad: + VG_(bad_option)(orig_opt); +} + +/* Check for command line option for cache configuration. + * Return False if unknown and not handled. + * + * Called from CLG_(process_cmd_line_option)() in clo.c + */ +static Bool cachesim_parse_opt(Char* arg) +{ + if (0 == VG_(strcmp)(arg, "--simulate-wb=yes")) + clo_simulate_writeback = True; + else if (0 == VG_(strcmp)(arg, "--simulate-wb=no")) + clo_simulate_writeback = False; + + else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=yes")) + clo_simulate_hwpref = True; + else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=no")) + clo_simulate_hwpref = False; + + else if (0 == VG_(strcmp)(arg, "--simulate-sectors=yes")) + clo_simulate_sectors = True; + else if (0 == VG_(strcmp)(arg, "--simulate-sectors=no")) + clo_simulate_sectors = False; + + else if (0 == VG_(strcmp)(arg, "--cacheuse=yes")) { + clo_collect_cacheuse = True; + /* Use counters only make sense with fine dumping */ + CLG_(clo).dump_instr = True; + } + else if (0 == VG_(strcmp)(arg, "--cacheuse=no")) + clo_collect_cacheuse = False; + + /* 5 is length of "--I1=" */ + else if (0 == VG_(strncmp)(arg, "--I1=", 5)) + parse_opt(&clo_I1_cache, arg, 5); + else if (0 == VG_(strncmp)(arg, "--D1=", 5)) + parse_opt(&clo_D1_cache, arg, 5); + else if (0 == VG_(strncmp)(arg, "--L2=", 5)) + parse_opt(&clo_L2_cache, arg, 5); + else + return False; + + return True; +} + +/* Adds commas to ULong, right justifying in a field field_width wide, returns + * the string in buf. */ +static +Int commify(ULong n, int field_width, char* buf) +{ + int len, n_commas, i, j, new_len, space; + + VG_(sprintf)(buf, "%llu", n); + len = VG_(strlen)(buf); + n_commas = (len - 1) / 3; + new_len = len + n_commas; + space = field_width - new_len; + + /* Allow for printing a number in a field_width smaller than it's size */ + if (space < 0) space = 0; + + /* Make j = -1 because we copy the '\0' before doing the numbers in groups + * of three. */ + for (j = -1, i = len ; i >= 0; i--) { + buf[i + n_commas + space] = buf[i]; + + if ((i>0) && (3 == ++j)) { + j = 0; + n_commas--; + buf[i + n_commas + space] = ','; + } + } + /* Right justify in field. */ + for (i = 0; i < space; i++) buf[i] = ' '; + return new_len; +} + +static +void percentify(Int n, Int ex, Int field_width, char buf[]) +{ + int i, len, space; + + VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex); + len = VG_(strlen)(buf); + space = field_width - len; + if (space < 0) space = 0; /* Allow for v. small field_width */ + i = len; + + /* Right justify in field */ + for ( ; i >= 0; i--) buf[i + space] = buf[i]; + for (i = 0; i < space; i++) buf[i] = ' '; +} + +static +void cachesim_printstat(void) +{ + FullCost total = CLG_(total_cost), D_total = 0; + ULong L2_total_m, L2_total_mr, L2_total_mw, + L2_total, L2_total_r, L2_total_w; + char buf1[RESULTS_BUF_LEN], + buf2[RESULTS_BUF_LEN], + buf3[RESULTS_BUF_LEN]; + Int l1, l2, l3; + Int p; + + if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) { + VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu", + prefetch_up); + VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu", + prefetch_down); + VG_(message)(Vg_DebugMsg, ""); + } + + /* I cache results. Use the I_refs value to determine the first column + * width. */ + l1 = commify(total[CLG_(sets).off_full_Ir], 0, buf1); + VG_(message)(Vg_UserMsg, "I refs: %s", buf1); + + if (!CLG_(clo).simulate_cache) return; + + commify(total[CLG_(sets).off_full_Ir +1], l1, buf1); + VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1); + + commify(total[CLG_(sets).off_full_Ir +2], l1, buf1); + VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1); + + p = 100; + + if (0 == total[CLG_(sets).off_full_Ir]) + total[CLG_(sets).off_full_Ir] = 1; + + percentify(total[CLG_(sets).off_full_Ir+1] * 100 * p / + total[CLG_(sets).off_full_Ir], p, l1+1, buf1); + VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1); + + percentify(total[CLG_(sets).off_full_Ir+2] * 100 * p / + total[CLG_(sets).off_full_Ir], p, l1+1, buf1); + VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1); + VG_(message)(Vg_UserMsg, ""); + + /* D cache results. + Use the D_refs.rd and D_refs.wr values to determine the + * width of columns 2 & 3. */ + + D_total = CLG_(get_eventset_cost)( CLG_(sets).full ); + CLG_(init_cost)( CLG_(sets).full, D_total); + CLG_(copy_cost)( CLG_(sets).Dr, D_total, total + CLG_(sets).off_full_Dr ); + CLG_(add_cost) ( CLG_(sets).Dw, D_total, total + CLG_(sets).off_full_Dw ); + + commify( D_total[0], l1, buf1); + l2 = commify(total[CLG_(sets).off_full_Dr], 0, buf2); + l3 = commify(total[CLG_(sets).off_full_Dw], 0, buf3); + VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)", + buf1, buf2, buf3); + + commify( D_total[1], l1, buf1); + commify(total[CLG_(sets).off_full_Dr+1], l2, buf2); + commify(total[CLG_(sets).off_full_Dw+1], l3, buf3); + VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)", + buf1, buf2, buf3); + + commify( D_total[2], l1, buf1); + commify(total[CLG_(sets).off_full_Dr+2], l2, buf2); + commify(total[CLG_(sets).off_full_Dw+2], l3, buf3); + VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)", + buf1, buf2, buf3); + + p = 10; + + if (0 == D_total[0]) D_total[0] = 1; + if (0 == total[CLG_(sets).off_full_Dr]) total[CLG_(sets).off_full_Dr] = 1; + if (0 == total[CLG_(sets).off_full_Dw]) total[CLG_(sets).off_full_Dw] = 1; + + percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1); + percentify(total[CLG_(sets).off_full_Dr+1] * 100 * p / + total[CLG_(sets).off_full_Dr], p, l2+1, buf2); + percentify(total[CLG_(sets).off_full_Dw+1] * 100 * p / + total[CLG_(sets).off_full_Dw], p, l3+1, buf3); + VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3); + + percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1); + percentify(total[CLG_(sets).off_full_Dr+2] * 100 * p / + total[CLG_(sets).off_full_Dr], p, l2+1, buf2); + percentify(total[CLG_(sets).off_full_Dw+2] * 100 * p / + total[CLG_(sets).off_full_Dw], p, l3+1, buf3); + VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3); + VG_(message)(Vg_UserMsg, ""); + + + + /* L2 overall results */ + + L2_total = + total[CLG_(sets).off_full_Dr +1] + + total[CLG_(sets).off_full_Dw +1] + + total[CLG_(sets).off_full_Ir +1]; + L2_total_r = + total[CLG_(sets).off_full_Dr +1] + + total[CLG_(sets).off_full_Ir +1]; + L2_total_w = total[CLG_(sets).off_full_Dw +1]; + commify(L2_total, l1, buf1); + commify(L2_total_r, l2, buf2); + commify(L2_total_w, l3, buf3); + VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)", + buf1, buf2, buf3); + + L2_total_m = + total[CLG_(sets).off_full_Dr +2] + + total[CLG_(sets).off_full_Dw +2] + + total[CLG_(sets).off_full_Ir +2]; + L2_total_mr = + total[CLG_(sets).off_full_Dr +2] + + total[CLG_(sets).off_full_Ir +2]; + L2_total_mw = total[CLG_(sets).off_full_Dw +2]; + commify(L2_total_m, l1, buf1); + commify(L2_total_mr, l2, buf2); + commify(L2_total_mw, l3, buf3); + VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)", + buf1, buf2, buf3); + + percentify(L2_total_m * 100 * p / + (total[CLG_(sets).off_full_Ir] + D_total[0]), p, l1+1, buf1); + percentify(L2_total_mr * 100 * p / + (total[CLG_(sets).off_full_Ir] + total[CLG_(sets).off_full_Dr]), + p, l2+1, buf2); + percentify(L2_total_mw * 100 * p / + total[CLG_(sets).off_full_Dw], p, l3+1, buf3); + VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", + buf1, buf2,buf3); +} + + +/*------------------------------------------------------------*/ +/*--- Setup for Event set. ---*/ +/*------------------------------------------------------------*/ + +struct event_sets CLG_(sets); + +void CLG_(init_eventsets)(Int max_user) +{ + EventType * e1, *e2, *e3, *e4; + EventSet *Ir, *Dr, *Dw; + EventSet *D0, *D1r, *D1w, *D2; + EventSet *sim, *full; + EventSet *use; + int sizeOfUseIr; + + use = CLG_(get_eventset)("Use", 4); + if (clo_collect_cacheuse) { + /* if TUse is 0, there was never a load, and no loss, too */ + e1 = CLG_(register_eventtype)("AcCost1"); + CLG_(add_eventtype)(use, e1); + e1 = CLG_(register_eventtype)("SpLoss1"); + CLG_(add_eventtype)(use, e1); + e1 = CLG_(register_eventtype)("AcCost2"); + CLG_(add_eventtype)(use, e1); + e1 = CLG_(register_eventtype)("SpLoss2"); + CLG_(add_eventtype)(use, e1); + } + + Ir = CLG_(get_eventset)("Ir", 4); + Dr = CLG_(get_eventset)("Dr", 4); + Dw = CLG_(get_eventset)("Dw", 4); + if (CLG_(clo).simulate_cache) { + e1 = CLG_(register_eventtype)("Ir"); + e2 = CLG_(register_eventtype)("I1mr"); + e3 = CLG_(register_eventtype)("I2mr"); + if (clo_simulate_writeback) { + e4 = CLG_(register_eventtype)("I2dmr"); + CLG_(add_dep_event4)(Ir, e1,e2,e3,e4); + } + else + CLG_(add_dep_event3)(Ir, e1,e2,e3); + + e1 = CLG_(register_eventtype)("Dr"); + e2 = CLG_(register_eventtype)("D1mr"); + e3 = CLG_(register_eventtype)("D2mr"); + if (clo_simulate_writeback) { + e4 = CLG_(register_eventtype)("D2dmr"); + CLG_(add_dep_event4)(Dr, e1,e2,e3,e4); + } + else + CLG_(add_dep_event3)(Dr, e1,e2,e3); + + e1 = CLG_(register_eventtype)("Dw"); + e2 = CLG_(register_eventtype)("D1mw"); + e3 = CLG_(register_eventtype)("D2mw"); + if (clo_simulate_writeback) { + e4 = CLG_(register_eventtype)("D2dmw"); + CLG_(add_dep_event4)(Dw, e1,e2,e3,e4); + } + else + CLG_(add_dep_event3)(Dw, e1,e2,e3); + + } + else { + e1 = CLG_(register_eventtype)("Ir"); + CLG_(add_eventtype)(Ir, e1); + } + + sizeOfUseIr = use->size + Ir->size; + D0 = CLG_(get_eventset)("D0", sizeOfUseIr); + CLG_(add_eventset)(D0, use); + off_D0_Ir = CLG_(add_eventset)(D0, Ir); + + D1r = CLG_(get_eventset)("D1r", sizeOfUseIr + Dr->size); + CLG_(add_eventset)(D1r, use); + off_D1r_Ir = CLG_(add_eventset)(D1r, Ir); + off_D1r_Dr = CLG_(add_eventset)(D1r, Dr); + + D1w = CLG_(get_eventset)("D1w", sizeOfUseIr + Dw->size); + CLG_(add_eventset)(D1w, use); + off_D1w_Ir = CLG_(add_eventset)(D1w, Ir); + off_D1w_Dw = CLG_(add_eventset)(D1w, Dw); + + D2 = CLG_(get_eventset)("D2", sizeOfUseIr + Dr->size + Dw->size); + CLG_(add_eventset)(D2, use); + off_D2_Ir = CLG_(add_eventset)(D2, Ir); + off_D2_Dr = CLG_(add_eventset)(D2, Dr); + off_D2_Dw = CLG_(add_eventset)(D2, Dw); + + sim = CLG_(get_eventset)("sim", sizeOfUseIr + Dr->size + Dw->size); + CLG_(add_eventset)(sim, use); + CLG_(sets).off_sim_Ir = CLG_(add_eventset)(sim, Ir); + CLG_(sets).off_sim_Dr = CLG_(add_eventset)(sim, Dr); + CLG_(sets).off_sim_Dw = CLG_(add_eventset)(sim, Dw); + + if (CLG_(clo).collect_alloc) max_user += 2; + if (CLG_(clo).collect_systime) max_user += 2; + + full = CLG_(get_eventset)("full", sim->size + max_user); + CLG_(add_eventset)(full, sim); + CLG_(sets).off_full_Ir = CLG_(sets).off_sim_Ir; + CLG_(sets).off_full_Dr = CLG_(sets).off_sim_Dr; + CLG_(sets).off_full_Dw = CLG_(sets).off_sim_Dw; + + CLG_(sets).use = use; + CLG_(sets).Ir = Ir; + CLG_(sets).Dr = Dr; + CLG_(sets).Dw = Dw; + + CLG_(sets).D0 = D0; + CLG_(sets).D1r = D1r; + CLG_(sets).D1w = D1w; + CLG_(sets).D2 = D2; + + CLG_(sets).sim = sim; + CLG_(sets).full = full; + + if (CLG_(clo).collect_alloc) { + e1 = CLG_(register_eventtype)("allocCount"); + e2 = CLG_(register_eventtype)("allocSize"); + CLG_(sets).off_full_user = CLG_(add_dep_event2)(full, e1,e2); + } + + if (CLG_(clo).collect_systime) { + e1 = CLG_(register_eventtype)("sysCount"); + e2 = CLG_(register_eventtype)("sysTime"); + CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2); + } + + CLG_DEBUGIF(1) { + CLG_DEBUG(1, "EventSets:\n"); + CLG_(print_eventset)(-2, use); + CLG_(print_eventset)(-2, Ir); + CLG_(print_eventset)(-2, Dr); + CLG_(print_eventset)(-2, Dw); + CLG_(print_eventset)(-2, sim); + CLG_(print_eventset)(-2, full); + } + + /* Not-existing events are silently ignored */ + CLG_(dumpmap) = CLG_(get_eventmapping)(full); + CLG_(append_event)(CLG_(dumpmap), "Ir"); + CLG_(append_event)(CLG_(dumpmap), "Dr"); + CLG_(append_event)(CLG_(dumpmap), "Dw"); + CLG_(append_event)(CLG_(dumpmap), "I1mr"); + CLG_(append_event)(CLG_(dumpmap), "D1mr"); + CLG_(append_event)(CLG_(dumpmap), "D1mw"); + CLG_(append_event)(CLG_(dumpmap), "I2mr"); + CLG_(append_event)(CLG_(dumpmap), "D2mr"); + CLG_(append_event)(CLG_(dumpmap), "D2mw"); + CLG_(append_event)(CLG_(dumpmap), "I2dmr"); + CLG_(append_event)(CLG_(dumpmap), "D2dmr"); + CLG_(append_event)(CLG_(dumpmap), "D2dmw"); + CLG_(append_event)(CLG_(dumpmap), "AcCost1"); + CLG_(append_event)(CLG_(dumpmap), "SpLoss1"); + CLG_(append_event)(CLG_(dumpmap), "AcCost2"); + CLG_(append_event)(CLG_(dumpmap), "SpLoss2"); + CLG_(append_event)(CLG_(dumpmap), "allocCount"); + CLG_(append_event)(CLG_(dumpmap), "allocSize"); + CLG_(append_event)(CLG_(dumpmap), "sysCount"); + CLG_(append_event)(CLG_(dumpmap), "sysTime"); + +} + + + +static +void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost) +{ + /* if eventset use is defined, it is always first (hardcoded!) */ + CLG_(add_and_zero_cost)( CLG_(sets).use, dst, cost); + + /* FIXME: This is hardcoded... */ + if (es == CLG_(sets).D0) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, + cost + off_D0_Ir); + } + else if (es == CLG_(sets).D1r) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, + cost + off_D1r_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr, + cost + off_D1r_Dr); + } + else if (es == CLG_(sets).D1w) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, + cost + off_D1w_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw, + cost + off_D1w_Dw); + } + else { + CLG_ASSERT(es == CLG_(sets).D2); + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, + cost + off_D2_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr, + cost + off_D2_Dr); + CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw, + cost + off_D2_Dw); + } +} + +/* this is called at dump time for every instruction executed */ +static void cachesim_add_icost(SimCost cost, BBCC* bbcc, + InstrInfo* ii, ULong exe_count) +{ + if (!CLG_(clo).simulate_cache) + cost[CLG_(sets).off_sim_Ir] += exe_count; + else { + +#if 0 +/* There is always a trivial case where exe_count and Ir can be + * slightly different because ecounter is updated when executing + * the next BB. E.g. for last BB executed, or when toggling collection + */ + /* FIXME: Hardcoded that each eventset has Ir as first */ + if ((bbcc->cost + ii->cost_offset)[0] != exe_count) { + VG_(printf)("==> Ir %llu, exe %llu\n", + (bbcc->cost + ii->cost_offset)[0], exe_count); + CLG_(print_bbcc_cost)(-2, bbcc); + //CLG_ASSERT((bbcc->cost + ii->cost_offset)[0] == exe_count); + } +#endif + + add_and_zero_Dx(ii->eventset, cost, + bbcc->cost + ii->cost_offset); + } +} + +static +void cachesim_after_bbsetup(void) +{ + BBCC* bbcc = CLG_(current_state).bbcc; + + if (CLG_(clo).simulate_cache) { + BB* bb = bbcc->bb; + + /* only needed if log_* functions are called */ + bb_base = bb->obj->offset + bb->offset; + cost_base = bbcc->cost; + } +} + +static +void cachesim_finish(void) +{ + if (clo_collect_cacheuse) + cacheuse_finish(); +} + +/*------------------------------------------------------------*/ +/*--- The simulator defined in this file ---*/ +/*------------------------------------------------------------*/ + +struct cachesim_if CLG_(cachesim) = { + .print_opts = cachesim_print_opts, + .parse_opt = cachesim_parse_opt, + .post_clo_init = cachesim_post_clo_init, + .clear = cachesim_clear, + .getdesc = cachesim_getdesc, + .printstat = cachesim_printstat, + .add_icost = cachesim_add_icost, + .after_bbsetup = cachesim_after_bbsetup, + .finish = cachesim_finish, + + /* these will be set by cachesim_post_clo_init */ + .log_1I0D = 0, + + .log_1I1Dr = 0, + .log_1I1Dw = 0, + .log_1I2D = 0, + + .log_0I1Dr = 0, + .log_0I1Dw = 0, + .log_0I2D = 0, + + .log_1I0D_name = "(no function)", + + .log_1I1Dr_name = "(no function)", + .log_1I1Dw_name = "(no function)", + .log_1I2D_name = "(no function)", + + .log_0I1Dr_name = "(no function)", + .log_0I1Dw_name = "(no function)", + .log_0I2D_name = "(no function)" +}; + + +/*--------------------------------------------------------------------*/ +/*--- end ct_sim.c ---*/ +/*--------------------------------------------------------------------*/ + diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am new file mode 100644 index 0000000000..bc7d201f4f --- /dev/null +++ b/callgrind/tests/Makefile.am @@ -0,0 +1,14 @@ +# For AM_FLAG_M3264_PRI +include $(top_srcdir)/Makefile.flags.am + +SUBDIRS = . +DIST_SUBDIRS = . + +noinst_SCRIPTS = + +EXTRA_DIST = + +check_PROGRAMS = + +AM_CPPFLAGS = -I$(top_srcdir)/include +AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI) diff --git a/callgrind/threads.c b/callgrind/threads.c new file mode 100644 index 0000000000..eda9d0c46f --- /dev/null +++ b/callgrind/threads.c @@ -0,0 +1,456 @@ +/*--------------------------------------------------------------------*/ +/*--- Callgrind ---*/ +/*--- ct_threads.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Callgrind, a Valgrind tool for call tracing. + + Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "global.h" + +#include + +/* forward decls */ +static exec_state* exec_state_save(void); +static exec_state* exec_state_restore(void); +static exec_state* push_exec_state(int); +static exec_state* top_exec_state(void); + +static exec_stack current_states; + + +/*------------------------------------------------------------*/ +/*--- Support for multi-threading ---*/ +/*------------------------------------------------------------*/ + + +/* + * For Valgrind, MT is cooperative (no preemting in our code), + * so we don't need locks... + * + * Per-thread data: + * - BBCCs + * - call stack + * - call hash + * - event counters: last, current + * + * Even when ignoring MT, we need this functions to set up some + * datastructures for the process (= Thread 1). + */ + +/* current running thread */ +ThreadId CLG_(current_tid); + +static thread_info* thread[VG_N_THREADS]; + +thread_info** CLG_(get_threads)() +{ + return thread; +} + +thread_info* CLG_(get_current_thread)() +{ + return thread[CLG_(current_tid)]; +} + +void CLG_(init_threads)() +{ + Int i; + for(i=0;istates) ); + CLG_(init_call_stack)( &(t->calls) ); + CLG_(init_fn_stack) ( &(t->fns) ); + /* t->states.entry[0]->cxt = CLG_(get_cxt)(t->fns.bottom); */ + + /* event counters */ + t->lastdump_cost = CLG_(get_eventset_cost)( CLG_(sets).full ); + t->sighandler_cost = CLG_(get_eventset_cost)( CLG_(sets).full ); + CLG_(init_cost)( CLG_(sets).full, t->lastdump_cost ); + CLG_(init_cost)( CLG_(sets).full, t->sighandler_cost ); + + /* init data containers */ + CLG_(init_fn_array)( &(t->fn_active) ); + CLG_(init_bbcc_hash)( &(t->bbccs) ); + CLG_(init_jcc_hash)( &(t->jccs) ); + + return t; +} + + +void CLG_(switch_thread)(ThreadId tid) +{ + if (tid == CLG_(current_tid)) return; + + CLG_DEBUG(0, ">> thread %d (was %d)\n", tid, CLG_(current_tid)); + + if (CLG_(current_tid) != VG_INVALID_THREADID) { + /* save thread state */ + thread_info* t = thread[CLG_(current_tid)]; + + CLG_ASSERT(t != 0); + + /* current context (including signal handler contexts) */ + exec_state_save(); + CLG_(copy_current_exec_stack)( &(t->states) ); + CLG_(copy_current_call_stack)( &(t->calls) ); + CLG_(copy_current_fn_stack) ( &(t->fns) ); + + CLG_(copy_current_fn_array) ( &(t->fn_active) ); + /* If we cumulate costs of threads, use TID 1 for all jccs/bccs */ + if (!CLG_(clo).separate_threads) t = thread[1]; + CLG_(copy_current_bbcc_hash)( &(t->bbccs) ); + CLG_(copy_current_jcc_hash) ( &(t->jccs) ); + } + + CLG_(current_tid) = tid; + CLG_ASSERT(tid < VG_N_THREADS); + + if (tid != VG_INVALID_THREADID) { + thread_info* t; + + /* load thread state */ + + if (thread[tid] == 0) thread[tid] = new_thread(); + t = thread[tid]; + + /* current context (including signal handler contexts) */ + CLG_(set_current_exec_stack)( &(t->states) ); + exec_state_restore(); + CLG_(set_current_call_stack)( &(t->calls) ); + CLG_(set_current_fn_stack) ( &(t->fns) ); + + CLG_(set_current_fn_array) ( &(t->fn_active) ); + /* If we cumulate costs of threads, use TID 1 for all jccs/bccs */ + if (!CLG_(clo).separate_threads) t = thread[1]; + CLG_(set_current_bbcc_hash) ( &(t->bbccs) ); + CLG_(set_current_jcc_hash) ( &(t->jccs) ); + } +} + + +void CLG_(run_thread)(ThreadId tid) +{ + /* check for dumps needed */ + static ULong bbs_done = 0; + static Char buf[512]; + + if (CLG_(clo).dump_every_bb >0) { + if (CLG_(stat).bb_executions - bbs_done > CLG_(clo).dump_every_bb) { + VG_(sprintf)(buf, "--dump-every-bb=%d", CLG_(clo).dump_every_bb); + CLG_(dump_profile)(buf, False); + bbs_done = CLG_(stat).bb_executions; + } + } + + CLG_(check_command)(); + + /* now check for thread switch */ + CLG_(switch_thread)(tid); +} + +void CLG_(pre_signal)(ThreadId tid, Int sigNum, Bool alt_stack) +{ + exec_state *es; + + CLG_DEBUG(0, ">> pre_signal(TID %d, sig %d, alt_st %s)\n", + tid, sigNum, alt_stack ? "yes":"no"); + + /* switch to the thread the handler runs in */ + CLG_(run_thread)(tid); + + /* save current execution state */ + exec_state_save(); + + /* setup current state for a spontaneous call */ + CLG_(init_exec_state)( &CLG_(current_state) ); + CLG_(push_cxt)(0); + + /* setup new cxtinfo struct for this signal handler */ + es = push_exec_state(sigNum); + CLG_(init_cost)( CLG_(sets).full, es->cost); + CLG_(current_state).cost = es->cost; + es->call_stack_bottom = CLG_(current_call_stack).sp; + + CLG_(current_state).sig = sigNum; +} + +/* Run post-signal if the stackpointer for call stack is at + * the bottom in current exec state (e.g. a signal handler) + * + * Called from CLG_(pop_call_stack) + */ +void CLG_(run_post_signal_on_call_stack_bottom)() +{ + exec_state* es = top_exec_state(); + CLG_ASSERT(es != 0); + CLG_ASSERT(CLG_(current_state).sig >0); + + if (CLG_(current_call_stack).sp == es->call_stack_bottom) + CLG_(post_signal)( CLG_(current_tid), CLG_(current_state).sig ); +} + +void CLG_(post_signal)(ThreadId tid, Int sigNum) +{ + exec_state* es; + UInt fn_number, *pactive; + + CLG_DEBUG(0, ">> post_signal(TID %d, sig %d)\n", + tid, sigNum); + + CLG_ASSERT(tid == CLG_(current_tid)); + CLG_ASSERT(sigNum == CLG_(current_state).sig); + + /* Unwind call stack of this signal handler. + * This should only be needed at finalisation time + */ + es = top_exec_state(); + CLG_ASSERT(es != 0); + while(CLG_(current_call_stack).sp > es->call_stack_bottom) + CLG_(pop_call_stack)(); + + if (CLG_(current_state).cxt) { + /* correct active counts */ + fn_number = CLG_(current_state).cxt->fn[0]->number; + pactive = CLG_(get_fn_entry)(fn_number); + (*pactive)--; + CLG_DEBUG(0, " set active count of %s back to %d\n", + CLG_(current_state).cxt->fn[0]->name, *pactive); + } + + if (CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) { + /* set fn_stack_top back. + * top can point to 0 if nothing was executed in the signal handler; + * this is possible at end on unwinding handlers. + */ + if (*(CLG_(current_fn_stack).top) != 0) { + CLG_(current_fn_stack).top--; + CLG_ASSERT(*(CLG_(current_fn_stack).top) == 0); + } + if (CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) + CLG_(current_fn_stack).top--; + } + + /* sum up costs */ + CLG_ASSERT(CLG_(current_state).cost == es->cost); + CLG_(add_and_zero_cost)( CLG_(sets).full, + thread[CLG_(current_tid)]->sighandler_cost, + CLG_(current_state).cost ); + + /* restore previous context */ + es->sig = -1; + current_states.sp--; + es = top_exec_state(); + CLG_(current_state).sig = es->sig; + exec_state_restore(); + + /* There is no way to reliable get the thread ID we are switching to + * after this handler returns. So we sync with actual TID at start of + * CLG_(setup_bb)(), which should be the next for callgrind. + */ +} + + + +/*------------------------------------------------------------*/ +/*--- Execution states in a thread & signal handlers ---*/ +/*------------------------------------------------------------*/ + +/* Each thread can be interrupted by a signal handler, and they + * themselves again. But as there's no scheduling among handlers + * of the same thread, we don't need additional stacks. + * So storing execution contexts and + * adding separators in the callstack(needed to not intermix normal/handler + * functions in contexts) should be enough. + */ + +/* not initialized: call_stack_bottom, sig */ +void CLG_(init_exec_state)(exec_state* es) +{ + es->collect = CLG_(clo).collect_atstart; + es->cxt = 0; + es->jmps_passed = 0; + es->bbcc = 0; + es->nonskipped = 0; +} + + +static exec_state* new_exec_state(Int sigNum) +{ + exec_state* es; + es = (exec_state*) CLG_MALLOC(sizeof(exec_state)); + + /* allocate real cost space: needed as incremented by + * simulation functions */ + es->cost = CLG_(get_eventset_cost)(CLG_(sets).full); + CLG_(init_cost)( CLG_(sets).full, es->cost ); + + CLG_(init_exec_state)(es); + es->sig = sigNum; + es->call_stack_bottom = 0; + + return es; +} + +void CLG_(init_exec_stack)(exec_stack* es) +{ + Int i; + + /* The first element is for the main thread */ + es->entry[0] = new_exec_state(0); + for(i=1;ientry[i] = 0; + es->sp = 0; +} + +void CLG_(copy_current_exec_stack)(exec_stack* dst) +{ + Int i; + + dst->sp = current_states.sp; + for(i=0;ientry[i] = current_states.entry[i]; +} + +void CLG_(set_current_exec_stack)(exec_stack* dst) +{ + Int i; + + current_states.sp = dst->sp; + for(i=0;ientry[i]; +} + + +/* Get top context info struct of current thread */ +static +exec_state* top_exec_state(void) +{ + Int sp = current_states.sp; + exec_state* es; + + CLG_ASSERT((sp >= 0) && (sp < MAX_SIGHANDLERS)); + es = current_states.entry[sp]; + CLG_ASSERT(es != 0); + return es; +} + +/* Allocates a free context info structure for a new entered + * signal handler, putting it on the context stack. + * Returns a pointer to the structure. + */ +static exec_state* push_exec_state(int sigNum) +{ + Int sp; + exec_state* es; + + current_states.sp++; + sp = current_states.sp; + + CLG_ASSERT((sigNum > 0) && (sigNum <= _VKI_NSIG)); + CLG_ASSERT((sp > 0) && (sp < MAX_SIGHANDLERS)); + es = current_states.entry[sp]; + if (!es) { + es = new_exec_state(sigNum); + current_states.entry[sp] = es; + } + else + es->sig = sigNum; + + return es; +} + +/* Save current context to top cxtinfo struct */ +static +exec_state* exec_state_save(void) +{ + exec_state* es = top_exec_state(); + + es->cxt = CLG_(current_state).cxt; + es->collect = CLG_(current_state).collect; + es->jmps_passed = CLG_(current_state).jmps_passed; + es->bbcc = CLG_(current_state).bbcc; + es->nonskipped = CLG_(current_state).nonskipped; + + CLG_DEBUGIF(1) { + CLG_DEBUG(1, " cxtinfo_save(sig %d): collect %s, jmps_passed %d\n", + es->sig, es->collect ? "Yes": "No", es->jmps_passed); + CLG_(print_bbcc)(-9, es->bbcc, False); + CLG_(print_cost)(-9, CLG_(sets).full, es->cost); + } + + /* signal number does not need to be saved */ + CLG_ASSERT(CLG_(current_state).sig == es->sig); + + return es; +} + +static +exec_state* exec_state_restore(void) +{ + exec_state* es = top_exec_state(); + + CLG_(current_state).cxt = es->cxt; + CLG_(current_state).collect = es->collect; + CLG_(current_state).jmps_passed = es->jmps_passed; + CLG_(current_state).bbcc = es->bbcc; + CLG_(current_state).nonskipped = es->nonskipped; + CLG_(current_state).cost = es->cost; + CLG_(current_state).sig = es->sig; + + CLG_DEBUGIF(1) { + CLG_DEBUG(1, " exec_state_restore(sig %d): collect %s, jmps_passed %d\n", + es->sig, es->collect ? "Yes": "No", es->jmps_passed); + CLG_(print_bbcc)(-9, es->bbcc, False); + CLG_(print_cxt)(-9, es->cxt, 0); + CLG_(print_cost)(-9, CLG_(sets).full, es->cost); + } + + return es; +} + diff --git a/configure.in b/configure.in index 2491673370..f912b6bd95 100644 --- a/configure.in +++ b/configure.in @@ -697,6 +697,11 @@ AC_OUTPUT( cachegrind/tests/x86/Makefile cachegrind/docs/Makefile cachegrind/cg_annotate + callgrind/Makefile + callgrind/callgrind_annotate + callgrind/callgrind_control + callgrind/tests/Makefile + callgrind/docs/Makefile helgrind/Makefile helgrind/tests/Makefile helgrind/docs/Makefile -- 2.47.2