[ <!ENTITY % vg-entities SYSTEM "../../docs/xml/vg-entities.xml"> %vg-entities; ]>
<chapter id="cl-manual" xreflabel="Callgrind Manual">
-<title>Callgrind: a call-graph generating cache profiler</title>
+<title>Callgrind: a call-graph generating cache and branch prediction profiler</title>
<para>To use this tool, you must specify
<sect1 id="cl-manual.use" xreflabel="Overview">
<title>Overview</title>
-<para>Callgrind is a profiling tool that can
-construct a call graph for a program's run.
+<para>Callgrind is a profiling tool that records the call history among
+functions in a program's run as a call-graph.
By default, the collected data consists of
the number of instructions executed, their relationship
to source lines, the caller/callee relationship between functions,
and the numbers of such calls.
-Optionally, a cache simulator (similar to Cachegrind) can produce
-further information about the memory access behavior of the application.
+Optionally, cache simulation and/or branch prediction (similar to Cachegrind)
+can produce further information about the runtime behavior of an application.
</para>
<para>The profile data is written out to a file at program
results in this case.</para>
<para>If you are additionally interested in measuring the
- cache behavior of your
- program, use Callgrind with the option
- <option><xref linkend="opt.simulate-cache"/>=yes.</option>
- However, expect a further slow down approximately by a factor of 2.</para>
+ cache behavior of your program, use Callgrind with the option
+ <option><xref linkend="clopt.cache-sim"/>=yes</option>. For
+ branch prediction simulation, use <option><xref linkend="clopt.branch-sim"/>=yes</option>.
+ Expect a further slow down approximately by a factor of 2.</para>
<para>If the program section you want to profile is somewhere in the
middle of the run, it is beneficial to
"global bus events" is used.</para>
<para>The short name of the event type used for global bus events is "Ge".
- To count global bus events, use <option><xref linkend="opt.collect-bus"/></option>.
+ To count global bus events, use <option><xref linkend="clopt.collect-bus"/>=yes</option>.
</para>
</sect2>
</listitem>
</varlistentry>
- <varlistentry id="opt.collect-bus" xreflabel="--collect-bus">
+ <varlistentry id="clopt.collect-bus" xreflabel="--collect-bus">
<term>
<option><![CDATA[--collect-bus=<no|yes> [default: no] ]]></option>
</term>
<!-- end of xi:include in the manpage -->
</sect2>
+
<sect2 id="cl-manual.options.simulation"
- xreflabel="Cache simulation options">
-<title>Cache simulation options</title>
+ xreflabel="Simulation options">
+<title>Simulation options</title>
<!-- start of xi:include in the manpage -->
<variablelist id="cl.opts.list.simulation">
-
- <varlistentry id="opt.simulate-cache" xreflabel="--simulate-cache">
+
+ <varlistentry id="clopt.cache-sim" xreflabel="--cache-sim">
<term>
- <option><![CDATA[--simulate-cache=<yes|no> [default: no] ]]></option>
+ <option><![CDATA[--cache-sim=<yes|no> [default: no] ]]></option>
</term>
<listitem>
<para>Specify if you want to do full cache simulation. By default,
- only instruction read accesses will be profiled.</para>
+ only instruction read accesses will be counted ("Ir").
+ With cache simulation, further event counters are enabled:
+ Cache misses on instruction reads ("I1mr"/"I2mr"),
+ data read accesses ("Dr") and related cache misses ("D1mr"/"D2mr"),
+ data write accesses ("Dw") and related cache misses ("D1mw"/"D2mw").
+ For more information, see <xref linkend="cg-manual"/>.
+ </para>
</listitem>
</varlistentry>
+ <varlistentry id="clopt.branch-sim" xreflabel="--branch-sim">
+ <term>
+ <option><![CDATA[--branch-sim=<yes|no> [default: no] ]]></option>
+ </term>
+ <listitem>
+ <para>Specify if you want to do branch prediction simulation.
+ Further event counters are enabled: Number of executed conditional
+ branches and related predictor misses ("Bc"/"Bcm"), executed indirect
+ jumps and related misses of the jump address predictor ("Bi"/"Bim").
+ </para>
+ </listitem>
+ </varlistentry>
+
+</variablelist>
+<!-- end of xi:include in the manpage -->
+</sect2>
+
+
+<sect2 id="cl-manual.options.cachesimulation"
+ xreflabel="Cache simulation options">
+<title>Cache simulation options</title>
+
+<!-- start of xi:include in the manpage -->
+<variablelist id="cl.opts.list.cachesimulation">
+
<varlistentry id="opt.simulate-wb" xreflabel="--simulate-wb">
<term>
<option><![CDATA[--simulate-wb=<yes|no> [default: no] ]]></option>
#include <pub_tool_threadstate.h>
+#include "cg_branchpred.c"
+
/*------------------------------------------------------------*/
/*--- Global variables ---*/
/*------------------------------------------------------------*/
{
ULong* cost_Bus;
- CLG_DEBUG(0, "log_global_event: Ir %#lx/%u\n",
+ CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
CLG_(bb_base) + ii->instr_offset, ii->instr_size);
if (!CLG_(current_state).collect) return;
+ CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
+
CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
if (CLG_(current_state).nonskipped)
}
+/* For branches, we consult two different predictors, one which
+ predicts taken/untaken for conditional branches, and the other
+ which predicts the branch target address for indirect branches
+ (jump-to-register style ones). */
+
+static VG_REGPARM(2)
+void log_cond_branch(InstrInfo* ii, Word taken)
+{
+ Bool miss;
+ Int fullOffset_Bc;
+ ULong* cost_Bc;
+
+ CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n",
+ CLG_(bb_base) + ii->instr_offset, taken);
+
+ miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
+
+ if (!CLG_(current_state).collect) return;
+
+ CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
+
+ if (CLG_(current_state).nonskipped)
+ cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
+ else
+ cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
+
+ fullOffset_Bc = fullOffset(EG_BC);
+ CLG_(current_state).cost[ fullOffset_Bc ]++;
+ cost_Bc[0]++;
+ if (miss) {
+ CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
+ cost_Bc[1]++;
+ }
+}
+
+static VG_REGPARM(2)
+void log_ind_branch(InstrInfo* ii, UWord actual_dst)
+{
+ Bool miss;
+ Int fullOffset_Bi;
+ ULong* cost_Bi;
+
+ CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
+ CLG_(bb_base) + ii->instr_offset, actual_dst);
+
+ miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
+
+ if (!CLG_(current_state).collect) return;
+
+ CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
+
+ if (CLG_(current_state).nonskipped)
+ cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
+ else
+ cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
+
+ fullOffset_Bi = fullOffset(EG_BI);
+ CLG_(current_state).cost[ fullOffset_Bi ]++;
+ cost_Bi[0]++;
+ if (miss) {
+ CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
+ cost_Bi[1]++;
+ }
+}
+
/*------------------------------------------------------------*/
/*--- Instrumentation structures and event queue handling ---*/
/*------------------------------------------------------------*/
Ev_Dr, // Data read
Ev_Dw, // Data write
Ev_Dm, // Data modify (read then write)
+ Ev_Bc, // branch conditional
+ Ev_Bi, // branch indirect (to unknown destination)
Ev_G // Global bus event
}
EventTag;
IRAtom* ea;
Int szB;
} Dm;
+ struct {
+ IRAtom* taken; /* :: Ity_I1 */
+ } Bc;
+ struct {
+ IRAtom* dst;
+ } Bi;
struct {
} G;
} Ev;
ppIRExpr(ev->Ev.Dm.ea);
VG_(printf)("\n");
break;
+ case Ev_Bc:
+ VG_(printf)("Bc %p GA=", ev->inode);
+ ppIRExpr(ev->Ev.Bc.taken);
+ VG_(printf)("\n");
+ break;
+ case Ev_Bi:
+ VG_(printf)("Bi %p DST=", ev->inode);
+ ppIRExpr(ev->Ev.Bi.dst);
+ VG_(printf)("\n");
+ break;
case Ev_G:
VG_(printf)("G %p\n", ev->inode);
break;
ev->inode->eventset = CLG_(sets).base;
break;
case Ev_Dr:
- // extend event set by Dr counter
+ // extend event set by Dr counters
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_DR);
break;
case Ev_Dw:
case Ev_Dm:
- // extend event set by Dw counter
+ // extend event set by Dw counters
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_DW);
break;
+ case Ev_Bc:
+ // extend event set by Bc counters
+ ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
+ EG_BC);
+ break;
+ case Ev_Bi:
+ // extend event set by Bi counters
+ ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
+ EG_BI);
+ break;
case Ev_G:
- // extend event set by Bus counter
+ // extend event set by Bus counter
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_BUS);
break;
regparms = 3;
inew = i+1;
break;
+ case Ev_Bc:
+ /* Conditional branch */
+ helperName = "log_cond_branch";
+ helperAddr = &log_cond_branch;
+ argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
+ regparms = 2;
+ inew = i+1;
+ break;
+ case Ev_Bi:
+ /* Branch to an unknown destination */
+ helperName = "log_ind_branch";
+ helperAddr = &log_ind_branch;
+ argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
+ regparms = 2;
+ inew = i+1;
+ break;
case Ev_G:
/* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
helperName = "log_global_event";
clgs->events_used++;
}
+static
+void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
+{
+ Event* evt;
+ tl_assert(isIRAtom(guard));
+ tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
+ == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+ if (!CLG_(clo).simulate_branch) return;
+
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_Bc;
+ evt->inode = inode;
+ evt->Ev.Bc.taken = guard;
+ clgs->events_used++;
+}
+
+static
+void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
+{
+ Event* evt;
+ tl_assert(isIRAtom(whereTo));
+ tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
+ == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+ if (!CLG_(clo).simulate_branch) return;
+
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_Bi;
+ evt->inode = inode;
+ evt->Ev.Bi.dst = whereTo;
+ clgs->events_used++;
+}
+
static
void addEvent_G ( ClgState* clgs, InstrInfo* inode )
{
Event* evt;
if (!CLG_(clo).collect_bus) return;
+
if (clgs->events_used == N_EVENTS)
flushEvents(clgs);
tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
Int i, isize;
IRStmt* st;
Addr origAddr;
+ Addr64 cia; /* address of current insn */
InstrInfo* curr_inode = NULL;
ClgState clgs;
UInt cJumps = 0;
CLG_ASSERT(Ist_IMark == st->tag);
origAddr = (Addr)st->Ist.IMark.addr;
+ cia = st->Ist.IMark.addr;
+ isize = st->Ist.IMark.len;
CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
/* Get BB struct (creating if necessary).
break;
case Ist_IMark: {
- CLG_ASSERT(clgs.instr_offset == (Addr)st->Ist.IMark.addr - origAddr);
- isize = st->Ist.IMark.len;
+ cia = st->Ist.IMark.addr;
+ isize = st->Ist.IMark.len;
+ CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
// If Vex fails to decode an instruction, the size will be zero.
// Pretend otherwise.
if (isize == 0) isize = VG_MIN_INSTR_SZB;
}
case Ist_Exit: {
- UInt jmps_passed;
+ Bool guest_exit, inverted;
+
+ /* VEX code generation sometimes inverts conditional branches.
+ * As Callgrind counts (conditional) jumps, it has to correct
+ * inversions. The heuristic is the following:
+ * (1) Callgrind switches off SB chasing and unrolling, and
+ * therefore it assumes that a candidate for inversion only is
+ * the last conditional branch in an SB.
+ * (2) inversion is assumed if the branch jumps to the address of
+ * the next guest instruction in memory.
+ * This heuristic is precalculated in CLG_(collectBlockInfo)().
+ *
+ * Branching behavior is also used for branch prediction. Note that
+ * above heuristic is different from what Cachegrind does.
+ * Cachegrind uses (2) for all branches.
+ */
+ if (cJumps+1 == clgs.bb->cjmp_count)
+ inverted = clgs.bb->cjmp_inverted;
+ else
+ inverted = False;
+
+ // call branch predictor only if this is a branch in guest code
+ guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
+ (st->Ist.Exit.jk == Ijk_Call) ||
+ (st->Ist.Exit.jk == Ijk_Ret);
+
+ if (guest_exit) {
+ /* Stuff to widen the guard expression to a host word, so
+ we can pass it to the branch predictor simulation
+ functions easily. */
+ IRType tyW = hWordTy;
+ IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
+ IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
+ IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
+ IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
+ IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
+ IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
+ : IRExpr_Const(IRConst_U64(1));
+
+ /* Widen the guard expression. */
+ addStmtToIRSB( clgs.sbOut,
+ IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
+ addStmtToIRSB( clgs.sbOut,
+ IRStmt_WrTmp( guardW,
+ IRExpr_Unop(widen,
+ IRExpr_RdTmp(guard1))) );
+ /* If the exit is inverted, invert the sense of the guard. */
+ addStmtToIRSB(
+ clgs.sbOut,
+ IRStmt_WrTmp(
+ guard,
+ inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
+ : IRExpr_RdTmp(guardW)
+ ));
+ /* And post the event. */
+ addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
+ }
/* We may never reach the next statement, so need to flush
all outstanding transactions now. */
/* Update global variable jmps_passed before the jump
* A correction is needed if VEX inverted the last jump condition
*/
- jmps_passed = cJumps;
- if ((cJumps+1 == clgs.bb->cjmp_count) && clgs.bb->cjmp_inverted)
- jmps_passed++;
addConstMemStoreStmt( clgs.sbOut,
(UWord) &CLG_(current_state).jmps_passed,
- jmps_passed, hWordTy);
+ inverted ? cJumps+1 : cJumps, hWordTy);
cJumps++;
break;
}
}
+ /* Deal with branches to unknown destinations. Except ignore ones
+ which are function returns as we assume the return stack
+ predictor never mispredicts. */
+ if (sbIn->jumpkind == Ijk_Boring) {
+ if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
+ switch (sbIn->next->tag) {
+ case Iex_Const:
+ break; /* boring - branch to known address */
+ case Iex_RdTmp:
+ /* looks like an indirect branch (branch to unknown) */
+ addEvent_Bi( &clgs, curr_inode, sbIn->next );
+ break;
+ default:
+ /* shouldn't happen - if the incoming IR is properly
+ flattened, should only have tmp and const cases to
+ consider. */
+ tl_assert(0);
+ }
+ }
+
/* At the end of the bb. Flush outstandings. */
flushEvents( &clgs );
}
}
+static UInt ULong_width(ULong n)
+{
+ UInt w = 0;
+ while (n > 0) {
+ n = n / 10;
+ w++;
+ }
+ if (w == 0) w = 1;
+ return w + (w-1)/3; // add space for commas
+}
+
+static
+void branchsim_printstat(int l1, int l2, int l3)
+{
+ static Char buf1[128], buf2[128], buf3[128], fmt[128];
+ FullCost total;
+ ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
+ ULong B_total_b, B_total_mp;
+
+ total = CLG_(total_cost);
+ Bc_total_b = total[ fullOffset(EG_BC) ];
+ Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
+ Bi_total_b = total[ fullOffset(EG_BI) ];
+ Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
+
+ /* Make format string, getting width right for numbers */
+ VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
+ l1, l2, l3);
+
+ if (0 == Bc_total_b) Bc_total_b = 1;
+ if (0 == Bi_total_b) Bi_total_b = 1;
+ B_total_b = Bc_total_b + Bi_total_b;
+ B_total_mp = Bc_total_mp + Bi_total_mp;
+
+ VG_(umsg)("\n");
+ VG_(umsg)(fmt, "Branches: ",
+ B_total_b, Bc_total_b, Bi_total_b);
+
+ VG_(umsg)(fmt, "Mispredicts: ",
+ B_total_mp, Bc_total_mp, Bi_total_mp);
+
+ VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1);
+ VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
+ VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
+
+ VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
+}
+
+
static
void finish(void)
{
- char buf[RESULTS_BUF_LEN];
+ Char buf[RESULTS_BUF_LEN], fmt[128];
+ Int l1, l2, l3;
+ FullCost total;
CLG_DEBUG(0, "finish()\n");
VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
VG_(message)(Vg_UserMsg, "\n");
- // if (CLG_(clo).simulate_cache)
- (*CLG_(cachesim).printstat)();
+ /* determine value widths for statistics */
+ total = CLG_(total_cost);
+ l1 = ULong_width( total[fullOffset(EG_IR)] );
+ l2 = l3 = 0;
+ if (CLG_(clo).simulate_cache) {
+ l2 = ULong_width( total[fullOffset(EG_DR)] );
+ l3 = ULong_width( total[fullOffset(EG_DW)] );
+ }
+ if (CLG_(clo).simulate_branch) {
+ int l2b = ULong_width( total[fullOffset(EG_BC)] );
+ int l3b = ULong_width( total[fullOffset(EG_BI)] );
+ if (l2b > l2) l2 = l2b;
+ if (l3b > l3) l3 = l3b;
+ }
+
+ /* Make format string, getting width right for numbers */
+ VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
+
+ /* Always print this */
+ VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
+
+ if (CLG_(clo).simulate_cache)
+ (*CLG_(cachesim).printstat)(l1, l2, l3);
+
+ if (CLG_(clo).simulate_branch)
+ branchsim_printstat(l1, l2, l3);
+
}
void cachesim_print_opts(void)
{
VG_(printf)(
-"\n cache simulator options:\n"
-" --simulate-cache=no|yes Do cache simulation [no]\n"
+"\n cache simulator options (does cache simulation if used):\n"
" --simulate-wb=no|yes Count write-back events [no]\n"
" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
#if CLG_EXPERIMENTAL
}
static
-void cachesim_printstat(void)
+void cachesim_printstat(Int l1, Int l2, Int l3)
{
FullCost total = CLG_(total_cost), D_total = 0;
ULong L2_total_m, L2_total_mr, L2_total_mw,
char buf1[RESULTS_BUF_LEN],
buf2[RESULTS_BUF_LEN],
buf3[RESULTS_BUF_LEN];
- Int l1, l2, l3;
Int p;
if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
VG_(message)(Vg_DebugMsg, "\n");
}
- /* I cache results. Use the I_refs value to determine the first column
- * width. */
- l1 = commify(total[fullOffset(EG_IR)], 0, buf1);
- VG_(message)(Vg_UserMsg, "I refs: %s\n", buf1);
-
- if (!CLG_(clo).simulate_cache) return;
-
commify(total[fullOffset(EG_IR) +1], l1, buf1);
VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1);
CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) );
commify( D_total[0], l1, buf1);
- l2 = commify(total[fullOffset(EG_DR)], 0, buf2);
- l3 = commify(total[fullOffset(EG_DW)], 0, buf3);
+ commify(total[fullOffset(EG_DR)], l2, buf2);
+ commify(total[fullOffset(EG_DW)], l3, buf3);
VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n",
buf1, buf2, buf3);
CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "D2dmw");
}
+ if (CLG_(clo).simulate_branch) {
+ CLG_(register_event_group2)(EG_BC, "Bc", "Bcm");
+ CLG_(register_event_group2)(EG_BI, "Bi", "Bim");
+ }
+
if (CLG_(clo).collect_bus)
CLG_(register_event_group)(EG_BUS, "Ge");
// event set comprising all event groups, used for inclusive cost
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
+ CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI);
CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
CLG_(append_event)(CLG_(dumpmap), "I2dmr");
CLG_(append_event)(CLG_(dumpmap), "D2dmr");
CLG_(append_event)(CLG_(dumpmap), "D2dmw");
+ CLG_(append_event)(CLG_(dumpmap), "Bc");
+ CLG_(append_event)(CLG_(dumpmap), "Bcm");
+ CLG_(append_event)(CLG_(dumpmap), "Bi");
+ CLG_(append_event)(CLG_(dumpmap), "Bim");
CLG_(append_event)(CLG_(dumpmap), "AcCost1");
CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
CLG_(append_event)(CLG_(dumpmap), "AcCost2");