To count global bus lock events, use "--collect-bus=yes".
For x86, this will count the number of executed instructions
with a lock prefix; for architectures with LL/SC, this will
count the number of executed SC instructions.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167
/* compatibility alias, deprecated option */
else if VG_BOOL_CLO(arg, "--trace-jump", CLG_(clo).collect_jumps) {}
+ else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {}
+
else if VG_BOOL_CLO(arg, "--combine-dumps", CLG_(clo).combine_dumps) {}
else if VG_BOOL_CLO(arg, "--collect-atstart", CLG_(clo).collect_atstart) {}
" --collect-atstart=no|yes Collect at process/thread start [yes]\n"
" --toggle-collect=<func> Toggle collection on enter/leave function\n"
" --collect-jumps=no|yes Collect jumps? [no]\n"
+" --collect-bus=no|yes Collect global bus events? [no]\n"
#if CLG_EXPERIMENTAL
" --collect-alloc=no|yes Collect memory allocation info? [no]\n"
#endif
start event collection a few million instructions after you have enabled
instrumentation.</para>
-
</sect2>
+ <sect2 id="cl-manual.busevents" xreflabel="Counting global bus events">
+ <title>Counting global bus events</title>
+
+ <para>For access to shared data among threads in a multithreaded
+ code, synchronization is required to avoid raced conditions.
+ Synchronization primitives are usually implemented via atomic instructions.
+ However, excessive use of such instructions can lead to performance
+ issues.</para>
+
+ <para>To enable analysis of this problem, Callgrind optionally can count
+ the number of atomic instructions executed. More precisely, for x86/x86_64,
+ these are instructions using a lock prefix. For architectures supporting
+ LL/SC, these are the number of SC instructions executed. For both, the term
+ "global bus events" is used.</para>
+ <para>The short name of the event type used for global bus events is "Ge".
+ To count global bus events, use <option><xref linkend="opt.collect-bus"/></option>.
+ </para>
+ </sect2>
<sect2 id="cl-manual.cycles" xreflabel="Avoiding cycles">
<title>Avoiding cycles</title>
</listitem>
</varlistentry>
+ <varlistentry id="opt.collect-bus" xreflabel="--collect-bus">
+ <term>
+ <option><![CDATA[--collect-bus=<no|yes> [default: no] ]]></option>
+ </term>
+ <listitem>
+ <para>This specifies whether the number of global bus events executed
+ should be collected. The event type "Ge" is used for these events.</para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<!-- end of xi:include in the manpage -->
</sect2>
Bool collect_alloc; /* Collect size of allocated memory */
Bool collect_systime; /* Collect time for system calls */
+ Bool collect_bus; /* Collect global bus events */
+
/* Instrument options */
Bool instrument_atstart; /* Instrument at start? */
Bool simulate_cache; /* Call into cache simulator ? */
#define EG_IR 1
#define EG_DR 2
#define EG_DW 3
-#define EG_ALLOC 4
-#define EG_SYS 5
+#define EG_BUS 4
+#define EG_ALLOC 5
+#define EG_SYS 6
struct event_sets {
EventSet *base, *full;
}
+/*------------------------------------------------------------*/
+/*--- Simple callbacks (not cache similator) ---*/
+/*------------------------------------------------------------*/
+
+VG_REGPARM(1)
+static void log_global_event(InstrInfo* ii)
+{
+ ULong* cost_Bus;
+
+ CLG_DEBUG(0, "log_global_event: Ir %#lx/%u\n",
+ CLG_(bb_base) + ii->instr_offset, ii->instr_size);
+
+ if (!CLG_(current_state).collect) return;
+
+ CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
+
+ if (CLG_(current_state).nonskipped)
+ cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
+ else
+ cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
+ cost_Bus[0]++;
+}
+
+
/*------------------------------------------------------------*/
/*--- Instrumentation structures and event queue handling ---*/
/*------------------------------------------------------------*/
Ev_Dr, // Data read
Ev_Dw, // Data write
Ev_Dm, // Data modify (read then write)
+ Ev_G // Global bus event
}
EventTag;
IRAtom* ea;
Int szB;
} Dm;
+ struct {
+ } G;
} Ev;
}
Event;
ppIRExpr(ev->Ev.Dm.ea);
VG_(printf)("\n");
break;
+ case Ev_G:
+ VG_(printf)("G %p\n", ev->inode);
+ break;
default:
tl_assert(0);
break;
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_DW);
break;
+ case Ev_G:
+ // extend event set by Bus counter
+ ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
+ EG_BUS);
+ break;
default:
tl_assert(0);
}
regparms = 3;
inew = i+1;
break;
+ case Ev_G:
+ /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
+ helperName = "log_global_event";
+ helperAddr = &log_global_event;
+ argv = mkIRExprVec_1( i_node_expr );
+ regparms = 1;
+ inew = i+1;
+ break;
default:
tl_assert(0);
}
clgs->events_used++;
}
+static
+void addEvent_G ( ClgState* clgs, InstrInfo* inode )
+{
+ Event* evt;
+ if (!CLG_(clo).collect_bus) return;
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_G;
+ evt->inode = inode;
+ clgs->events_used++;
+}
+
/* Initialise or check (if already seen before) an InstrInfo for next insn.
We only can set instr_offset/instr_size here. The required event set and
resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
dataSize *= 2; /* since this is a doubleword-cas */
addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
+ addEvent_G( &clgs, curr_inode );
break;
}
dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
addEvent_Dw( &clgs, curr_inode,
sizeofIRType(dataTy), st->Ist.LLSC.addr );
+ /* I don't know whether the global-bus-lock cost should
+ be attributed to the LL or the SC, but it doesn't
+ really matter since they always have to be used in
+ pairs anyway. Hence put it (quite arbitrarily) on
+ the SC. */
+ addEvent_G( &clgs, curr_inode );
}
break;
}
CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
}
+ if (CLG_(clo).collect_bus)
+ CLG_(register_event_group)(EG_BUS, "Ge");
+
if (CLG_(clo).collect_alloc)
CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
// event set comprising all event groups, used for inclusive cost
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
+ CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
CLG_DEBUGIF(1) {
CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
CLG_(append_event)(CLG_(dumpmap), "AcCost2");
CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
+ CLG_(append_event)(CLG_(dumpmap), "Ge");
CLG_(append_event)(CLG_(dumpmap), "allocCount");
CLG_(append_event)(CLG_(dumpmap), "allocSize");
CLG_(append_event)(CLG_(dumpmap), "sysCount");
{
if (!CLG_(clo).simulate_cache)
cost[ fullOffset(EG_IR) ] += exe_count;
- else
+
+ if (ii->eventset)
CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
ii->eventset, bbcc->cost + ii->cost_offset);
}