From: Josef Weidendorfer Date: Wed, 9 Jun 2010 22:33:02 +0000 (+0000) Subject: Callgrind new feature: count global bus lock events "Ge" X-Git-Tag: svn/VALGRIND_3_6_0~272 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3b643bb3530d7d5015600bb98972eae42b147fe;p=thirdparty%2Fvalgrind.git Callgrind new feature: count global bus lock events "Ge" To count global bus lock events, use "--collect-bus=yes". For x86, this will count the number of executed instructions with a lock prefix; for architectures with LL/SC, this will count the number of executed SC instructions. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167 --- diff --git a/callgrind/clo.c b/callgrind/clo.c index 6d7df37bab..e4da421a10 100644 --- a/callgrind/clo.c +++ b/callgrind/clo.c @@ -415,6 +415,8 @@ Bool CLG_(process_cmd_line_option)(Char* arg) /* compatibility alias, deprecated option */ else if VG_BOOL_CLO(arg, "--trace-jump", CLG_(clo).collect_jumps) {} + else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {} + else if VG_BOOL_CLO(arg, "--combine-dumps", CLG_(clo).combine_dumps) {} else if VG_BOOL_CLO(arg, "--collect-atstart", CLG_(clo).collect_atstart) {} @@ -572,6 +574,7 @@ void CLG_(print_usage)(void) " --collect-atstart=no|yes Collect at process/thread start [yes]\n" " --toggle-collect= Toggle collection on enter/leave function\n" " --collect-jumps=no|yes Collect jumps? [no]\n" +" --collect-bus=no|yes Collect global bus events? [no]\n" #if CLG_EXPERIMENTAL " --collect-alloc=no|yes Collect memory allocation info? [no]\n" #endif diff --git a/callgrind/docs/cl-manual.xml b/callgrind/docs/cl-manual.xml index 7e43bfa44c..a1339841de 100644 --- a/callgrind/docs/cl-manual.xml +++ b/callgrind/docs/cl-manual.xml @@ -353,10 +353,27 @@ callgrind.out.pid.part-threa start event collection a few million instructions after you have enabled instrumentation. - + + Counting global bus events + + For access to shared data among threads in a multithreaded + code, synchronization is required to avoid raced conditions. + Synchronization primitives are usually implemented via atomic instructions. + However, excessive use of such instructions can lead to performance + issues. + + To enable analysis of this problem, Callgrind optionally can count + the number of atomic instructions executed. More precisely, for x86/x86_64, + these are instructions using a lock prefix. For architectures supporting + LL/SC, these are the number of SC instructions executed. For both, the term + "global bus events" is used. + The short name of the event type used for global bus events is "Ge". + To count global bus events, use . + + Avoiding cycles @@ -762,6 +779,16 @@ Also see . + + + + + + This specifies whether the number of global bus events executed + should be collected. The event type "Ge" is used for these events. + + + diff --git a/callgrind/global.h b/callgrind/global.h index b285715da6..db694a8cc5 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -87,6 +87,8 @@ struct _CommandLineOptions { Bool collect_alloc; /* Collect size of allocated memory */ Bool collect_systime; /* Collect time for system calls */ + Bool collect_bus; /* Collect global bus events */ + /* Instrument options */ Bool instrument_atstart; /* Instrument at start? */ Bool simulate_cache; /* Call into cache simulator ? */ @@ -679,8 +681,9 @@ extern ULong* CLG_(cost_base); #define EG_IR 1 #define EG_DR 2 #define EG_DW 3 -#define EG_ALLOC 4 -#define EG_SYS 5 +#define EG_BUS 4 +#define EG_ALLOC 5 +#define EG_SYS 6 struct event_sets { EventSet *base, *full; diff --git a/callgrind/main.c b/callgrind/main.c index 4aa3ce7e2a..c0290b460d 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -94,6 +94,30 @@ static void CLG_(init_statistics)(Statistics* s) } +/*------------------------------------------------------------*/ +/*--- Simple callbacks (not cache similator) ---*/ +/*------------------------------------------------------------*/ + +VG_REGPARM(1) +static void log_global_event(InstrInfo* ii) +{ + ULong* cost_Bus; + + CLG_DEBUG(0, "log_global_event: Ir %#lx/%u\n", + CLG_(bb_base) + ii->instr_offset, ii->instr_size); + + if (!CLG_(current_state).collect) return; + + CLG_(current_state).cost[ fullOffset(EG_BUS) ]++; + + if (CLG_(current_state).nonskipped) + cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS); + else + cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS]; + cost_Bus[0]++; +} + + /*------------------------------------------------------------*/ /*--- Instrumentation structures and event queue handling ---*/ /*------------------------------------------------------------*/ @@ -137,6 +161,7 @@ typedef Ev_Dr, // Data read Ev_Dw, // Data write Ev_Dm, // Data modify (read then write) + Ev_G // Global bus event } EventTag; @@ -159,6 +184,8 @@ typedef IRAtom* ea; Int szB; } Dm; + struct { + } G; } Ev; } Event; @@ -242,6 +269,9 @@ static void showEvent ( Event* ev ) ppIRExpr(ev->Ev.Dm.ea); VG_(printf)("\n"); break; + case Ev_G: + VG_(printf)("G %p\n", ev->inode); + break; default: tl_assert(0); break; @@ -286,6 +316,11 @@ static void flushEvents ( ClgState* clgs ) ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, EG_DW); break; + case Ev_G: + // extend event set by Bus counter + ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, + EG_BUS); + break; default: tl_assert(0); } @@ -401,6 +436,14 @@ static void flushEvents ( ClgState* clgs ) regparms = 3; inew = i+1; break; + case Ev_G: + /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */ + helperName = "log_global_event"; + helperAddr = &log_global_event; + argv = mkIRExprVec_1( i_node_expr ); + regparms = 1; + inew = i+1; + break; default: tl_assert(0); } @@ -505,6 +548,21 @@ void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) clgs->events_used++; } +static +void addEvent_G ( ClgState* clgs, InstrInfo* inode ) +{ + Event* evt; + if (!CLG_(clo).collect_bus) return; + if (clgs->events_used == N_EVENTS) + flushEvents(clgs); + tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); + evt = &clgs->events[clgs->events_used]; + init_Event(evt); + evt->tag = Ev_G; + evt->inode = inode; + clgs->events_used++; +} + /* Initialise or check (if already seen before) an InstrInfo for next insn. We only can set instr_offset/instr_size here. The required event set and resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest @@ -840,6 +898,7 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure, dataSize *= 2; /* since this is a doubleword-cas */ addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr ); addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr ); + addEvent_G( &clgs, curr_inode ); break; } @@ -855,6 +914,12 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure, dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata); addEvent_Dw( &clgs, curr_inode, sizeofIRType(dataTy), st->Ist.LLSC.addr ); + /* I don't know whether the global-bus-lock cost should + be attributed to the LL or the SC, but it doesn't + really matter since they always have to be used in + pairs anyway. Hence put it (quite arbitrarily) on + the SC. */ + addEvent_G( &clgs, curr_inode ); } break; } diff --git a/callgrind/sim.c b/callgrind/sim.c index 61377d141f..42824561dd 100644 --- a/callgrind/sim.c +++ b/callgrind/sim.c @@ -1782,6 +1782,9 @@ void CLG_(init_eventsets)() CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw"); } + if (CLG_(clo).collect_bus) + CLG_(register_event_group)(EG_BUS, "Ge"); + if (CLG_(clo).collect_alloc) CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize"); @@ -1793,6 +1796,7 @@ void CLG_(init_eventsets)() // event set comprising all event groups, used for inclusive cost CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW); + CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS); CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS); CLG_DEBUGIF(1) { @@ -1819,6 +1823,7 @@ void CLG_(init_eventsets)() CLG_(append_event)(CLG_(dumpmap), "SpLoss1"); CLG_(append_event)(CLG_(dumpmap), "AcCost2"); CLG_(append_event)(CLG_(dumpmap), "SpLoss2"); + CLG_(append_event)(CLG_(dumpmap), "Ge"); CLG_(append_event)(CLG_(dumpmap), "allocCount"); CLG_(append_event)(CLG_(dumpmap), "allocSize"); CLG_(append_event)(CLG_(dumpmap), "sysCount"); @@ -1832,7 +1837,8 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc, { if (!CLG_(clo).simulate_cache) cost[ fullOffset(EG_IR) ] += exe_count; - else + + if (ii->eventset) CLG_(add_and_zero_cost2)( CLG_(sets).full, cost, ii->eventset, bbcc->cost + ii->cost_offset); }