]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Callgrind new feature: count global bus lock events "Ge"
authorJosef Weidendorfer <Josef.Weidendorfer@gmx.de>
Wed, 9 Jun 2010 22:33:02 +0000 (22:33 +0000)
committerJosef Weidendorfer <Josef.Weidendorfer@gmx.de>
Wed, 9 Jun 2010 22:33:02 +0000 (22:33 +0000)
To count global bus lock events, use "--collect-bus=yes".
For x86, this will count the number of executed instructions
with a lock prefix; for architectures with LL/SC, this will
count the number of executed SC instructions.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167

callgrind/clo.c
callgrind/docs/cl-manual.xml
callgrind/global.h
callgrind/main.c
callgrind/sim.c

index 6d7df37bab82aa74299acbd8555a23631dffc7e2..e4da421a103a530cefafd9f40a86f1bbc4cf3dfc 100644 (file)
@@ -415,6 +415,8 @@ Bool CLG_(process_cmd_line_option)(Char* arg)
    /* compatibility alias, deprecated option */
    else if VG_BOOL_CLO(arg, "--trace-jump",    CLG_(clo).collect_jumps) {}
 
+   else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {}
+
    else if VG_BOOL_CLO(arg, "--combine-dumps", CLG_(clo).combine_dumps) {}
 
    else if VG_BOOL_CLO(arg, "--collect-atstart", CLG_(clo).collect_atstart) {}
@@ -572,6 +574,7 @@ void CLG_(print_usage)(void)
 "    --collect-atstart=no|yes  Collect at process/thread start [yes]\n"
 "    --toggle-collect=<func>   Toggle collection on enter/leave function\n"
 "    --collect-jumps=no|yes    Collect jumps? [no]\n"
+"    --collect-bus=no|yes      Collect global bus events? [no]\n"
 #if CLG_EXPERIMENTAL
 "    --collect-alloc=no|yes    Collect memory allocation info? [no]\n"
 #endif
index 7e43bfa44c7091c72b642baf2710245e99404748..a1339841deca0f0b60cf0392e8bd52d3eccd4e20 100644 (file)
@@ -353,10 +353,27 @@ callgrind.out.<emphasis>pid</emphasis>.<emphasis>part</emphasis>-<emphasis>threa
   start event collection a few million instructions after you have enabled
   instrumentation.</para>
 
-
   </sect2>
 
+  <sect2 id="cl-manual.busevents" xreflabel="Counting global bus events">
+  <title>Counting global bus events</title>
+
+  <para>For access to shared data among threads in a multithreaded
+  code, synchronization is required to avoid raced conditions.
+  Synchronization primitives are usually implemented via atomic instructions.
+  However, excessive use of such instructions can lead to performance
+  issues.</para>
+
+  <para>To enable analysis of this problem, Callgrind optionally can count
+  the number of atomic instructions executed. More precisely, for x86/x86_64,
+  these are instructions using a lock prefix. For architectures supporting
+  LL/SC, these are the number of SC instructions executed. For both, the term
+  "global bus events" is used.</para>
 
+  <para>The short name of the event type used for global bus events is "Ge".
+  To count global bus events, use <option><xref linkend="opt.collect-bus"/></option>.
+  </para>
+  </sect2>
 
   <sect2 id="cl-manual.cycles" xreflabel="Avoiding cycles">
   <title>Avoiding cycles</title>
@@ -762,6 +779,16 @@ Also see <xref linkend="cl-manual.limits"/>.</para>
     </listitem>
   </varlistentry>
 
+  <varlistentry id="opt.collect-bus" xreflabel="--collect-bus">
+    <term>
+      <option><![CDATA[--collect-bus=<no|yes> [default: no] ]]></option>
+    </term>
+    <listitem>
+      <para>This specifies whether the number of global bus events executed
+      should be collected. The event type "Ge" is used for these events.</para>
+    </listitem>
+  </varlistentry>
+
 </variablelist>
 <!-- end of xi:include in the manpage -->
 </sect2>
index b285715da62f1a9b46bf0b52dbc9ce75d5eda4df..db694a8cc5aebe4e7f63ce8154becb4882da9460 100644 (file)
@@ -87,6 +87,8 @@ struct _CommandLineOptions {
   Bool collect_alloc;    /* Collect size of allocated memory */
   Bool collect_systime;  /* Collect time for system calls */
 
+  Bool collect_bus;      /* Collect global bus events */
+
   /* Instrument options */
   Bool instrument_atstart;  /* Instrument at start? */
   Bool simulate_cache;      /* Call into cache simulator ? */
@@ -679,8 +681,9 @@ extern ULong* CLG_(cost_base);
 #define EG_IR    1
 #define EG_DR    2
 #define EG_DW    3
-#define EG_ALLOC 4
-#define EG_SYS   5
+#define EG_BUS   4
+#define EG_ALLOC 5
+#define EG_SYS   6
 
 struct event_sets {
     EventSet *base, *full;
index 4aa3ce7e2ab53bb26ee07831e63e7ec117d2410d..c0290b460d12bd680af01948ea0c3da6f53e2c67 100644 (file)
@@ -94,6 +94,30 @@ static void CLG_(init_statistics)(Statistics* s)
 }
 
 
+/*------------------------------------------------------------*/
+/*--- Simple callbacks (not cache similator)               ---*/
+/*------------------------------------------------------------*/
+
+VG_REGPARM(1)
+static void log_global_event(InstrInfo* ii)
+{
+    ULong* cost_Bus;
+
+    CLG_DEBUG(0, "log_global_event:  Ir  %#lx/%u\n",
+              CLG_(bb_base) + ii->instr_offset, ii->instr_size);
+
+    if (!CLG_(current_state).collect) return;
+
+    CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
+
+    if (CLG_(current_state).nonskipped)
+        cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
+    else
+        cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
+    cost_Bus[0]++;
+}
+
+
 /*------------------------------------------------------------*/
 /*--- Instrumentation structures and event queue handling  ---*/
 /*------------------------------------------------------------*/
@@ -137,6 +161,7 @@ typedef
       Ev_Dr,  // Data read
       Ev_Dw,  // Data write
       Ev_Dm,  // Data modify (read then write)
+      Ev_G    // Global bus event
    }
    EventTag;
 
@@ -159,6 +184,8 @@ typedef
            IRAtom* ea;
            Int     szB;
         } Dm;
+        struct {
+        } G;
       } Ev;
    }
    Event;
@@ -242,6 +269,9 @@ static void showEvent ( Event* ev )
         ppIRExpr(ev->Ev.Dm.ea);
         VG_(printf)("\n");
         break;
+      case Ev_G:
+         VG_(printf)("G  %p\n", ev->inode);
+         break;
       default:
         tl_assert(0);
         break;
@@ -286,6 +316,11 @@ static void flushEvents ( ClgState* clgs )
               ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
                                                           EG_DW);
               break;
+          case Ev_G:
+              // extend event set by Bus counter
+              ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
+                                                          EG_BUS);
+              break;
           default:
               tl_assert(0);
           }
@@ -401,6 +436,14 @@ static void flushEvents ( ClgState* clgs )
            regparms = 3;
            inew = i+1;
            break;
+         case Ev_G:
+            /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
+            helperName = "log_global_event";
+            helperAddr = &log_global_event;
+            argv = mkIRExprVec_1( i_node_expr );
+            regparms = 1;
+            inew = i+1;
+            break;
         default:
            tl_assert(0);
       }
@@ -505,6 +548,21 @@ void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    clgs->events_used++;
 }
 
+static
+void addEvent_G ( ClgState* clgs, InstrInfo* inode )
+{
+   Event* evt;
+   if (!CLG_(clo).collect_bus) return;
+   if (clgs->events_used == N_EVENTS)
+      flushEvents(clgs);
+   tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+   evt = &clgs->events[clgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_G;
+   evt->inode     = inode;
+   clgs->events_used++;
+}
+
 /* Initialise or check (if already seen before) an InstrInfo for next insn.
    We only can set instr_offset/instr_size here. The required event set and
    resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
@@ -840,6 +898,7 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
                dataSize *= 2; /* since this is a doubleword-cas */
             addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
             addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
+            addEvent_G(  &clgs, curr_inode );
             break;
          }
 
@@ -855,6 +914,12 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
                dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
                addEvent_Dw( &clgs, curr_inode,
                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
+               /* I don't know whether the global-bus-lock cost should
+                  be attributed to the LL or the SC, but it doesn't
+                  really matter since they always have to be used in
+                  pairs anyway.  Hence put it (quite arbitrarily) on
+                  the SC. */
+               addEvent_G(  &clgs, curr_inode );
             }
             break;
          }
index 61377d141febda4250f7ff00fba17a5359025135..42824561ddb489a51ba37b0442ce337d09966dcb 100644 (file)
@@ -1782,6 +1782,9 @@ void CLG_(init_eventsets)()
        CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
     }
 
+    if (CLG_(clo).collect_bus)
+       CLG_(register_event_group)(EG_BUS, "Ge");
+
     if (CLG_(clo).collect_alloc)
        CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
 
@@ -1793,6 +1796,7 @@ void CLG_(init_eventsets)()
 
     // event set comprising all event groups, used for inclusive cost
     CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
+    CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
     CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
 
     CLG_DEBUGIF(1) {
@@ -1819,6 +1823,7 @@ void CLG_(init_eventsets)()
     CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
     CLG_(append_event)(CLG_(dumpmap), "AcCost2");
     CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
+    CLG_(append_event)(CLG_(dumpmap), "Ge");
     CLG_(append_event)(CLG_(dumpmap), "allocCount");
     CLG_(append_event)(CLG_(dumpmap), "allocSize");
     CLG_(append_event)(CLG_(dumpmap), "sysCount");
@@ -1832,7 +1837,8 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
 {
     if (!CLG_(clo).simulate_cache)
        cost[ fullOffset(EG_IR) ] += exe_count;
-    else
+
+    if (ii->eventset)
        CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
                                  ii->eventset, bbcc->cost + ii->cost_offset);
 }