Enhance callgrind option --collect-system.

author Philippe Waroquiers <philippe.waroquiers@skynet.be>

Thu, 10 Oct 2019 19:42:01 +0000 (21:42 +0200)

committer Philippe Waroquiers <philippe.waroquiers@skynet.be>

Sat, 12 Oct 2019 16:12:04 +0000 (18:12 +0200)
author Philippe Waroquiers <philippe.waroquiers@skynet.be>
Thu, 10 Oct 2019 19:42:01 +0000 (21:42 +0200)
committer Philippe Waroquiers <philippe.waroquiers@skynet.be>
Sat, 12 Oct 2019 16:12:04 +0000 (18:12 +0200)
diff --git a/NEWS b/NEWS

index 2fde3c7cdaa9cc846b31a70d609a4a0452bb9e8a..a2422e89b637b40740ce0e6a4dfcc327978c96c2 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,11 @@ support for X86/macOS 10.13 and AMD64/macOS 10.13.
  * Cachegrind:
  
  * Callgrind:
+  - The command option --collect-systime has been enhanced to specify
+    the unit used to record the elapsed time spent during system calls.
+    The command option now accepts the values no|yes|msec|usec|nsec,
+    where yes is a synonym of msec.  When giving the value nsec, the
+    system cpu time of system calls is also recorded.
  
  * Massif:
  
diff --git a/callgrind/clo.c b/callgrind/clo.c

index b9ccb32967c33434251b941ea8bb09f87759e58f..ce15fc832c7da5275a17c804f931bd4330e2290c 100644 (file)
--- a/callgrind/clo.c
+++ b/callgrind/clo.c
@@ -523,7 +523,23 @@ Bool CLG_(process_cmd_line_option)(const HChar* arg)
     else if VG_INT_CLO( arg, "--dump-every-bb", CLG_(clo).dump_every_bb) {}
  
     else if VG_BOOL_CLO(arg, "--collect-alloc",   CLG_(clo).collect_alloc) {}
-   else if VG_BOOL_CLO(arg, "--collect-systime", CLG_(clo).collect_systime) {}
+   else if VG_XACT_CLO(arg, "--collect-systime=no",
+                       CLG_(clo).collect_systime, systime_no) {}
+   else if VG_XACT_CLO(arg, "--collect-systime=msec",
+                       CLG_(clo).collect_systime, systime_msec) {}
+   else if VG_XACT_CLO(arg, "--collect-systime=yes", /* backward compatibility.  */
+                       CLG_(clo).collect_systime, systime_msec) {}
+   else if VG_XACT_CLO(arg, "--collect-systime=usec",
+                       CLG_(clo).collect_systime, systime_usec) {}
+   else if VG_XACT_CLO(arg, "--collect-systime=nsec",
+                       CLG_(clo).collect_systime, systime_nsec) {
+#  if defined(VGO_darwin)
+      VG_(fmsg_bad_option)
+         (arg,
+          "--collect-systime=nsec not supported on darwin\n");
+#  endif
+   }
+
     else if VG_BOOL_CLO(arg, "--collect-bus",     CLG_(clo).collect_bus) {}
     /* for option compatibility with cachegrind */
     else if VG_BOOL_CLO(arg, "--cache-sim",       CLG_(clo).simulate_cache) {}
@@ -580,7 +596,11 @@ void CLG_(print_usage)(void)
  #if CLG_EXPERIMENTAL
  "    --collect-alloc=no|yes    Collect memory allocation info? [no]\n"
  #endif
-"    --collect-systime=no|yes  Collect system call time info? [no]\n"
+"    --collect-systime=no|yes|msec|usec|nsec  Collect system call time info? [no]\n"
+"        no         Do not collect system call time info.\n"
+"        msec|yes   Collect syscount, syscall elapsed time (milli-seconds).\n"
+"        usec       Collect syscount, syscall elapsed time (micro-seconds).\n"
+"        nsec       Collect syscount, syscall elapsed and syscall cpu time (nano-seconds).\n"
  
  "\n   cost entity separation options:\n"
  "    --separate-threads=no|yes Separate data per thread [no]\n"
@@ -646,7 +666,7 @@ void CLG_(set_clo_defaults)(void)
    CLG_(clo).collect_atstart  = True;
    CLG_(clo).collect_jumps    = False;
    CLG_(clo).collect_alloc    = False;
-  CLG_(clo).collect_systime  = False;
+  CLG_(clo).collect_systime  = systime_no;
    CLG_(clo).collect_bus      = False;
  
    CLG_(clo).skip_plt         = True;
diff --git a/callgrind/docs/cl-manual.xml b/callgrind/docs/cl-manual.xml

index c1eb6ea3559fdcf3cf1ce6761905d7fa726939a9..31fcbaabd8500967ede61f2b8755996392b952a9 100644 (file)
--- a/callgrind/docs/cl-manual.xml
+++ b/callgrind/docs/cl-manual.xml
@@ -800,11 +800,25 @@ Also see <xref linkend="cl-manual.limits"/>.</para>
  
    <varlistentry id="opt.collect-systime" xreflabel="--collect-systime">
      <term>
-      <option><![CDATA[--collect-systime=<no|yes> [default: no] ]]></option>
+      <option><![CDATA[--collect-systime=<no|yes|msec|usec|nsec> [default: no] ]]></option>
      </term>
      <listitem>
        <para>This specifies whether information for system call times
-      should be collected.</para>
+        should be collected.</para>
+      <para>The value <computeroutput>no</computeroutput> indicates to record
+        no system call information.</para>
+      <para>The other values indicate to record the number of system calls
+        done (sysCount event) and the elapsed time (sysTime event) spent
+        in system calls.
+        The <computeroutput>--collect-systime</computeroutput> value gives
+        the unit used for sysTime : milli seconds, micro seconds or nano
+        seconds.  With the value <computeroutput>nsec</computeroutput>,
+        callgrind also records the cpu time spent during system calls
+        (sysCpuTime).</para>
+      <para>The value <computeroutput>yes</computeroutput> is a synonym
+        of <computeroutput>msec</computeroutput>.
+        The value <computeroutput>nsec</computeroutput> is not supported
+        on Darwin.</para>
      </listitem>
    </varlistentry>
  
diff --git a/callgrind/dump.c b/callgrind/dump.c

index e57abfe9b5e08470877b44543cb8b12df3123e28..f95bdb5b02c656211911b9bab91115f26ab916a1 100644 (file)
--- a/callgrind/dump.c
+++ b/callgrind/dump.c
@@ -1305,6 +1305,21 @@ static VgFile *new_dumpfile(int tid, const HChar* trigger)
     HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
     VG_(fprintf)(fp, "events: %s\n", evmap);
     VG_(free)(evmap);
+  switch (CLG_(clo).collect_systime) {
+    case systime_no: break;
+    case systime_msec:
+       VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed ms)\n");
+       break;
+    case systime_usec:
+       VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed us)\n");
+       break;
+    case systime_nsec:
+       VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed ns)\n");
+       VG_(fprintf)(fp, "event: sysCpuTime : sysCpuTime (system cpu ns)\n");
+       break;
+    default:
+       tl_assert(0);
+  }
  
     /* summary lines */
     sum = CLG_(get_eventset_cost)( CLG_(sets).full );
diff --git a/callgrind/global.h b/callgrind/global.h

index 4fd8f22ff2e5aed8d000e597fe35e7658158a2f7..4a988a4811e9738d2ab5c312039c15f6d47cf837 100644 (file)
--- a/callgrind/global.h
+++ b/callgrind/global.h
@@ -58,11 +58,6 @@
  /* Enable experimental features? */
  #define CLG_EXPERIMENTAL 0
  
-/* Syscall Timing in microseconds? 
- * (define to 0 if you get compile errors) */
-#define CLG_MICROSYSTIME 0
-
-
  
  /*------------------------------------------------------------*/
  /*--- Command line options                                 ---*/
@@ -70,6 +65,19 @@
  
  #define DEFAULT_OUTFORMAT   "callgrind.out.%p"
  
+/* If and how to collect syscall time.
+   systime_no : do not collect systime
+   systime_msec : collect syscount, systime elapsed, milli second precision.
+   systime_usec : collect syscount, systime elapsed, micro second precision.
+   systime_nsec : collect syscount, systime elapsed, systime cpu, nano second
+                  precision.  */
+typedef enum {
+   systime_no,
+   systime_msec,
+   systime_usec,
+   systime_nsec
+} Collect_Systime;
+
  typedef struct _CommandLineOptions CommandLineOptions;
  struct _CommandLineOptions {
  
@@ -100,7 +108,7 @@ struct _CommandLineOptions {
    Bool collect_jumps;    /* Collect (cond.) jumps in functions ? */
  
    Bool collect_alloc;    /* Collect size of allocated memory */
-  Bool collect_systime;  /* Collect time for system calls */
+  Collect_Systime collect_systime;  /* Collect time for system calls */
  
    Bool collect_bus;      /* Collect global bus events */
  
diff --git a/callgrind/main.c b/callgrind/main.c

index 5f88fb0c17315e8504e9f353e030fed5ec9ac1e9..47369d1305dba8ee52ea2dc73532b34bf02fe4bd 100644 (file)
--- a/callgrind/main.c
+++ b/callgrind/main.c
@@ -1697,60 +1697,115 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
  }
  
  
-/* Syscall Timing */
+/* Syscall Timing.  syscalltime[tid] is the time at which thread tid last
+   started a syscall.  */
+
+/* struct vki_timespec syscalltime[VG_N_THREADS];
+   Whatever the syscall we use to measure the syscall time, we convert to
+   seconds and nanoseconds.  */
+struct vki_timespec *syscalltime;
+struct vki_timespec *syscallcputime;
  
-/* struct timeval syscalltime[VG_N_THREADS]; */
-#if CLG_MICROSYSTIME
-ULong *syscalltime;
-#else
-UInt *syscalltime;
-#endif
+
+static
+void collect_time (struct vki_timespec *systime, struct vki_timespec *syscputime)
+{
+  switch (CLG_(clo).collect_systime) {
+    case systime_no: tl_assert (0);
+    case systime_msec: {
+      UInt ms_timer = VG_(read_millisecond_timer)();
+      systime->tv_sec = ms_timer / 1000;
+      systime->tv_nsec = (ms_timer % 1000) * 1000000L;
+      break;
+    }
+    case systime_usec: {
+      struct vki_timeval tv_now;
+      VG_(gettimeofday)(&tv_now, NULL);
+      systime->tv_sec = tv_now.tv_sec;
+      systime->tv_nsec = tv_now.tv_usec * 1000;
+      break;
+    }
+   case systime_nsec:
+#  if defined(VGO_linux) || defined(VGO_solaris)
+      VG_(clock_gettime)(systime, VKI_CLOCK_MONOTONIC);
+      VG_(clock_gettime)(syscputime, VKI_CLOCK_THREAD_CPUTIME_ID);
+
+#  elif defined(VGO_darwin)
+      tl_assert(0);
+#  else
+#     error "Unknown OS"
+#  endif
+      break;
+  }
+}
  
  static
  void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
                             UWord* args, UInt nArgs)
  {
-  if (CLG_(clo).collect_systime) {
-#if CLG_MICROSYSTIME
-    struct vki_timeval tv_now;
-    VG_(gettimeofday)(&tv_now, NULL);
-    syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
-#else
-    syscalltime[tid] = VG_(read_millisecond_timer)();
-#endif
+  collect_time(&syscalltime[tid],
+               CLG_(clo).collect_systime == systime_nsec ? &syscallcputime[tid] : NULL);
+}
+
+/* Returns "after - before" in the unit as specified by --collect-systime.
+   after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec.  */
+static
+ULong vki_timespec_diff (struct vki_timespec after, struct vki_timespec before)
+{
+   vki_time_t diff_sec = after.tv_sec - before.tv_sec;
+   long diff_nsec = after.tv_nsec - before.tv_nsec;
+   ULong nsec_factor; // factor to convert the desired unit into nsec.
+
+   if (diff_nsec < 0) {
+      diff_sec--;
+      diff_nsec += 1000000000ULL;
+   }
+  switch (CLG_(clo).collect_systime) {
+    case systime_no: tl_assert (0);
+    case systime_msec: nsec_factor = 1000000ULL; break;
+    case systime_usec: nsec_factor = 1000ULL; break;
+    case systime_nsec: nsec_factor = 1ULL; break;
+    default: tl_assert(0);
    }
+  return ((ULong) diff_sec * 1000000000ULL + diff_nsec) / nsec_factor;
  }
  
  static
  void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
                              UWord* args, UInt nArgs, SysRes res)
  {
-  if (CLG_(clo).collect_systime &&
-      CLG_(current_state).bbcc) {
-      Int o;
-#if CLG_MICROSYSTIME
-    struct vki_timeval tv_now;
+  if (CLG_(current_state).bbcc) {
+    Int o;
+    struct vki_timespec ts_now;
+    struct vki_timespec ts_cpunow;
      ULong diff;
-    
-    VG_(gettimeofday)(&tv_now, NULL);
-    diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
-#else
-    UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
-#endif  
  
-    /* offset o is for "SysCount", o+1 for "SysTime" */
+    collect_time(&ts_now,
+                 CLG_(clo).collect_systime == systime_nsec ? &ts_cpunow : NULL);
+
+    diff = vki_timespec_diff (ts_now, syscalltime[tid]);
+
+    /* offset o is for "SysCount", o+1 for "SysTime",
+       o+2 is (optionally) "SysCpuTime".  */
      o = fullOffset(EG_SYS);
      CLG_ASSERT(o>=0);
      CLG_DEBUG(0,"   Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
-              (ULong)diff);
-    
-    CLG_(current_state).cost[o] ++;
-    CLG_(current_state).cost[o+1] += diff;
+              diff);
+
      if (!CLG_(current_state).bbcc->skipped)
        CLG_(init_cost_lz)(CLG_(sets).full,
                         &(CLG_(current_state).bbcc->skipped));
+    CLG_(current_state).cost[o] ++;
+    CLG_(current_state).cost[o+1] += diff;
      CLG_(current_state).bbcc->skipped[o] ++;
      CLG_(current_state).bbcc->skipped[o+1] += diff;
+    if (CLG_(clo).collect_systime == systime_nsec) {
+      diff = vki_timespec_diff (ts_cpunow, syscallcputime[tid]);
+      CLG_DEBUG(0,"   SysCpuTime (Off %d) for Syscall %u: %llu\n", o+2, syscallno,
+                diff);
+      CLG_(current_state).cost[o+2] += diff;
+      CLG_(current_state).bbcc->skipped[o+2] += diff;
+    }
    }
  }
  
@@ -1973,13 +2028,22 @@ void CLG_(post_clo_init)(void)
                  "sp-at-mem-access\n");
     }
  
-   if (CLG_(clo).collect_systime) {
+   if (CLG_(clo).collect_systime != systime_no) {
        VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
                                   CLG_(post_syscalltime));
        syscalltime = CLG_MALLOC("cl.main.pci.1",
                                 VG_N_THREADS * sizeof syscalltime[0]);
        for (UInt i = 0; i < VG_N_THREADS; ++i) {
-         syscalltime[i] = 0;
+         syscalltime[i].tv_sec = 0;
+         syscalltime[i].tv_nsec = 0;
+      }
+      if (CLG_(clo).collect_systime == systime_nsec) {
+         syscallcputime = CLG_MALLOC("cl.main.pci.2",
+                                     VG_N_THREADS * sizeof syscallcputime[0]);
+         for (UInt i = 0; i < VG_N_THREADS; ++i) {
+            syscallcputime[i].tv_sec = 0;
+            syscallcputime[i].tv_nsec = 0;
+         }
        }
     }
  
diff --git a/callgrind/sim.c b/callgrind/sim.c

index 521bfb8163573a7558e2c8ad9c78d6e42039b8a3..104c63492e36021916238682bcd6870bdb9eceaa 100644 (file)
--- a/callgrind/sim.c
+++ b/callgrind/sim.c
@@ -1625,8 +1625,12 @@ void CLG_(init_eventsets)()
      if (CLG_(clo).collect_alloc)
         CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
  
-    if (CLG_(clo).collect_systime)
-       CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
+    if (CLG_(clo).collect_systime != systime_no) {
+       if (CLG_(clo).collect_systime == systime_nsec)
+          CLG_(register_event_group3)(EG_SYS, "sysCount", "sysTime", "sysCpuTime");
+       else
+          CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
+    }
  
      // event set used as base for instruction self cost
      CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR);
@@ -1670,6 +1674,7 @@ void CLG_(init_eventsets)()
      CLG_(append_event)(CLG_(dumpmap), "allocSize");
      CLG_(append_event)(CLG_(dumpmap), "sysCount");
      CLG_(append_event)(CLG_(dumpmap), "sysTime");
+    CLG_(append_event)(CLG_(dumpmap), "sysCpuTime");
  }
  
  
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c

index d6577ba6b733749da963a798b424b0e343d1a5a8..fc5da96974a8e5ba510c7daa0821bcb0c41ce6c7 100644 (file)
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -962,6 +962,20 @@ UInt VG_(read_millisecond_timer) ( void )
     return (now - base) / 1000;
  }
  
+#  if defined(VGO_linux) || defined(VGO_solaris)
+void VG_(clock_gettime) ( struct vki_timespec *ts, vki_clockid_t clk_id )
+{
+    SysRes res;
+    res = VG_(do_syscall2)(__NR_clock_gettime, clk_id,
+                           (UWord)ts);
+    vg_assert (sr_isError(res) == 0);
+}
+#  elif defined(VGO_darwin)
+  /* See pub_tool_libcproc.h */
+#  else
+#    error "Unknown OS"
+#  endif
+
  Int VG_(gettimeofday)(struct vki_timeval *tv, struct vki_timezone *tz)
  {
     SysRes res;
diff --git a/include/pub_tool_libcproc.h b/include/pub_tool_libcproc.h

index f90af82e0ec0d83ec0c1bc54ca98a7e6b1158b9b..73d7eca68c6b87988ebfb1fb2c12fd3ae3536879 100644 (file)
--- a/include/pub_tool_libcproc.h
+++ b/include/pub_tool_libcproc.h
@@ -103,6 +103,16 @@ extern UInt VG_(read_millisecond_timer) ( void );
  
  extern Int  VG_(gettimeofday)(struct vki_timeval *tv, struct vki_timezone *tz);
  
+#  if defined(VGO_linux) || defined(VGO_solaris)
+/* Get the clock value as specified by clk_id.  Asserts if unsuccesful.  */
+extern void VG_(clock_gettime)(struct vki_timespec *ts, vki_clockid_t clk_id);
+#  elif defined(VGO_darwin)
+  /* It seems clock_gettime is only available on recent Darwin versions.
+     For the moment, let's assume it is not available.  */
+#  else
+#    error "Unknown OS"
+#  endif
+
  // Returns the number of milliseconds of user cpu time we have used,
  // as reported by 'getrusage'.
  extern UInt VG_(get_user_milliseconds)(void);
author	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Thu, 10 Oct 2019 19:42:01 +0000 (21:42 +0200)
committer	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Sat, 12 Oct 2019 16:12:04 +0000 (18:12 +0200)
NEWS		patch \| blob \| blame \| history
callgrind/clo.c		patch \| blob \| blame \| history
callgrind/docs/cl-manual.xml		patch \| blob \| blame \| history
callgrind/dump.c		patch \| blob \| blame \| history
callgrind/global.h		patch \| blob \| blame \| history
callgrind/main.c		patch \| blob \| blame \| history
callgrind/sim.c		patch \| blob \| blame \| history
coregrind/m_libcproc.c		patch \| blob \| blame \| history
include/pub_tool_libcproc.h		patch \| blob \| blame \| history