]> git.ipfire.org Git - thirdparty/git.git/commitdiff
tr2: log parent process name
authorEmily Shaffer <emilyshaffer@google.com>
Thu, 22 Jul 2021 01:27:07 +0000 (18:27 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 22 Jul 2021 20:35:20 +0000 (13:35 -0700)
It can be useful to tell who invoked Git - was it invoked manually by a
user via CLI or script? By an IDE?  In some cases - like 'repo' tool -
we can influence the source code and set the GIT_TRACE2_PARENT_SID
environment variable from the caller process. In 'repo''s case, that
parent SID is manipulated to include the string "repo", which means we
can positively identify when Git was invoked by 'repo' tool. However,
identifying parents that way requires both that we know which tools
invoke Git and that we have the ability to modify the source code of
those tools. It cannot scale to keep up with the various IDEs and
wrappers which use Git, most of which we don't know about. Learning
which tools and wrappers invoke Git, and how, would give us insight to
decide where to improve Git's usability and performance.

Unfortunately, there's no cross-platform reliable way to gather the name
of the parent process. If procfs is present, we can use that; otherwise
we will need to discover the name another way. However, the process ID
should be sufficient to look up the process name on most platforms, so
that code may be shareable.

Git for Windows gathers similar information and logs it as a "data_json"
event. However, since "data_json" has a variable format, it is difficult
to parse effectively in some languages; instead, let's pursue a
dedicated "cmd_ancestry" event to record information about the ancestry
of the current process and a consistent, parseable way.

Git for Windows also gathers information about more than one generation
of parent. In Linux further ancestry info can be gathered with procfs,
but it's unwieldy to do so. In the interest of later moving Git for
Windows ancestry logging to the 'cmd_ancestry' event, and in the
interest of later adding more ancestry to the Linux implementation - or
of adding this functionality to other platforms which have an easier
time walking the process tree - let's make 'cmd_ancestry' accept an
array of parentage.

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 files changed:
Documentation/technical/api-trace2.txt
compat/linux/procinfo.c [new file with mode: 0644]
config.mak.uname
t/t0210/scrub_normal.perl
t/t0211/scrub_perf.perl
t/t0212/parse_events.perl
trace2.c
trace2.h
trace2/tr2_tgt.h
trace2/tr2_tgt_event.c
trace2/tr2_tgt_normal.c
trace2/tr2_tgt_perf.c

index 3f52f981a2d813d03bcff37ed8a6fc8e73412191..8a0b360a0e97e31525a58243bb74e04735650b26 100644 (file)
@@ -493,6 +493,20 @@ about specific error arguments.
 }
 ------------
 
+`"cmd_ancestry"`::
+       This event contains the text command name for the parent (and earlier
+       generations of parents) of the current process, in an array ordered from
+       nearest parent to furthest great-grandparent. It may not be implemented
+       on all platforms.
++
+------------
+{
+       "event":"cmd_ancestry",
+       ...
+       "ancestry":["bash","tmux: server","systemd"]
+}
+------------
+
 `"cmd_name"`::
        This event contains the command name for this git process
        and the hierarchy of commands from parent git processes.
diff --git a/compat/linux/procinfo.c b/compat/linux/procinfo.c
new file mode 100644 (file)
index 0000000..578fed4
--- /dev/null
@@ -0,0 +1,55 @@
+#include "cache.h"
+
+#include "strbuf.h"
+#include "strvec.h"
+#include "trace2.h"
+
+static void get_ancestry_names(struct strvec *names)
+{
+       /*
+        * NEEDSWORK: We could gather the entire pstree into an array to match
+        * functionality with compat/win32/trace2_win32_process_info.c.
+        * To do so, we may want to examine /proc/<pid>/stat. For now, just
+        * gather the immediate parent name which is readily accessible from
+        * /proc/$(getppid())/comm.
+        */
+       struct strbuf procfs_path = STRBUF_INIT;
+       struct strbuf name = STRBUF_INIT;
+
+       /* try to use procfs if it's present. */
+       strbuf_addf(&procfs_path, "/proc/%d/comm", getppid());
+       if (strbuf_read_file(&name, procfs_path.buf, 0)) {
+               strbuf_release(&procfs_path);
+               strbuf_trim_trailing_newline(&name);
+               strvec_push(names, strbuf_detach(&name, NULL));
+       }
+
+       return;
+       /* NEEDSWORK: add non-procfs-linux implementations here */
+}
+
+void trace2_collect_process_info(enum trace2_process_info_reason reason)
+{
+       if (!trace2_is_enabled())
+               return;
+
+       /* someday we may want to write something extra here, but not today */
+       if (reason == TRACE2_PROCESS_INFO_EXIT)
+               return;
+
+       if (reason == TRACE2_PROCESS_INFO_STARTUP) {
+               /*
+                * NEEDSWORK: we could do the entire ptree in an array instead,
+                * see compat/win32/trace2_win32_process_info.c.
+                */
+               struct strvec names = STRVEC_INIT;
+
+               get_ancestry_names(&names);
+
+               if (names.nr)
+                       trace2_cmd_ancestry(names.v);
+               strvec_clear(&names);
+       }
+
+       return;
+}
index 185ff79b146aff522234d5b4819a6567e6263908..d3bd4c6843bc4e87047d685f59165fa580f4cb28 100644 (file)
@@ -58,6 +58,8 @@ ifeq ($(uname_S),Linux)
        FREAD_READS_DIRECTORIES = UnfortunatelyYes
        BASIC_CFLAGS += -DHAVE_SYSINFO
        PROCFS_EXECUTABLE_PATH = /proc/self/exe
+       HAVE_PLATFORM_PROCINFO = YesPlease
+       COMPAT_OBJS += compat/linux/procinfo.o
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
        HAVE_ALLOCA_H = YesPlease
index c65d1a815ea3d88fc09e88971e4126f24ce25c43..7cc4de392a051e68d438c19c0a179b2b47b3c4ae 100644 (file)
@@ -42,6 +42,12 @@ while (<>) {
        # so just omit it for testing purposes.
        # print "cmd_path _EXE_\n";
     }
+    elsif ($line =~ m/^cmd_ancestry/) {
+       # 'cmd_ancestry' is not implemented everywhere, so for portability's
+       # sake, skip it when parsing normal.
+       #
+       # print "$line";
+    }
     else {
        print "$line";
     }
index 351af7844ed95d0823bbb8c54cee06fe340e29fb..d164b750ff702c32e449d6e844f403c657871b2d 100644 (file)
@@ -44,6 +44,11 @@ while (<>) {
        # $tokens[$col_rest] = "_EXE_";
        goto SKIP_LINE;
     }
+    elsif ($tokens[$col_event] =~ m/cmd_ancestry/) {
+       # 'cmd_ancestry' is platform-specific and not implemented everywhere,
+       # so skip it.
+       goto SKIP_LINE;
+    }
     elsif ($tokens[$col_event] =~ m/child_exit/) {
        $tokens[$col_rest] =~ s/ pid:\d* / pid:_PID_ /;
     }
index 6584bb5634276e85c44beff001f8d72f09264df5..b6408560c0ca934601cda3fed8091510a3918745 100644 (file)
@@ -132,7 +132,10 @@ while (<>) {
        # just omit it for testing purposes.
        # $processes->{$sid}->{'path'} = "_EXE_";
     }
-    
+    elsif ($event eq 'cmd_ancestry') {
+       # 'cmd_ancestry' is platform-specific and not implemented everywhere, so
+       # just skip it for testing purposes.
+    }
     elsif ($event eq 'cmd_name') {
        $processes->{$sid}->{'name'} = $line->{'name'};
        $processes->{$sid}->{'hierarchy'} = $line->{'hierarchy'};
index 256120c7fd553e81d82a1a6e1c883af6213e46b6..b9b154ac440db0b895cd77e86148e27afc7c6514 100644 (file)
--- a/trace2.c
+++ b/trace2.c
@@ -260,6 +260,19 @@ void trace2_cmd_path_fl(const char *file, int line, const char *pathname)
                        tgt_j->pfn_command_path_fl(file, line, pathname);
 }
 
+void trace2_cmd_ancestry_fl(const char *file, int line, const char **parent_names)
+{
+       struct tr2_tgt *tgt_j;
+       int j;
+
+       if (!trace2_enabled)
+               return;
+
+       for_each_wanted_builtin (j, tgt_j)
+               if (tgt_j->pfn_command_ancestry_fl)
+                       tgt_j->pfn_command_ancestry_fl(file, line, parent_names);
+}
+
 void trace2_cmd_name_fl(const char *file, int line, const char *name)
 {
        struct tr2_tgt *tgt_j;
index 0d990db8177b390a41f444f3220ebbd4069bf518..9b7286c572f55234135afa3b4df3c99bfb0faa00 100644 (file)
--- a/trace2.h
+++ b/trace2.h
@@ -133,6 +133,16 @@ void trace2_cmd_path_fl(const char *file, int line, const char *pathname);
 
 #define trace2_cmd_path(p) trace2_cmd_path_fl(__FILE__, __LINE__, (p))
 
+/*
+ * Emit an 'ancestry' event with the process name of the current process's
+ * parent process.
+ * This gives post-processors a way to determine what invoked the command and
+ * learn more about usage patterns.
+ */
+void trace2_cmd_ancestry_fl(const char *file, int line, const char **parent_names);
+
+#define trace2_cmd_ancestry(v) trace2_cmd_ancestry_fl(__FILE__, __LINE__, (v))
+
 /*
  * Emit a 'cmd_name' event with the canonical name of the command.
  * This gives post-processors a simple field to identify the command
index 7b904692123e28074458fce0c0afe06922caa607..1f66fd657302510fb9d6a08d2f71ec846ed33c16 100644 (file)
@@ -27,6 +27,8 @@ typedef void(tr2_tgt_evt_error_va_fl_t)(const char *file, int line,
 
 typedef void(tr2_tgt_evt_command_path_fl_t)(const char *file, int line,
                                            const char *command_path);
+typedef void(tr2_tgt_evt_command_ancestry_fl_t)(const char *file, int line,
+                                               const char **parent_names);
 typedef void(tr2_tgt_evt_command_name_fl_t)(const char *file, int line,
                                            const char *name,
                                            const char *hierarchy);
@@ -108,6 +110,7 @@ struct tr2_tgt {
        tr2_tgt_evt_atexit_t                    *pfn_atexit;
        tr2_tgt_evt_error_va_fl_t               *pfn_error_va_fl;
        tr2_tgt_evt_command_path_fl_t           *pfn_command_path_fl;
+       tr2_tgt_evt_command_ancestry_fl_t       *pfn_command_ancestry_fl;
        tr2_tgt_evt_command_name_fl_t           *pfn_command_name_fl;
        tr2_tgt_evt_command_mode_fl_t           *pfn_command_mode_fl;
        tr2_tgt_evt_alias_fl_t                  *pfn_alias_fl;
index 6353e8ad915610bc93147c5291bc6f31020ac03f..578a9a5287ab11bc31a4668e8e0d7d8d0e8c6e7d 100644 (file)
@@ -261,6 +261,26 @@ static void fn_command_path_fl(const char *file, int line, const char *pathname)
        jw_release(&jw);
 }
 
+static void fn_command_ancestry_fl(const char *file, int line, const char **parent_names)
+{
+       const char *event_name = "cmd_ancestry";
+       const char *parent_name = NULL;
+       struct json_writer jw = JSON_WRITER_INIT;
+
+       jw_object_begin(&jw, 0);
+       event_fmt_prepare(event_name, file, line, NULL, &jw);
+       jw_object_inline_begin_array(&jw, "ancestry");
+
+       while ((parent_name = *parent_names++))
+               jw_array_string(&jw, parent_name);
+
+       jw_end(&jw); /* 'ancestry' array */
+       jw_end(&jw); /* event object */
+
+       tr2_dst_write_line(&tr2dst_event, &jw.json);
+       jw_release(&jw);
+}
+
 static void fn_command_name_fl(const char *file, int line, const char *name,
                               const char *hierarchy)
 {
@@ -584,6 +604,7 @@ struct tr2_tgt tr2_tgt_event = {
        fn_atexit,
        fn_error_va_fl,
        fn_command_path_fl,
+       fn_command_ancestry_fl,
        fn_command_name_fl,
        fn_command_mode_fl,
        fn_alias_fl,
index 31b602c171fc69177a75c62135ed2629b62ec892..a5751c886441658efc18c65ad5b279c73aaaf918 100644 (file)
@@ -160,6 +160,24 @@ static void fn_command_path_fl(const char *file, int line, const char *pathname)
        strbuf_release(&buf_payload);
 }
 
+static void fn_command_ancestry_fl(const char *file, int line, const char **parent_names)
+{
+       const char *parent_name = NULL;
+       struct strbuf buf_payload = STRBUF_INIT;
+
+       /* cmd_ancestry parent <- grandparent <- great-grandparent */
+       strbuf_addstr(&buf_payload, "cmd_ancestry ");
+       while ((parent_name = *parent_names++)) {
+               strbuf_addstr(&buf_payload, parent_name);
+               /* if we'll write another one after this, add a delimiter */
+               if (parent_names && *parent_names)
+                       strbuf_addstr(&buf_payload, " <- ");
+       }
+
+       normal_io_write_fl(file, line, &buf_payload);
+       strbuf_release(&buf_payload);
+}
+
 static void fn_command_name_fl(const char *file, int line, const char *name,
                               const char *hierarchy)
 {
@@ -306,6 +324,7 @@ struct tr2_tgt tr2_tgt_normal = {
        fn_atexit,
        fn_error_va_fl,
        fn_command_path_fl,
+       fn_command_ancestry_fl,
        fn_command_name_fl,
        fn_command_mode_fl,
        fn_alias_fl,
index a8018f18cc87e869d63bc92abf0d4434c320134e..af4d65a0a5fb79fa5689529b5e76b90f3dc1ccdb 100644 (file)
@@ -253,6 +253,21 @@ static void fn_command_path_fl(const char *file, int line, const char *pathname)
        strbuf_release(&buf_payload);
 }
 
+static void fn_command_ancestry_fl(const char *file, int line, const char **parent_names)
+{
+       const char *event_name = "cmd_ancestry";
+       struct strbuf buf_payload = STRBUF_INIT;
+
+       strbuf_addstr(&buf_payload, "ancestry:[");
+       /* It's not an argv but the rules are basically the same. */
+       sq_append_quote_argv_pretty(&buf_payload, parent_names);
+       strbuf_addch(&buf_payload, ']');
+
+       perf_io_write_fl(file, line, event_name, NULL, NULL, NULL, NULL,
+                        &buf_payload);
+       strbuf_release(&buf_payload);
+}
+
 static void fn_command_name_fl(const char *file, int line, const char *name,
                               const char *hierarchy)
 {
@@ -532,6 +547,7 @@ struct tr2_tgt tr2_tgt_perf = {
        fn_atexit,
        fn_error_va_fl,
        fn_command_path_fl,
+       fn_command_ancestry_fl,
        fn_command_name_fl,
        fn_command_mode_fl,
        fn_alias_fl,