]> git.ipfire.org Git - thirdparty/git.git/blobdiff - fsmonitor.c
fsmonitor: force update index after large responses
[thirdparty/git.git] / fsmonitor.c
index ab9bfc60b34e3146f73194d01cf8963a90f1d768..292a6742b4fa23332000a61ad3db6ca28cf88018 100644 (file)
@@ -3,6 +3,7 @@
 #include "dir.h"
 #include "ewah/ewok.h"
 #include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
 #include "run-command.h"
 #include "strbuf.h"
 
@@ -148,15 +149,18 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
 /*
  * Call the query-fsmonitor hook passing the last update token of the saved results.
  */
-static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
+static int query_fsmonitor_hook(struct repository *r,
+                               int version,
+                               const char *last_update,
+                               struct strbuf *query_result)
 {
        struct child_process cp = CHILD_PROCESS_INIT;
        int result;
 
-       if (!core_fsmonitor)
+       if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
                return -1;
 
-       strvec_push(&cp.args, core_fsmonitor);
+       strvec_push(&cp.args, fsm_settings__get_hook_path(r));
        strvec_pushf(&cp.args, "%d", version);
        strvec_pushf(&cp.args, "%s", last_update);
        cp.use_shell = 1;
@@ -168,29 +172,15 @@ static int query_fsmonitor(int version, const char *last_update, struct strbuf *
 
        if (result)
                trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
-       else {
+       else
                trace2_data_intmax("fsm_hook", NULL, "query/response-length",
                                   query_result->len);
 
-               if (fsmonitor_is_trivial_response(query_result))
-                       trace2_data_intmax("fsm_hook", NULL,
-                                          "query/trivial-response", 1);
-       }
-
        trace2_region_leave("fsm_hook", "query", NULL);
 
        return result;
 }
 
-int fsmonitor_is_trivial_response(const struct strbuf *query_result)
-{
-       static char trivial_response[3] = { '\0', '/', '\0' };
-
-       return query_result->len >= 3 &&
-               !memcmp(trivial_response,
-                       &query_result->buf[query_result->len - 3], 3);
-}
-
 static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
 {
        int i, len = strlen(name);
@@ -229,6 +219,43 @@ static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
        untracked_cache_invalidate_path(istate, name, 0);
 }
 
+/*
+ * The number of pathnames that we need to receive from FSMonitor
+ * before we force the index to be updated.
+ *
+ * Note that any pathname within the set of received paths MAY cause
+ * cache-entry or istate flag bits to be updated and thus cause the
+ * index to be updated on disk.
+ *
+ * However, the response may contain many paths (such as ignored
+ * paths) that will not update any flag bits.  And thus not force the
+ * index to be updated.  (This is fine and normal.)  It also means
+ * that the token will not be updated in the FSMonitor index
+ * extension.  So the next Git command will find the same token in the
+ * index, make the same token-relative request, and receive the same
+ * response (plus any newly changed paths).  If this response is large
+ * (and continues to grow), performance could be impacted.
+ *
+ * For example, if the user runs a build and it writes 100K object
+ * files but doesn't modify any source files, the index would not need
+ * to be updated.  The FSMonitor response (after the build and
+ * relative to a pre-build token) might be 5MB.  Each subsequent Git
+ * command will receive that same 100K/5MB response until something
+ * causes the index to be updated.  And `refresh_fsmonitor()` will
+ * have to iterate over those 100K paths each time.
+ *
+ * Performance could be improved if we optionally force update the
+ * index after a very large response and get an updated token into
+ * the FSMonitor index extension.  This should allow subsequent
+ * commands to get smaller and more current responses.
+ *
+ * The value chosen here does not need to be precise.  The index
+ * will be updated automatically the first time the user touches
+ * a tracked file and causes a command like `git status` to
+ * update an mtime to be updated and/or set a flag bit.
+ */
+static int fsmonitor_force_update_threshold = 100;
+
 void refresh_fsmonitor(struct index_state *istate)
 {
        struct strbuf query_result = STRBUF_INIT;
@@ -238,17 +265,62 @@ void refresh_fsmonitor(struct index_state *istate)
        struct strbuf last_update_token = STRBUF_INIT;
        char *buf;
        unsigned int i;
+       int is_trivial = 0;
+       struct repository *r = istate->repo ? istate->repo : the_repository;
+       enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
 
-       if (!core_fsmonitor || istate->fsmonitor_has_run_once)
+       if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
+           istate->fsmonitor_has_run_once)
                return;
 
-       hook_version = fsmonitor_hook_version();
-
        istate->fsmonitor_has_run_once = 1;
 
        trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
+
+       if (fsm_mode == FSMONITOR_MODE_IPC) {
+               query_success = !fsmonitor_ipc__send_query(
+                       istate->fsmonitor_last_update ?
+                       istate->fsmonitor_last_update : "builtin:fake",
+                       &query_result);
+               if (query_success) {
+                       /*
+                        * The response contains a series of nul terminated
+                        * strings.  The first is the new token.
+                        *
+                        * Use `char *buf` as an interlude to trick the CI
+                        * static analysis to let us use `strbuf_addstr()`
+                        * here (and only copy the token) rather than
+                        * `strbuf_addbuf()`.
+                        */
+                       buf = query_result.buf;
+                       strbuf_addstr(&last_update_token, buf);
+                       bol = last_update_token.len + 1;
+                       is_trivial = query_result.buf[bol] == '/';
+                       if (is_trivial)
+                               trace2_data_intmax("fsm_client", NULL,
+                                                  "query/trivial-response", 1);
+               } else {
+                       /*
+                        * The builtin daemon is not available on this
+                        * platform -OR- we failed to get a response.
+                        *
+                        * Generate a fake token (rather than a V1
+                        * timestamp) for the index extension.  (If
+                        * they switch back to the hook API, we don't
+                        * want ambiguous state.)
+                        */
+                       strbuf_addstr(&last_update_token, "builtin:fake");
+               }
+
+               goto apply_results;
+       }
+
+       assert(fsm_mode == FSMONITOR_MODE_HOOK);
+
+       hook_version = fsmonitor_hook_version();
+
        /*
-        * This could be racy so save the date/time now and query_fsmonitor
+        * This could be racy so save the date/time now and query_fsmonitor_hook
         * should be inclusive to ensure we don't miss potential changes.
         */
        last_update = getnanotime();
@@ -256,13 +328,14 @@ void refresh_fsmonitor(struct index_state *istate)
                strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
 
        /*
-        * If we have a last update token, call query_fsmonitor for the set of
+        * If we have a last update token, call query_fsmonitor_hook for the set of
         * changes since that token, else assume everything is possibly dirty
         * and check it all.
         */
        if (istate->fsmonitor_last_update) {
                if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
-                       query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2,
+                       query_success = !query_fsmonitor_hook(
+                               r, HOOK_INTERFACE_VERSION2,
                                istate->fsmonitor_last_update, &query_result);
 
                        if (query_success) {
@@ -283,6 +356,7 @@ void refresh_fsmonitor(struct index_state *istate)
                                        query_success = 0;
                                } else {
                                        bol = last_update_token.len + 1;
+                                       is_trivial = query_result.buf[bol] == '/';
                                }
                        } else if (hook_version < 0) {
                                hook_version = HOOK_INTERFACE_VERSION1;
@@ -292,37 +366,83 @@ void refresh_fsmonitor(struct index_state *istate)
                }
 
                if (hook_version == HOOK_INTERFACE_VERSION1) {
-                       query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1,
+                       query_success = !query_fsmonitor_hook(
+                               r, HOOK_INTERFACE_VERSION1,
                                istate->fsmonitor_last_update, &query_result);
+                       if (query_success)
+                               is_trivial = query_result.buf[0] == '/';
                }
 
-               trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor);
-               trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s",
-                       core_fsmonitor, query_success ? "success" : "failure");
+               if (is_trivial)
+                       trace2_data_intmax("fsm_hook", NULL,
+                                          "query/trivial-response", 1);
+
+               trace_performance_since(last_update, "fsmonitor process '%s'",
+                                       fsm_settings__get_hook_path(r));
+               trace_printf_key(&trace_fsmonitor,
+                                "fsmonitor process '%s' returned %s",
+                                fsm_settings__get_hook_path(r),
+                                query_success ? "success" : "failure");
        }
 
-       /* a fsmonitor process can return '/' to indicate all entries are invalid */
-       if (query_success && query_result.buf[bol] != '/') {
-               /* Mark all entries returned by the monitor as dirty */
+apply_results:
+       /*
+        * The response from FSMonitor (excluding the header token) is
+        * either:
+        *
+        * [a] a (possibly empty) list of NUL delimited relative
+        *     pathnames of changed paths.  This list can contain
+        *     files and directories.  Directories have a trailing
+        *     slash.
+        *
+        * [b] a single '/' to indicate the provider had no
+        *     information and that we should consider everything
+        *     invalid.  We call this a trivial response.
+        */
+       trace2_region_enter("fsmonitor", "apply_results", istate->repo);
+
+       if (query_success && !is_trivial) {
+               /*
+                * Mark all pathnames returned by the monitor as dirty.
+                *
+                * This updates both the cache-entries and the untracked-cache.
+                */
+               int count = 0;
+
                buf = query_result.buf;
                for (i = bol; i < query_result.len; i++) {
                        if (buf[i] != '\0')
                                continue;
                        fsmonitor_refresh_callback(istate, buf + bol);
                        bol = i + 1;
+                       count++;
                }
-               if (bol < query_result.len)
+               if (bol < query_result.len) {
                        fsmonitor_refresh_callback(istate, buf + bol);
+                       count++;
+               }
 
                /* Now mark the untracked cache for fsmonitor usage */
                if (istate->untracked)
                        istate->untracked->use_fsmonitor = 1;
-       } else {
 
-               /* We only want to run the post index changed hook if we've actually changed entries, so keep track
-                * if we actually changed entries or not */
+               if (count > fsmonitor_force_update_threshold)
+                       istate->cache_changed |= FSMONITOR_CHANGED;
+
+               trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
+                                  count);
+
+       } else {
+               /*
+                * We failed to get a response or received a trivial response,
+                * so invalidate everything.
+                *
+                * We only want to run the post index changed hook if
+                * we've actually changed entries, so keep track if we
+                * actually changed entries or not.
+                */
                int is_cache_changed = 0;
-               /* Mark all entries invalid */
+
                for (i = 0; i < istate->cache_nr; i++) {
                        if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
                                is_cache_changed = 1;
@@ -330,13 +450,18 @@ void refresh_fsmonitor(struct index_state *istate)
                        }
                }
 
-               /* If we're going to check every file, ensure we save the results */
+               /*
+                * If we're going to check every file, ensure we save
+                * the results.
+                */
                if (is_cache_changed)
                        istate->cache_changed |= FSMONITOR_CHANGED;
 
                if (istate->untracked)
                        istate->untracked->use_fsmonitor = 0;
        }
+       trace2_region_leave("fsmonitor", "apply_results", istate->repo);
+
        strbuf_release(&query_result);
 
        /* Now that we've updated istate, save the last_update_token */
@@ -411,7 +536,8 @@ void remove_fsmonitor(struct index_state *istate)
 void tweak_fsmonitor(struct index_state *istate)
 {
        unsigned int i;
-       int fsmonitor_enabled = git_config_get_fsmonitor();
+       int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
+                                > FSMONITOR_MODE_DISABLED);
 
        if (istate->fsmonitor_dirty) {
                if (fsmonitor_enabled) {
@@ -431,16 +557,8 @@ void tweak_fsmonitor(struct index_state *istate)
                istate->fsmonitor_dirty = NULL;
        }
 
-       switch (fsmonitor_enabled) {
-       case -1: /* keep: do nothing */
-               break;
-       case 0: /* false */
-               remove_fsmonitor(istate);
-               break;
-       case 1: /* true */
+       if (fsmonitor_enabled)
                add_fsmonitor(istate);
-               break;
-       default: /* unknown value: do nothing */
-               break;
-       }
+       else
+               remove_fsmonitor(istate);
 }