builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "abspath.h"
   3 #include "config.h"
   4 #include "environment.h"
   5 #include "gettext.h"
   6 #include "parse-options.h"
   7 #include "fsmonitor-ll.h"
   8 #include "fsmonitor-ipc.h"
   9 #include "fsmonitor-path-utils.h"
  10 #include "fsmonitor-settings.h"
  11 #include "compat/fsmonitor/fsm-health.h"
  12 #include "compat/fsmonitor/fsm-listen.h"
  13 #include "fsmonitor--daemon.h"
  14 #include "simple-ipc.h"
  15 #include "khash.h"
  16 #include "pkt-line.h"
  17 #include "trace.h"
  18 #include "trace2.h"
  19
  20 static const char * const builtin_fsmonitor__daemon_usage[] = {
  21         N_("git fsmonitor--daemon start [<options>]"),
  22         N_("git fsmonitor--daemon run [<options>]"),
  23         "git fsmonitor--daemon stop",
  24         "git fsmonitor--daemon status",
  25         NULL
  26 };
  27
  28 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  29 /*
  30  * Global state loaded from config.
  31  */
  32 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  33 static int fsmonitor__ipc_threads = 8;
  34
  35 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  36 static int fsmonitor__start_timeout_sec = 60;
  37
  38 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  39 static int fsmonitor__announce_startup = 0;
  40
  41 static int fsmonitor_config(const char *var, const char *value, void *cb)
  42 {
  43         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  44                 int i = git_config_int(var, value);
  45                 if (i < 1)
  46                         return error(_("value of '%s' out of range: %d"),
  47                                      FSMONITOR__IPC_THREADS, i);
  48                 fsmonitor__ipc_threads = i;
  49                 return 0;
  50         }
  51
  52         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  53                 int i = git_config_int(var, value);
  54                 if (i < 0)
  55                         return error(_("value of '%s' out of range: %d"),
  56                                      FSMONITOR__START_TIMEOUT, i);
  57                 fsmonitor__start_timeout_sec = i;
  58                 return 0;
  59         }
  60
  61         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  62                 int is_bool;
  63                 int i = git_config_bool_or_int(var, value, &is_bool);
  64                 if (i < 0)
  65                         return error(_("value of '%s' not bool or int: %d"),
  66                                      var, i);
  67                 fsmonitor__announce_startup = i;
  68                 return 0;
  69         }
  70
  71         return git_default_config(var, value, cb);
  72 }
  73
  74 /*
  75  * Acting as a CLIENT.
  76  *
  77  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  78  * and wait for it to shutdown.
  79  */
  80 static int do_as_client__send_stop(void)
  81 {
  82         struct strbuf answer = STRBUF_INIT;
  83         int ret;
  84
  85         ret = fsmonitor_ipc__send_command("quit", &answer);
  86
  87         /* The quit command does not return any response data. */
  88         strbuf_release(&answer);
  89
  90         if (ret)
  91                 return ret;
  92
  93         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  94         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  95                 sleep_millisec(50);
  96         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  97
  98         return 0;
  99 }
 100
 101 static int do_as_client__status(void)
 102 {
 103         enum ipc_active_state state = fsmonitor_ipc__get_state();
 104
 105         switch (state) {
 106         case IPC_STATE__LISTENING:
 107                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 108                        the_repository->worktree);
 109                 return 0;
 110
 111         default:
 112                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 113                        the_repository->worktree);
 114                 return 1;
 115         }
 116 }
 117
 118 enum fsmonitor_cookie_item_result {
 119         FCIR_ERROR = -1, /* could not create cookie file ? */
 120         FCIR_INIT,
 121         FCIR_SEEN,
 122         FCIR_ABORT,
 123 };
 124
 125 struct fsmonitor_cookie_item {
 126         struct hashmap_entry entry;
 127         char *name;
 128         enum fsmonitor_cookie_item_result result;
 129 };
 130
 131 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 132                      const struct hashmap_entry *he2, const void *keydata)
 133 {
 134         const struct fsmonitor_cookie_item *a =
 135                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 136         const struct fsmonitor_cookie_item *b =
 137                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 138
 139         return strcmp(a->name, keydata ? keydata : b->name);
 140 }
 141
 142 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 143         struct fsmonitor_daemon_state *state)
 144 {
 145         /* assert current thread holding state->main_lock */
 146
 147         int fd;
 148         struct fsmonitor_cookie_item *cookie;
 149         struct strbuf cookie_pathname = STRBUF_INIT;
 150         struct strbuf cookie_filename = STRBUF_INIT;
 151         enum fsmonitor_cookie_item_result result;
 152         int my_cookie_seq;
 153
 154         CALLOC_ARRAY(cookie, 1);
 155
 156         my_cookie_seq = state->cookie_seq++;
 157
 158         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 159
 160         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 161         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 162
 163         cookie->name = strbuf_detach(&cookie_filename, NULL);
 164         cookie->result = FCIR_INIT;
 165         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 166
 167         hashmap_add(&state->cookies, &cookie->entry);
 168
 169         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 170                          cookie->name, cookie_pathname.buf);
 171
 172         /*
 173          * Create the cookie file on disk and then wait for a notification
 174          * that the listener thread has seen it.
 175          */
 176         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 177         if (fd < 0) {
 178                 error_errno(_("could not create fsmonitor cookie '%s'"),
 179                             cookie->name);
 180
 181                 cookie->result = FCIR_ERROR;
 182                 goto done;
 183         }
 184
 185         /*
 186          * Technically, close() and unlink() can fail, but we don't
 187          * care here.  We only created the file to trigger a watch
 188          * event from the FS to know that when we're up to date.
 189          */
 190         close(fd);
 191         unlink(cookie_pathname.buf);
 192
 193         /*
 194          * Technically, this is an infinite wait (well, unless another
 195          * thread sends us an abort).  I'd like to change this to
 196          * use `pthread_cond_timedwait()` and return an error/timeout
 197          * and let the caller do the trivial response thing, but we
 198          * don't have that routine in our thread-utils.
 199          *
 200          * After extensive beta testing I'm not really worried about
 201          * this.  Also note that the above open() and unlink() calls
 202          * will cause at least two FS events on that path, so the odds
 203          * of getting stuck are pretty slim.
 204          */
 205         while (cookie->result == FCIR_INIT)
 206                 pthread_cond_wait(&state->cookies_cond,
 207                                   &state->main_lock);
 208
 209 done:
 210         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 211
 212         result = cookie->result;
 213
 214         free(cookie->name);
 215         free(cookie);
 216         strbuf_release(&cookie_pathname);
 217
 218         return result;
 219 }
 220
 221 /*
 222  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 223  */
 224 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 225                                          const struct string_list *cookie_names)
 226 {
 227         /* assert current thread holding state->main_lock */
 228
 229         int k;
 230         int nr_seen = 0;
 231
 232         for (k = 0; k < cookie_names->nr; k++) {
 233                 struct fsmonitor_cookie_item key;
 234                 struct fsmonitor_cookie_item *cookie;
 235
 236                 key.name = cookie_names->items[k].string;
 237                 hashmap_entry_init(&key.entry, strhash(key.name));
 238
 239                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 240                 if (cookie) {
 241                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 242                                          cookie->name);
 243                         cookie->result = FCIR_SEEN;
 244                         nr_seen++;
 245                 }
 246         }
 247
 248         if (nr_seen)
 249                 pthread_cond_broadcast(&state->cookies_cond);
 250 }
 251
 252 /*
 253  * Set _ABORT on all pending cookies and wake up all client threads.
 254  */
 255 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 256 {
 257         /* assert current thread holding state->main_lock */
 258
 259         struct hashmap_iter iter;
 260         struct fsmonitor_cookie_item *cookie;
 261         int nr_aborted = 0;
 262
 263         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 264                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 265                                  cookie->name);
 266                 cookie->result = FCIR_ABORT;
 267                 nr_aborted++;
 268         }
 269
 270         if (nr_aborted)
 271                 pthread_cond_broadcast(&state->cookies_cond);
 272 }
 273
 274 /*
 275  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 276  * "token" as a virtual timestamp.  Clients can request a summary of all
 277  * created/deleted/modified files relative to a token.  In the response,
 278  * clients receive a new token for the next (relative) request.
 279  *
 280  *
 281  * Token Format
 282  * ============
 283  *
 284  * The contents of the token are private and provider-specific.
 285  *
 286  * For the built-in fsmonitor--daemon, we define a token as follows:
 287  *
 288  *     "builtin" ":" <token_id> ":" <sequence_nr>
 289  *
 290  * The "builtin" prefix is used as a namespace to avoid conflicts
 291  * with other providers (such as Watchman).
 292  *
 293  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 294  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 295  * events that happened while the daemon was monitoring (and in-sync
 296  * with the filesystem).
 297  *
 298  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 299  *     and does not define less-than/greater-than relationships.
 300  *     (There are too many race conditions to rely on file system
 301  *     event timestamps.)
 302  *
 303  * The <sequence_nr> is a simple integer incremented whenever the
 304  * daemon needs to make its state public.  For example, if 1000 file
 305  * system events come in, but no clients have requested the data,
 306  * the daemon can continue to accumulate file changes in the same
 307  * bin and does not need to advance the sequence number.  However,
 308  * as soon as a client does arrive, the daemon needs to start a new
 309  * bin and increment the sequence number.
 310  *
 311  *     The sequence number serves as the boundary between 2 sets
 312  *     of bins -- the older ones that the client has already seen
 313  *     and the newer ones that it hasn't.
 314  *
 315  * When a new <token_id> is created, the <sequence_nr> is reset to
 316  * zero.
 317  *
 318  *
 319  * About Token Ids
 320  * ===============
 321  *
 322  * A new token_id is created:
 323  *
 324  * [1] each time the daemon is started.
 325  *
 326  * [2] any time that the daemon must re-sync with the filesystem
 327  *     (such as when the kernel drops or we miss events on a very
 328  *     active volume).
 329  *
 330  * [3] in response to a client "flush" command (for dropped event
 331  *     testing).
 332  *
 333  * When a new token_id is created, the daemon is free to discard all
 334  * cached filesystem events associated with any previous token_ids.
 335  * Events associated with a non-current token_id will never be sent
 336  * to a client.  A token_id change implicitly means that the daemon
 337  * has gap in its event history.
 338  *
 339  * Therefore, clients that present a token with a stale (non-current)
 340  * token_id will always be given a trivial response.
 341  */
 342 struct fsmonitor_token_data {
 343         struct strbuf token_id;
 344         struct fsmonitor_batch *batch_head;
 345         struct fsmonitor_batch *batch_tail;
 346         uint64_t client_ref_count;
 347 };
 348
 349 struct fsmonitor_batch {
 350         struct fsmonitor_batch *next;
 351         uint64_t batch_seq_nr;
 352         const char **interned_paths;
 353         size_t nr, alloc;
 354         time_t pinned_time;
 355 };
 356
 357 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 358 {
 359         static int test_env_value = -1;
 360         static uint64_t flush_count = 0;
 361         struct fsmonitor_token_data *token;
 362         struct fsmonitor_batch *batch;
 363
 364         CALLOC_ARRAY(token, 1);
 365         batch = fsmonitor_batch__new();
 366
 367         strbuf_init(&token->token_id, 0);
 368         token->batch_head = batch;
 369         token->batch_tail = batch;
 370         token->client_ref_count = 0;
 371
 372         if (test_env_value < 0)
 373                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 374
 375         if (!test_env_value) {
 376                 struct timeval tv;
 377                 struct tm tm;
 378                 time_t secs;
 379
 380                 gettimeofday(&tv, NULL);
 381                 secs = tv.tv_sec;
 382                 gmtime_r(&secs, &tm);
 383
 384                 strbuf_addf(&token->token_id,
 385                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 386                             flush_count++,
 387                             getpid(),
 388                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 389                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 390                             (long)tv.tv_usec);
 391         } else {
 392                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 393         }
 394
 395         /*
 396          * We created a new <token_id> and are starting a new series
 397          * of tokens with a zero <seq_nr>.
 398          *
 399          * Since clients cannot guess our new (non test) <token_id>
 400          * they will always receive a trivial response (because of the
 401          * mismatch on the <token_id>).  The trivial response will
 402          * tell them our new <token_id> so that subsequent requests
 403          * will be relative to our new series.  (And when sending that
 404          * response, we pin the current head of the batch list.)
 405          *
 406          * Even if the client correctly guesses the <token_id>, their
 407          * request of "builtin:<token_id>:0" asks for all changes MORE
 408          * RECENT than batch/bin 0.
 409          *
 410          * This implies that it is a waste to accumulate paths in the
 411          * initial batch/bin (because they will never be transmitted).
 412          *
 413          * So the daemon could be running for days and watching the
 414          * file system, but doesn't need to actually accumulate any
 415          * paths UNTIL we need to set a reference point for a later
 416          * relative request.
 417          *
 418          * However, it is very useful for testing to always have a
 419          * reference point set.  Pin batch 0 to force early file system
 420          * events to accumulate.
 421          */
 422         if (test_env_value)
 423                 batch->pinned_time = time(NULL);
 424
 425         return token;
 426 }
 427
 428 struct fsmonitor_batch *fsmonitor_batch__new(void)
 429 {
 430         struct fsmonitor_batch *batch;
 431
 432         CALLOC_ARRAY(batch, 1);
 433
 434         return batch;
 435 }
 436
 437 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 438 {
 439         while (batch) {
 440                 struct fsmonitor_batch *next = batch->next;
 441
 442                 /*
 443                  * The actual strings within the array of this batch
 444                  * are interned, so we don't own them.  We only own
 445                  * the array.
 446                  */
 447                 free(batch->interned_paths);
 448                 free(batch);
 449
 450                 batch = next;
 451         }
 452 }
 453
 454 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 455                                const char *path)
 456 {
 457         const char *interned_path = strintern(path);
 458
 459         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 460
 461         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 462         batch->interned_paths[batch->nr++] = interned_path;
 463 }
 464
 465 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 466                                      const struct fsmonitor_batch *batch_src)
 467 {
 468         size_t k;
 469
 470         ALLOC_GROW(batch_dest->interned_paths,
 471                    batch_dest->nr + batch_src->nr + 1,
 472                    batch_dest->alloc);
 473
 474         for (k = 0; k < batch_src->nr; k++)
 475                 batch_dest->interned_paths[batch_dest->nr++] =
 476                         batch_src->interned_paths[k];
 477 }
 478
 479 /*
 480  * To keep the batch list from growing unbounded in response to filesystem
 481  * activity, we try to truncate old batches from the end of the list as
 482  * they become irrelevant.
 483  *
 484  * We assume that the .git/index will be updated with the most recent token
 485  * any time the index is updated.  And future commands will only ask for
 486  * recent changes *since* that new token.  So as tokens advance into the
 487  * future, older batch items will never be requested/needed.  So we can
 488  * truncate them without loss of functionality.
 489  *
 490  * However, multiple commands may be talking to the daemon concurrently
 491  * or perform a slow command, so a little "token skew" is possible.
 492  * Therefore, we want this to be a little bit lazy and have a generous
 493  * delay.
 494  *
 495  * The current reader thread walked backwards in time from `token->batch_head`
 496  * back to `batch_marker` somewhere in the middle of the batch list.
 497  *
 498  * Let's walk backwards in time from that marker an arbitrary delay
 499  * and truncate the list there.  Note that these timestamps are completely
 500  * artificial (based on when we pinned the batch item) and not on any
 501  * filesystem activity.
 502  *
 503  * Return the obsolete portion of the list after we have removed it from
 504  * the official list so that the caller can free it after leaving the lock.
 505  */
 506 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 507
 508 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 509         struct fsmonitor_daemon_state *state,
 510         const struct fsmonitor_batch *batch_marker)
 511 {
 512         /* assert current thread holding state->main_lock */
 513
 514         const struct fsmonitor_batch *batch;
 515         struct fsmonitor_batch *remainder;
 516
 517         if (!batch_marker)
 518                 return NULL;
 519
 520         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 521                          batch_marker->batch_seq_nr,
 522                          (uint64_t)batch_marker->pinned_time);
 523
 524         for (batch = batch_marker; batch; batch = batch->next) {
 525                 time_t t;
 526
 527                 if (!batch->pinned_time) /* an overflow batch */
 528                         continue;
 529
 530                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 531                 if (t > batch_marker->pinned_time) /* too close to marker */
 532                         continue;
 533
 534                 goto truncate_past_here;
 535         }
 536
 537         return NULL;
 538
 539 truncate_past_here:
 540         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 541
 542         remainder = ((struct fsmonitor_batch *)batch)->next;
 543         ((struct fsmonitor_batch *)batch)->next = NULL;
 544
 545         return remainder;
 546 }
 547
 548 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 549 {
 550         if (!token)
 551                 return;
 552
 553         assert(token->client_ref_count == 0);
 554
 555         strbuf_release(&token->token_id);
 556
 557         fsmonitor_batch__free_list(token->batch_head);
 558
 559         free(token);
 560 }
 561
 562 /*
 563  * Flush all of our cached data about the filesystem.  Call this if we
 564  * lose sync with the filesystem and miss some notification events.
 565  *
 566  * [1] If we are missing events, then we no longer have a complete
 567  *     history of the directory (relative to our current start token).
 568  *     We should create a new token and start fresh (as if we just
 569  *     booted up).
 570  *
 571  * [2] Some of those lost events may have been for cookie files.  We
 572  *     should assume the worst and abort them rather letting them starve.
 573  *
 574  * If there are no concurrent threads reading the current token data
 575  * series, we can free it now.  Otherwise, let the last reader free
 576  * it.
 577  *
 578  * Either way, the old token data series is no longer associated with
 579  * our state data.
 580  */
 581 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 582 {
 583         /* assert current thread holding state->main_lock */
 584
 585         struct fsmonitor_token_data *free_me = NULL;
 586         struct fsmonitor_token_data *new_one = NULL;
 587
 588         new_one = fsmonitor_new_token_data();
 589
 590         if (state->current_token_data->client_ref_count == 0)
 591                 free_me = state->current_token_data;
 592         state->current_token_data = new_one;
 593
 594         fsmonitor_free_token_data(free_me);
 595
 596         with_lock__abort_all_cookies(state);
 597 }
 598
 599 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 600 {
 601         pthread_mutex_lock(&state->main_lock);
 602         with_lock__do_force_resync(state);
 603         pthread_mutex_unlock(&state->main_lock);
 604 }
 605
 606 /*
 607  * Format an opaque token string to send to the client.
 608  */
 609 static void with_lock__format_response_token(
 610         struct strbuf *response_token,
 611         const struct strbuf *response_token_id,
 612         const struct fsmonitor_batch *batch)
 613 {
 614         /* assert current thread holding state->main_lock */
 615
 616         strbuf_reset(response_token);
 617         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 618                     response_token_id->buf, batch->batch_seq_nr);
 619 }
 620
 621 /*
 622  * Parse an opaque token from the client.
 623  * Returns -1 on error.
 624  */
 625 static int fsmonitor_parse_client_token(const char *buf_token,
 626                                         struct strbuf *requested_token_id,
 627                                         uint64_t *seq_nr)
 628 {
 629         const char *p;
 630         char *p_end;
 631
 632         strbuf_reset(requested_token_id);
 633         *seq_nr = 0;
 634
 635         if (!skip_prefix(buf_token, "builtin:", &p))
 636                 return -1;
 637
 638         while (*p && *p != ':')
 639                 strbuf_addch(requested_token_id, *p++);
 640         if (!*p++)
 641                 return -1;
 642
 643         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 644         if (*p_end)
 645                 return -1;
 646
 647         return 0;
 648 }
 649
 650 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 651
 652 static int do_handle_client(struct fsmonitor_daemon_state *state,
 653                             const char *command,
 654                             ipc_server_reply_cb *reply,
 655                             struct ipc_server_reply_data *reply_data)
 656 {
 657         struct fsmonitor_token_data *token_data = NULL;
 658         struct strbuf response_token = STRBUF_INIT;
 659         struct strbuf requested_token_id = STRBUF_INIT;
 660         struct strbuf payload = STRBUF_INIT;
 661         uint64_t requested_oldest_seq_nr = 0;
 662         uint64_t total_response_len = 0;
 663         const char *p;
 664         const struct fsmonitor_batch *batch_head;
 665         const struct fsmonitor_batch *batch;
 666         struct fsmonitor_batch *remainder = NULL;
 667         intmax_t count = 0, duplicates = 0;
 668         kh_str_t *shown;
 669         int hash_ret;
 670         int do_trivial = 0;
 671         int do_flush = 0;
 672         int do_cookie = 0;
 673         enum fsmonitor_cookie_item_result cookie_result;
 674
 675         /*
 676          * We expect `command` to be of the form:
 677          *
 678          * <command> := quit NUL
 679          *            | flush NUL
 680          *            | <V1-time-since-epoch-ns> NUL
 681          *            | <V2-opaque-fsmonitor-token> NUL
 682          */
 683
 684         if (!strcmp(command, "quit")) {
 685                 /*
 686                  * A client has requested over the socket/pipe that the
 687                  * daemon shutdown.
 688                  *
 689                  * Tell the IPC thread pool to shutdown (which completes
 690                  * the await in the main thread (which can stop the
 691                  * fsmonitor listener thread)).
 692                  *
 693                  * There is no reply to the client.
 694                  */
 695                 return SIMPLE_IPC_QUIT;
 696
 697         } else if (!strcmp(command, "flush")) {
 698                 /*
 699                  * Flush all of our cached data and generate a new token
 700                  * just like if we lost sync with the filesystem.
 701                  *
 702                  * Then send a trivial response using the new token.
 703                  */
 704                 do_flush = 1;
 705                 do_trivial = 1;
 706
 707         } else if (!skip_prefix(command, "builtin:", &p)) {
 708                 /* assume V1 timestamp or garbage */
 709
 710                 char *p_end;
 711
 712                 strtoumax(command, &p_end, 10);
 713                 trace_printf_key(&trace_fsmonitor,
 714                                  ((*p_end) ?
 715                                   "fsmonitor: invalid command line '%s'" :
 716                                   "fsmonitor: unsupported V1 protocol '%s'"),
 717                                  command);
 718                 do_trivial = 1;
 719                 do_cookie = 1;
 720
 721         } else {
 722                 /* We have "builtin:*" */
 723                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 724                                                  &requested_oldest_seq_nr)) {
 725                         trace_printf_key(&trace_fsmonitor,
 726                                          "fsmonitor: invalid V2 protocol token '%s'",
 727                                          command);
 728                         do_trivial = 1;
 729                         do_cookie = 1;
 730
 731                 } else {
 732                         /*
 733                          * We have a V2 valid token:
 734                          *     "builtin:<token_id>:<seq_nr>"
 735                          */
 736                         do_cookie = 1;
 737                 }
 738         }
 739
 740         pthread_mutex_lock(&state->main_lock);
 741
 742         if (!state->current_token_data)
 743                 BUG("fsmonitor state does not have a current token");
 744
 745         /*
 746          * Write a cookie file inside the directory being watched in
 747          * an effort to flush out existing filesystem events that we
 748          * actually care about.  Suspend this client thread until we
 749          * see the filesystem events for this cookie file.
 750          *
 751          * Creating the cookie lets us guarantee that our FS listener
 752          * thread has drained the kernel queue and we are caught up
 753          * with the kernel.
 754          *
 755          * If we cannot create the cookie (or otherwise guarantee that
 756          * we are caught up), we send a trivial response.  We have to
 757          * assume that there might be some very, very recent activity
 758          * on the FS still in flight.
 759          */
 760         if (do_cookie) {
 761                 cookie_result = with_lock__wait_for_cookie(state);
 762                 if (cookie_result != FCIR_SEEN) {
 763                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 764                               cookie_result);
 765                         do_trivial = 1;
 766                 }
 767         }
 768
 769         if (do_flush)
 770                 with_lock__do_force_resync(state);
 771
 772         /*
 773          * We mark the current head of the batch list as "pinned" so
 774          * that the listener thread will treat this item as read-only
 775          * (and prevent any more paths from being added to it) from
 776          * now on.
 777          */
 778         token_data = state->current_token_data;
 779         batch_head = token_data->batch_head;
 780         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 781
 782         /*
 783          * FSMonitor Protocol V2 requires that we send a response header
 784          * with a "new current token" and then all of the paths that changed
 785          * since the "requested token".  We send the seq_nr of the just-pinned
 786          * head batch so that future requests from a client will be relative
 787          * to it.
 788          */
 789         with_lock__format_response_token(&response_token,
 790                                          &token_data->token_id, batch_head);
 791
 792         reply(reply_data, response_token.buf, response_token.len + 1);
 793         total_response_len += response_token.len + 1;
 794
 795         trace2_data_string("fsmonitor", the_repository, "response/token",
 796                            response_token.buf);
 797         trace_printf_key(&trace_fsmonitor, "response token: %s",
 798                          response_token.buf);
 799
 800         if (!do_trivial) {
 801                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 802                         /*
 803                          * The client last spoke to a different daemon
 804                          * instance -OR- the daemon had to resync with
 805                          * the filesystem (and lost events), so reject.
 806                          */
 807                         trace2_data_string("fsmonitor", the_repository,
 808                                            "response/token", "different");
 809                         do_trivial = 1;
 810
 811                 } else if (requested_oldest_seq_nr <
 812                            token_data->batch_tail->batch_seq_nr) {
 813                         /*
 814                          * The client wants older events than we have for
 815                          * this token_id.  This means that the end of our
 816                          * batch list was truncated and we cannot give the
 817                          * client a complete snapshot relative to their
 818                          * request.
 819                          */
 820                         trace_printf_key(&trace_fsmonitor,
 821                                          "client requested truncated data");
 822                         do_trivial = 1;
 823                 }
 824         }
 825
 826         if (do_trivial) {
 827                 pthread_mutex_unlock(&state->main_lock);
 828
 829                 reply(reply_data, "/", 2);
 830
 831                 trace2_data_intmax("fsmonitor", the_repository,
 832                                    "response/trivial", 1);
 833
 834                 goto cleanup;
 835         }
 836
 837         /*
 838          * We're going to hold onto a pointer to the current
 839          * token-data while we walk the list of batches of files.
 840          * During this time, we will NOT be under the lock.
 841          * So we ref-count it.
 842          *
 843          * This allows the listener thread to continue prepending
 844          * new batches of items to the token-data (which we'll ignore).
 845          *
 846          * AND it allows the listener thread to do a token-reset
 847          * (and install a new `current_token_data`).
 848          */
 849         token_data->client_ref_count++;
 850
 851         pthread_mutex_unlock(&state->main_lock);
 852
 853         /*
 854          * The client request is relative to the token that they sent,
 855          * so walk the batch list backwards from the current head back
 856          * to the batch (sequence number) they named.
 857          *
 858          * We use khash to de-dup the list of pathnames.
 859          *
 860          * NEEDSWORK: each batch contains a list of interned strings,
 861          * so we only need to do pointer comparisons here to build the
 862          * hash table.  Currently, we're still comparing the string
 863          * values.
 864          */
 865         shown = kh_init_str();
 866         for (batch = batch_head;
 867              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 868              batch = batch->next) {
 869                 size_t k;
 870
 871                 for (k = 0; k < batch->nr; k++) {
 872                         const char *s = batch->interned_paths[k];
 873                         size_t s_len;
 874
 875                         if (kh_get_str(shown, s) != kh_end(shown))
 876                                 duplicates++;
 877                         else {
 878                                 kh_put_str(shown, s, &hash_ret);
 879
 880                                 trace_printf_key(&trace_fsmonitor,
 881                                                  "send[%"PRIuMAX"]: %s",
 882                                                  count, s);
 883
 884                                 /* Each path gets written with a trailing NUL */
 885                                 s_len = strlen(s) + 1;
 886
 887                                 if (payload.len + s_len >=
 888                                     LARGE_PACKET_DATA_MAX) {
 889                                         reply(reply_data, payload.buf,
 890                                               payload.len);
 891                                         total_response_len += payload.len;
 892                                         strbuf_reset(&payload);
 893                                 }
 894
 895                                 strbuf_add(&payload, s, s_len);
 896                                 count++;
 897                         }
 898                 }
 899         }
 900
 901         if (payload.len) {
 902                 reply(reply_data, payload.buf, payload.len);
 903                 total_response_len += payload.len;
 904         }
 905
 906         kh_release_str(shown);
 907
 908         pthread_mutex_lock(&state->main_lock);
 909
 910         if (token_data->client_ref_count > 0)
 911                 token_data->client_ref_count--;
 912
 913         if (token_data->client_ref_count == 0) {
 914                 if (token_data != state->current_token_data) {
 915                         /*
 916                          * The listener thread did a token-reset while we were
 917                          * walking the batch list.  Therefore, this token is
 918                          * stale and can be discarded completely.  If we are
 919                          * the last reader thread using this token, we own
 920                          * that work.
 921                          */
 922                         fsmonitor_free_token_data(token_data);
 923                 } else if (batch) {
 924                         /*
 925                          * We are holding the lock and are the only
 926                          * reader of the ref-counted portion of the
 927                          * list, so we get the honor of seeing if the
 928                          * list can be truncated to save memory.
 929                          *
 930                          * The main loop did not walk to the end of the
 931                          * list, so this batch is the first item in the
 932                          * batch-list that is older than the requested
 933                          * end-point sequence number.  See if the tail
 934                          * end of the list is obsolete.
 935                          */
 936                         remainder = with_lock__truncate_old_batches(state,
 937                                                                     batch);
 938                 }
 939         }
 940
 941         pthread_mutex_unlock(&state->main_lock);
 942
 943         if (remainder)
 944                 fsmonitor_batch__free_list(remainder);
 945
 946         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 947         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 948         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 949
 950 cleanup:
 951         strbuf_release(&response_token);
 952         strbuf_release(&requested_token_id);
 953         strbuf_release(&payload);
 954
 955         return 0;
 956 }
 957
 958 static ipc_server_application_cb handle_client;
 959
 960 static int handle_client(void *data,
 961                          const char *command, size_t command_len,
 962                          ipc_server_reply_cb *reply,
 963                          struct ipc_server_reply_data *reply_data)
 964 {
 965         struct fsmonitor_daemon_state *state = data;
 966         int result;
 967
 968         /*
 969          * The Simple IPC API now supports {char*, len} arguments, but
 970          * FSMonitor always uses proper null-terminated strings, so
 971          * we can ignore the command_len argument.  (Trust, but verify.)
 972          */
 973         if (command_len != strlen(command))
 974                 BUG("FSMonitor assumes text messages");
 975
 976         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 977
 978         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 979         trace2_data_string("fsmonitor", the_repository, "request", command);
 980
 981         result = do_handle_client(state, command, reply, reply_data);
 982
 983         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 984
 985         return result;
 986 }
 987
 988 #define FSMONITOR_DIR           "fsmonitor--daemon"
 989 #define FSMONITOR_COOKIE_DIR    "cookies"
 990 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 991
 992 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 993         const char *rel)
 994 {
 995         if (fspathncmp(rel, ".git", 4))
 996                 return IS_WORKDIR_PATH;
 997         rel += 4;
 998
 999         if (!*rel)
1000                 return IS_DOT_GIT;
1001         if (*rel != '/')
1002                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
1003         rel++;
1004
1005         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1006                         strlen(FSMONITOR_COOKIE_PREFIX)))
1007                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1008
1009         return IS_INSIDE_DOT_GIT;
1010 }
1011
1012 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1013         const char *rel)
1014 {
1015         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1016                         strlen(FSMONITOR_COOKIE_PREFIX)))
1017                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1018
1019         return IS_INSIDE_GITDIR;
1020 }
1021
1022 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1023         struct fsmonitor_daemon_state *state,
1024         const char *path)
1025 {
1026         const char *rel;
1027
1028         if (fspathncmp(path, state->path_worktree_watch.buf,
1029                        state->path_worktree_watch.len))
1030                 return IS_OUTSIDE_CONE;
1031
1032         rel = path + state->path_worktree_watch.len;
1033
1034         if (!*rel)
1035                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1036         if (*rel != '/')
1037                 return IS_OUTSIDE_CONE;
1038         rel++;
1039
1040         return fsmonitor_classify_path_workdir_relative(rel);
1041 }
1042
1043 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1044         struct fsmonitor_daemon_state *state,
1045         const char *path)
1046 {
1047         const char *rel;
1048         enum fsmonitor_path_type t;
1049
1050         t = try_classify_workdir_abs_path(state, path);
1051         if (state->nr_paths_watching == 1)
1052                 return t;
1053         if (t != IS_OUTSIDE_CONE)
1054                 return t;
1055
1056         if (fspathncmp(path, state->path_gitdir_watch.buf,
1057                        state->path_gitdir_watch.len))
1058                 return IS_OUTSIDE_CONE;
1059
1060         rel = path + state->path_gitdir_watch.len;
1061
1062         if (!*rel)
1063                 return IS_GITDIR; /* it is the <gitdir> exactly */
1064         if (*rel != '/')
1065                 return IS_OUTSIDE_CONE;
1066         rel++;
1067
1068         return fsmonitor_classify_path_gitdir_relative(rel);
1069 }
1070
1071 /*
1072  * We try to combine small batches at the front of the batch-list to avoid
1073  * having a long list.  This hopefully makes it a little easier when we want
1074  * to truncate and maintain the list.  However, we don't want the paths array
1075  * to just keep growing and growing with realloc, so we insert an arbitrary
1076  * limit.
1077  */
1078 #define MY_COMBINE_LIMIT (1024)
1079
1080 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1081                        struct fsmonitor_batch *batch,
1082                        const struct string_list *cookie_names)
1083 {
1084         if (!batch && !cookie_names->nr)
1085                 return;
1086
1087         pthread_mutex_lock(&state->main_lock);
1088
1089         if (batch) {
1090                 struct fsmonitor_batch *head;
1091
1092                 head = state->current_token_data->batch_head;
1093                 if (!head) {
1094                         BUG("token does not have batch");
1095                 } else if (head->pinned_time) {
1096                         /*
1097                          * We cannot alter the current batch list
1098                          * because:
1099                          *
1100                          * [a] it is being transmitted to at least one
1101                          * client and the handle_client() thread has a
1102                          * ref-count, but not a lock on the batch list
1103                          * starting with this item.
1104                          *
1105                          * [b] it has been transmitted in the past to
1106                          * at least one client such that future
1107                          * requests are relative to this head batch.
1108                          *
1109                          * So, we can only prepend a new batch onto
1110                          * the front of the list.
1111                          */
1112                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1113                         batch->next = head;
1114                         state->current_token_data->batch_head = batch;
1115                 } else if (!head->batch_seq_nr) {
1116                         /*
1117                          * Batch 0 is unpinned.  See the note in
1118                          * `fsmonitor_new_token_data()` about why we
1119                          * don't need to accumulate these paths.
1120                          */
1121                         fsmonitor_batch__free_list(batch);
1122                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1123                         /*
1124                          * The head batch in the list has never been
1125                          * transmitted to a client, but folding the
1126                          * contents of the new batch onto it would
1127                          * exceed our arbitrary limit, so just prepend
1128                          * the new batch onto the list.
1129                          */
1130                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1131                         batch->next = head;
1132                         state->current_token_data->batch_head = batch;
1133                 } else {
1134                         /*
1135                          * We are free to add the paths in the given
1136                          * batch onto the end of the current head batch.
1137                          */
1138                         fsmonitor_batch__combine(head, batch);
1139                         fsmonitor_batch__free_list(batch);
1140                 }
1141         }
1142
1143         if (cookie_names->nr)
1144                 with_lock__mark_cookies_seen(state, cookie_names);
1145
1146         pthread_mutex_unlock(&state->main_lock);
1147 }
1148
1149 static void *fsm_health__thread_proc(void *_state)
1150 {
1151         struct fsmonitor_daemon_state *state = _state;
1152
1153         trace2_thread_start("fsm-health");
1154
1155         fsm_health__loop(state);
1156
1157         trace2_thread_exit();
1158         return NULL;
1159 }
1160
1161 static void *fsm_listen__thread_proc(void *_state)
1162 {
1163         struct fsmonitor_daemon_state *state = _state;
1164
1165         trace2_thread_start("fsm-listen");
1166
1167         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1168                          state->path_worktree_watch.buf);
1169         if (state->nr_paths_watching > 1)
1170                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1171                                  state->path_gitdir_watch.buf);
1172
1173         fsm_listen__loop(state);
1174
1175         pthread_mutex_lock(&state->main_lock);
1176         if (state->current_token_data &&
1177             state->current_token_data->client_ref_count == 0)
1178                 fsmonitor_free_token_data(state->current_token_data);
1179         state->current_token_data = NULL;
1180         pthread_mutex_unlock(&state->main_lock);
1181
1182         trace2_thread_exit();
1183         return NULL;
1184 }
1185
1186 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1187 {
1188         struct ipc_server_opts ipc_opts = {
1189                 .nr_threads = fsmonitor__ipc_threads,
1190
1191                 /*
1192                  * We know that there are no other active threads yet,
1193                  * so we can let the IPC layer temporarily chdir() if
1194                  * it needs to when creating the server side of the
1195                  * Unix domain socket.
1196                  */
1197                 .uds_disallow_chdir = 0
1198         };
1199         int health_started = 0;
1200         int listener_started = 0;
1201         int err = 0;
1202
1203         /*
1204          * Start the IPC thread pool before the we've started the file
1205          * system event listener thread so that we have the IPC handle
1206          * before we need it.
1207          */
1208         if (ipc_server_run_async(&state->ipc_server_data,
1209                                  state->path_ipc.buf, &ipc_opts,
1210                                  handle_client, state))
1211                 return error_errno(
1212                         _("could not start IPC thread pool on '%s'"),
1213                         state->path_ipc.buf);
1214
1215         /*
1216          * Start the fsmonitor listener thread to collect filesystem
1217          * events.
1218          */
1219         if (pthread_create(&state->listener_thread, NULL,
1220                            fsm_listen__thread_proc, state)) {
1221                 ipc_server_stop_async(state->ipc_server_data);
1222                 err = error(_("could not start fsmonitor listener thread"));
1223                 goto cleanup;
1224         }
1225         listener_started = 1;
1226
1227         /*
1228          * Start the health thread to watch over our process.
1229          */
1230         if (pthread_create(&state->health_thread, NULL,
1231                            fsm_health__thread_proc, state)) {
1232                 ipc_server_stop_async(state->ipc_server_data);
1233                 err = error(_("could not start fsmonitor health thread"));
1234                 goto cleanup;
1235         }
1236         health_started = 1;
1237
1238         /*
1239          * The daemon is now fully functional in background threads.
1240          * Our primary thread should now just wait while the threads
1241          * do all the work.
1242          */
1243 cleanup:
1244         /*
1245          * Wait for the IPC thread pool to shutdown (whether by client
1246          * request, from filesystem activity, or an error).
1247          */
1248         ipc_server_await(state->ipc_server_data);
1249
1250         /*
1251          * The fsmonitor listener thread may have received a shutdown
1252          * event from the IPC thread pool, but it doesn't hurt to tell
1253          * it again.  And wait for it to shutdown.
1254          */
1255         if (listener_started) {
1256                 fsm_listen__stop_async(state);
1257                 pthread_join(state->listener_thread, NULL);
1258         }
1259
1260         if (health_started) {
1261                 fsm_health__stop_async(state);
1262                 pthread_join(state->health_thread, NULL);
1263         }
1264
1265         if (err)
1266                 return err;
1267         if (state->listen_error_code)
1268                 return state->listen_error_code;
1269         if (state->health_error_code)
1270                 return state->health_error_code;
1271         return 0;
1272 }
1273
1274 static int fsmonitor_run_daemon(void)
1275 {
1276         struct fsmonitor_daemon_state state;
1277         const char *home;
1278         int err;
1279
1280         memset(&state, 0, sizeof(state));
1281
1282         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1283         pthread_mutex_init(&state.main_lock, NULL);
1284         pthread_cond_init(&state.cookies_cond, NULL);
1285         state.listen_error_code = 0;
1286         state.health_error_code = 0;
1287         state.current_token_data = fsmonitor_new_token_data();
1288
1289         /* Prepare to (recursively) watch the <worktree-root> directory. */
1290         strbuf_init(&state.path_worktree_watch, 0);
1291         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1292         state.nr_paths_watching = 1;
1293
1294         strbuf_init(&state.alias.alias, 0);
1295         strbuf_init(&state.alias.points_to, 0);
1296         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1297                 goto done;
1298
1299         /*
1300          * We create and delete cookie files somewhere inside the .git
1301          * directory to help us keep sync with the file system.  If
1302          * ".git" is not a directory, then <gitdir> is not inside the
1303          * cone of <worktree-root>, so set up a second watch to watch
1304          * the <gitdir> so that we get events for the cookie files.
1305          */
1306         strbuf_init(&state.path_gitdir_watch, 0);
1307         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1308         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1309         if (!is_directory(state.path_gitdir_watch.buf)) {
1310                 strbuf_reset(&state.path_gitdir_watch);
1311                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1312                 state.nr_paths_watching = 2;
1313         }
1314
1315         /*
1316          * We will write filesystem syncing cookie files into
1317          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1318          *
1319          * The extra layers of subdirectories here keep us from
1320          * changing the mtime on ".git/" or ".git/foo/" when we create
1321          * or delete cookie files.
1322          *
1323          * There have been problems with some IDEs that do a
1324          * non-recursive watch of the ".git/" directory and run a
1325          * series of commands any time something happens.
1326          *
1327          * For example, if we place our cookie files directly in
1328          * ".git/" or ".git/foo/" then a `git status` (or similar
1329          * command) from the IDE will cause a cookie file to be
1330          * created in one of those dirs.  This causes the mtime of
1331          * those dirs to change.  This triggers the IDE's watch
1332          * notification.  This triggers the IDE to run those commands
1333          * again.  And the process repeats and the machine never goes
1334          * idle.
1335          *
1336          * Adding the extra layers of subdirectories prevents the
1337          * mtime of ".git/" and ".git/foo" from changing when a
1338          * cookie file is created.
1339          */
1340         strbuf_init(&state.path_cookie_prefix, 0);
1341         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1342
1343         strbuf_addch(&state.path_cookie_prefix, '/');
1344         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1345         mkdir(state.path_cookie_prefix.buf, 0777);
1346
1347         strbuf_addch(&state.path_cookie_prefix, '/');
1348         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1349         mkdir(state.path_cookie_prefix.buf, 0777);
1350
1351         strbuf_addch(&state.path_cookie_prefix, '/');
1352
1353         /*
1354          * We create a named-pipe or unix domain socket inside of the
1355          * ".git" directory.  (Well, on Windows, we base our named
1356          * pipe in the NPFS on the absolute path of the git
1357          * directory.)
1358          */
1359         strbuf_init(&state.path_ipc, 0);
1360         strbuf_addstr(&state.path_ipc,
1361                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1362
1363         /*
1364          * Confirm that we can create platform-specific resources for the
1365          * filesystem listener before we bother starting all the threads.
1366          */
1367         if (fsm_listen__ctor(&state)) {
1368                 err = error(_("could not initialize listener thread"));
1369                 goto done;
1370         }
1371
1372         if (fsm_health__ctor(&state)) {
1373                 err = error(_("could not initialize health thread"));
1374                 goto done;
1375         }
1376
1377         /*
1378          * CD out of the worktree root directory.
1379          *
1380          * The common Git startup mechanism causes our CWD to be the
1381          * root of the worktree.  On Windows, this causes our process
1382          * to hold a locked handle on the CWD.  This prevents the
1383          * worktree from being moved or deleted while the daemon is
1384          * running.
1385          *
1386          * We assume that our FS and IPC listener threads have either
1387          * opened all of the handles that they need or will do
1388          * everything using absolute paths.
1389          */
1390         home = getenv("HOME");
1391         if (home && *home && chdir(home))
1392                 die_errno(_("could not cd home '%s'"), home);
1393
1394         err = fsmonitor_run_daemon_1(&state);
1395
1396 done:
1397         pthread_cond_destroy(&state.cookies_cond);
1398         pthread_mutex_destroy(&state.main_lock);
1399         fsm_listen__dtor(&state);
1400         fsm_health__dtor(&state);
1401
1402         ipc_server_free(state.ipc_server_data);
1403
1404         strbuf_release(&state.path_worktree_watch);
1405         strbuf_release(&state.path_gitdir_watch);
1406         strbuf_release(&state.path_cookie_prefix);
1407         strbuf_release(&state.path_ipc);
1408         strbuf_release(&state.alias.alias);
1409         strbuf_release(&state.alias.points_to);
1410
1411         return err;
1412 }
1413
1414 static int try_to_run_foreground_daemon(int detach_console)
1415 {
1416         /*
1417          * Technically, we don't need to probe for an existing daemon
1418          * process, since we could just call `fsmonitor_run_daemon()`
1419          * and let it fail if the pipe/socket is busy.
1420          *
1421          * However, this method gives us a nicer error message for a
1422          * common error case.
1423          */
1424         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1425                 die(_("fsmonitor--daemon is already running '%s'"),
1426                     the_repository->worktree);
1427
1428         if (fsmonitor__announce_startup) {
1429                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1430                         the_repository->worktree);
1431                 fflush(stderr);
1432         }
1433
1434 #ifdef GIT_WINDOWS_NATIVE
1435         if (detach_console)
1436                 FreeConsole();
1437 #endif
1438
1439         return !!fsmonitor_run_daemon();
1440 }
1441
1442 static start_bg_wait_cb bg_wait_cb;
1443
1444 static int bg_wait_cb(const struct child_process *cp, void *cb_data)
1445 {
1446         enum ipc_active_state s = fsmonitor_ipc__get_state();
1447
1448         switch (s) {
1449         case IPC_STATE__LISTENING:
1450                 /* child is "ready" */
1451                 return 0;
1452
1453         case IPC_STATE__NOT_LISTENING:
1454         case IPC_STATE__PATH_NOT_FOUND:
1455                 /* give child more time */
1456                 return 1;
1457
1458         default:
1459         case IPC_STATE__INVALID_PATH:
1460         case IPC_STATE__OTHER_ERROR:
1461                 /* all the time in world won't help */
1462                 return -1;
1463         }
1464 }
1465
1466 static int try_to_start_background_daemon(void)
1467 {
1468         struct child_process cp = CHILD_PROCESS_INIT;
1469         enum start_bg_result sbgr;
1470
1471         /*
1472          * Before we try to create a background daemon process, see
1473          * if a daemon process is already listening.  This makes it
1474          * easier for us to report an already-listening error to the
1475          * console, since our spawn/daemon can only report the success
1476          * of creating the background process (and not whether it
1477          * immediately exited).
1478          */
1479         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1480                 die(_("fsmonitor--daemon is already running '%s'"),
1481                     the_repository->worktree);
1482
1483         if (fsmonitor__announce_startup) {
1484                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1485                         the_repository->worktree);
1486                 fflush(stderr);
1487         }
1488
1489         cp.git_cmd = 1;
1490
1491         strvec_push(&cp.args, "fsmonitor--daemon");
1492         strvec_push(&cp.args, "run");
1493         strvec_push(&cp.args, "--detach");
1494         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1495
1496         cp.no_stdin = 1;
1497         cp.no_stdout = 1;
1498         cp.no_stderr = 1;
1499
1500         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1501                                 fsmonitor__start_timeout_sec);
1502
1503         switch (sbgr) {
1504         case SBGR_READY:
1505                 return 0;
1506
1507         default:
1508         case SBGR_ERROR:
1509         case SBGR_CB_ERROR:
1510                 return error(_("daemon failed to start"));
1511
1512         case SBGR_TIMEOUT:
1513                 return error(_("daemon not online yet"));
1514
1515         case SBGR_DIED:
1516                 return error(_("daemon terminated"));
1517         }
1518 }
1519
1520 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1521 {
1522         const char *subcmd;
1523         enum fsmonitor_reason reason;
1524         int detach_console = 0;
1525
1526         struct option options[] = {
1527                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1528                 OPT_INTEGER(0, "ipc-threads",
1529                             &fsmonitor__ipc_threads,
1530                             N_("use <n> ipc worker threads")),
1531                 OPT_INTEGER(0, "start-timeout",
1532                             &fsmonitor__start_timeout_sec,
1533                             N_("max seconds to wait for background daemon startup")),
1534
1535                 OPT_END()
1536         };
1537
1538         git_config(fsmonitor_config, NULL);
1539
1540         argc = parse_options(argc, argv, prefix, options,
1541                              builtin_fsmonitor__daemon_usage, 0);
1542         if (argc != 1)
1543                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1544         subcmd = argv[0];
1545
1546         if (fsmonitor__ipc_threads < 1)
1547                 die(_("invalid 'ipc-threads' value (%d)"),
1548                     fsmonitor__ipc_threads);
1549
1550         prepare_repo_settings(the_repository);
1551         /*
1552          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1553          * (without bothering to load the `core.fsmonitor` config settings).
1554          *
1555          * If the repo is not compatible, the repo-settings will be set to
1556          * incompatible rather than IPC, so we can use one of the __get
1557          * routines to detect the discrepancy.
1558          */
1559         fsm_settings__set_ipc(the_repository);
1560
1561         reason = fsm_settings__get_reason(the_repository);
1562         if (reason > FSMONITOR_REASON_OK)
1563                 die("%s",
1564                     fsm_settings__get_incompatible_msg(the_repository,
1565                                                        reason));
1566
1567         if (!strcmp(subcmd, "start"))
1568                 return !!try_to_start_background_daemon();
1569
1570         if (!strcmp(subcmd, "run"))
1571                 return !!try_to_run_foreground_daemon(detach_console);
1572
1573         if (!strcmp(subcmd, "stop"))
1574                 return !!do_as_client__send_stop();
1575
1576         if (!strcmp(subcmd, "status"))
1577                 return !!do_as_client__status();
1578
1579         die(_("Unhandled subcommand '%s'"), subcmd);
1580 }
1581
1582 #else
1583 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix UNUSED)
1584 {
1585         struct option options[] = {
1586                 OPT_END()
1587         };
1588
1589         if (argc == 2 && !strcmp(argv[1], "-h"))
1590                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1591
1592         die(_("fsmonitor--daemon not supported on this platform"));
1593 }
1594 #endif