builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "alloc.h"
   3 #include "config.h"
   4 #include "parse-options.h"
   5 #include "fsmonitor.h"
   6 #include "fsmonitor-ipc.h"
   7 #include "fsmonitor-path-utils.h"
   8 #include "compat/fsmonitor/fsm-health.h"
   9 #include "compat/fsmonitor/fsm-listen.h"
  10 #include "fsmonitor--daemon.h"
  11 #include "simple-ipc.h"
  12 #include "khash.h"
  13 #include "pkt-line.h"
  14
  15 static const char * const builtin_fsmonitor__daemon_usage[] = {
  16         N_("git fsmonitor--daemon start [<options>]"),
  17         N_("git fsmonitor--daemon run [<options>]"),
  18         "git fsmonitor--daemon stop",
  19         "git fsmonitor--daemon status",
  20         NULL
  21 };
  22
  23 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  24 /*
  25  * Global state loaded from config.
  26  */
  27 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  28 static int fsmonitor__ipc_threads = 8;
  29
  30 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  31 static int fsmonitor__start_timeout_sec = 60;
  32
  33 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  34 static int fsmonitor__announce_startup = 0;
  35
  36 static int fsmonitor_config(const char *var, const char *value, void *cb)
  37 {
  38         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  39                 int i = git_config_int(var, value);
  40                 if (i < 1)
  41                         return error(_("value of '%s' out of range: %d"),
  42                                      FSMONITOR__IPC_THREADS, i);
  43                 fsmonitor__ipc_threads = i;
  44                 return 0;
  45         }
  46
  47         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  48                 int i = git_config_int(var, value);
  49                 if (i < 0)
  50                         return error(_("value of '%s' out of range: %d"),
  51                                      FSMONITOR__START_TIMEOUT, i);
  52                 fsmonitor__start_timeout_sec = i;
  53                 return 0;
  54         }
  55
  56         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  57                 int is_bool;
  58                 int i = git_config_bool_or_int(var, value, &is_bool);
  59                 if (i < 0)
  60                         return error(_("value of '%s' not bool or int: %d"),
  61                                      var, i);
  62                 fsmonitor__announce_startup = i;
  63                 return 0;
  64         }
  65
  66         return git_default_config(var, value, cb);
  67 }
  68
  69 /*
  70  * Acting as a CLIENT.
  71  *
  72  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  73  * and wait for it to shutdown.
  74  */
  75 static int do_as_client__send_stop(void)
  76 {
  77         struct strbuf answer = STRBUF_INIT;
  78         int ret;
  79
  80         ret = fsmonitor_ipc__send_command("quit", &answer);
  81
  82         /* The quit command does not return any response data. */
  83         strbuf_release(&answer);
  84
  85         if (ret)
  86                 return ret;
  87
  88         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  89         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  90                 sleep_millisec(50);
  91         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  92
  93         return 0;
  94 }
  95
  96 static int do_as_client__status(void)
  97 {
  98         enum ipc_active_state state = fsmonitor_ipc__get_state();
  99
 100         switch (state) {
 101         case IPC_STATE__LISTENING:
 102                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 103                        the_repository->worktree);
 104                 return 0;
 105
 106         default:
 107                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 108                        the_repository->worktree);
 109                 return 1;
 110         }
 111 }
 112
 113 enum fsmonitor_cookie_item_result {
 114         FCIR_ERROR = -1, /* could not create cookie file ? */
 115         FCIR_INIT,
 116         FCIR_SEEN,
 117         FCIR_ABORT,
 118 };
 119
 120 struct fsmonitor_cookie_item {
 121         struct hashmap_entry entry;
 122         char *name;
 123         enum fsmonitor_cookie_item_result result;
 124 };
 125
 126 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 127                      const struct hashmap_entry *he2, const void *keydata)
 128 {
 129         const struct fsmonitor_cookie_item *a =
 130                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 131         const struct fsmonitor_cookie_item *b =
 132                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 133
 134         return strcmp(a->name, keydata ? keydata : b->name);
 135 }
 136
 137 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 138         struct fsmonitor_daemon_state *state)
 139 {
 140         /* assert current thread holding state->main_lock */
 141
 142         int fd;
 143         struct fsmonitor_cookie_item *cookie;
 144         struct strbuf cookie_pathname = STRBUF_INIT;
 145         struct strbuf cookie_filename = STRBUF_INIT;
 146         enum fsmonitor_cookie_item_result result;
 147         int my_cookie_seq;
 148
 149         CALLOC_ARRAY(cookie, 1);
 150
 151         my_cookie_seq = state->cookie_seq++;
 152
 153         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 154
 155         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 156         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 157
 158         cookie->name = strbuf_detach(&cookie_filename, NULL);
 159         cookie->result = FCIR_INIT;
 160         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 161
 162         hashmap_add(&state->cookies, &cookie->entry);
 163
 164         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 165                          cookie->name, cookie_pathname.buf);
 166
 167         /*
 168          * Create the cookie file on disk and then wait for a notification
 169          * that the listener thread has seen it.
 170          */
 171         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 172         if (fd < 0) {
 173                 error_errno(_("could not create fsmonitor cookie '%s'"),
 174                             cookie->name);
 175
 176                 cookie->result = FCIR_ERROR;
 177                 goto done;
 178         }
 179
 180         /*
 181          * Technically, close() and unlink() can fail, but we don't
 182          * care here.  We only created the file to trigger a watch
 183          * event from the FS to know that when we're up to date.
 184          */
 185         close(fd);
 186         unlink(cookie_pathname.buf);
 187
 188         /*
 189          * Technically, this is an infinite wait (well, unless another
 190          * thread sends us an abort).  I'd like to change this to
 191          * use `pthread_cond_timedwait()` and return an error/timeout
 192          * and let the caller do the trivial response thing, but we
 193          * don't have that routine in our thread-utils.
 194          *
 195          * After extensive beta testing I'm not really worried about
 196          * this.  Also note that the above open() and unlink() calls
 197          * will cause at least two FS events on that path, so the odds
 198          * of getting stuck are pretty slim.
 199          */
 200         while (cookie->result == FCIR_INIT)
 201                 pthread_cond_wait(&state->cookies_cond,
 202                                   &state->main_lock);
 203
 204 done:
 205         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 206
 207         result = cookie->result;
 208
 209         free(cookie->name);
 210         free(cookie);
 211         strbuf_release(&cookie_pathname);
 212
 213         return result;
 214 }
 215
 216 /*
 217  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 218  */
 219 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 220                                          const struct string_list *cookie_names)
 221 {
 222         /* assert current thread holding state->main_lock */
 223
 224         int k;
 225         int nr_seen = 0;
 226
 227         for (k = 0; k < cookie_names->nr; k++) {
 228                 struct fsmonitor_cookie_item key;
 229                 struct fsmonitor_cookie_item *cookie;
 230
 231                 key.name = cookie_names->items[k].string;
 232                 hashmap_entry_init(&key.entry, strhash(key.name));
 233
 234                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 235                 if (cookie) {
 236                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 237                                          cookie->name);
 238                         cookie->result = FCIR_SEEN;
 239                         nr_seen++;
 240                 }
 241         }
 242
 243         if (nr_seen)
 244                 pthread_cond_broadcast(&state->cookies_cond);
 245 }
 246
 247 /*
 248  * Set _ABORT on all pending cookies and wake up all client threads.
 249  */
 250 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 251 {
 252         /* assert current thread holding state->main_lock */
 253
 254         struct hashmap_iter iter;
 255         struct fsmonitor_cookie_item *cookie;
 256         int nr_aborted = 0;
 257
 258         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 259                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 260                                  cookie->name);
 261                 cookie->result = FCIR_ABORT;
 262                 nr_aborted++;
 263         }
 264
 265         if (nr_aborted)
 266                 pthread_cond_broadcast(&state->cookies_cond);
 267 }
 268
 269 /*
 270  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 271  * "token" as a virtual timestamp.  Clients can request a summary of all
 272  * created/deleted/modified files relative to a token.  In the response,
 273  * clients receive a new token for the next (relative) request.
 274  *
 275  *
 276  * Token Format
 277  * ============
 278  *
 279  * The contents of the token are private and provider-specific.
 280  *
 281  * For the built-in fsmonitor--daemon, we define a token as follows:
 282  *
 283  *     "builtin" ":" <token_id> ":" <sequence_nr>
 284  *
 285  * The "builtin" prefix is used as a namespace to avoid conflicts
 286  * with other providers (such as Watchman).
 287  *
 288  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 289  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 290  * events that happened while the daemon was monitoring (and in-sync
 291  * with the filesystem).
 292  *
 293  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 294  *     and does not define less-than/greater-than relationships.
 295  *     (There are too many race conditions to rely on file system
 296  *     event timestamps.)
 297  *
 298  * The <sequence_nr> is a simple integer incremented whenever the
 299  * daemon needs to make its state public.  For example, if 1000 file
 300  * system events come in, but no clients have requested the data,
 301  * the daemon can continue to accumulate file changes in the same
 302  * bin and does not need to advance the sequence number.  However,
 303  * as soon as a client does arrive, the daemon needs to start a new
 304  * bin and increment the sequence number.
 305  *
 306  *     The sequence number serves as the boundary between 2 sets
 307  *     of bins -- the older ones that the client has already seen
 308  *     and the newer ones that it hasn't.
 309  *
 310  * When a new <token_id> is created, the <sequence_nr> is reset to
 311  * zero.
 312  *
 313  *
 314  * About Token Ids
 315  * ===============
 316  *
 317  * A new token_id is created:
 318  *
 319  * [1] each time the daemon is started.
 320  *
 321  * [2] any time that the daemon must re-sync with the filesystem
 322  *     (such as when the kernel drops or we miss events on a very
 323  *     active volume).
 324  *
 325  * [3] in response to a client "flush" command (for dropped event
 326  *     testing).
 327  *
 328  * When a new token_id is created, the daemon is free to discard all
 329  * cached filesystem events associated with any previous token_ids.
 330  * Events associated with a non-current token_id will never be sent
 331  * to a client.  A token_id change implicitly means that the daemon
 332  * has gap in its event history.
 333  *
 334  * Therefore, clients that present a token with a stale (non-current)
 335  * token_id will always be given a trivial response.
 336  */
 337 struct fsmonitor_token_data {
 338         struct strbuf token_id;
 339         struct fsmonitor_batch *batch_head;
 340         struct fsmonitor_batch *batch_tail;
 341         uint64_t client_ref_count;
 342 };
 343
 344 struct fsmonitor_batch {
 345         struct fsmonitor_batch *next;
 346         uint64_t batch_seq_nr;
 347         const char **interned_paths;
 348         size_t nr, alloc;
 349         time_t pinned_time;
 350 };
 351
 352 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 353 {
 354         static int test_env_value = -1;
 355         static uint64_t flush_count = 0;
 356         struct fsmonitor_token_data *token;
 357         struct fsmonitor_batch *batch;
 358
 359         CALLOC_ARRAY(token, 1);
 360         batch = fsmonitor_batch__new();
 361
 362         strbuf_init(&token->token_id, 0);
 363         token->batch_head = batch;
 364         token->batch_tail = batch;
 365         token->client_ref_count = 0;
 366
 367         if (test_env_value < 0)
 368                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 369
 370         if (!test_env_value) {
 371                 struct timeval tv;
 372                 struct tm tm;
 373                 time_t secs;
 374
 375                 gettimeofday(&tv, NULL);
 376                 secs = tv.tv_sec;
 377                 gmtime_r(&secs, &tm);
 378
 379                 strbuf_addf(&token->token_id,
 380                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 381                             flush_count++,
 382                             getpid(),
 383                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 384                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 385                             (long)tv.tv_usec);
 386         } else {
 387                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 388         }
 389
 390         /*
 391          * We created a new <token_id> and are starting a new series
 392          * of tokens with a zero <seq_nr>.
 393          *
 394          * Since clients cannot guess our new (non test) <token_id>
 395          * they will always receive a trivial response (because of the
 396          * mismatch on the <token_id>).  The trivial response will
 397          * tell them our new <token_id> so that subsequent requests
 398          * will be relative to our new series.  (And when sending that
 399          * response, we pin the current head of the batch list.)
 400          *
 401          * Even if the client correctly guesses the <token_id>, their
 402          * request of "builtin:<token_id>:0" asks for all changes MORE
 403          * RECENT than batch/bin 0.
 404          *
 405          * This implies that it is a waste to accumulate paths in the
 406          * initial batch/bin (because they will never be transmitted).
 407          *
 408          * So the daemon could be running for days and watching the
 409          * file system, but doesn't need to actually accumulate any
 410          * paths UNTIL we need to set a reference point for a later
 411          * relative request.
 412          *
 413          * However, it is very useful for testing to always have a
 414          * reference point set.  Pin batch 0 to force early file system
 415          * events to accumulate.
 416          */
 417         if (test_env_value)
 418                 batch->pinned_time = time(NULL);
 419
 420         return token;
 421 }
 422
 423 struct fsmonitor_batch *fsmonitor_batch__new(void)
 424 {
 425         struct fsmonitor_batch *batch;
 426
 427         CALLOC_ARRAY(batch, 1);
 428
 429         return batch;
 430 }
 431
 432 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 433 {
 434         while (batch) {
 435                 struct fsmonitor_batch *next = batch->next;
 436
 437                 /*
 438                  * The actual strings within the array of this batch
 439                  * are interned, so we don't own them.  We only own
 440                  * the array.
 441                  */
 442                 free(batch->interned_paths);
 443                 free(batch);
 444
 445                 batch = next;
 446         }
 447 }
 448
 449 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 450                                const char *path)
 451 {
 452         const char *interned_path = strintern(path);
 453
 454         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 455
 456         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 457         batch->interned_paths[batch->nr++] = interned_path;
 458 }
 459
 460 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 461                                      const struct fsmonitor_batch *batch_src)
 462 {
 463         size_t k;
 464
 465         ALLOC_GROW(batch_dest->interned_paths,
 466                    batch_dest->nr + batch_src->nr + 1,
 467                    batch_dest->alloc);
 468
 469         for (k = 0; k < batch_src->nr; k++)
 470                 batch_dest->interned_paths[batch_dest->nr++] =
 471                         batch_src->interned_paths[k];
 472 }
 473
 474 /*
 475  * To keep the batch list from growing unbounded in response to filesystem
 476  * activity, we try to truncate old batches from the end of the list as
 477  * they become irrelevant.
 478  *
 479  * We assume that the .git/index will be updated with the most recent token
 480  * any time the index is updated.  And future commands will only ask for
 481  * recent changes *since* that new token.  So as tokens advance into the
 482  * future, older batch items will never be requested/needed.  So we can
 483  * truncate them without loss of functionality.
 484  *
 485  * However, multiple commands may be talking to the daemon concurrently
 486  * or perform a slow command, so a little "token skew" is possible.
 487  * Therefore, we want this to be a little bit lazy and have a generous
 488  * delay.
 489  *
 490  * The current reader thread walked backwards in time from `token->batch_head`
 491  * back to `batch_marker` somewhere in the middle of the batch list.
 492  *
 493  * Let's walk backwards in time from that marker an arbitrary delay
 494  * and truncate the list there.  Note that these timestamps are completely
 495  * artificial (based on when we pinned the batch item) and not on any
 496  * filesystem activity.
 497  *
 498  * Return the obsolete portion of the list after we have removed it from
 499  * the official list so that the caller can free it after leaving the lock.
 500  */
 501 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 502
 503 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 504         struct fsmonitor_daemon_state *state,
 505         const struct fsmonitor_batch *batch_marker)
 506 {
 507         /* assert current thread holding state->main_lock */
 508
 509         const struct fsmonitor_batch *batch;
 510         struct fsmonitor_batch *remainder;
 511
 512         if (!batch_marker)
 513                 return NULL;
 514
 515         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 516                          batch_marker->batch_seq_nr,
 517                          (uint64_t)batch_marker->pinned_time);
 518
 519         for (batch = batch_marker; batch; batch = batch->next) {
 520                 time_t t;
 521
 522                 if (!batch->pinned_time) /* an overflow batch */
 523                         continue;
 524
 525                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 526                 if (t > batch_marker->pinned_time) /* too close to marker */
 527                         continue;
 528
 529                 goto truncate_past_here;
 530         }
 531
 532         return NULL;
 533
 534 truncate_past_here:
 535         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 536
 537         remainder = ((struct fsmonitor_batch *)batch)->next;
 538         ((struct fsmonitor_batch *)batch)->next = NULL;
 539
 540         return remainder;
 541 }
 542
 543 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 544 {
 545         if (!token)
 546                 return;
 547
 548         assert(token->client_ref_count == 0);
 549
 550         strbuf_release(&token->token_id);
 551
 552         fsmonitor_batch__free_list(token->batch_head);
 553
 554         free(token);
 555 }
 556
 557 /*
 558  * Flush all of our cached data about the filesystem.  Call this if we
 559  * lose sync with the filesystem and miss some notification events.
 560  *
 561  * [1] If we are missing events, then we no longer have a complete
 562  *     history of the directory (relative to our current start token).
 563  *     We should create a new token and start fresh (as if we just
 564  *     booted up).
 565  *
 566  * [2] Some of those lost events may have been for cookie files.  We
 567  *     should assume the worst and abort them rather letting them starve.
 568  *
 569  * If there are no concurrent threads reading the current token data
 570  * series, we can free it now.  Otherwise, let the last reader free
 571  * it.
 572  *
 573  * Either way, the old token data series is no longer associated with
 574  * our state data.
 575  */
 576 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 577 {
 578         /* assert current thread holding state->main_lock */
 579
 580         struct fsmonitor_token_data *free_me = NULL;
 581         struct fsmonitor_token_data *new_one = NULL;
 582
 583         new_one = fsmonitor_new_token_data();
 584
 585         if (state->current_token_data->client_ref_count == 0)
 586                 free_me = state->current_token_data;
 587         state->current_token_data = new_one;
 588
 589         fsmonitor_free_token_data(free_me);
 590
 591         with_lock__abort_all_cookies(state);
 592 }
 593
 594 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 595 {
 596         pthread_mutex_lock(&state->main_lock);
 597         with_lock__do_force_resync(state);
 598         pthread_mutex_unlock(&state->main_lock);
 599 }
 600
 601 /*
 602  * Format an opaque token string to send to the client.
 603  */
 604 static void with_lock__format_response_token(
 605         struct strbuf *response_token,
 606         const struct strbuf *response_token_id,
 607         const struct fsmonitor_batch *batch)
 608 {
 609         /* assert current thread holding state->main_lock */
 610
 611         strbuf_reset(response_token);
 612         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 613                     response_token_id->buf, batch->batch_seq_nr);
 614 }
 615
 616 /*
 617  * Parse an opaque token from the client.
 618  * Returns -1 on error.
 619  */
 620 static int fsmonitor_parse_client_token(const char *buf_token,
 621                                         struct strbuf *requested_token_id,
 622                                         uint64_t *seq_nr)
 623 {
 624         const char *p;
 625         char *p_end;
 626
 627         strbuf_reset(requested_token_id);
 628         *seq_nr = 0;
 629
 630         if (!skip_prefix(buf_token, "builtin:", &p))
 631                 return -1;
 632
 633         while (*p && *p != ':')
 634                 strbuf_addch(requested_token_id, *p++);
 635         if (!*p++)
 636                 return -1;
 637
 638         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 639         if (*p_end)
 640                 return -1;
 641
 642         return 0;
 643 }
 644
 645 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 646
 647 static int do_handle_client(struct fsmonitor_daemon_state *state,
 648                             const char *command,
 649                             ipc_server_reply_cb *reply,
 650                             struct ipc_server_reply_data *reply_data)
 651 {
 652         struct fsmonitor_token_data *token_data = NULL;
 653         struct strbuf response_token = STRBUF_INIT;
 654         struct strbuf requested_token_id = STRBUF_INIT;
 655         struct strbuf payload = STRBUF_INIT;
 656         uint64_t requested_oldest_seq_nr = 0;
 657         uint64_t total_response_len = 0;
 658         const char *p;
 659         const struct fsmonitor_batch *batch_head;
 660         const struct fsmonitor_batch *batch;
 661         struct fsmonitor_batch *remainder = NULL;
 662         intmax_t count = 0, duplicates = 0;
 663         kh_str_t *shown;
 664         int hash_ret;
 665         int do_trivial = 0;
 666         int do_flush = 0;
 667         int do_cookie = 0;
 668         enum fsmonitor_cookie_item_result cookie_result;
 669
 670         /*
 671          * We expect `command` to be of the form:
 672          *
 673          * <command> := quit NUL
 674          *            | flush NUL
 675          *            | <V1-time-since-epoch-ns> NUL
 676          *            | <V2-opaque-fsmonitor-token> NUL
 677          */
 678
 679         if (!strcmp(command, "quit")) {
 680                 /*
 681                  * A client has requested over the socket/pipe that the
 682                  * daemon shutdown.
 683                  *
 684                  * Tell the IPC thread pool to shutdown (which completes
 685                  * the await in the main thread (which can stop the
 686                  * fsmonitor listener thread)).
 687                  *
 688                  * There is no reply to the client.
 689                  */
 690                 return SIMPLE_IPC_QUIT;
 691
 692         } else if (!strcmp(command, "flush")) {
 693                 /*
 694                  * Flush all of our cached data and generate a new token
 695                  * just like if we lost sync with the filesystem.
 696                  *
 697                  * Then send a trivial response using the new token.
 698                  */
 699                 do_flush = 1;
 700                 do_trivial = 1;
 701
 702         } else if (!skip_prefix(command, "builtin:", &p)) {
 703                 /* assume V1 timestamp or garbage */
 704
 705                 char *p_end;
 706
 707                 strtoumax(command, &p_end, 10);
 708                 trace_printf_key(&trace_fsmonitor,
 709                                  ((*p_end) ?
 710                                   "fsmonitor: invalid command line '%s'" :
 711                                   "fsmonitor: unsupported V1 protocol '%s'"),
 712                                  command);
 713                 do_trivial = 1;
 714                 do_cookie = 1;
 715
 716         } else {
 717                 /* We have "builtin:*" */
 718                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 719                                                  &requested_oldest_seq_nr)) {
 720                         trace_printf_key(&trace_fsmonitor,
 721                                          "fsmonitor: invalid V2 protocol token '%s'",
 722                                          command);
 723                         do_trivial = 1;
 724                         do_cookie = 1;
 725
 726                 } else {
 727                         /*
 728                          * We have a V2 valid token:
 729                          *     "builtin:<token_id>:<seq_nr>"
 730                          */
 731                         do_cookie = 1;
 732                 }
 733         }
 734
 735         pthread_mutex_lock(&state->main_lock);
 736
 737         if (!state->current_token_data)
 738                 BUG("fsmonitor state does not have a current token");
 739
 740         /*
 741          * Write a cookie file inside the directory being watched in
 742          * an effort to flush out existing filesystem events that we
 743          * actually care about.  Suspend this client thread until we
 744          * see the filesystem events for this cookie file.
 745          *
 746          * Creating the cookie lets us guarantee that our FS listener
 747          * thread has drained the kernel queue and we are caught up
 748          * with the kernel.
 749          *
 750          * If we cannot create the cookie (or otherwise guarantee that
 751          * we are caught up), we send a trivial response.  We have to
 752          * assume that there might be some very, very recent activity
 753          * on the FS still in flight.
 754          */
 755         if (do_cookie) {
 756                 cookie_result = with_lock__wait_for_cookie(state);
 757                 if (cookie_result != FCIR_SEEN) {
 758                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 759                               cookie_result);
 760                         do_trivial = 1;
 761                 }
 762         }
 763
 764         if (do_flush)
 765                 with_lock__do_force_resync(state);
 766
 767         /*
 768          * We mark the current head of the batch list as "pinned" so
 769          * that the listener thread will treat this item as read-only
 770          * (and prevent any more paths from being added to it) from
 771          * now on.
 772          */
 773         token_data = state->current_token_data;
 774         batch_head = token_data->batch_head;
 775         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 776
 777         /*
 778          * FSMonitor Protocol V2 requires that we send a response header
 779          * with a "new current token" and then all of the paths that changed
 780          * since the "requested token".  We send the seq_nr of the just-pinned
 781          * head batch so that future requests from a client will be relative
 782          * to it.
 783          */
 784         with_lock__format_response_token(&response_token,
 785                                          &token_data->token_id, batch_head);
 786
 787         reply(reply_data, response_token.buf, response_token.len + 1);
 788         total_response_len += response_token.len + 1;
 789
 790         trace2_data_string("fsmonitor", the_repository, "response/token",
 791                            response_token.buf);
 792         trace_printf_key(&trace_fsmonitor, "response token: %s",
 793                          response_token.buf);
 794
 795         if (!do_trivial) {
 796                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 797                         /*
 798                          * The client last spoke to a different daemon
 799                          * instance -OR- the daemon had to resync with
 800                          * the filesystem (and lost events), so reject.
 801                          */
 802                         trace2_data_string("fsmonitor", the_repository,
 803                                            "response/token", "different");
 804                         do_trivial = 1;
 805
 806                 } else if (requested_oldest_seq_nr <
 807                            token_data->batch_tail->batch_seq_nr) {
 808                         /*
 809                          * The client wants older events than we have for
 810                          * this token_id.  This means that the end of our
 811                          * batch list was truncated and we cannot give the
 812                          * client a complete snapshot relative to their
 813                          * request.
 814                          */
 815                         trace_printf_key(&trace_fsmonitor,
 816                                          "client requested truncated data");
 817                         do_trivial = 1;
 818                 }
 819         }
 820
 821         if (do_trivial) {
 822                 pthread_mutex_unlock(&state->main_lock);
 823
 824                 reply(reply_data, "/", 2);
 825
 826                 trace2_data_intmax("fsmonitor", the_repository,
 827                                    "response/trivial", 1);
 828
 829                 goto cleanup;
 830         }
 831
 832         /*
 833          * We're going to hold onto a pointer to the current
 834          * token-data while we walk the list of batches of files.
 835          * During this time, we will NOT be under the lock.
 836          * So we ref-count it.
 837          *
 838          * This allows the listener thread to continue prepending
 839          * new batches of items to the token-data (which we'll ignore).
 840          *
 841          * AND it allows the listener thread to do a token-reset
 842          * (and install a new `current_token_data`).
 843          */
 844         token_data->client_ref_count++;
 845
 846         pthread_mutex_unlock(&state->main_lock);
 847
 848         /*
 849          * The client request is relative to the token that they sent,
 850          * so walk the batch list backwards from the current head back
 851          * to the batch (sequence number) they named.
 852          *
 853          * We use khash to de-dup the list of pathnames.
 854          *
 855          * NEEDSWORK: each batch contains a list of interned strings,
 856          * so we only need to do pointer comparisons here to build the
 857          * hash table.  Currently, we're still comparing the string
 858          * values.
 859          */
 860         shown = kh_init_str();
 861         for (batch = batch_head;
 862              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 863              batch = batch->next) {
 864                 size_t k;
 865
 866                 for (k = 0; k < batch->nr; k++) {
 867                         const char *s = batch->interned_paths[k];
 868                         size_t s_len;
 869
 870                         if (kh_get_str(shown, s) != kh_end(shown))
 871                                 duplicates++;
 872                         else {
 873                                 kh_put_str(shown, s, &hash_ret);
 874
 875                                 trace_printf_key(&trace_fsmonitor,
 876                                                  "send[%"PRIuMAX"]: %s",
 877                                                  count, s);
 878
 879                                 /* Each path gets written with a trailing NUL */
 880                                 s_len = strlen(s) + 1;
 881
 882                                 if (payload.len + s_len >=
 883                                     LARGE_PACKET_DATA_MAX) {
 884                                         reply(reply_data, payload.buf,
 885                                               payload.len);
 886                                         total_response_len += payload.len;
 887                                         strbuf_reset(&payload);
 888                                 }
 889
 890                                 strbuf_add(&payload, s, s_len);
 891                                 count++;
 892                         }
 893                 }
 894         }
 895
 896         if (payload.len) {
 897                 reply(reply_data, payload.buf, payload.len);
 898                 total_response_len += payload.len;
 899         }
 900
 901         kh_release_str(shown);
 902
 903         pthread_mutex_lock(&state->main_lock);
 904
 905         if (token_data->client_ref_count > 0)
 906                 token_data->client_ref_count--;
 907
 908         if (token_data->client_ref_count == 0) {
 909                 if (token_data != state->current_token_data) {
 910                         /*
 911                          * The listener thread did a token-reset while we were
 912                          * walking the batch list.  Therefore, this token is
 913                          * stale and can be discarded completely.  If we are
 914                          * the last reader thread using this token, we own
 915                          * that work.
 916                          */
 917                         fsmonitor_free_token_data(token_data);
 918                 } else if (batch) {
 919                         /*
 920                          * We are holding the lock and are the only
 921                          * reader of the ref-counted portion of the
 922                          * list, so we get the honor of seeing if the
 923                          * list can be truncated to save memory.
 924                          *
 925                          * The main loop did not walk to the end of the
 926                          * list, so this batch is the first item in the
 927                          * batch-list that is older than the requested
 928                          * end-point sequence number.  See if the tail
 929                          * end of the list is obsolete.
 930                          */
 931                         remainder = with_lock__truncate_old_batches(state,
 932                                                                     batch);
 933                 }
 934         }
 935
 936         pthread_mutex_unlock(&state->main_lock);
 937
 938         if (remainder)
 939                 fsmonitor_batch__free_list(remainder);
 940
 941         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 942         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 943         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 944
 945 cleanup:
 946         strbuf_release(&response_token);
 947         strbuf_release(&requested_token_id);
 948         strbuf_release(&payload);
 949
 950         return 0;
 951 }
 952
 953 static ipc_server_application_cb handle_client;
 954
 955 static int handle_client(void *data,
 956                          const char *command, size_t command_len,
 957                          ipc_server_reply_cb *reply,
 958                          struct ipc_server_reply_data *reply_data)
 959 {
 960         struct fsmonitor_daemon_state *state = data;
 961         int result;
 962
 963         /*
 964          * The Simple IPC API now supports {char*, len} arguments, but
 965          * FSMonitor always uses proper null-terminated strings, so
 966          * we can ignore the command_len argument.  (Trust, but verify.)
 967          */
 968         if (command_len != strlen(command))
 969                 BUG("FSMonitor assumes text messages");
 970
 971         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 972
 973         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 974         trace2_data_string("fsmonitor", the_repository, "request", command);
 975
 976         result = do_handle_client(state, command, reply, reply_data);
 977
 978         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 979
 980         return result;
 981 }
 982
 983 #define FSMONITOR_DIR           "fsmonitor--daemon"
 984 #define FSMONITOR_COOKIE_DIR    "cookies"
 985 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 986
 987 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 988         const char *rel)
 989 {
 990         if (fspathncmp(rel, ".git", 4))
 991                 return IS_WORKDIR_PATH;
 992         rel += 4;
 993
 994         if (!*rel)
 995                 return IS_DOT_GIT;
 996         if (*rel != '/')
 997                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
 998         rel++;
 999
1000         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1001                         strlen(FSMONITOR_COOKIE_PREFIX)))
1002                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1003
1004         return IS_INSIDE_DOT_GIT;
1005 }
1006
1007 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1008         const char *rel)
1009 {
1010         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1011                         strlen(FSMONITOR_COOKIE_PREFIX)))
1012                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1013
1014         return IS_INSIDE_GITDIR;
1015 }
1016
1017 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1018         struct fsmonitor_daemon_state *state,
1019         const char *path)
1020 {
1021         const char *rel;
1022
1023         if (fspathncmp(path, state->path_worktree_watch.buf,
1024                        state->path_worktree_watch.len))
1025                 return IS_OUTSIDE_CONE;
1026
1027         rel = path + state->path_worktree_watch.len;
1028
1029         if (!*rel)
1030                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1031         if (*rel != '/')
1032                 return IS_OUTSIDE_CONE;
1033         rel++;
1034
1035         return fsmonitor_classify_path_workdir_relative(rel);
1036 }
1037
1038 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1039         struct fsmonitor_daemon_state *state,
1040         const char *path)
1041 {
1042         const char *rel;
1043         enum fsmonitor_path_type t;
1044
1045         t = try_classify_workdir_abs_path(state, path);
1046         if (state->nr_paths_watching == 1)
1047                 return t;
1048         if (t != IS_OUTSIDE_CONE)
1049                 return t;
1050
1051         if (fspathncmp(path, state->path_gitdir_watch.buf,
1052                        state->path_gitdir_watch.len))
1053                 return IS_OUTSIDE_CONE;
1054
1055         rel = path + state->path_gitdir_watch.len;
1056
1057         if (!*rel)
1058                 return IS_GITDIR; /* it is the <gitdir> exactly */
1059         if (*rel != '/')
1060                 return IS_OUTSIDE_CONE;
1061         rel++;
1062
1063         return fsmonitor_classify_path_gitdir_relative(rel);
1064 }
1065
1066 /*
1067  * We try to combine small batches at the front of the batch-list to avoid
1068  * having a long list.  This hopefully makes it a little easier when we want
1069  * to truncate and maintain the list.  However, we don't want the paths array
1070  * to just keep growing and growing with realloc, so we insert an arbitrary
1071  * limit.
1072  */
1073 #define MY_COMBINE_LIMIT (1024)
1074
1075 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1076                        struct fsmonitor_batch *batch,
1077                        const struct string_list *cookie_names)
1078 {
1079         if (!batch && !cookie_names->nr)
1080                 return;
1081
1082         pthread_mutex_lock(&state->main_lock);
1083
1084         if (batch) {
1085                 struct fsmonitor_batch *head;
1086
1087                 head = state->current_token_data->batch_head;
1088                 if (!head) {
1089                         BUG("token does not have batch");
1090                 } else if (head->pinned_time) {
1091                         /*
1092                          * We cannot alter the current batch list
1093                          * because:
1094                          *
1095                          * [a] it is being transmitted to at least one
1096                          * client and the handle_client() thread has a
1097                          * ref-count, but not a lock on the batch list
1098                          * starting with this item.
1099                          *
1100                          * [b] it has been transmitted in the past to
1101                          * at least one client such that future
1102                          * requests are relative to this head batch.
1103                          *
1104                          * So, we can only prepend a new batch onto
1105                          * the front of the list.
1106                          */
1107                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1108                         batch->next = head;
1109                         state->current_token_data->batch_head = batch;
1110                 } else if (!head->batch_seq_nr) {
1111                         /*
1112                          * Batch 0 is unpinned.  See the note in
1113                          * `fsmonitor_new_token_data()` about why we
1114                          * don't need to accumulate these paths.
1115                          */
1116                         fsmonitor_batch__free_list(batch);
1117                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1118                         /*
1119                          * The head batch in the list has never been
1120                          * transmitted to a client, but folding the
1121                          * contents of the new batch onto it would
1122                          * exceed our arbitrary limit, so just prepend
1123                          * the new batch onto the list.
1124                          */
1125                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1126                         batch->next = head;
1127                         state->current_token_data->batch_head = batch;
1128                 } else {
1129                         /*
1130                          * We are free to add the paths in the given
1131                          * batch onto the end of the current head batch.
1132                          */
1133                         fsmonitor_batch__combine(head, batch);
1134                         fsmonitor_batch__free_list(batch);
1135                 }
1136         }
1137
1138         if (cookie_names->nr)
1139                 with_lock__mark_cookies_seen(state, cookie_names);
1140
1141         pthread_mutex_unlock(&state->main_lock);
1142 }
1143
1144 static void *fsm_health__thread_proc(void *_state)
1145 {
1146         struct fsmonitor_daemon_state *state = _state;
1147
1148         trace2_thread_start("fsm-health");
1149
1150         fsm_health__loop(state);
1151
1152         trace2_thread_exit();
1153         return NULL;
1154 }
1155
1156 static void *fsm_listen__thread_proc(void *_state)
1157 {
1158         struct fsmonitor_daemon_state *state = _state;
1159
1160         trace2_thread_start("fsm-listen");
1161
1162         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1163                          state->path_worktree_watch.buf);
1164         if (state->nr_paths_watching > 1)
1165                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1166                                  state->path_gitdir_watch.buf);
1167
1168         fsm_listen__loop(state);
1169
1170         pthread_mutex_lock(&state->main_lock);
1171         if (state->current_token_data &&
1172             state->current_token_data->client_ref_count == 0)
1173                 fsmonitor_free_token_data(state->current_token_data);
1174         state->current_token_data = NULL;
1175         pthread_mutex_unlock(&state->main_lock);
1176
1177         trace2_thread_exit();
1178         return NULL;
1179 }
1180
1181 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1182 {
1183         struct ipc_server_opts ipc_opts = {
1184                 .nr_threads = fsmonitor__ipc_threads,
1185
1186                 /*
1187                  * We know that there are no other active threads yet,
1188                  * so we can let the IPC layer temporarily chdir() if
1189                  * it needs to when creating the server side of the
1190                  * Unix domain socket.
1191                  */
1192                 .uds_disallow_chdir = 0
1193         };
1194         int health_started = 0;
1195         int listener_started = 0;
1196         int err = 0;
1197
1198         /*
1199          * Start the IPC thread pool before the we've started the file
1200          * system event listener thread so that we have the IPC handle
1201          * before we need it.
1202          */
1203         if (ipc_server_run_async(&state->ipc_server_data,
1204                                  state->path_ipc.buf, &ipc_opts,
1205                                  handle_client, state))
1206                 return error_errno(
1207                         _("could not start IPC thread pool on '%s'"),
1208                         state->path_ipc.buf);
1209
1210         /*
1211          * Start the fsmonitor listener thread to collect filesystem
1212          * events.
1213          */
1214         if (pthread_create(&state->listener_thread, NULL,
1215                            fsm_listen__thread_proc, state)) {
1216                 ipc_server_stop_async(state->ipc_server_data);
1217                 err = error(_("could not start fsmonitor listener thread"));
1218                 goto cleanup;
1219         }
1220         listener_started = 1;
1221
1222         /*
1223          * Start the health thread to watch over our process.
1224          */
1225         if (pthread_create(&state->health_thread, NULL,
1226                            fsm_health__thread_proc, state)) {
1227                 ipc_server_stop_async(state->ipc_server_data);
1228                 err = error(_("could not start fsmonitor health thread"));
1229                 goto cleanup;
1230         }
1231         health_started = 1;
1232
1233         /*
1234          * The daemon is now fully functional in background threads.
1235          * Our primary thread should now just wait while the threads
1236          * do all the work.
1237          */
1238 cleanup:
1239         /*
1240          * Wait for the IPC thread pool to shutdown (whether by client
1241          * request, from filesystem activity, or an error).
1242          */
1243         ipc_server_await(state->ipc_server_data);
1244
1245         /*
1246          * The fsmonitor listener thread may have received a shutdown
1247          * event from the IPC thread pool, but it doesn't hurt to tell
1248          * it again.  And wait for it to shutdown.
1249          */
1250         if (listener_started) {
1251                 fsm_listen__stop_async(state);
1252                 pthread_join(state->listener_thread, NULL);
1253         }
1254
1255         if (health_started) {
1256                 fsm_health__stop_async(state);
1257                 pthread_join(state->health_thread, NULL);
1258         }
1259
1260         if (err)
1261                 return err;
1262         if (state->listen_error_code)
1263                 return state->listen_error_code;
1264         if (state->health_error_code)
1265                 return state->health_error_code;
1266         return 0;
1267 }
1268
1269 static int fsmonitor_run_daemon(void)
1270 {
1271         struct fsmonitor_daemon_state state;
1272         const char *home;
1273         int err;
1274
1275         memset(&state, 0, sizeof(state));
1276
1277         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1278         pthread_mutex_init(&state.main_lock, NULL);
1279         pthread_cond_init(&state.cookies_cond, NULL);
1280         state.listen_error_code = 0;
1281         state.health_error_code = 0;
1282         state.current_token_data = fsmonitor_new_token_data();
1283
1284         /* Prepare to (recursively) watch the <worktree-root> directory. */
1285         strbuf_init(&state.path_worktree_watch, 0);
1286         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1287         state.nr_paths_watching = 1;
1288
1289         strbuf_init(&state.alias.alias, 0);
1290         strbuf_init(&state.alias.points_to, 0);
1291         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1292                 goto done;
1293
1294         /*
1295          * We create and delete cookie files somewhere inside the .git
1296          * directory to help us keep sync with the file system.  If
1297          * ".git" is not a directory, then <gitdir> is not inside the
1298          * cone of <worktree-root>, so set up a second watch to watch
1299          * the <gitdir> so that we get events for the cookie files.
1300          */
1301         strbuf_init(&state.path_gitdir_watch, 0);
1302         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1303         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1304         if (!is_directory(state.path_gitdir_watch.buf)) {
1305                 strbuf_reset(&state.path_gitdir_watch);
1306                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1307                 state.nr_paths_watching = 2;
1308         }
1309
1310         /*
1311          * We will write filesystem syncing cookie files into
1312          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1313          *
1314          * The extra layers of subdirectories here keep us from
1315          * changing the mtime on ".git/" or ".git/foo/" when we create
1316          * or delete cookie files.
1317          *
1318          * There have been problems with some IDEs that do a
1319          * non-recursive watch of the ".git/" directory and run a
1320          * series of commands any time something happens.
1321          *
1322          * For example, if we place our cookie files directly in
1323          * ".git/" or ".git/foo/" then a `git status` (or similar
1324          * command) from the IDE will cause a cookie file to be
1325          * created in one of those dirs.  This causes the mtime of
1326          * those dirs to change.  This triggers the IDE's watch
1327          * notification.  This triggers the IDE to run those commands
1328          * again.  And the process repeats and the machine never goes
1329          * idle.
1330          *
1331          * Adding the extra layers of subdirectories prevents the
1332          * mtime of ".git/" and ".git/foo" from changing when a
1333          * cookie file is created.
1334          */
1335         strbuf_init(&state.path_cookie_prefix, 0);
1336         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1337
1338         strbuf_addch(&state.path_cookie_prefix, '/');
1339         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1340         mkdir(state.path_cookie_prefix.buf, 0777);
1341
1342         strbuf_addch(&state.path_cookie_prefix, '/');
1343         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1344         mkdir(state.path_cookie_prefix.buf, 0777);
1345
1346         strbuf_addch(&state.path_cookie_prefix, '/');
1347
1348         /*
1349          * We create a named-pipe or unix domain socket inside of the
1350          * ".git" directory.  (Well, on Windows, we base our named
1351          * pipe in the NPFS on the absolute path of the git
1352          * directory.)
1353          */
1354         strbuf_init(&state.path_ipc, 0);
1355         strbuf_addstr(&state.path_ipc,
1356                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1357
1358         /*
1359          * Confirm that we can create platform-specific resources for the
1360          * filesystem listener before we bother starting all the threads.
1361          */
1362         if (fsm_listen__ctor(&state)) {
1363                 err = error(_("could not initialize listener thread"));
1364                 goto done;
1365         }
1366
1367         if (fsm_health__ctor(&state)) {
1368                 err = error(_("could not initialize health thread"));
1369                 goto done;
1370         }
1371
1372         /*
1373          * CD out of the worktree root directory.
1374          *
1375          * The common Git startup mechanism causes our CWD to be the
1376          * root of the worktree.  On Windows, this causes our process
1377          * to hold a locked handle on the CWD.  This prevents the
1378          * worktree from being moved or deleted while the daemon is
1379          * running.
1380          *
1381          * We assume that our FS and IPC listener threads have either
1382          * opened all of the handles that they need or will do
1383          * everything using absolute paths.
1384          */
1385         home = getenv("HOME");
1386         if (home && *home && chdir(home))
1387                 die_errno(_("could not cd home '%s'"), home);
1388
1389         err = fsmonitor_run_daemon_1(&state);
1390
1391 done:
1392         pthread_cond_destroy(&state.cookies_cond);
1393         pthread_mutex_destroy(&state.main_lock);
1394         fsm_listen__dtor(&state);
1395         fsm_health__dtor(&state);
1396
1397         ipc_server_free(state.ipc_server_data);
1398
1399         strbuf_release(&state.path_worktree_watch);
1400         strbuf_release(&state.path_gitdir_watch);
1401         strbuf_release(&state.path_cookie_prefix);
1402         strbuf_release(&state.path_ipc);
1403         strbuf_release(&state.alias.alias);
1404         strbuf_release(&state.alias.points_to);
1405
1406         return err;
1407 }
1408
1409 static int try_to_run_foreground_daemon(int detach_console)
1410 {
1411         /*
1412          * Technically, we don't need to probe for an existing daemon
1413          * process, since we could just call `fsmonitor_run_daemon()`
1414          * and let it fail if the pipe/socket is busy.
1415          *
1416          * However, this method gives us a nicer error message for a
1417          * common error case.
1418          */
1419         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1420                 die(_("fsmonitor--daemon is already running '%s'"),
1421                     the_repository->worktree);
1422
1423         if (fsmonitor__announce_startup) {
1424                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1425                         the_repository->worktree);
1426                 fflush(stderr);
1427         }
1428
1429 #ifdef GIT_WINDOWS_NATIVE
1430         if (detach_console)
1431                 FreeConsole();
1432 #endif
1433
1434         return !!fsmonitor_run_daemon();
1435 }
1436
1437 static start_bg_wait_cb bg_wait_cb;
1438
1439 static int bg_wait_cb(const struct child_process *cp, void *cb_data)
1440 {
1441         enum ipc_active_state s = fsmonitor_ipc__get_state();
1442
1443         switch (s) {
1444         case IPC_STATE__LISTENING:
1445                 /* child is "ready" */
1446                 return 0;
1447
1448         case IPC_STATE__NOT_LISTENING:
1449         case IPC_STATE__PATH_NOT_FOUND:
1450                 /* give child more time */
1451                 return 1;
1452
1453         default:
1454         case IPC_STATE__INVALID_PATH:
1455         case IPC_STATE__OTHER_ERROR:
1456                 /* all the time in world won't help */
1457                 return -1;
1458         }
1459 }
1460
1461 static int try_to_start_background_daemon(void)
1462 {
1463         struct child_process cp = CHILD_PROCESS_INIT;
1464         enum start_bg_result sbgr;
1465
1466         /*
1467          * Before we try to create a background daemon process, see
1468          * if a daemon process is already listening.  This makes it
1469          * easier for us to report an already-listening error to the
1470          * console, since our spawn/daemon can only report the success
1471          * of creating the background process (and not whether it
1472          * immediately exited).
1473          */
1474         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1475                 die(_("fsmonitor--daemon is already running '%s'"),
1476                     the_repository->worktree);
1477
1478         if (fsmonitor__announce_startup) {
1479                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1480                         the_repository->worktree);
1481                 fflush(stderr);
1482         }
1483
1484         cp.git_cmd = 1;
1485
1486         strvec_push(&cp.args, "fsmonitor--daemon");
1487         strvec_push(&cp.args, "run");
1488         strvec_push(&cp.args, "--detach");
1489         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1490
1491         cp.no_stdin = 1;
1492         cp.no_stdout = 1;
1493         cp.no_stderr = 1;
1494
1495         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1496                                 fsmonitor__start_timeout_sec);
1497
1498         switch (sbgr) {
1499         case SBGR_READY:
1500                 return 0;
1501
1502         default:
1503         case SBGR_ERROR:
1504         case SBGR_CB_ERROR:
1505                 return error(_("daemon failed to start"));
1506
1507         case SBGR_TIMEOUT:
1508                 return error(_("daemon not online yet"));
1509
1510         case SBGR_DIED:
1511                 return error(_("daemon terminated"));
1512         }
1513 }
1514
1515 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1516 {
1517         const char *subcmd;
1518         enum fsmonitor_reason reason;
1519         int detach_console = 0;
1520
1521         struct option options[] = {
1522                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1523                 OPT_INTEGER(0, "ipc-threads",
1524                             &fsmonitor__ipc_threads,
1525                             N_("use <n> ipc worker threads")),
1526                 OPT_INTEGER(0, "start-timeout",
1527                             &fsmonitor__start_timeout_sec,
1528                             N_("max seconds to wait for background daemon startup")),
1529
1530                 OPT_END()
1531         };
1532
1533         git_config(fsmonitor_config, NULL);
1534
1535         argc = parse_options(argc, argv, prefix, options,
1536                              builtin_fsmonitor__daemon_usage, 0);
1537         if (argc != 1)
1538                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1539         subcmd = argv[0];
1540
1541         if (fsmonitor__ipc_threads < 1)
1542                 die(_("invalid 'ipc-threads' value (%d)"),
1543                     fsmonitor__ipc_threads);
1544
1545         prepare_repo_settings(the_repository);
1546         /*
1547          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1548          * (without bothering to load the `core.fsmonitor` config settings).
1549          *
1550          * If the repo is not compatible, the repo-settings will be set to
1551          * incompatible rather than IPC, so we can use one of the __get
1552          * routines to detect the discrepancy.
1553          */
1554         fsm_settings__set_ipc(the_repository);
1555
1556         reason = fsm_settings__get_reason(the_repository);
1557         if (reason > FSMONITOR_REASON_OK)
1558                 die("%s",
1559                     fsm_settings__get_incompatible_msg(the_repository,
1560                                                        reason));
1561
1562         if (!strcmp(subcmd, "start"))
1563                 return !!try_to_start_background_daemon();
1564
1565         if (!strcmp(subcmd, "run"))
1566                 return !!try_to_run_foreground_daemon(detach_console);
1567
1568         if (!strcmp(subcmd, "stop"))
1569                 return !!do_as_client__send_stop();
1570
1571         if (!strcmp(subcmd, "status"))
1572                 return !!do_as_client__status();
1573
1574         die(_("Unhandled subcommand '%s'"), subcmd);
1575 }
1576
1577 #else
1578 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1579 {
1580         struct option options[] = {
1581                 OPT_END()
1582         };
1583
1584         if (argc == 2 && !strcmp(argv[1], "-h"))
1585                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1586
1587         die(_("fsmonitor--daemon not supported on this platform"));
1588 }
1589 #endif