]> git.ipfire.org Git - thirdparty/git.git/blob - fsmonitor.c
environment.h: move declarations for environment.c functions from cache.h
[thirdparty/git.git] / fsmonitor.c
1 #include "cache.h"
2 #include "config.h"
3 #include "dir.h"
4 #include "environment.h"
5 #include "ewah/ewok.h"
6 #include "fsmonitor.h"
7 #include "fsmonitor-ipc.h"
8 #include "run-command.h"
9 #include "strbuf.h"
10
11 #define INDEX_EXTENSION_VERSION1 (1)
12 #define INDEX_EXTENSION_VERSION2 (2)
13 #define HOOK_INTERFACE_VERSION1 (1)
14 #define HOOK_INTERFACE_VERSION2 (2)
15
16 struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR);
17
18 static void assert_index_minimum(struct index_state *istate, size_t pos)
19 {
20 if (pos > istate->cache_nr)
21 BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
22 (uintmax_t)pos, istate->cache_nr);
23 }
24
25 static void fsmonitor_ewah_callback(size_t pos, void *is)
26 {
27 struct index_state *istate = (struct index_state *)is;
28 struct cache_entry *ce;
29
30 assert_index_minimum(istate, pos + 1);
31
32 ce = istate->cache[pos];
33 ce->ce_flags &= ~CE_FSMONITOR_VALID;
34 }
35
36 static int fsmonitor_hook_version(void)
37 {
38 int hook_version;
39
40 if (git_config_get_int("core.fsmonitorhookversion", &hook_version))
41 return -1;
42
43 if (hook_version == HOOK_INTERFACE_VERSION1 ||
44 hook_version == HOOK_INTERFACE_VERSION2)
45 return hook_version;
46
47 warning("Invalid hook version '%i' in core.fsmonitorhookversion. "
48 "Must be 1 or 2.", hook_version);
49 return -1;
50 }
51
52 int read_fsmonitor_extension(struct index_state *istate, const void *data,
53 unsigned long sz)
54 {
55 const char *index = data;
56 uint32_t hdr_version;
57 uint32_t ewah_size;
58 struct ewah_bitmap *fsmonitor_dirty;
59 int ret;
60 uint64_t timestamp;
61 struct strbuf last_update = STRBUF_INIT;
62
63 if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t))
64 return error("corrupt fsmonitor extension (too short)");
65
66 hdr_version = get_be32(index);
67 index += sizeof(uint32_t);
68 if (hdr_version == INDEX_EXTENSION_VERSION1) {
69 timestamp = get_be64(index);
70 strbuf_addf(&last_update, "%"PRIu64"", timestamp);
71 index += sizeof(uint64_t);
72 } else if (hdr_version == INDEX_EXTENSION_VERSION2) {
73 strbuf_addstr(&last_update, index);
74 index += last_update.len + 1;
75 } else {
76 return error("bad fsmonitor version %d", hdr_version);
77 }
78
79 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
80
81 ewah_size = get_be32(index);
82 index += sizeof(uint32_t);
83
84 fsmonitor_dirty = ewah_new();
85 ret = ewah_read_mmap(fsmonitor_dirty, index, ewah_size);
86 if (ret != ewah_size) {
87 ewah_free(fsmonitor_dirty);
88 return error("failed to parse ewah bitmap reading fsmonitor index extension");
89 }
90 istate->fsmonitor_dirty = fsmonitor_dirty;
91
92 if (!istate->split_index)
93 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
94
95 trace2_data_string("index", NULL, "extension/fsmn/read/token",
96 istate->fsmonitor_last_update);
97 trace_printf_key(&trace_fsmonitor,
98 "read fsmonitor extension successful '%s'",
99 istate->fsmonitor_last_update);
100 return 0;
101 }
102
103 void fill_fsmonitor_bitmap(struct index_state *istate)
104 {
105 unsigned int i, skipped = 0;
106 istate->fsmonitor_dirty = ewah_new();
107 for (i = 0; i < istate->cache_nr; i++) {
108 if (istate->cache[i]->ce_flags & CE_REMOVE)
109 skipped++;
110 else if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID))
111 ewah_set(istate->fsmonitor_dirty, i - skipped);
112 }
113 }
114
115 void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
116 {
117 uint32_t hdr_version;
118 uint32_t ewah_start;
119 uint32_t ewah_size = 0;
120 int fixup = 0;
121
122 if (!istate->split_index)
123 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
124
125 put_be32(&hdr_version, INDEX_EXTENSION_VERSION2);
126 strbuf_add(sb, &hdr_version, sizeof(uint32_t));
127
128 strbuf_addstr(sb, istate->fsmonitor_last_update);
129 strbuf_addch(sb, 0); /* Want to keep a NUL */
130
131 fixup = sb->len;
132 strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */
133
134 ewah_start = sb->len;
135 ewah_serialize_strbuf(istate->fsmonitor_dirty, sb);
136 ewah_free(istate->fsmonitor_dirty);
137 istate->fsmonitor_dirty = NULL;
138
139 /* fix up size field */
140 put_be32(&ewah_size, sb->len - ewah_start);
141 memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
142
143 trace2_data_string("index", NULL, "extension/fsmn/write/token",
144 istate->fsmonitor_last_update);
145 trace_printf_key(&trace_fsmonitor,
146 "write fsmonitor extension successful '%s'",
147 istate->fsmonitor_last_update);
148 }
149
150 /*
151 * Call the query-fsmonitor hook passing the last update token of the saved results.
152 */
153 static int query_fsmonitor_hook(struct repository *r,
154 int version,
155 const char *last_update,
156 struct strbuf *query_result)
157 {
158 struct child_process cp = CHILD_PROCESS_INIT;
159 int result;
160
161 if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
162 return -1;
163
164 strvec_push(&cp.args, fsm_settings__get_hook_path(r));
165 strvec_pushf(&cp.args, "%d", version);
166 strvec_pushf(&cp.args, "%s", last_update);
167 cp.use_shell = 1;
168 cp.dir = get_git_work_tree();
169
170 trace2_region_enter("fsm_hook", "query", NULL);
171
172 result = capture_command(&cp, query_result, 1024);
173
174 if (result)
175 trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
176 else
177 trace2_data_intmax("fsm_hook", NULL, "query/response-length",
178 query_result->len);
179
180 trace2_region_leave("fsm_hook", "query", NULL);
181
182 return result;
183 }
184
185 static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
186 {
187 int i, len = strlen(name);
188 int pos = index_name_pos(istate, name, len);
189
190 trace_printf_key(&trace_fsmonitor,
191 "fsmonitor_refresh_callback '%s' (pos %d)",
192 name, pos);
193
194 if (name[len - 1] == '/') {
195 /*
196 * The daemon can decorate directory events, such as
197 * moves or renames, with a trailing slash if the OS
198 * FS Event contains sufficient information, such as
199 * MacOS.
200 *
201 * Use this to invalidate the entire cone under that
202 * directory.
203 *
204 * We do not expect an exact match because the index
205 * does not normally contain directory entries, so we
206 * start at the insertion point and scan.
207 */
208 if (pos < 0)
209 pos = -pos - 1;
210
211 /* Mark all entries for the folder invalid */
212 for (i = pos; i < istate->cache_nr; i++) {
213 if (!starts_with(istate->cache[i]->name, name))
214 break;
215 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
216 }
217
218 /*
219 * We need to remove the traling "/" from the path
220 * for the untracked cache.
221 */
222 name[len - 1] = '\0';
223 } else if (pos >= 0) {
224 /*
225 * We have an exact match for this path and can just
226 * invalidate it.
227 */
228 istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID;
229 } else {
230 /*
231 * The path is not a tracked file -or- it is a
232 * directory event on a platform that cannot
233 * distinguish between file and directory events in
234 * the event handler, such as Windows.
235 *
236 * Scan as if it is a directory and invalidate the
237 * cone under it. (But remember to ignore items
238 * between "name" and "name/", such as "name-" and
239 * "name.".
240 */
241 pos = -pos - 1;
242
243 for (i = pos; i < istate->cache_nr; i++) {
244 if (!starts_with(istate->cache[i]->name, name))
245 break;
246 if ((unsigned char)istate->cache[i]->name[len] > '/')
247 break;
248 if (istate->cache[i]->name[len] == '/')
249 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
250 }
251 }
252
253 /*
254 * Mark the untracked cache dirty even if it wasn't found in the index
255 * as it could be a new untracked file.
256 */
257 untracked_cache_invalidate_path(istate, name, 0);
258 }
259
260 /*
261 * The number of pathnames that we need to receive from FSMonitor
262 * before we force the index to be updated.
263 *
264 * Note that any pathname within the set of received paths MAY cause
265 * cache-entry or istate flag bits to be updated and thus cause the
266 * index to be updated on disk.
267 *
268 * However, the response may contain many paths (such as ignored
269 * paths) that will not update any flag bits. And thus not force the
270 * index to be updated. (This is fine and normal.) It also means
271 * that the token will not be updated in the FSMonitor index
272 * extension. So the next Git command will find the same token in the
273 * index, make the same token-relative request, and receive the same
274 * response (plus any newly changed paths). If this response is large
275 * (and continues to grow), performance could be impacted.
276 *
277 * For example, if the user runs a build and it writes 100K object
278 * files but doesn't modify any source files, the index would not need
279 * to be updated. The FSMonitor response (after the build and
280 * relative to a pre-build token) might be 5MB. Each subsequent Git
281 * command will receive that same 100K/5MB response until something
282 * causes the index to be updated. And `refresh_fsmonitor()` will
283 * have to iterate over those 100K paths each time.
284 *
285 * Performance could be improved if we optionally force update the
286 * index after a very large response and get an updated token into
287 * the FSMonitor index extension. This should allow subsequent
288 * commands to get smaller and more current responses.
289 *
290 * The value chosen here does not need to be precise. The index
291 * will be updated automatically the first time the user touches
292 * a tracked file and causes a command like `git status` to
293 * update an mtime to be updated and/or set a flag bit.
294 */
295 static int fsmonitor_force_update_threshold = 100;
296
297 void refresh_fsmonitor(struct index_state *istate)
298 {
299 static int warn_once = 0;
300 struct strbuf query_result = STRBUF_INIT;
301 int query_success = 0, hook_version = -1;
302 size_t bol = 0; /* beginning of line */
303 uint64_t last_update;
304 struct strbuf last_update_token = STRBUF_INIT;
305 char *buf;
306 unsigned int i;
307 int is_trivial = 0;
308 struct repository *r = istate->repo;
309 enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
310 enum fsmonitor_reason reason = fsm_settings__get_reason(r);
311
312 if (!warn_once && reason > FSMONITOR_REASON_OK) {
313 char *msg = fsm_settings__get_incompatible_msg(r, reason);
314 warn_once = 1;
315 warning("%s", msg);
316 free(msg);
317 }
318
319 if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
320 istate->fsmonitor_has_run_once)
321 return;
322
323 istate->fsmonitor_has_run_once = 1;
324
325 trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
326
327 if (fsm_mode == FSMONITOR_MODE_IPC) {
328 query_success = !fsmonitor_ipc__send_query(
329 istate->fsmonitor_last_update ?
330 istate->fsmonitor_last_update : "builtin:fake",
331 &query_result);
332 if (query_success) {
333 /*
334 * The response contains a series of nul terminated
335 * strings. The first is the new token.
336 *
337 * Use `char *buf` as an interlude to trick the CI
338 * static analysis to let us use `strbuf_addstr()`
339 * here (and only copy the token) rather than
340 * `strbuf_addbuf()`.
341 */
342 buf = query_result.buf;
343 strbuf_addstr(&last_update_token, buf);
344 bol = last_update_token.len + 1;
345 is_trivial = query_result.buf[bol] == '/';
346 if (is_trivial)
347 trace2_data_intmax("fsm_client", NULL,
348 "query/trivial-response", 1);
349 } else {
350 /*
351 * The builtin daemon is not available on this
352 * platform -OR- we failed to get a response.
353 *
354 * Generate a fake token (rather than a V1
355 * timestamp) for the index extension. (If
356 * they switch back to the hook API, we don't
357 * want ambiguous state.)
358 */
359 strbuf_addstr(&last_update_token, "builtin:fake");
360 }
361
362 goto apply_results;
363 }
364
365 assert(fsm_mode == FSMONITOR_MODE_HOOK);
366
367 hook_version = fsmonitor_hook_version();
368
369 /*
370 * This could be racy so save the date/time now and query_fsmonitor_hook
371 * should be inclusive to ensure we don't miss potential changes.
372 */
373 last_update = getnanotime();
374 if (hook_version == HOOK_INTERFACE_VERSION1)
375 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
376
377 /*
378 * If we have a last update token, call query_fsmonitor_hook for the set of
379 * changes since that token, else assume everything is possibly dirty
380 * and check it all.
381 */
382 if (istate->fsmonitor_last_update) {
383 if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
384 query_success = !query_fsmonitor_hook(
385 r, HOOK_INTERFACE_VERSION2,
386 istate->fsmonitor_last_update, &query_result);
387
388 if (query_success) {
389 if (hook_version < 0)
390 hook_version = HOOK_INTERFACE_VERSION2;
391
392 /*
393 * First entry will be the last update token
394 * Need to use a char * variable because static
395 * analysis was suggesting to use strbuf_addbuf
396 * but we don't want to copy the entire strbuf
397 * only the chars up to the first NUL
398 */
399 buf = query_result.buf;
400 strbuf_addstr(&last_update_token, buf);
401 if (!last_update_token.len) {
402 warning("Empty last update token.");
403 query_success = 0;
404 } else {
405 bol = last_update_token.len + 1;
406 is_trivial = query_result.buf[bol] == '/';
407 }
408 } else if (hook_version < 0) {
409 hook_version = HOOK_INTERFACE_VERSION1;
410 if (!last_update_token.len)
411 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
412 }
413 }
414
415 if (hook_version == HOOK_INTERFACE_VERSION1) {
416 query_success = !query_fsmonitor_hook(
417 r, HOOK_INTERFACE_VERSION1,
418 istate->fsmonitor_last_update, &query_result);
419 if (query_success)
420 is_trivial = query_result.buf[0] == '/';
421 }
422
423 if (is_trivial)
424 trace2_data_intmax("fsm_hook", NULL,
425 "query/trivial-response", 1);
426
427 trace_performance_since(last_update, "fsmonitor process '%s'",
428 fsm_settings__get_hook_path(r));
429 trace_printf_key(&trace_fsmonitor,
430 "fsmonitor process '%s' returned %s",
431 fsm_settings__get_hook_path(r),
432 query_success ? "success" : "failure");
433 }
434
435 apply_results:
436 /*
437 * The response from FSMonitor (excluding the header token) is
438 * either:
439 *
440 * [a] a (possibly empty) list of NUL delimited relative
441 * pathnames of changed paths. This list can contain
442 * files and directories. Directories have a trailing
443 * slash.
444 *
445 * [b] a single '/' to indicate the provider had no
446 * information and that we should consider everything
447 * invalid. We call this a trivial response.
448 */
449 trace2_region_enter("fsmonitor", "apply_results", istate->repo);
450
451 if (query_success && !is_trivial) {
452 /*
453 * Mark all pathnames returned by the monitor as dirty.
454 *
455 * This updates both the cache-entries and the untracked-cache.
456 */
457 int count = 0;
458
459 buf = query_result.buf;
460 for (i = bol; i < query_result.len; i++) {
461 if (buf[i] != '\0')
462 continue;
463 fsmonitor_refresh_callback(istate, buf + bol);
464 bol = i + 1;
465 count++;
466 }
467 if (bol < query_result.len) {
468 fsmonitor_refresh_callback(istate, buf + bol);
469 count++;
470 }
471
472 /* Now mark the untracked cache for fsmonitor usage */
473 if (istate->untracked)
474 istate->untracked->use_fsmonitor = 1;
475
476 if (count > fsmonitor_force_update_threshold)
477 istate->cache_changed |= FSMONITOR_CHANGED;
478
479 trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
480 count);
481
482 } else {
483 /*
484 * We failed to get a response or received a trivial response,
485 * so invalidate everything.
486 *
487 * We only want to run the post index changed hook if
488 * we've actually changed entries, so keep track if we
489 * actually changed entries or not.
490 */
491 int is_cache_changed = 0;
492
493 for (i = 0; i < istate->cache_nr; i++) {
494 if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
495 is_cache_changed = 1;
496 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
497 }
498 }
499
500 /*
501 * If we're going to check every file, ensure we save
502 * the results.
503 */
504 if (is_cache_changed)
505 istate->cache_changed |= FSMONITOR_CHANGED;
506
507 if (istate->untracked)
508 istate->untracked->use_fsmonitor = 0;
509 }
510 trace2_region_leave("fsmonitor", "apply_results", istate->repo);
511
512 strbuf_release(&query_result);
513
514 /* Now that we've updated istate, save the last_update_token */
515 FREE_AND_NULL(istate->fsmonitor_last_update);
516 istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);
517 }
518
519 /*
520 * The caller wants to turn on FSMonitor. And when the caller writes
521 * the index to disk, a FSMonitor extension should be included. This
522 * requires that `istate->fsmonitor_last_update` not be NULL. But we
523 * have not actually talked to a FSMonitor process yet, so we don't
524 * have an initial value for this field.
525 *
526 * For a protocol V1 FSMonitor process, this field is a formatted
527 * "nanoseconds since epoch" field. However, for a protocol V2
528 * FSMonitor process, this field is an opaque token.
529 *
530 * Historically, `add_fsmonitor()` has initialized this field to the
531 * current time for protocol V1 processes. There are lots of race
532 * conditions here, but that code has shipped...
533 *
534 * The only true solution is to use a V2 FSMonitor and get a current
535 * or default token value (that it understands), but we cannot do that
536 * until we have actually talked to an instance of the FSMonitor process
537 * (but the protocol requires that we send a token first...).
538 *
539 * For simplicity, just initialize like we have a V1 process and require
540 * that V2 processes adapt.
541 */
542 static void initialize_fsmonitor_last_update(struct index_state *istate)
543 {
544 struct strbuf last_update = STRBUF_INIT;
545
546 strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
547 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
548 }
549
550 void add_fsmonitor(struct index_state *istate)
551 {
552 unsigned int i;
553
554 if (!istate->fsmonitor_last_update) {
555 trace_printf_key(&trace_fsmonitor, "add fsmonitor");
556 istate->cache_changed |= FSMONITOR_CHANGED;
557 initialize_fsmonitor_last_update(istate);
558
559 /* reset the fsmonitor state */
560 for (i = 0; i < istate->cache_nr; i++)
561 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
562
563 /* reset the untracked cache */
564 if (istate->untracked) {
565 add_untracked_cache(istate);
566 istate->untracked->use_fsmonitor = 1;
567 }
568
569 /* Update the fsmonitor state */
570 refresh_fsmonitor(istate);
571 }
572 }
573
574 void remove_fsmonitor(struct index_state *istate)
575 {
576 if (istate->fsmonitor_last_update) {
577 trace_printf_key(&trace_fsmonitor, "remove fsmonitor");
578 istate->cache_changed |= FSMONITOR_CHANGED;
579 FREE_AND_NULL(istate->fsmonitor_last_update);
580 }
581 }
582
583 void tweak_fsmonitor(struct index_state *istate)
584 {
585 unsigned int i;
586 int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
587 > FSMONITOR_MODE_DISABLED);
588
589 if (istate->fsmonitor_dirty) {
590 if (fsmonitor_enabled) {
591 /* Mark all entries valid */
592 for (i = 0; i < istate->cache_nr; i++) {
593 if (S_ISGITLINK(istate->cache[i]->ce_mode))
594 continue;
595 istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID;
596 }
597
598 /* Mark all previously saved entries as dirty */
599 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
600 ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
601
602 refresh_fsmonitor(istate);
603 }
604
605 ewah_free(istate->fsmonitor_dirty);
606 istate->fsmonitor_dirty = NULL;
607 }
608
609 if (fsmonitor_enabled)
610 add_fsmonitor(istate);
611 else
612 remove_fsmonitor(istate);
613 }