]> git.ipfire.org Git - thirdparty/git.git/blob - fsmonitor.c
t7510: add a test case that does not need gpg
[thirdparty/git.git] / fsmonitor.c
1 #include "cache.h"
2 #include "config.h"
3 #include "dir.h"
4 #include "ewah/ewok.h"
5 #include "fsmonitor.h"
6 #include "fsmonitor-ipc.h"
7 #include "run-command.h"
8 #include "strbuf.h"
9
10 #define INDEX_EXTENSION_VERSION1 (1)
11 #define INDEX_EXTENSION_VERSION2 (2)
12 #define HOOK_INTERFACE_VERSION1 (1)
13 #define HOOK_INTERFACE_VERSION2 (2)
14
15 struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR);
16
17 static void assert_index_minimum(struct index_state *istate, size_t pos)
18 {
19 if (pos > istate->cache_nr)
20 BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
21 (uintmax_t)pos, istate->cache_nr);
22 }
23
24 static void fsmonitor_ewah_callback(size_t pos, void *is)
25 {
26 struct index_state *istate = (struct index_state *)is;
27 struct cache_entry *ce;
28
29 assert_index_minimum(istate, pos + 1);
30
31 ce = istate->cache[pos];
32 ce->ce_flags &= ~CE_FSMONITOR_VALID;
33 }
34
35 static int fsmonitor_hook_version(void)
36 {
37 int hook_version;
38
39 if (git_config_get_int("core.fsmonitorhookversion", &hook_version))
40 return -1;
41
42 if (hook_version == HOOK_INTERFACE_VERSION1 ||
43 hook_version == HOOK_INTERFACE_VERSION2)
44 return hook_version;
45
46 warning("Invalid hook version '%i' in core.fsmonitorhookversion. "
47 "Must be 1 or 2.", hook_version);
48 return -1;
49 }
50
51 int read_fsmonitor_extension(struct index_state *istate, const void *data,
52 unsigned long sz)
53 {
54 const char *index = data;
55 uint32_t hdr_version;
56 uint32_t ewah_size;
57 struct ewah_bitmap *fsmonitor_dirty;
58 int ret;
59 uint64_t timestamp;
60 struct strbuf last_update = STRBUF_INIT;
61
62 if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t))
63 return error("corrupt fsmonitor extension (too short)");
64
65 hdr_version = get_be32(index);
66 index += sizeof(uint32_t);
67 if (hdr_version == INDEX_EXTENSION_VERSION1) {
68 timestamp = get_be64(index);
69 strbuf_addf(&last_update, "%"PRIu64"", timestamp);
70 index += sizeof(uint64_t);
71 } else if (hdr_version == INDEX_EXTENSION_VERSION2) {
72 strbuf_addstr(&last_update, index);
73 index += last_update.len + 1;
74 } else {
75 return error("bad fsmonitor version %d", hdr_version);
76 }
77
78 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
79
80 ewah_size = get_be32(index);
81 index += sizeof(uint32_t);
82
83 fsmonitor_dirty = ewah_new();
84 ret = ewah_read_mmap(fsmonitor_dirty, index, ewah_size);
85 if (ret != ewah_size) {
86 ewah_free(fsmonitor_dirty);
87 return error("failed to parse ewah bitmap reading fsmonitor index extension");
88 }
89 istate->fsmonitor_dirty = fsmonitor_dirty;
90
91 if (!istate->split_index)
92 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
93
94 trace2_data_string("index", NULL, "extension/fsmn/read/token",
95 istate->fsmonitor_last_update);
96 trace_printf_key(&trace_fsmonitor,
97 "read fsmonitor extension successful '%s'",
98 istate->fsmonitor_last_update);
99 return 0;
100 }
101
102 void fill_fsmonitor_bitmap(struct index_state *istate)
103 {
104 unsigned int i, skipped = 0;
105 istate->fsmonitor_dirty = ewah_new();
106 for (i = 0; i < istate->cache_nr; i++) {
107 if (istate->cache[i]->ce_flags & CE_REMOVE)
108 skipped++;
109 else if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID))
110 ewah_set(istate->fsmonitor_dirty, i - skipped);
111 }
112 }
113
114 void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
115 {
116 uint32_t hdr_version;
117 uint32_t ewah_start;
118 uint32_t ewah_size = 0;
119 int fixup = 0;
120
121 if (!istate->split_index)
122 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
123
124 put_be32(&hdr_version, INDEX_EXTENSION_VERSION2);
125 strbuf_add(sb, &hdr_version, sizeof(uint32_t));
126
127 strbuf_addstr(sb, istate->fsmonitor_last_update);
128 strbuf_addch(sb, 0); /* Want to keep a NUL */
129
130 fixup = sb->len;
131 strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */
132
133 ewah_start = sb->len;
134 ewah_serialize_strbuf(istate->fsmonitor_dirty, sb);
135 ewah_free(istate->fsmonitor_dirty);
136 istate->fsmonitor_dirty = NULL;
137
138 /* fix up size field */
139 put_be32(&ewah_size, sb->len - ewah_start);
140 memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
141
142 trace2_data_string("index", NULL, "extension/fsmn/write/token",
143 istate->fsmonitor_last_update);
144 trace_printf_key(&trace_fsmonitor,
145 "write fsmonitor extension successful '%s'",
146 istate->fsmonitor_last_update);
147 }
148
149 /*
150 * Call the query-fsmonitor hook passing the last update token of the saved results.
151 */
152 static int query_fsmonitor_hook(struct repository *r,
153 int version,
154 const char *last_update,
155 struct strbuf *query_result)
156 {
157 struct child_process cp = CHILD_PROCESS_INIT;
158 int result;
159
160 if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
161 return -1;
162
163 strvec_push(&cp.args, fsm_settings__get_hook_path(r));
164 strvec_pushf(&cp.args, "%d", version);
165 strvec_pushf(&cp.args, "%s", last_update);
166 cp.use_shell = 1;
167 cp.dir = get_git_work_tree();
168
169 trace2_region_enter("fsm_hook", "query", NULL);
170
171 result = capture_command(&cp, query_result, 1024);
172
173 if (result)
174 trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
175 else
176 trace2_data_intmax("fsm_hook", NULL, "query/response-length",
177 query_result->len);
178
179 trace2_region_leave("fsm_hook", "query", NULL);
180
181 return result;
182 }
183
184 static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
185 {
186 int i, len = strlen(name);
187 int pos = index_name_pos(istate, name, len);
188
189 trace_printf_key(&trace_fsmonitor,
190 "fsmonitor_refresh_callback '%s' (pos %d)",
191 name, pos);
192
193 if (name[len - 1] == '/') {
194 /*
195 * The daemon can decorate directory events, such as
196 * moves or renames, with a trailing slash if the OS
197 * FS Event contains sufficient information, such as
198 * MacOS.
199 *
200 * Use this to invalidate the entire cone under that
201 * directory.
202 *
203 * We do not expect an exact match because the index
204 * does not normally contain directory entries, so we
205 * start at the insertion point and scan.
206 */
207 if (pos < 0)
208 pos = -pos - 1;
209
210 /* Mark all entries for the folder invalid */
211 for (i = pos; i < istate->cache_nr; i++) {
212 if (!starts_with(istate->cache[i]->name, name))
213 break;
214 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
215 }
216
217 /*
218 * We need to remove the traling "/" from the path
219 * for the untracked cache.
220 */
221 name[len - 1] = '\0';
222 } else if (pos >= 0) {
223 /*
224 * We have an exact match for this path and can just
225 * invalidate it.
226 */
227 istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID;
228 } else {
229 /*
230 * The path is not a tracked file -or- it is a
231 * directory event on a platform that cannot
232 * distinguish between file and directory events in
233 * the event handler, such as Windows.
234 *
235 * Scan as if it is a directory and invalidate the
236 * cone under it. (But remember to ignore items
237 * between "name" and "name/", such as "name-" and
238 * "name.".
239 */
240 pos = -pos - 1;
241
242 for (i = pos; i < istate->cache_nr; i++) {
243 if (!starts_with(istate->cache[i]->name, name))
244 break;
245 if ((unsigned char)istate->cache[i]->name[len] > '/')
246 break;
247 if (istate->cache[i]->name[len] == '/')
248 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
249 }
250 }
251
252 /*
253 * Mark the untracked cache dirty even if it wasn't found in the index
254 * as it could be a new untracked file.
255 */
256 untracked_cache_invalidate_path(istate, name, 0);
257 }
258
259 /*
260 * The number of pathnames that we need to receive from FSMonitor
261 * before we force the index to be updated.
262 *
263 * Note that any pathname within the set of received paths MAY cause
264 * cache-entry or istate flag bits to be updated and thus cause the
265 * index to be updated on disk.
266 *
267 * However, the response may contain many paths (such as ignored
268 * paths) that will not update any flag bits. And thus not force the
269 * index to be updated. (This is fine and normal.) It also means
270 * that the token will not be updated in the FSMonitor index
271 * extension. So the next Git command will find the same token in the
272 * index, make the same token-relative request, and receive the same
273 * response (plus any newly changed paths). If this response is large
274 * (and continues to grow), performance could be impacted.
275 *
276 * For example, if the user runs a build and it writes 100K object
277 * files but doesn't modify any source files, the index would not need
278 * to be updated. The FSMonitor response (after the build and
279 * relative to a pre-build token) might be 5MB. Each subsequent Git
280 * command will receive that same 100K/5MB response until something
281 * causes the index to be updated. And `refresh_fsmonitor()` will
282 * have to iterate over those 100K paths each time.
283 *
284 * Performance could be improved if we optionally force update the
285 * index after a very large response and get an updated token into
286 * the FSMonitor index extension. This should allow subsequent
287 * commands to get smaller and more current responses.
288 *
289 * The value chosen here does not need to be precise. The index
290 * will be updated automatically the first time the user touches
291 * a tracked file and causes a command like `git status` to
292 * update an mtime to be updated and/or set a flag bit.
293 */
294 static int fsmonitor_force_update_threshold = 100;
295
296 void refresh_fsmonitor(struct index_state *istate)
297 {
298 static int warn_once = 0;
299 struct strbuf query_result = STRBUF_INIT;
300 int query_success = 0, hook_version = -1;
301 size_t bol = 0; /* beginning of line */
302 uint64_t last_update;
303 struct strbuf last_update_token = STRBUF_INIT;
304 char *buf;
305 unsigned int i;
306 int is_trivial = 0;
307 struct repository *r = istate->repo ? istate->repo : the_repository;
308 enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
309 enum fsmonitor_reason reason = fsm_settings__get_reason(r);
310
311 if (!warn_once && reason > FSMONITOR_REASON_OK) {
312 char *msg = fsm_settings__get_incompatible_msg(r, reason);
313 warn_once = 1;
314 warning("%s", msg);
315 free(msg);
316 }
317
318 if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
319 istate->fsmonitor_has_run_once)
320 return;
321
322 istate->fsmonitor_has_run_once = 1;
323
324 trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
325
326 if (fsm_mode == FSMONITOR_MODE_IPC) {
327 query_success = !fsmonitor_ipc__send_query(
328 istate->fsmonitor_last_update ?
329 istate->fsmonitor_last_update : "builtin:fake",
330 &query_result);
331 if (query_success) {
332 /*
333 * The response contains a series of nul terminated
334 * strings. The first is the new token.
335 *
336 * Use `char *buf` as an interlude to trick the CI
337 * static analysis to let us use `strbuf_addstr()`
338 * here (and only copy the token) rather than
339 * `strbuf_addbuf()`.
340 */
341 buf = query_result.buf;
342 strbuf_addstr(&last_update_token, buf);
343 bol = last_update_token.len + 1;
344 is_trivial = query_result.buf[bol] == '/';
345 if (is_trivial)
346 trace2_data_intmax("fsm_client", NULL,
347 "query/trivial-response", 1);
348 } else {
349 /*
350 * The builtin daemon is not available on this
351 * platform -OR- we failed to get a response.
352 *
353 * Generate a fake token (rather than a V1
354 * timestamp) for the index extension. (If
355 * they switch back to the hook API, we don't
356 * want ambiguous state.)
357 */
358 strbuf_addstr(&last_update_token, "builtin:fake");
359 }
360
361 goto apply_results;
362 }
363
364 assert(fsm_mode == FSMONITOR_MODE_HOOK);
365
366 hook_version = fsmonitor_hook_version();
367
368 /*
369 * This could be racy so save the date/time now and query_fsmonitor_hook
370 * should be inclusive to ensure we don't miss potential changes.
371 */
372 last_update = getnanotime();
373 if (hook_version == HOOK_INTERFACE_VERSION1)
374 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
375
376 /*
377 * If we have a last update token, call query_fsmonitor_hook for the set of
378 * changes since that token, else assume everything is possibly dirty
379 * and check it all.
380 */
381 if (istate->fsmonitor_last_update) {
382 if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
383 query_success = !query_fsmonitor_hook(
384 r, HOOK_INTERFACE_VERSION2,
385 istate->fsmonitor_last_update, &query_result);
386
387 if (query_success) {
388 if (hook_version < 0)
389 hook_version = HOOK_INTERFACE_VERSION2;
390
391 /*
392 * First entry will be the last update token
393 * Need to use a char * variable because static
394 * analysis was suggesting to use strbuf_addbuf
395 * but we don't want to copy the entire strbuf
396 * only the chars up to the first NUL
397 */
398 buf = query_result.buf;
399 strbuf_addstr(&last_update_token, buf);
400 if (!last_update_token.len) {
401 warning("Empty last update token.");
402 query_success = 0;
403 } else {
404 bol = last_update_token.len + 1;
405 is_trivial = query_result.buf[bol] == '/';
406 }
407 } else if (hook_version < 0) {
408 hook_version = HOOK_INTERFACE_VERSION1;
409 if (!last_update_token.len)
410 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
411 }
412 }
413
414 if (hook_version == HOOK_INTERFACE_VERSION1) {
415 query_success = !query_fsmonitor_hook(
416 r, HOOK_INTERFACE_VERSION1,
417 istate->fsmonitor_last_update, &query_result);
418 if (query_success)
419 is_trivial = query_result.buf[0] == '/';
420 }
421
422 if (is_trivial)
423 trace2_data_intmax("fsm_hook", NULL,
424 "query/trivial-response", 1);
425
426 trace_performance_since(last_update, "fsmonitor process '%s'",
427 fsm_settings__get_hook_path(r));
428 trace_printf_key(&trace_fsmonitor,
429 "fsmonitor process '%s' returned %s",
430 fsm_settings__get_hook_path(r),
431 query_success ? "success" : "failure");
432 }
433
434 apply_results:
435 /*
436 * The response from FSMonitor (excluding the header token) is
437 * either:
438 *
439 * [a] a (possibly empty) list of NUL delimited relative
440 * pathnames of changed paths. This list can contain
441 * files and directories. Directories have a trailing
442 * slash.
443 *
444 * [b] a single '/' to indicate the provider had no
445 * information and that we should consider everything
446 * invalid. We call this a trivial response.
447 */
448 trace2_region_enter("fsmonitor", "apply_results", istate->repo);
449
450 if (query_success && !is_trivial) {
451 /*
452 * Mark all pathnames returned by the monitor as dirty.
453 *
454 * This updates both the cache-entries and the untracked-cache.
455 */
456 int count = 0;
457
458 buf = query_result.buf;
459 for (i = bol; i < query_result.len; i++) {
460 if (buf[i] != '\0')
461 continue;
462 fsmonitor_refresh_callback(istate, buf + bol);
463 bol = i + 1;
464 count++;
465 }
466 if (bol < query_result.len) {
467 fsmonitor_refresh_callback(istate, buf + bol);
468 count++;
469 }
470
471 /* Now mark the untracked cache for fsmonitor usage */
472 if (istate->untracked)
473 istate->untracked->use_fsmonitor = 1;
474
475 if (count > fsmonitor_force_update_threshold)
476 istate->cache_changed |= FSMONITOR_CHANGED;
477
478 trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
479 count);
480
481 } else {
482 /*
483 * We failed to get a response or received a trivial response,
484 * so invalidate everything.
485 *
486 * We only want to run the post index changed hook if
487 * we've actually changed entries, so keep track if we
488 * actually changed entries or not.
489 */
490 int is_cache_changed = 0;
491
492 for (i = 0; i < istate->cache_nr; i++) {
493 if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
494 is_cache_changed = 1;
495 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
496 }
497 }
498
499 /*
500 * If we're going to check every file, ensure we save
501 * the results.
502 */
503 if (is_cache_changed)
504 istate->cache_changed |= FSMONITOR_CHANGED;
505
506 if (istate->untracked)
507 istate->untracked->use_fsmonitor = 0;
508 }
509 trace2_region_leave("fsmonitor", "apply_results", istate->repo);
510
511 strbuf_release(&query_result);
512
513 /* Now that we've updated istate, save the last_update_token */
514 FREE_AND_NULL(istate->fsmonitor_last_update);
515 istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);
516 }
517
518 /*
519 * The caller wants to turn on FSMonitor. And when the caller writes
520 * the index to disk, a FSMonitor extension should be included. This
521 * requires that `istate->fsmonitor_last_update` not be NULL. But we
522 * have not actually talked to a FSMonitor process yet, so we don't
523 * have an initial value for this field.
524 *
525 * For a protocol V1 FSMonitor process, this field is a formatted
526 * "nanoseconds since epoch" field. However, for a protocol V2
527 * FSMonitor process, this field is an opaque token.
528 *
529 * Historically, `add_fsmonitor()` has initialized this field to the
530 * current time for protocol V1 processes. There are lots of race
531 * conditions here, but that code has shipped...
532 *
533 * The only true solution is to use a V2 FSMonitor and get a current
534 * or default token value (that it understands), but we cannot do that
535 * until we have actually talked to an instance of the FSMonitor process
536 * (but the protocol requires that we send a token first...).
537 *
538 * For simplicity, just initialize like we have a V1 process and require
539 * that V2 processes adapt.
540 */
541 static void initialize_fsmonitor_last_update(struct index_state *istate)
542 {
543 struct strbuf last_update = STRBUF_INIT;
544
545 strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
546 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
547 }
548
549 void add_fsmonitor(struct index_state *istate)
550 {
551 unsigned int i;
552
553 if (!istate->fsmonitor_last_update) {
554 trace_printf_key(&trace_fsmonitor, "add fsmonitor");
555 istate->cache_changed |= FSMONITOR_CHANGED;
556 initialize_fsmonitor_last_update(istate);
557
558 /* reset the fsmonitor state */
559 for (i = 0; i < istate->cache_nr; i++)
560 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
561
562 /* reset the untracked cache */
563 if (istate->untracked) {
564 add_untracked_cache(istate);
565 istate->untracked->use_fsmonitor = 1;
566 }
567
568 /* Update the fsmonitor state */
569 refresh_fsmonitor(istate);
570 }
571 }
572
573 void remove_fsmonitor(struct index_state *istate)
574 {
575 if (istate->fsmonitor_last_update) {
576 trace_printf_key(&trace_fsmonitor, "remove fsmonitor");
577 istate->cache_changed |= FSMONITOR_CHANGED;
578 FREE_AND_NULL(istate->fsmonitor_last_update);
579 }
580 }
581
582 void tweak_fsmonitor(struct index_state *istate)
583 {
584 unsigned int i;
585 int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
586 > FSMONITOR_MODE_DISABLED);
587
588 if (istate->fsmonitor_dirty) {
589 if (fsmonitor_enabled) {
590 /* Mark all entries valid */
591 for (i = 0; i < istate->cache_nr; i++) {
592 if (S_ISGITLINK(istate->cache[i]->ce_mode))
593 continue;
594 istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID;
595 }
596
597 /* Mark all previously saved entries as dirty */
598 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
599 ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
600
601 refresh_fsmonitor(istate);
602 }
603
604 ewah_free(istate->fsmonitor_dirty);
605 istate->fsmonitor_dirty = NULL;
606 }
607
608 if (fsmonitor_enabled)
609 add_fsmonitor(istate);
610 else
611 remove_fsmonitor(istate);
612 }