]> git.ipfire.org Git - thirdparty/git.git/blob - builtin/gc.c
Merge branch 'ab/detox-gettext-tests'
[thirdparty/git.git] / builtin / gc.c
1 /*
2 * git gc builtin command
3 *
4 * Cleanup unreachable files and optimize the repository.
5 *
6 * Copyright (c) 2007 James Bowes
7 *
8 * Based on git-gc.sh, which is
9 *
10 * Copyright (c) 2006 Shawn O. Pearce
11 */
12
13 #include "builtin.h"
14 #include "repository.h"
15 #include "config.h"
16 #include "tempfile.h"
17 #include "lockfile.h"
18 #include "parse-options.h"
19 #include "run-command.h"
20 #include "sigchain.h"
21 #include "strvec.h"
22 #include "commit.h"
23 #include "commit-graph.h"
24 #include "packfile.h"
25 #include "object-store.h"
26 #include "pack.h"
27 #include "pack-objects.h"
28 #include "blob.h"
29 #include "tree.h"
30 #include "promisor-remote.h"
31 #include "refs.h"
32 #include "remote.h"
33 #include "object-store.h"
34 #include "exec-cmd.h"
35
36 #define FAILED_RUN "failed to run %s"
37
38 static const char * const builtin_gc_usage[] = {
39 N_("git gc [<options>]"),
40 NULL
41 };
42
43 static int pack_refs = 1;
44 static int prune_reflogs = 1;
45 static int aggressive_depth = 50;
46 static int aggressive_window = 250;
47 static int gc_auto_threshold = 6700;
48 static int gc_auto_pack_limit = 50;
49 static int detach_auto = 1;
50 static timestamp_t gc_log_expire_time;
51 static const char *gc_log_expire = "1.day.ago";
52 static const char *prune_expire = "2.weeks.ago";
53 static const char *prune_worktrees_expire = "3.months.ago";
54 static unsigned long big_pack_threshold;
55 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
56
57 static struct strvec reflog = STRVEC_INIT;
58 static struct strvec repack = STRVEC_INIT;
59 static struct strvec prune = STRVEC_INIT;
60 static struct strvec prune_worktrees = STRVEC_INIT;
61 static struct strvec rerere = STRVEC_INIT;
62
63 static struct tempfile *pidfile;
64 static struct lock_file log_lock;
65
66 static struct string_list pack_garbage = STRING_LIST_INIT_DUP;
67
68 static void clean_pack_garbage(void)
69 {
70 int i;
71 for (i = 0; i < pack_garbage.nr; i++)
72 unlink_or_warn(pack_garbage.items[i].string);
73 string_list_clear(&pack_garbage, 0);
74 }
75
76 static void report_pack_garbage(unsigned seen_bits, const char *path)
77 {
78 if (seen_bits == PACKDIR_FILE_IDX)
79 string_list_append(&pack_garbage, path);
80 }
81
82 static void process_log_file(void)
83 {
84 struct stat st;
85 if (fstat(get_lock_file_fd(&log_lock), &st)) {
86 /*
87 * Perhaps there was an i/o error or another
88 * unlikely situation. Try to make a note of
89 * this in gc.log along with any existing
90 * messages.
91 */
92 int saved_errno = errno;
93 fprintf(stderr, _("Failed to fstat %s: %s"),
94 get_lock_file_path(&log_lock),
95 strerror(saved_errno));
96 fflush(stderr);
97 commit_lock_file(&log_lock);
98 errno = saved_errno;
99 } else if (st.st_size) {
100 /* There was some error recorded in the lock file */
101 commit_lock_file(&log_lock);
102 } else {
103 /* No error, clean up any old gc.log */
104 unlink(git_path("gc.log"));
105 rollback_lock_file(&log_lock);
106 }
107 }
108
109 static void process_log_file_at_exit(void)
110 {
111 fflush(stderr);
112 process_log_file();
113 }
114
115 static void process_log_file_on_signal(int signo)
116 {
117 process_log_file();
118 sigchain_pop(signo);
119 raise(signo);
120 }
121
122 static int gc_config_is_timestamp_never(const char *var)
123 {
124 const char *value;
125 timestamp_t expire;
126
127 if (!git_config_get_value(var, &value) && value) {
128 if (parse_expiry_date(value, &expire))
129 die(_("failed to parse '%s' value '%s'"), var, value);
130 return expire == 0;
131 }
132 return 0;
133 }
134
135 static void gc_config(void)
136 {
137 const char *value;
138
139 if (!git_config_get_value("gc.packrefs", &value)) {
140 if (value && !strcmp(value, "notbare"))
141 pack_refs = -1;
142 else
143 pack_refs = git_config_bool("gc.packrefs", value);
144 }
145
146 if (gc_config_is_timestamp_never("gc.reflogexpire") &&
147 gc_config_is_timestamp_never("gc.reflogexpireunreachable"))
148 prune_reflogs = 0;
149
150 git_config_get_int("gc.aggressivewindow", &aggressive_window);
151 git_config_get_int("gc.aggressivedepth", &aggressive_depth);
152 git_config_get_int("gc.auto", &gc_auto_threshold);
153 git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit);
154 git_config_get_bool("gc.autodetach", &detach_auto);
155 git_config_get_expiry("gc.pruneexpire", &prune_expire);
156 git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
157 git_config_get_expiry("gc.logexpiry", &gc_log_expire);
158
159 git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold);
160 git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size);
161
162 git_config(git_default_config, NULL);
163 }
164
165 struct maintenance_run_opts;
166 static int maintenance_task_pack_refs(MAYBE_UNUSED struct maintenance_run_opts *opts)
167 {
168 struct strvec pack_refs_cmd = STRVEC_INIT;
169 strvec_pushl(&pack_refs_cmd, "pack-refs", "--all", "--prune", NULL);
170
171 return run_command_v_opt(pack_refs_cmd.v, RUN_GIT_CMD);
172 }
173
174 static int too_many_loose_objects(void)
175 {
176 /*
177 * Quickly check if a "gc" is needed, by estimating how
178 * many loose objects there are. Because SHA-1 is evenly
179 * distributed, we can check only one and get a reasonable
180 * estimate.
181 */
182 DIR *dir;
183 struct dirent *ent;
184 int auto_threshold;
185 int num_loose = 0;
186 int needed = 0;
187 const unsigned hexsz_loose = the_hash_algo->hexsz - 2;
188
189 dir = opendir(git_path("objects/17"));
190 if (!dir)
191 return 0;
192
193 auto_threshold = DIV_ROUND_UP(gc_auto_threshold, 256);
194 while ((ent = readdir(dir)) != NULL) {
195 if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose ||
196 ent->d_name[hexsz_loose] != '\0')
197 continue;
198 if (++num_loose > auto_threshold) {
199 needed = 1;
200 break;
201 }
202 }
203 closedir(dir);
204 return needed;
205 }
206
207 static struct packed_git *find_base_packs(struct string_list *packs,
208 unsigned long limit)
209 {
210 struct packed_git *p, *base = NULL;
211
212 for (p = get_all_packs(the_repository); p; p = p->next) {
213 if (!p->pack_local)
214 continue;
215 if (limit) {
216 if (p->pack_size >= limit)
217 string_list_append(packs, p->pack_name);
218 } else if (!base || base->pack_size < p->pack_size) {
219 base = p;
220 }
221 }
222
223 if (base)
224 string_list_append(packs, base->pack_name);
225
226 return base;
227 }
228
229 static int too_many_packs(void)
230 {
231 struct packed_git *p;
232 int cnt;
233
234 if (gc_auto_pack_limit <= 0)
235 return 0;
236
237 for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) {
238 if (!p->pack_local)
239 continue;
240 if (p->pack_keep)
241 continue;
242 /*
243 * Perhaps check the size of the pack and count only
244 * very small ones here?
245 */
246 cnt++;
247 }
248 return gc_auto_pack_limit < cnt;
249 }
250
251 static uint64_t total_ram(void)
252 {
253 #if defined(HAVE_SYSINFO)
254 struct sysinfo si;
255
256 if (!sysinfo(&si))
257 return si.totalram;
258 #elif defined(HAVE_BSD_SYSCTL) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM))
259 int64_t physical_memory;
260 int mib[2];
261 size_t length;
262
263 mib[0] = CTL_HW;
264 # if defined(HW_MEMSIZE)
265 mib[1] = HW_MEMSIZE;
266 # else
267 mib[1] = HW_PHYSMEM;
268 # endif
269 length = sizeof(int64_t);
270 if (!sysctl(mib, 2, &physical_memory, &length, NULL, 0))
271 return physical_memory;
272 #elif defined(GIT_WINDOWS_NATIVE)
273 MEMORYSTATUSEX memInfo;
274
275 memInfo.dwLength = sizeof(MEMORYSTATUSEX);
276 if (GlobalMemoryStatusEx(&memInfo))
277 return memInfo.ullTotalPhys;
278 #endif
279 return 0;
280 }
281
282 static uint64_t estimate_repack_memory(struct packed_git *pack)
283 {
284 unsigned long nr_objects = approximate_object_count();
285 size_t os_cache, heap;
286
287 if (!pack || !nr_objects)
288 return 0;
289
290 /*
291 * First we have to scan through at least one pack.
292 * Assume enough room in OS file cache to keep the entire pack
293 * or we may accidentally evict data of other processes from
294 * the cache.
295 */
296 os_cache = pack->pack_size + pack->index_size;
297 /* then pack-objects needs lots more for book keeping */
298 heap = sizeof(struct object_entry) * nr_objects;
299 /*
300 * internal rev-list --all --objects takes up some memory too,
301 * let's say half of it is for blobs
302 */
303 heap += sizeof(struct blob) * nr_objects / 2;
304 /*
305 * and the other half is for trees (commits and tags are
306 * usually insignificant)
307 */
308 heap += sizeof(struct tree) * nr_objects / 2;
309 /* and then obj_hash[], underestimated in fact */
310 heap += sizeof(struct object *) * nr_objects;
311 /* revindex is used also */
312 heap += (sizeof(off_t) + sizeof(uint32_t)) * nr_objects;
313 /*
314 * read_sha1_file() (either at delta calculation phase, or
315 * writing phase) also fills up the delta base cache
316 */
317 heap += delta_base_cache_limit;
318 /* and of course pack-objects has its own delta cache */
319 heap += max_delta_cache_size;
320
321 return os_cache + heap;
322 }
323
324 static int keep_one_pack(struct string_list_item *item, void *data)
325 {
326 strvec_pushf(&repack, "--keep-pack=%s", basename(item->string));
327 return 0;
328 }
329
330 static void add_repack_all_option(struct string_list *keep_pack)
331 {
332 if (prune_expire && !strcmp(prune_expire, "now"))
333 strvec_push(&repack, "-a");
334 else {
335 strvec_push(&repack, "-A");
336 if (prune_expire)
337 strvec_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
338 }
339
340 if (keep_pack)
341 for_each_string_list(keep_pack, keep_one_pack, NULL);
342 }
343
344 static void add_repack_incremental_option(void)
345 {
346 strvec_push(&repack, "--no-write-bitmap-index");
347 }
348
349 static int need_to_gc(void)
350 {
351 /*
352 * Setting gc.auto to 0 or negative can disable the
353 * automatic gc.
354 */
355 if (gc_auto_threshold <= 0)
356 return 0;
357
358 /*
359 * If there are too many loose objects, but not too many
360 * packs, we run "repack -d -l". If there are too many packs,
361 * we run "repack -A -d -l". Otherwise we tell the caller
362 * there is no need.
363 */
364 if (too_many_packs()) {
365 struct string_list keep_pack = STRING_LIST_INIT_NODUP;
366
367 if (big_pack_threshold) {
368 find_base_packs(&keep_pack, big_pack_threshold);
369 if (keep_pack.nr >= gc_auto_pack_limit) {
370 big_pack_threshold = 0;
371 string_list_clear(&keep_pack, 0);
372 find_base_packs(&keep_pack, 0);
373 }
374 } else {
375 struct packed_git *p = find_base_packs(&keep_pack, 0);
376 uint64_t mem_have, mem_want;
377
378 mem_have = total_ram();
379 mem_want = estimate_repack_memory(p);
380
381 /*
382 * Only allow 1/2 of memory for pack-objects, leave
383 * the rest for the OS and other processes in the
384 * system.
385 */
386 if (!mem_have || mem_want < mem_have / 2)
387 string_list_clear(&keep_pack, 0);
388 }
389
390 add_repack_all_option(&keep_pack);
391 string_list_clear(&keep_pack, 0);
392 } else if (too_many_loose_objects())
393 add_repack_incremental_option();
394 else
395 return 0;
396
397 if (run_hook_le(NULL, "pre-auto-gc", NULL))
398 return 0;
399 return 1;
400 }
401
402 /* return NULL on success, else hostname running the gc */
403 static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
404 {
405 struct lock_file lock = LOCK_INIT;
406 char my_host[HOST_NAME_MAX + 1];
407 struct strbuf sb = STRBUF_INIT;
408 struct stat st;
409 uintmax_t pid;
410 FILE *fp;
411 int fd;
412 char *pidfile_path;
413
414 if (is_tempfile_active(pidfile))
415 /* already locked */
416 return NULL;
417
418 if (xgethostname(my_host, sizeof(my_host)))
419 xsnprintf(my_host, sizeof(my_host), "unknown");
420
421 pidfile_path = git_pathdup("gc.pid");
422 fd = hold_lock_file_for_update(&lock, pidfile_path,
423 LOCK_DIE_ON_ERROR);
424 if (!force) {
425 static char locking_host[HOST_NAME_MAX + 1];
426 static char *scan_fmt;
427 int should_exit;
428
429 if (!scan_fmt)
430 scan_fmt = xstrfmt("%s %%%ds", "%"SCNuMAX, HOST_NAME_MAX);
431 fp = fopen(pidfile_path, "r");
432 memset(locking_host, 0, sizeof(locking_host));
433 should_exit =
434 fp != NULL &&
435 !fstat(fileno(fp), &st) &&
436 /*
437 * 12 hour limit is very generous as gc should
438 * never take that long. On the other hand we
439 * don't really need a strict limit here,
440 * running gc --auto one day late is not a big
441 * problem. --force can be used in manual gc
442 * after the user verifies that no gc is
443 * running.
444 */
445 time(NULL) - st.st_mtime <= 12 * 3600 &&
446 fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
447 /* be gentle to concurrent "gc" on remote hosts */
448 (strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
449 if (fp != NULL)
450 fclose(fp);
451 if (should_exit) {
452 if (fd >= 0)
453 rollback_lock_file(&lock);
454 *ret_pid = pid;
455 free(pidfile_path);
456 return locking_host;
457 }
458 }
459
460 strbuf_addf(&sb, "%"PRIuMAX" %s",
461 (uintmax_t) getpid(), my_host);
462 write_in_full(fd, sb.buf, sb.len);
463 strbuf_release(&sb);
464 commit_lock_file(&lock);
465 pidfile = register_tempfile(pidfile_path);
466 free(pidfile_path);
467 return NULL;
468 }
469
470 /*
471 * Returns 0 if there was no previous error and gc can proceed, 1 if
472 * gc should not proceed due to an error in the last run. Prints a
473 * message and returns -1 if an error occurred while reading gc.log
474 */
475 static int report_last_gc_error(void)
476 {
477 struct strbuf sb = STRBUF_INIT;
478 int ret = 0;
479 ssize_t len;
480 struct stat st;
481 char *gc_log_path = git_pathdup("gc.log");
482
483 if (stat(gc_log_path, &st)) {
484 if (errno == ENOENT)
485 goto done;
486
487 ret = error_errno(_("cannot stat '%s'"), gc_log_path);
488 goto done;
489 }
490
491 if (st.st_mtime < gc_log_expire_time)
492 goto done;
493
494 len = strbuf_read_file(&sb, gc_log_path, 0);
495 if (len < 0)
496 ret = error_errno(_("cannot read '%s'"), gc_log_path);
497 else if (len > 0) {
498 /*
499 * A previous gc failed. Report the error, and don't
500 * bother with an automatic gc run since it is likely
501 * to fail in the same way.
502 */
503 warning(_("The last gc run reported the following. "
504 "Please correct the root cause\n"
505 "and remove %s.\n"
506 "Automatic cleanup will not be performed "
507 "until the file is removed.\n\n"
508 "%s"),
509 gc_log_path, sb.buf);
510 ret = 1;
511 }
512 strbuf_release(&sb);
513 done:
514 free(gc_log_path);
515 return ret;
516 }
517
518 static void gc_before_repack(void)
519 {
520 /*
521 * We may be called twice, as both the pre- and
522 * post-daemonized phases will call us, but running these
523 * commands more than once is pointless and wasteful.
524 */
525 static int done = 0;
526 if (done++)
527 return;
528
529 if (pack_refs && maintenance_task_pack_refs(NULL))
530 die(FAILED_RUN, "pack-refs");
531
532 if (prune_reflogs && run_command_v_opt(reflog.v, RUN_GIT_CMD))
533 die(FAILED_RUN, reflog.v[0]);
534 }
535
536 int cmd_gc(int argc, const char **argv, const char *prefix)
537 {
538 int aggressive = 0;
539 int auto_gc = 0;
540 int quiet = 0;
541 int force = 0;
542 const char *name;
543 pid_t pid;
544 int daemonized = 0;
545 int keep_largest_pack = -1;
546 timestamp_t dummy;
547
548 struct option builtin_gc_options[] = {
549 OPT__QUIET(&quiet, N_("suppress progress reporting")),
550 { OPTION_STRING, 0, "prune", &prune_expire, N_("date"),
551 N_("prune unreferenced objects"),
552 PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
553 OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
554 OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"),
555 PARSE_OPT_NOCOMPLETE),
556 OPT_BOOL_F(0, "force", &force,
557 N_("force running gc even if there may be another gc running"),
558 PARSE_OPT_NOCOMPLETE),
559 OPT_BOOL(0, "keep-largest-pack", &keep_largest_pack,
560 N_("repack all other packs except the largest pack")),
561 OPT_END()
562 };
563
564 if (argc == 2 && !strcmp(argv[1], "-h"))
565 usage_with_options(builtin_gc_usage, builtin_gc_options);
566
567 strvec_pushl(&reflog, "reflog", "expire", "--all", NULL);
568 strvec_pushl(&repack, "repack", "-d", "-l", NULL);
569 strvec_pushl(&prune, "prune", "--expire", NULL);
570 strvec_pushl(&prune_worktrees, "worktree", "prune", "--expire", NULL);
571 strvec_pushl(&rerere, "rerere", "gc", NULL);
572
573 /* default expiry time, overwritten in gc_config */
574 gc_config();
575 if (parse_expiry_date(gc_log_expire, &gc_log_expire_time))
576 die(_("failed to parse gc.logexpiry value %s"), gc_log_expire);
577
578 if (pack_refs < 0)
579 pack_refs = !is_bare_repository();
580
581 argc = parse_options(argc, argv, prefix, builtin_gc_options,
582 builtin_gc_usage, 0);
583 if (argc > 0)
584 usage_with_options(builtin_gc_usage, builtin_gc_options);
585
586 if (prune_expire && parse_expiry_date(prune_expire, &dummy))
587 die(_("failed to parse prune expiry value %s"), prune_expire);
588
589 if (aggressive) {
590 strvec_push(&repack, "-f");
591 if (aggressive_depth > 0)
592 strvec_pushf(&repack, "--depth=%d", aggressive_depth);
593 if (aggressive_window > 0)
594 strvec_pushf(&repack, "--window=%d", aggressive_window);
595 }
596 if (quiet)
597 strvec_push(&repack, "-q");
598
599 if (auto_gc) {
600 /*
601 * Auto-gc should be least intrusive as possible.
602 */
603 if (!need_to_gc())
604 return 0;
605 if (!quiet) {
606 if (detach_auto)
607 fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n"));
608 else
609 fprintf(stderr, _("Auto packing the repository for optimum performance.\n"));
610 fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n"));
611 }
612 if (detach_auto) {
613 int ret = report_last_gc_error();
614 if (ret < 0)
615 /* an I/O error occurred, already reported */
616 exit(128);
617 if (ret == 1)
618 /* Last gc --auto failed. Skip this one. */
619 return 0;
620
621 if (lock_repo_for_gc(force, &pid))
622 return 0;
623 gc_before_repack(); /* dies on failure */
624 delete_tempfile(&pidfile);
625
626 /*
627 * failure to daemonize is ok, we'll continue
628 * in foreground
629 */
630 daemonized = !daemonize();
631 }
632 } else {
633 struct string_list keep_pack = STRING_LIST_INIT_NODUP;
634
635 if (keep_largest_pack != -1) {
636 if (keep_largest_pack)
637 find_base_packs(&keep_pack, 0);
638 } else if (big_pack_threshold) {
639 find_base_packs(&keep_pack, big_pack_threshold);
640 }
641
642 add_repack_all_option(&keep_pack);
643 string_list_clear(&keep_pack, 0);
644 }
645
646 name = lock_repo_for_gc(force, &pid);
647 if (name) {
648 if (auto_gc)
649 return 0; /* be quiet on --auto */
650 die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
651 name, (uintmax_t)pid);
652 }
653
654 if (daemonized) {
655 hold_lock_file_for_update(&log_lock,
656 git_path("gc.log"),
657 LOCK_DIE_ON_ERROR);
658 dup2(get_lock_file_fd(&log_lock), 2);
659 sigchain_push_common(process_log_file_on_signal);
660 atexit(process_log_file_at_exit);
661 }
662
663 gc_before_repack();
664
665 if (!repository_format_precious_objects) {
666 close_object_store(the_repository->objects);
667 if (run_command_v_opt(repack.v, RUN_GIT_CMD))
668 die(FAILED_RUN, repack.v[0]);
669
670 if (prune_expire) {
671 strvec_push(&prune, prune_expire);
672 if (quiet)
673 strvec_push(&prune, "--no-progress");
674 if (has_promisor_remote())
675 strvec_push(&prune,
676 "--exclude-promisor-objects");
677 if (run_command_v_opt(prune.v, RUN_GIT_CMD))
678 die(FAILED_RUN, prune.v[0]);
679 }
680 }
681
682 if (prune_worktrees_expire) {
683 strvec_push(&prune_worktrees, prune_worktrees_expire);
684 if (run_command_v_opt(prune_worktrees.v, RUN_GIT_CMD))
685 die(FAILED_RUN, prune_worktrees.v[0]);
686 }
687
688 if (run_command_v_opt(rerere.v, RUN_GIT_CMD))
689 die(FAILED_RUN, rerere.v[0]);
690
691 report_garbage = report_pack_garbage;
692 reprepare_packed_git(the_repository);
693 if (pack_garbage.nr > 0) {
694 close_object_store(the_repository->objects);
695 clean_pack_garbage();
696 }
697
698 prepare_repo_settings(the_repository);
699 if (the_repository->settings.gc_write_commit_graph == 1)
700 write_commit_graph_reachable(the_repository->objects->odb,
701 !quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0,
702 NULL);
703
704 if (auto_gc && too_many_loose_objects())
705 warning(_("There are too many unreachable loose objects; "
706 "run 'git prune' to remove them."));
707
708 if (!daemonized)
709 unlink(git_path("gc.log"));
710
711 return 0;
712 }
713
714 static const char *const builtin_maintenance_run_usage[] = {
715 N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>] [--schedule]"),
716 NULL
717 };
718
719 enum schedule_priority {
720 SCHEDULE_NONE = 0,
721 SCHEDULE_WEEKLY = 1,
722 SCHEDULE_DAILY = 2,
723 SCHEDULE_HOURLY = 3,
724 };
725
726 static enum schedule_priority parse_schedule(const char *value)
727 {
728 if (!value)
729 return SCHEDULE_NONE;
730 if (!strcasecmp(value, "hourly"))
731 return SCHEDULE_HOURLY;
732 if (!strcasecmp(value, "daily"))
733 return SCHEDULE_DAILY;
734 if (!strcasecmp(value, "weekly"))
735 return SCHEDULE_WEEKLY;
736 return SCHEDULE_NONE;
737 }
738
739 static int maintenance_opt_schedule(const struct option *opt, const char *arg,
740 int unset)
741 {
742 enum schedule_priority *priority = opt->value;
743
744 if (unset)
745 die(_("--no-schedule is not allowed"));
746
747 *priority = parse_schedule(arg);
748
749 if (!*priority)
750 die(_("unrecognized --schedule argument '%s'"), arg);
751
752 return 0;
753 }
754
755 struct maintenance_run_opts {
756 int auto_flag;
757 int quiet;
758 enum schedule_priority schedule;
759 };
760
761 /* Remember to update object flag allocation in object.h */
762 #define SEEN (1u<<0)
763
764 struct cg_auto_data {
765 int num_not_in_graph;
766 int limit;
767 };
768
769 static int dfs_on_ref(const char *refname,
770 const struct object_id *oid, int flags,
771 void *cb_data)
772 {
773 struct cg_auto_data *data = (struct cg_auto_data *)cb_data;
774 int result = 0;
775 struct object_id peeled;
776 struct commit_list *stack = NULL;
777 struct commit *commit;
778
779 if (!peel_iterated_oid(oid, &peeled))
780 oid = &peeled;
781 if (oid_object_info(the_repository, oid, NULL) != OBJ_COMMIT)
782 return 0;
783
784 commit = lookup_commit(the_repository, oid);
785 if (!commit)
786 return 0;
787 if (parse_commit(commit) ||
788 commit_graph_position(commit) != COMMIT_NOT_FROM_GRAPH)
789 return 0;
790
791 data->num_not_in_graph++;
792
793 if (data->num_not_in_graph >= data->limit)
794 return 1;
795
796 commit_list_append(commit, &stack);
797
798 while (!result && stack) {
799 struct commit_list *parent;
800
801 commit = pop_commit(&stack);
802
803 for (parent = commit->parents; parent; parent = parent->next) {
804 if (parse_commit(parent->item) ||
805 commit_graph_position(parent->item) != COMMIT_NOT_FROM_GRAPH ||
806 parent->item->object.flags & SEEN)
807 continue;
808
809 parent->item->object.flags |= SEEN;
810 data->num_not_in_graph++;
811
812 if (data->num_not_in_graph >= data->limit) {
813 result = 1;
814 break;
815 }
816
817 commit_list_append(parent->item, &stack);
818 }
819 }
820
821 free_commit_list(stack);
822 return result;
823 }
824
825 static int should_write_commit_graph(void)
826 {
827 int result;
828 struct cg_auto_data data;
829
830 data.num_not_in_graph = 0;
831 data.limit = 100;
832 git_config_get_int("maintenance.commit-graph.auto",
833 &data.limit);
834
835 if (!data.limit)
836 return 0;
837 if (data.limit < 0)
838 return 1;
839
840 result = for_each_ref(dfs_on_ref, &data);
841
842 repo_clear_commit_marks(the_repository, SEEN);
843
844 return result;
845 }
846
847 static int run_write_commit_graph(struct maintenance_run_opts *opts)
848 {
849 struct child_process child = CHILD_PROCESS_INIT;
850
851 child.git_cmd = 1;
852 strvec_pushl(&child.args, "commit-graph", "write",
853 "--split", "--reachable", NULL);
854
855 if (opts->quiet)
856 strvec_push(&child.args, "--no-progress");
857
858 return !!run_command(&child);
859 }
860
861 static int maintenance_task_commit_graph(struct maintenance_run_opts *opts)
862 {
863 prepare_repo_settings(the_repository);
864 if (!the_repository->settings.core_commit_graph)
865 return 0;
866
867 close_object_store(the_repository->objects);
868 if (run_write_commit_graph(opts)) {
869 error(_("failed to write commit-graph"));
870 return 1;
871 }
872
873 return 0;
874 }
875
876 static int fetch_remote(const char *remote, struct maintenance_run_opts *opts)
877 {
878 struct child_process child = CHILD_PROCESS_INIT;
879
880 child.git_cmd = 1;
881 strvec_pushl(&child.args, "fetch", remote, "--prune", "--no-tags",
882 "--no-write-fetch-head", "--recurse-submodules=no",
883 "--refmap=", NULL);
884
885 if (opts->quiet)
886 strvec_push(&child.args, "--quiet");
887
888 strvec_pushf(&child.args, "+refs/heads/*:refs/prefetch/%s/*", remote);
889
890 return !!run_command(&child);
891 }
892
893 static int append_remote(struct remote *remote, void *cbdata)
894 {
895 struct string_list *remotes = (struct string_list *)cbdata;
896
897 string_list_append(remotes, remote->name);
898 return 0;
899 }
900
901 static int maintenance_task_prefetch(struct maintenance_run_opts *opts)
902 {
903 int result = 0;
904 struct string_list_item *item;
905 struct string_list remotes = STRING_LIST_INIT_DUP;
906
907 git_config_set_multivar_gently("log.excludedecoration",
908 "refs/prefetch/",
909 "refs/prefetch/",
910 CONFIG_FLAGS_FIXED_VALUE |
911 CONFIG_FLAGS_MULTI_REPLACE);
912
913 if (for_each_remote(append_remote, &remotes)) {
914 error(_("failed to fill remotes"));
915 result = 1;
916 goto cleanup;
917 }
918
919 for_each_string_list_item(item, &remotes)
920 result |= fetch_remote(item->string, opts);
921
922 cleanup:
923 string_list_clear(&remotes, 0);
924 return result;
925 }
926
927 static int maintenance_task_gc(struct maintenance_run_opts *opts)
928 {
929 struct child_process child = CHILD_PROCESS_INIT;
930
931 child.git_cmd = 1;
932 strvec_push(&child.args, "gc");
933
934 if (opts->auto_flag)
935 strvec_push(&child.args, "--auto");
936 if (opts->quiet)
937 strvec_push(&child.args, "--quiet");
938 else
939 strvec_push(&child.args, "--no-quiet");
940
941 close_object_store(the_repository->objects);
942 return run_command(&child);
943 }
944
945 static int prune_packed(struct maintenance_run_opts *opts)
946 {
947 struct child_process child = CHILD_PROCESS_INIT;
948
949 child.git_cmd = 1;
950 strvec_push(&child.args, "prune-packed");
951
952 if (opts->quiet)
953 strvec_push(&child.args, "--quiet");
954
955 return !!run_command(&child);
956 }
957
958 struct write_loose_object_data {
959 FILE *in;
960 int count;
961 int batch_size;
962 };
963
964 static int loose_object_auto_limit = 100;
965
966 static int loose_object_count(const struct object_id *oid,
967 const char *path,
968 void *data)
969 {
970 int *count = (int*)data;
971 if (++(*count) >= loose_object_auto_limit)
972 return 1;
973 return 0;
974 }
975
976 static int loose_object_auto_condition(void)
977 {
978 int count = 0;
979
980 git_config_get_int("maintenance.loose-objects.auto",
981 &loose_object_auto_limit);
982
983 if (!loose_object_auto_limit)
984 return 0;
985 if (loose_object_auto_limit < 0)
986 return 1;
987
988 return for_each_loose_file_in_objdir(the_repository->objects->odb->path,
989 loose_object_count,
990 NULL, NULL, &count);
991 }
992
993 static int bail_on_loose(const struct object_id *oid,
994 const char *path,
995 void *data)
996 {
997 return 1;
998 }
999
1000 static int write_loose_object_to_stdin(const struct object_id *oid,
1001 const char *path,
1002 void *data)
1003 {
1004 struct write_loose_object_data *d = (struct write_loose_object_data *)data;
1005
1006 fprintf(d->in, "%s\n", oid_to_hex(oid));
1007
1008 return ++(d->count) > d->batch_size;
1009 }
1010
1011 static int pack_loose(struct maintenance_run_opts *opts)
1012 {
1013 struct repository *r = the_repository;
1014 int result = 0;
1015 struct write_loose_object_data data;
1016 struct child_process pack_proc = CHILD_PROCESS_INIT;
1017
1018 /*
1019 * Do not start pack-objects process
1020 * if there are no loose objects.
1021 */
1022 if (!for_each_loose_file_in_objdir(r->objects->odb->path,
1023 bail_on_loose,
1024 NULL, NULL, NULL))
1025 return 0;
1026
1027 pack_proc.git_cmd = 1;
1028
1029 strvec_push(&pack_proc.args, "pack-objects");
1030 if (opts->quiet)
1031 strvec_push(&pack_proc.args, "--quiet");
1032 strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path);
1033
1034 pack_proc.in = -1;
1035
1036 if (start_command(&pack_proc)) {
1037 error(_("failed to start 'git pack-objects' process"));
1038 return 1;
1039 }
1040
1041 data.in = xfdopen(pack_proc.in, "w");
1042 data.count = 0;
1043 data.batch_size = 50000;
1044
1045 for_each_loose_file_in_objdir(r->objects->odb->path,
1046 write_loose_object_to_stdin,
1047 NULL,
1048 NULL,
1049 &data);
1050
1051 fclose(data.in);
1052
1053 if (finish_command(&pack_proc)) {
1054 error(_("failed to finish 'git pack-objects' process"));
1055 result = 1;
1056 }
1057
1058 return result;
1059 }
1060
1061 static int maintenance_task_loose_objects(struct maintenance_run_opts *opts)
1062 {
1063 return prune_packed(opts) || pack_loose(opts);
1064 }
1065
1066 static int incremental_repack_auto_condition(void)
1067 {
1068 struct packed_git *p;
1069 int enabled;
1070 int incremental_repack_auto_limit = 10;
1071 int count = 0;
1072
1073 if (git_config_get_bool("core.multiPackIndex", &enabled) ||
1074 !enabled)
1075 return 0;
1076
1077 git_config_get_int("maintenance.incremental-repack.auto",
1078 &incremental_repack_auto_limit);
1079
1080 if (!incremental_repack_auto_limit)
1081 return 0;
1082 if (incremental_repack_auto_limit < 0)
1083 return 1;
1084
1085 for (p = get_packed_git(the_repository);
1086 count < incremental_repack_auto_limit && p;
1087 p = p->next) {
1088 if (!p->multi_pack_index)
1089 count++;
1090 }
1091
1092 return count >= incremental_repack_auto_limit;
1093 }
1094
1095 static int multi_pack_index_write(struct maintenance_run_opts *opts)
1096 {
1097 struct child_process child = CHILD_PROCESS_INIT;
1098
1099 child.git_cmd = 1;
1100 strvec_pushl(&child.args, "multi-pack-index", "write", NULL);
1101
1102 if (opts->quiet)
1103 strvec_push(&child.args, "--no-progress");
1104
1105 if (run_command(&child))
1106 return error(_("failed to write multi-pack-index"));
1107
1108 return 0;
1109 }
1110
1111 static int multi_pack_index_expire(struct maintenance_run_opts *opts)
1112 {
1113 struct child_process child = CHILD_PROCESS_INIT;
1114
1115 child.git_cmd = 1;
1116 strvec_pushl(&child.args, "multi-pack-index", "expire", NULL);
1117
1118 if (opts->quiet)
1119 strvec_push(&child.args, "--no-progress");
1120
1121 close_object_store(the_repository->objects);
1122
1123 if (run_command(&child))
1124 return error(_("'git multi-pack-index expire' failed"));
1125
1126 return 0;
1127 }
1128
1129 #define TWO_GIGABYTES (INT32_MAX)
1130
1131 static off_t get_auto_pack_size(void)
1132 {
1133 /*
1134 * The "auto" value is special: we optimize for
1135 * one large pack-file (i.e. from a clone) and
1136 * expect the rest to be small and they can be
1137 * repacked quickly.
1138 *
1139 * The strategy we select here is to select a
1140 * size that is one more than the second largest
1141 * pack-file. This ensures that we will repack
1142 * at least two packs if there are three or more
1143 * packs.
1144 */
1145 off_t max_size = 0;
1146 off_t second_largest_size = 0;
1147 off_t result_size;
1148 struct packed_git *p;
1149 struct repository *r = the_repository;
1150
1151 reprepare_packed_git(r);
1152 for (p = get_all_packs(r); p; p = p->next) {
1153 if (p->pack_size > max_size) {
1154 second_largest_size = max_size;
1155 max_size = p->pack_size;
1156 } else if (p->pack_size > second_largest_size)
1157 second_largest_size = p->pack_size;
1158 }
1159
1160 result_size = second_largest_size + 1;
1161
1162 /* But limit ourselves to a batch size of 2g */
1163 if (result_size > TWO_GIGABYTES)
1164 result_size = TWO_GIGABYTES;
1165
1166 return result_size;
1167 }
1168
1169 static int multi_pack_index_repack(struct maintenance_run_opts *opts)
1170 {
1171 struct child_process child = CHILD_PROCESS_INIT;
1172
1173 child.git_cmd = 1;
1174 strvec_pushl(&child.args, "multi-pack-index", "repack", NULL);
1175
1176 if (opts->quiet)
1177 strvec_push(&child.args, "--no-progress");
1178
1179 strvec_pushf(&child.args, "--batch-size=%"PRIuMAX,
1180 (uintmax_t)get_auto_pack_size());
1181
1182 close_object_store(the_repository->objects);
1183
1184 if (run_command(&child))
1185 return error(_("'git multi-pack-index repack' failed"));
1186
1187 return 0;
1188 }
1189
1190 static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts)
1191 {
1192 prepare_repo_settings(the_repository);
1193 if (!the_repository->settings.core_multi_pack_index) {
1194 warning(_("skipping incremental-repack task because core.multiPackIndex is disabled"));
1195 return 0;
1196 }
1197
1198 if (multi_pack_index_write(opts))
1199 return 1;
1200 if (multi_pack_index_expire(opts))
1201 return 1;
1202 if (multi_pack_index_repack(opts))
1203 return 1;
1204 return 0;
1205 }
1206
1207 typedef int maintenance_task_fn(struct maintenance_run_opts *opts);
1208
1209 /*
1210 * An auto condition function returns 1 if the task should run
1211 * and 0 if the task should NOT run. See needs_to_gc() for an
1212 * example.
1213 */
1214 typedef int maintenance_auto_fn(void);
1215
1216 struct maintenance_task {
1217 const char *name;
1218 maintenance_task_fn *fn;
1219 maintenance_auto_fn *auto_condition;
1220 unsigned enabled:1;
1221
1222 enum schedule_priority schedule;
1223
1224 /* -1 if not selected. */
1225 int selected_order;
1226 };
1227
1228 enum maintenance_task_label {
1229 TASK_PREFETCH,
1230 TASK_LOOSE_OBJECTS,
1231 TASK_INCREMENTAL_REPACK,
1232 TASK_GC,
1233 TASK_COMMIT_GRAPH,
1234 TASK_PACK_REFS,
1235
1236 /* Leave as final value */
1237 TASK__COUNT
1238 };
1239
1240 static struct maintenance_task tasks[] = {
1241 [TASK_PREFETCH] = {
1242 "prefetch",
1243 maintenance_task_prefetch,
1244 },
1245 [TASK_LOOSE_OBJECTS] = {
1246 "loose-objects",
1247 maintenance_task_loose_objects,
1248 loose_object_auto_condition,
1249 },
1250 [TASK_INCREMENTAL_REPACK] = {
1251 "incremental-repack",
1252 maintenance_task_incremental_repack,
1253 incremental_repack_auto_condition,
1254 },
1255 [TASK_GC] = {
1256 "gc",
1257 maintenance_task_gc,
1258 need_to_gc,
1259 1,
1260 },
1261 [TASK_COMMIT_GRAPH] = {
1262 "commit-graph",
1263 maintenance_task_commit_graph,
1264 should_write_commit_graph,
1265 },
1266 [TASK_PACK_REFS] = {
1267 "pack-refs",
1268 maintenance_task_pack_refs,
1269 NULL,
1270 },
1271 };
1272
1273 static int compare_tasks_by_selection(const void *a_, const void *b_)
1274 {
1275 const struct maintenance_task *a = a_;
1276 const struct maintenance_task *b = b_;
1277
1278 return b->selected_order - a->selected_order;
1279 }
1280
1281 static int maintenance_run_tasks(struct maintenance_run_opts *opts)
1282 {
1283 int i, found_selected = 0;
1284 int result = 0;
1285 struct lock_file lk;
1286 struct repository *r = the_repository;
1287 char *lock_path = xstrfmt("%s/maintenance", r->objects->odb->path);
1288
1289 if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) {
1290 /*
1291 * Another maintenance command is running.
1292 *
1293 * If --auto was provided, then it is likely due to a
1294 * recursive process stack. Do not report an error in
1295 * that case.
1296 */
1297 if (!opts->auto_flag && !opts->quiet)
1298 warning(_("lock file '%s' exists, skipping maintenance"),
1299 lock_path);
1300 free(lock_path);
1301 return 0;
1302 }
1303 free(lock_path);
1304
1305 for (i = 0; !found_selected && i < TASK__COUNT; i++)
1306 found_selected = tasks[i].selected_order >= 0;
1307
1308 if (found_selected)
1309 QSORT(tasks, TASK__COUNT, compare_tasks_by_selection);
1310
1311 for (i = 0; i < TASK__COUNT; i++) {
1312 if (found_selected && tasks[i].selected_order < 0)
1313 continue;
1314
1315 if (!found_selected && !tasks[i].enabled)
1316 continue;
1317
1318 if (opts->auto_flag &&
1319 (!tasks[i].auto_condition ||
1320 !tasks[i].auto_condition()))
1321 continue;
1322
1323 if (opts->schedule && tasks[i].schedule < opts->schedule)
1324 continue;
1325
1326 trace2_region_enter("maintenance", tasks[i].name, r);
1327 if (tasks[i].fn(opts)) {
1328 error(_("task '%s' failed"), tasks[i].name);
1329 result = 1;
1330 }
1331 trace2_region_leave("maintenance", tasks[i].name, r);
1332 }
1333
1334 rollback_lock_file(&lk);
1335 return result;
1336 }
1337
1338 static void initialize_maintenance_strategy(void)
1339 {
1340 char *config_str;
1341
1342 if (git_config_get_string("maintenance.strategy", &config_str))
1343 return;
1344
1345 if (!strcasecmp(config_str, "incremental")) {
1346 tasks[TASK_GC].schedule = SCHEDULE_NONE;
1347 tasks[TASK_COMMIT_GRAPH].enabled = 1;
1348 tasks[TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY;
1349 tasks[TASK_PREFETCH].enabled = 1;
1350 tasks[TASK_PREFETCH].schedule = SCHEDULE_HOURLY;
1351 tasks[TASK_INCREMENTAL_REPACK].enabled = 1;
1352 tasks[TASK_INCREMENTAL_REPACK].schedule = SCHEDULE_DAILY;
1353 tasks[TASK_LOOSE_OBJECTS].enabled = 1;
1354 tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
1355 tasks[TASK_PACK_REFS].enabled = 1;
1356 tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1357 }
1358 }
1359
1360 static void initialize_task_config(int schedule)
1361 {
1362 int i;
1363 struct strbuf config_name = STRBUF_INIT;
1364 gc_config();
1365
1366 if (schedule)
1367 initialize_maintenance_strategy();
1368
1369 for (i = 0; i < TASK__COUNT; i++) {
1370 int config_value;
1371 char *config_str;
1372
1373 strbuf_reset(&config_name);
1374 strbuf_addf(&config_name, "maintenance.%s.enabled",
1375 tasks[i].name);
1376
1377 if (!git_config_get_bool(config_name.buf, &config_value))
1378 tasks[i].enabled = config_value;
1379
1380 strbuf_reset(&config_name);
1381 strbuf_addf(&config_name, "maintenance.%s.schedule",
1382 tasks[i].name);
1383
1384 if (!git_config_get_string(config_name.buf, &config_str)) {
1385 tasks[i].schedule = parse_schedule(config_str);
1386 free(config_str);
1387 }
1388 }
1389
1390 strbuf_release(&config_name);
1391 }
1392
1393 static int task_option_parse(const struct option *opt,
1394 const char *arg, int unset)
1395 {
1396 int i, num_selected = 0;
1397 struct maintenance_task *task = NULL;
1398
1399 BUG_ON_OPT_NEG(unset);
1400
1401 for (i = 0; i < TASK__COUNT; i++) {
1402 if (tasks[i].selected_order >= 0)
1403 num_selected++;
1404 if (!strcasecmp(tasks[i].name, arg)) {
1405 task = &tasks[i];
1406 }
1407 }
1408
1409 if (!task) {
1410 error(_("'%s' is not a valid task"), arg);
1411 return 1;
1412 }
1413
1414 if (task->selected_order >= 0) {
1415 error(_("task '%s' cannot be selected multiple times"), arg);
1416 return 1;
1417 }
1418
1419 task->selected_order = num_selected + 1;
1420
1421 return 0;
1422 }
1423
1424 static int maintenance_run(int argc, const char **argv, const char *prefix)
1425 {
1426 int i;
1427 struct maintenance_run_opts opts;
1428 struct option builtin_maintenance_run_options[] = {
1429 OPT_BOOL(0, "auto", &opts.auto_flag,
1430 N_("run tasks based on the state of the repository")),
1431 OPT_CALLBACK(0, "schedule", &opts.schedule, N_("frequency"),
1432 N_("run tasks based on frequency"),
1433 maintenance_opt_schedule),
1434 OPT_BOOL(0, "quiet", &opts.quiet,
1435 N_("do not report progress or other information over stderr")),
1436 OPT_CALLBACK_F(0, "task", NULL, N_("task"),
1437 N_("run a specific task"),
1438 PARSE_OPT_NONEG, task_option_parse),
1439 OPT_END()
1440 };
1441 memset(&opts, 0, sizeof(opts));
1442
1443 opts.quiet = !isatty(2);
1444
1445 for (i = 0; i < TASK__COUNT; i++)
1446 tasks[i].selected_order = -1;
1447
1448 argc = parse_options(argc, argv, prefix,
1449 builtin_maintenance_run_options,
1450 builtin_maintenance_run_usage,
1451 PARSE_OPT_STOP_AT_NON_OPTION);
1452
1453 if (opts.auto_flag && opts.schedule)
1454 die(_("use at most one of --auto and --schedule=<frequency>"));
1455
1456 initialize_task_config(opts.schedule);
1457
1458 if (argc != 0)
1459 usage_with_options(builtin_maintenance_run_usage,
1460 builtin_maintenance_run_options);
1461 return maintenance_run_tasks(&opts);
1462 }
1463
1464 static char *get_maintpath(void)
1465 {
1466 struct strbuf sb = STRBUF_INIT;
1467 const char *p = the_repository->worktree ?
1468 the_repository->worktree : the_repository->gitdir;
1469
1470 strbuf_realpath(&sb, p, 1);
1471 return strbuf_detach(&sb, NULL);
1472 }
1473
1474 static int maintenance_register(void)
1475 {
1476 int rc;
1477 char *config_value;
1478 struct child_process config_set = CHILD_PROCESS_INIT;
1479 struct child_process config_get = CHILD_PROCESS_INIT;
1480 char *maintpath = get_maintpath();
1481
1482 /* Disable foreground maintenance */
1483 git_config_set("maintenance.auto", "false");
1484
1485 /* Set maintenance strategy, if unset */
1486 if (!git_config_get_string("maintenance.strategy", &config_value))
1487 free(config_value);
1488 else
1489 git_config_set("maintenance.strategy", "incremental");
1490
1491 config_get.git_cmd = 1;
1492 strvec_pushl(&config_get.args, "config", "--global", "--get",
1493 "--fixed-value", "maintenance.repo", maintpath, NULL);
1494 config_get.out = -1;
1495
1496 if (start_command(&config_get)) {
1497 rc = error(_("failed to run 'git config'"));
1498 goto done;
1499 }
1500
1501 /* We already have this value in our config! */
1502 if (!finish_command(&config_get)) {
1503 rc = 0;
1504 goto done;
1505 }
1506
1507 config_set.git_cmd = 1;
1508 strvec_pushl(&config_set.args, "config", "--add", "--global", "maintenance.repo",
1509 maintpath, NULL);
1510
1511 rc = run_command(&config_set);
1512
1513 done:
1514 free(maintpath);
1515 return rc;
1516 }
1517
1518 static int maintenance_unregister(void)
1519 {
1520 int rc;
1521 struct child_process config_unset = CHILD_PROCESS_INIT;
1522 char *maintpath = get_maintpath();
1523
1524 config_unset.git_cmd = 1;
1525 strvec_pushl(&config_unset.args, "config", "--global", "--unset",
1526 "--fixed-value", "maintenance.repo", maintpath, NULL);
1527
1528 rc = run_command(&config_unset);
1529 free(maintpath);
1530 return rc;
1531 }
1532
1533 static const char *get_frequency(enum schedule_priority schedule)
1534 {
1535 switch (schedule) {
1536 case SCHEDULE_HOURLY:
1537 return "hourly";
1538 case SCHEDULE_DAILY:
1539 return "daily";
1540 case SCHEDULE_WEEKLY:
1541 return "weekly";
1542 default:
1543 BUG("invalid schedule %d", schedule);
1544 }
1545 }
1546
1547 static char *launchctl_service_name(const char *frequency)
1548 {
1549 struct strbuf label = STRBUF_INIT;
1550 strbuf_addf(&label, "org.git-scm.git.%s", frequency);
1551 return strbuf_detach(&label, NULL);
1552 }
1553
1554 static char *launchctl_service_filename(const char *name)
1555 {
1556 char *expanded;
1557 struct strbuf filename = STRBUF_INIT;
1558 strbuf_addf(&filename, "~/Library/LaunchAgents/%s.plist", name);
1559
1560 expanded = expand_user_path(filename.buf, 1);
1561 if (!expanded)
1562 die(_("failed to expand path '%s'"), filename.buf);
1563
1564 strbuf_release(&filename);
1565 return expanded;
1566 }
1567
1568 static char *launchctl_get_uid(void)
1569 {
1570 return xstrfmt("gui/%d", getuid());
1571 }
1572
1573 static int launchctl_boot_plist(int enable, const char *filename, const char *cmd)
1574 {
1575 int result;
1576 struct child_process child = CHILD_PROCESS_INIT;
1577 char *uid = launchctl_get_uid();
1578
1579 strvec_split(&child.args, cmd);
1580 if (enable)
1581 strvec_push(&child.args, "bootstrap");
1582 else
1583 strvec_push(&child.args, "bootout");
1584 strvec_push(&child.args, uid);
1585 strvec_push(&child.args, filename);
1586
1587 child.no_stderr = 1;
1588 child.no_stdout = 1;
1589
1590 if (start_command(&child))
1591 die(_("failed to start launchctl"));
1592
1593 result = finish_command(&child);
1594
1595 free(uid);
1596 return result;
1597 }
1598
1599 static int launchctl_remove_plist(enum schedule_priority schedule, const char *cmd)
1600 {
1601 const char *frequency = get_frequency(schedule);
1602 char *name = launchctl_service_name(frequency);
1603 char *filename = launchctl_service_filename(name);
1604 int result = launchctl_boot_plist(0, filename, cmd);
1605 unlink(filename);
1606 free(filename);
1607 free(name);
1608 return result;
1609 }
1610
1611 static int launchctl_remove_plists(const char *cmd)
1612 {
1613 return launchctl_remove_plist(SCHEDULE_HOURLY, cmd) ||
1614 launchctl_remove_plist(SCHEDULE_DAILY, cmd) ||
1615 launchctl_remove_plist(SCHEDULE_WEEKLY, cmd);
1616 }
1617
1618 static int launchctl_schedule_plist(const char *exec_path, enum schedule_priority schedule, const char *cmd)
1619 {
1620 FILE *plist;
1621 int i;
1622 const char *preamble, *repeat;
1623 const char *frequency = get_frequency(schedule);
1624 char *name = launchctl_service_name(frequency);
1625 char *filename = launchctl_service_filename(name);
1626
1627 if (safe_create_leading_directories(filename))
1628 die(_("failed to create directories for '%s'"), filename);
1629 plist = xfopen(filename, "w");
1630
1631 preamble = "<?xml version=\"1.0\"?>\n"
1632 "<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n"
1633 "<plist version=\"1.0\">"
1634 "<dict>\n"
1635 "<key>Label</key><string>%s</string>\n"
1636 "<key>ProgramArguments</key>\n"
1637 "<array>\n"
1638 "<string>%s/git</string>\n"
1639 "<string>--exec-path=%s</string>\n"
1640 "<string>for-each-repo</string>\n"
1641 "<string>--config=maintenance.repo</string>\n"
1642 "<string>maintenance</string>\n"
1643 "<string>run</string>\n"
1644 "<string>--schedule=%s</string>\n"
1645 "</array>\n"
1646 "<key>StartCalendarInterval</key>\n"
1647 "<array>\n";
1648 fprintf(plist, preamble, name, exec_path, exec_path, frequency);
1649
1650 switch (schedule) {
1651 case SCHEDULE_HOURLY:
1652 repeat = "<dict>\n"
1653 "<key>Hour</key><integer>%d</integer>\n"
1654 "<key>Minute</key><integer>0</integer>\n"
1655 "</dict>\n";
1656 for (i = 1; i <= 23; i++)
1657 fprintf(plist, repeat, i);
1658 break;
1659
1660 case SCHEDULE_DAILY:
1661 repeat = "<dict>\n"
1662 "<key>Day</key><integer>%d</integer>\n"
1663 "<key>Hour</key><integer>0</integer>\n"
1664 "<key>Minute</key><integer>0</integer>\n"
1665 "</dict>\n";
1666 for (i = 1; i <= 6; i++)
1667 fprintf(plist, repeat, i);
1668 break;
1669
1670 case SCHEDULE_WEEKLY:
1671 fprintf(plist,
1672 "<dict>\n"
1673 "<key>Day</key><integer>0</integer>\n"
1674 "<key>Hour</key><integer>0</integer>\n"
1675 "<key>Minute</key><integer>0</integer>\n"
1676 "</dict>\n");
1677 break;
1678
1679 default:
1680 /* unreachable */
1681 break;
1682 }
1683 fprintf(plist, "</array>\n</dict>\n</plist>\n");
1684 fclose(plist);
1685
1686 /* bootout might fail if not already running, so ignore */
1687 launchctl_boot_plist(0, filename, cmd);
1688 if (launchctl_boot_plist(1, filename, cmd))
1689 die(_("failed to bootstrap service %s"), filename);
1690
1691 free(filename);
1692 free(name);
1693 return 0;
1694 }
1695
1696 static int launchctl_add_plists(const char *cmd)
1697 {
1698 const char *exec_path = git_exec_path();
1699
1700 return launchctl_schedule_plist(exec_path, SCHEDULE_HOURLY, cmd) ||
1701 launchctl_schedule_plist(exec_path, SCHEDULE_DAILY, cmd) ||
1702 launchctl_schedule_plist(exec_path, SCHEDULE_WEEKLY, cmd);
1703 }
1704
1705 static int launchctl_update_schedule(int run_maintenance, int fd, const char *cmd)
1706 {
1707 if (run_maintenance)
1708 return launchctl_add_plists(cmd);
1709 else
1710 return launchctl_remove_plists(cmd);
1711 }
1712
1713 static char *schtasks_task_name(const char *frequency)
1714 {
1715 struct strbuf label = STRBUF_INIT;
1716 strbuf_addf(&label, "Git Maintenance (%s)", frequency);
1717 return strbuf_detach(&label, NULL);
1718 }
1719
1720 static int schtasks_remove_task(enum schedule_priority schedule, const char *cmd)
1721 {
1722 int result;
1723 struct strvec args = STRVEC_INIT;
1724 const char *frequency = get_frequency(schedule);
1725 char *name = schtasks_task_name(frequency);
1726
1727 strvec_split(&args, cmd);
1728 strvec_pushl(&args, "/delete", "/tn", name, "/f", NULL);
1729
1730 result = run_command_v_opt(args.v, 0);
1731
1732 strvec_clear(&args);
1733 free(name);
1734 return result;
1735 }
1736
1737 static int schtasks_remove_tasks(const char *cmd)
1738 {
1739 return schtasks_remove_task(SCHEDULE_HOURLY, cmd) ||
1740 schtasks_remove_task(SCHEDULE_DAILY, cmd) ||
1741 schtasks_remove_task(SCHEDULE_WEEKLY, cmd);
1742 }
1743
1744 static int schtasks_schedule_task(const char *exec_path, enum schedule_priority schedule, const char *cmd)
1745 {
1746 int result;
1747 struct child_process child = CHILD_PROCESS_INIT;
1748 const char *xml;
1749 struct tempfile *tfile;
1750 const char *frequency = get_frequency(schedule);
1751 char *name = schtasks_task_name(frequency);
1752 struct strbuf tfilename = STRBUF_INIT;
1753
1754 strbuf_addf(&tfilename, "%s/schedule_%s_XXXXXX",
1755 get_git_common_dir(), frequency);
1756 tfile = xmks_tempfile(tfilename.buf);
1757 strbuf_release(&tfilename);
1758
1759 if (!fdopen_tempfile(tfile, "w"))
1760 die(_("failed to create temp xml file"));
1761
1762 xml = "<?xml version=\"1.0\" ?>\n"
1763 "<Task version=\"1.4\" xmlns=\"http://schemas.microsoft.com/windows/2004/02/mit/task\">\n"
1764 "<Triggers>\n"
1765 "<CalendarTrigger>\n";
1766 fputs(xml, tfile->fp);
1767
1768 switch (schedule) {
1769 case SCHEDULE_HOURLY:
1770 fprintf(tfile->fp,
1771 "<StartBoundary>2020-01-01T01:00:00</StartBoundary>\n"
1772 "<Enabled>true</Enabled>\n"
1773 "<ScheduleByDay>\n"
1774 "<DaysInterval>1</DaysInterval>\n"
1775 "</ScheduleByDay>\n"
1776 "<Repetition>\n"
1777 "<Interval>PT1H</Interval>\n"
1778 "<Duration>PT23H</Duration>\n"
1779 "<StopAtDurationEnd>false</StopAtDurationEnd>\n"
1780 "</Repetition>\n");
1781 break;
1782
1783 case SCHEDULE_DAILY:
1784 fprintf(tfile->fp,
1785 "<StartBoundary>2020-01-01T00:00:00</StartBoundary>\n"
1786 "<Enabled>true</Enabled>\n"
1787 "<ScheduleByWeek>\n"
1788 "<DaysOfWeek>\n"
1789 "<Monday />\n"
1790 "<Tuesday />\n"
1791 "<Wednesday />\n"
1792 "<Thursday />\n"
1793 "<Friday />\n"
1794 "<Saturday />\n"
1795 "</DaysOfWeek>\n"
1796 "<WeeksInterval>1</WeeksInterval>\n"
1797 "</ScheduleByWeek>\n");
1798 break;
1799
1800 case SCHEDULE_WEEKLY:
1801 fprintf(tfile->fp,
1802 "<StartBoundary>2020-01-01T00:00:00</StartBoundary>\n"
1803 "<Enabled>true</Enabled>\n"
1804 "<ScheduleByWeek>\n"
1805 "<DaysOfWeek>\n"
1806 "<Sunday />\n"
1807 "</DaysOfWeek>\n"
1808 "<WeeksInterval>1</WeeksInterval>\n"
1809 "</ScheduleByWeek>\n");
1810 break;
1811
1812 default:
1813 break;
1814 }
1815
1816 xml = "</CalendarTrigger>\n"
1817 "</Triggers>\n"
1818 "<Principals>\n"
1819 "<Principal id=\"Author\">\n"
1820 "<LogonType>InteractiveToken</LogonType>\n"
1821 "<RunLevel>LeastPrivilege</RunLevel>\n"
1822 "</Principal>\n"
1823 "</Principals>\n"
1824 "<Settings>\n"
1825 "<MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>\n"
1826 "<Enabled>true</Enabled>\n"
1827 "<Hidden>true</Hidden>\n"
1828 "<UseUnifiedSchedulingEngine>true</UseUnifiedSchedulingEngine>\n"
1829 "<WakeToRun>false</WakeToRun>\n"
1830 "<ExecutionTimeLimit>PT72H</ExecutionTimeLimit>\n"
1831 "<Priority>7</Priority>\n"
1832 "</Settings>\n"
1833 "<Actions Context=\"Author\">\n"
1834 "<Exec>\n"
1835 "<Command>\"%s\\git.exe\"</Command>\n"
1836 "<Arguments>--exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%s</Arguments>\n"
1837 "</Exec>\n"
1838 "</Actions>\n"
1839 "</Task>\n";
1840 fprintf(tfile->fp, xml, exec_path, exec_path, frequency);
1841 strvec_split(&child.args, cmd);
1842 strvec_pushl(&child.args, "/create", "/tn", name, "/f", "/xml",
1843 get_tempfile_path(tfile), NULL);
1844 close_tempfile_gently(tfile);
1845
1846 child.no_stdout = 1;
1847 child.no_stderr = 1;
1848
1849 if (start_command(&child))
1850 die(_("failed to start schtasks"));
1851 result = finish_command(&child);
1852
1853 delete_tempfile(&tfile);
1854 free(name);
1855 return result;
1856 }
1857
1858 static int schtasks_schedule_tasks(const char *cmd)
1859 {
1860 const char *exec_path = git_exec_path();
1861
1862 return schtasks_schedule_task(exec_path, SCHEDULE_HOURLY, cmd) ||
1863 schtasks_schedule_task(exec_path, SCHEDULE_DAILY, cmd) ||
1864 schtasks_schedule_task(exec_path, SCHEDULE_WEEKLY, cmd);
1865 }
1866
1867 static int schtasks_update_schedule(int run_maintenance, int fd, const char *cmd)
1868 {
1869 if (run_maintenance)
1870 return schtasks_schedule_tasks(cmd);
1871 else
1872 return schtasks_remove_tasks(cmd);
1873 }
1874
1875 #define BEGIN_LINE "# BEGIN GIT MAINTENANCE SCHEDULE"
1876 #define END_LINE "# END GIT MAINTENANCE SCHEDULE"
1877
1878 static int crontab_update_schedule(int run_maintenance, int fd, const char *cmd)
1879 {
1880 int result = 0;
1881 int in_old_region = 0;
1882 struct child_process crontab_list = CHILD_PROCESS_INIT;
1883 struct child_process crontab_edit = CHILD_PROCESS_INIT;
1884 FILE *cron_list, *cron_in;
1885 struct strbuf line = STRBUF_INIT;
1886
1887 strvec_split(&crontab_list.args, cmd);
1888 strvec_push(&crontab_list.args, "-l");
1889 crontab_list.in = -1;
1890 crontab_list.out = dup(fd);
1891 crontab_list.git_cmd = 0;
1892
1893 if (start_command(&crontab_list))
1894 return error(_("failed to run 'crontab -l'; your system might not support 'cron'"));
1895
1896 /* Ignore exit code, as an empty crontab will return error. */
1897 finish_command(&crontab_list);
1898
1899 /*
1900 * Read from the .lock file, filtering out the old
1901 * schedule while appending the new schedule.
1902 */
1903 cron_list = fdopen(fd, "r");
1904 rewind(cron_list);
1905
1906 strvec_split(&crontab_edit.args, cmd);
1907 crontab_edit.in = -1;
1908 crontab_edit.git_cmd = 0;
1909
1910 if (start_command(&crontab_edit))
1911 return error(_("failed to run 'crontab'; your system might not support 'cron'"));
1912
1913 cron_in = fdopen(crontab_edit.in, "w");
1914 if (!cron_in) {
1915 result = error(_("failed to open stdin of 'crontab'"));
1916 goto done_editing;
1917 }
1918
1919 while (!strbuf_getline_lf(&line, cron_list)) {
1920 if (!in_old_region && !strcmp(line.buf, BEGIN_LINE))
1921 in_old_region = 1;
1922 else if (in_old_region && !strcmp(line.buf, END_LINE))
1923 in_old_region = 0;
1924 else if (!in_old_region)
1925 fprintf(cron_in, "%s\n", line.buf);
1926 }
1927
1928 if (run_maintenance) {
1929 struct strbuf line_format = STRBUF_INIT;
1930 const char *exec_path = git_exec_path();
1931
1932 fprintf(cron_in, "%s\n", BEGIN_LINE);
1933 fprintf(cron_in,
1934 "# The following schedule was created by Git\n");
1935 fprintf(cron_in, "# Any edits made in this region might be\n");
1936 fprintf(cron_in,
1937 "# replaced in the future by a Git command.\n\n");
1938
1939 strbuf_addf(&line_format,
1940 "%%s %%s * * %%s \"%s/git\" --exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%%s\n",
1941 exec_path, exec_path);
1942 fprintf(cron_in, line_format.buf, "0", "1-23", "*", "hourly");
1943 fprintf(cron_in, line_format.buf, "0", "0", "1-6", "daily");
1944 fprintf(cron_in, line_format.buf, "0", "0", "0", "weekly");
1945 strbuf_release(&line_format);
1946
1947 fprintf(cron_in, "\n%s\n", END_LINE);
1948 }
1949
1950 fflush(cron_in);
1951 fclose(cron_in);
1952 close(crontab_edit.in);
1953
1954 done_editing:
1955 if (finish_command(&crontab_edit))
1956 result = error(_("'crontab' died"));
1957 else
1958 fclose(cron_list);
1959 return result;
1960 }
1961
1962 #if defined(__APPLE__)
1963 static const char platform_scheduler[] = "launchctl";
1964 #elif defined(GIT_WINDOWS_NATIVE)
1965 static const char platform_scheduler[] = "schtasks";
1966 #else
1967 static const char platform_scheduler[] = "crontab";
1968 #endif
1969
1970 static int update_background_schedule(int enable)
1971 {
1972 int result;
1973 const char *scheduler = platform_scheduler;
1974 const char *cmd = scheduler;
1975 char *testing;
1976 struct lock_file lk;
1977 char *lock_path = xstrfmt("%s/schedule", the_repository->objects->odb->path);
1978
1979 testing = xstrdup_or_null(getenv("GIT_TEST_MAINT_SCHEDULER"));
1980 if (testing) {
1981 char *sep = strchr(testing, ':');
1982 if (!sep)
1983 die("GIT_TEST_MAINT_SCHEDULER unparseable: %s", testing);
1984 *sep = '\0';
1985 scheduler = testing;
1986 cmd = sep + 1;
1987 }
1988
1989 if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0)
1990 return error(_("another process is scheduling background maintenance"));
1991
1992 if (!strcmp(scheduler, "launchctl"))
1993 result = launchctl_update_schedule(enable, get_lock_file_fd(&lk), cmd);
1994 else if (!strcmp(scheduler, "schtasks"))
1995 result = schtasks_update_schedule(enable, get_lock_file_fd(&lk), cmd);
1996 else if (!strcmp(scheduler, "crontab"))
1997 result = crontab_update_schedule(enable, get_lock_file_fd(&lk), cmd);
1998 else
1999 die("unknown background scheduler: %s", scheduler);
2000
2001 rollback_lock_file(&lk);
2002 free(testing);
2003 return result;
2004 }
2005
2006 static int maintenance_start(void)
2007 {
2008 if (maintenance_register())
2009 warning(_("failed to add repo to global config"));
2010
2011 return update_background_schedule(1);
2012 }
2013
2014 static int maintenance_stop(void)
2015 {
2016 return update_background_schedule(0);
2017 }
2018
2019 static const char builtin_maintenance_usage[] = N_("git maintenance <subcommand> [<options>]");
2020
2021 int cmd_maintenance(int argc, const char **argv, const char *prefix)
2022 {
2023 if (argc < 2 ||
2024 (argc == 2 && !strcmp(argv[1], "-h")))
2025 usage(builtin_maintenance_usage);
2026
2027 if (!strcmp(argv[1], "run"))
2028 return maintenance_run(argc - 1, argv + 1, prefix);
2029 if (!strcmp(argv[1], "start"))
2030 return maintenance_start();
2031 if (!strcmp(argv[1], "stop"))
2032 return maintenance_stop();
2033 if (!strcmp(argv[1], "register"))
2034 return maintenance_register();
2035 if (!strcmp(argv[1], "unregister"))
2036 return maintenance_unregister();
2037
2038 die(_("invalid subcommand: %s"), argv[1]);
2039 }