From: Junio C Hamano Date: Wed, 18 Oct 2023 20:25:41 +0000 (-0700) Subject: Merge branch 'tb/repack-max-cruft-size' X-Git-Tag: v2.43.0-rc0~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=79861babe2d58387cd50010c9c63e4ef95afeb12;p=thirdparty%2Fgit.git Merge branch 'tb/repack-max-cruft-size' "git repack" learned "--max-cruft-size" to prevent cruft packs from growing without bounds. * tb/repack-max-cruft-size: repack: free existing_cruft array after use builtin/repack.c: avoid making cruft packs preferred builtin/repack.c: implement support for `--max-cruft-size` builtin/repack.c: parse `--max-pack-size` with OPT_MAGNITUDE t7700: split cruft-related tests to t7704 --- 79861babe2d58387cd50010c9c63e4ef95afeb12 diff --cc builtin/repack.c index db9277081d,69e8f302c0..edaee4dbec --- a/builtin/repack.c +++ b/builtin/repack.c @@@ -808,86 -891,73 +893,153 @@@ static void remove_redundant_bitmaps(st strbuf_release(&path); } +static int finish_pack_objects_cmd(struct child_process *cmd, + struct string_list *names, + int local) +{ + FILE *out; + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != the_hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = populate_pack_exts(line.buf); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + +static int write_filtered_pack(const struct pack_objects_args *args, + const char *destination, + const char *pack_prefix, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *scratch; + int local = skip_prefix(destination, packdir, &scratch); + + prepare_pack_objects(&cmd, args, destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + if (!pack_kept_objects) + strvec_push(&cmd.args, "--honor-pack-keep"); + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(&cmd, names, local); +} + + static int existing_cruft_pack_cmp(const void *va, const void *vb) + { + struct packed_git *a = *(struct packed_git **)va; + struct packed_git *b = *(struct packed_git **)vb; + + if (a->pack_size < b->pack_size) + return -1; + if (a->pack_size > b->pack_size) + return 1; + return 0; + } + + static void collapse_small_cruft_packs(FILE *in, size_t max_size, + struct existing_packs *existing) + { + struct packed_git **existing_cruft, *p; + struct strbuf buf = STRBUF_INIT; + size_t total_size = 0; + size_t existing_cruft_nr = 0; + size_t i; + + ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr); + + for (p = get_all_packs(the_repository); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (existing_cruft_nr >= existing->cruft_packs.nr) + BUG("too many cruft packs (found %"PRIuMAX", but knew " + "of %"PRIuMAX")", + (uintmax_t)existing_cruft_nr + 1, + (uintmax_t)existing->cruft_packs.nr); + existing_cruft[existing_cruft_nr++] = p; + } + + QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp); + + for (i = 0; i < existing_cruft_nr; i++) { + size_t proposed; + + p = existing_cruft[i]; + proposed = st_add(total_size, p->pack_size); + + if (proposed <= max_size) { + total_size = proposed; + fprintf(in, "-%s\n", pack_basename(p)); + } else { + retain_cruft_pack(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); + free(existing_cruft); + } + static int write_cruft_pack(const struct pack_objects_args *args, const char *destination, const char *pack_prefix, @@@ -1017,9 -1104,8 +1175,9 @@@ int cmd_repack(int argc, const char **a N_("limits the maximum delta depth")), OPT_STRING(0, "threads", &po_args.threads, N_("n"), N_("limits the maximum number of threads")), - OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"), + OPT_MAGNITUDE(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), + OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),