" [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \
" [--refs-snapshot=<path>]")
+#define BUILTIN_MIDX_COMPACT_USAGE \
+ N_("git multi-pack-index [<options>] compact [--[no-]incremental]\n" \
+ " <from> <to>")
+
#define BUILTIN_MIDX_VERIFY_USAGE \
N_("git multi-pack-index [<options>] verify")
BUILTIN_MIDX_WRITE_USAGE,
NULL
};
+static char const * const builtin_multi_pack_index_compact_usage[] = {
+ BUILTIN_MIDX_COMPACT_USAGE,
+ NULL
+};
static char const * const builtin_multi_pack_index_verify_usage[] = {
BUILTIN_MIDX_VERIFY_USAGE,
NULL
};
static char const * const builtin_multi_pack_index_usage[] = {
BUILTIN_MIDX_WRITE_USAGE,
+ BUILTIN_MIDX_COMPACT_USAGE,
BUILTIN_MIDX_VERIFY_USAGE,
BUILTIN_MIDX_EXPIRE_USAGE,
BUILTIN_MIDX_REPACK_USAGE,
return ret;
}
+static int cmd_multi_pack_index_compact(int argc, const char **argv,
+ const char *prefix,
+ struct repository *repo)
+{
+ struct multi_pack_index *m, *cur;
+ struct multi_pack_index *from_midx = NULL;
+ struct multi_pack_index *to_midx = NULL;
+ struct odb_source *source;
+ int ret;
+
+ struct option *options;
+ static struct option builtin_multi_pack_index_compact_options[] = {
+ OPT_BIT(0, "incremental", &opts.flags,
+ N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL),
+ OPT_END(),
+ };
+
+ repo_config(repo, git_multi_pack_index_write_config, NULL);
+
+ options = add_common_options(builtin_multi_pack_index_compact_options);
+
+ trace2_cmd_mode(argv[0]);
+
+ if (isatty(2))
+ opts.flags |= MIDX_PROGRESS;
+ argc = parse_options(argc, argv, prefix,
+ options, builtin_multi_pack_index_compact_usage,
+ 0);
+
+ if (argc != 2)
+ usage_with_options(builtin_multi_pack_index_compact_usage,
+ options);
+ source = handle_object_dir_option(the_repository);
+
+ FREE_AND_NULL(options);
+
+ m = get_multi_pack_index(source);
+
+ for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) {
+ const char *midx_csum = get_midx_checksum(cur);
+
+ if (!from_midx && !strcmp(midx_csum, argv[0]))
+ from_midx = cur;
+ if (!to_midx && !strcmp(midx_csum, argv[1]))
+ to_midx = cur;
+ }
+
+ if (!from_midx)
+ die(_("could not find MIDX 'from': %s"), argv[0]);
+ if (!to_midx)
+ die(_("could not find MIDX 'to': %s"), argv[1]);
+
+ ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags);
+
+ return ret;
+}
+
static int cmd_multi_pack_index_verify(int argc, const char **argv,
const char *prefix,
struct repository *repo UNUSED)
struct option builtin_multi_pack_index_options[] = {
OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack),
OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write),
+ OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact),
OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify),
OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire),
OPT_END(),
int incremental;
uint32_t num_multi_pack_indexes_before;
+ struct multi_pack_index *compact_from;
+ struct multi_pack_index *compact_to;
+ int compact;
+
struct string_list *to_include;
struct repository *repo;
static uint32_t midx_pack_perm(struct write_midx_context *ctx,
uint32_t orig_pack_int_id)
{
+ if (ctx->compact)
+ orig_pack_int_id -= ctx->compact_from->num_packs_in_base;
return ctx->pack_perm[orig_pack_int_id];
}
cur_fanout);
}
+static void midx_fanout_add_compact(struct midx_fanout *fanout,
+ struct write_midx_context *ctx,
+ uint32_t cur_fanout)
+{
+ struct multi_pack_index *m = ctx->compact_to;
+
+ ASSERT(ctx->compact);
+
+ while (m && m != ctx->compact_from->base_midx) {
+ midx_fanout_add_midx_fanout(fanout, m, cur_fanout,
+ NO_PREFERRED_PACK);
+ m = m->base_midx;
+ }
+}
+
/*
* It is possible to artificially get into a state where there are many
* duplicate copies of objects. That can create high memory pressure if
size_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 };
+ if (ctx->compact)
+ ASSERT(!start_pack);
+
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
total_objects = st_add(total_objects,
ctx->info[cur_pack].p->num_objects);
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0;
- midx_fanout_add(&fanout, ctx, start_pack, cur_fanout);
+ if (ctx->compact)
+ midx_fanout_add_compact(&fanout, ctx, cur_fanout);
+ else
+ midx_fanout_add(&fanout, ctx, start_pack, cur_fanout);
midx_fanout_sort(&fanout);
/*
return 0;
}
+static uint32_t compactible_packs_between(const struct multi_pack_index *from,
+ const struct multi_pack_index *to)
+{
+ uint32_t nr;
+
+ ASSERT(from && to);
+
+ nr = u32_add(to->num_packs, to->num_packs_in_base);
+ if (nr < from->num_packs_in_base)
+ BUG("unexpected number of packs in base during compaction: "
+ "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base);
+
+ return nr - from->num_packs_in_base;
+}
+
+static int fill_packs_from_midx_range(struct write_midx_context *ctx,
+ int bitmap_order)
+{
+ struct multi_pack_index *m = ctx->compact_to;
+ uint32_t packs_nr;
+
+ ASSERT(ctx->compact && !ctx->nr);
+ ASSERT(ctx->compact_from);
+ ASSERT(ctx->compact_to);
+
+ packs_nr = compactible_packs_between(ctx->compact_from,
+ ctx->compact_to);
+
+ ALLOC_GROW(ctx->info, packs_nr, ctx->alloc);
+
+ while (m != ctx->compact_from->base_midx) {
+ uint32_t pack_int_id, preferred_pack_id;
+ uint32_t i;
+
+ if (bitmap_order) {
+ if (midx_preferred_pack(m, &preferred_pack_id) < 0)
+ die(_("could not determine preferred pack"));
+ } else {
+ preferred_pack_id = m->num_packs_in_base;
+ }
+
+ pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base;
+
+ if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
+ preferred_pack_id) < 0)
+ return -1;
+
+ for (i = m->num_packs_in_base;
+ i < m->num_packs_in_base + m->num_packs; i++) {
+ if (preferred_pack_id == i)
+ continue;
+
+ if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
+ i) < 0)
+ return -1;
+ }
+
+ ctx->nr += m->num_packs;
+ m = m->base_midx;
+ }
+
+ ASSERT(ctx->nr == packs_nr);
+
+ return 0;
+}
+
static struct {
const char *non_split;
const char *split;
strbuf_release(&buf);
}
+static int midx_hashcmp(const struct multi_pack_index *a,
+ const struct multi_pack_index *b,
+ const struct git_hash_algo *algop)
+{
+ return hashcmp(get_midx_hash(a), get_midx_hash(b), algop);
+}
+
struct write_midx_opts {
struct odb_source *source;
struct string_list *packs_to_include;
struct string_list *packs_to_drop;
+ struct multi_pack_index *compact_from;
+ struct multi_pack_index *compact_to;
+
const char *preferred_pack_name;
const char *refs_snapshot;
unsigned flags;
int dropped_packs = 0;
int result = -1;
const char **keep_hashes = NULL;
+ size_t keep_hashes_nr = 0;
struct chunkfile *cf;
trace2_region_enter("midx", "write_midx_internal", r);
ctx.source = opts->source;
ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL);
+ ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT);
+
+ if (ctx.compact) {
+ if (!opts->compact_from)
+ BUG("expected non-NULL 'from' MIDX during compaction");
+ if (!opts->compact_to)
+ BUG("expected non-NULL 'to' MIDX during compaction");
+
+ ctx.compact_from = opts->compact_from;
+ ctx.compact_to = opts->compact_to;
+ }
if (ctx.incremental)
strbuf_addf(&midx_name,
*/
if (ctx.incremental)
ctx.base_midx = m;
- else if (!opts->packs_to_include)
+ if (!opts->packs_to_include)
ctx.m = m;
}
}
+ /*
+ * If compacting MIDX layer(s) in the range [from, to], then the
+ * compacted MIDX will share the same base MIDX as 'from'.
+ */
+ if (ctx.compact)
+ ctx.base_midx = ctx.compact_from->base_midx;
+
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16;
ctx.info = NULL;
ctx.num_multi_pack_indexes_before++;
m = m->base_midx;
}
- } else if (ctx.m && fill_packs_from_midx(&ctx)) {
+ } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) {
goto cleanup;
}
else
ctx.progress = NULL;
- ctx.to_include = opts->packs_to_include;
+ if (ctx.compact) {
+ int bitmap_order = 0;
+ if (opts->preferred_pack_name)
+ bitmap_order |= 1;
+ else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
+ bitmap_order |= 1;
- for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx);
+ fill_packs_from_midx_range(&ctx, bitmap_order);
+ } else {
+ ctx.to_include = opts->packs_to_include;
+ for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx);
+ }
stop_progress(&ctx.progress);
if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) &&
!ctx.incremental &&
+ !ctx.compact &&
!(opts->packs_to_include || opts->packs_to_drop)) {
struct bitmap_index *bitmap_git;
int bitmap_exists;
ctx.large_offsets_needed = 1;
}
- QSORT(ctx.info, ctx.nr, pack_info_compare);
+ if (!ctx.compact)
+ QSORT(ctx.info, ctx.nr, pack_info_compare);
if (opts->packs_to_drop && opts->packs_to_drop->nr) {
size_t drop_index = 0;
int missing_drops = 0;
+ ASSERT(!ctx.compact);
+
for (size_t i = 0;
i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) {
int cmp = strcmp(ctx.info[i].pack_name,
*/
ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
for (size_t i = 0; i < ctx.nr; i++) {
+ uint32_t from = ctx.info[i].orig_pack_int_id;
+ uint32_t to;
+
if (ctx.info[i].expired) {
+ to = PACK_EXPIRED;
dropped_packs++;
- ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
} else {
- ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs;
+ to = i - dropped_packs;
}
+
+ if (ctx.compact)
+ from -= ctx.compact_from->num_packs_in_base;
+
+ ctx.pack_perm[from] = to;
}
for (size_t i = 0; i < ctx.nr; i++) {
if (ctx.num_multi_pack_indexes_before == UINT32_MAX)
die(_("too many multi-pack-indexes"));
- CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1);
+ if (ctx.compact) {
+ struct multi_pack_index *m;
+
+ /*
+ * Keep all MIDX layers excluding those in the range [from, to].
+ */
+ for (m = ctx.base_midx; m; m = m->base_midx)
+ keep_hashes_nr++;
+ for (m = ctx.m;
+ m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
+ m = m->base_midx)
+ keep_hashes_nr++;
+
+ keep_hashes_nr++; /* include the compacted layer */
+ } else {
+ keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1;
+ }
+ CALLOC_ARRAY(keep_hashes, keep_hashes_nr);
if (ctx.incremental) {
FILE *chainf = fdopen_lock_file(&lk, "w");
strbuf_release(&final_midx_name);
- keep_hashes[ctx.num_multi_pack_indexes_before] =
- xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo));
+ if (ctx.compact) {
+ struct multi_pack_index *m;
+ uint32_t num_layers_before_from = 0;
+ uint32_t i;
- for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
- uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
+ for (m = ctx.base_midx; m; m = m->base_midx)
+ num_layers_before_from++;
- keep_hashes[j] = xstrdup(get_midx_checksum(m));
- m = m->base_midx;
+ m = ctx.base_midx;
+ for (i = 0; i < num_layers_before_from; i++) {
+ uint32_t j = num_layers_before_from - i - 1;
+
+ keep_hashes[j] = xstrdup(get_midx_checksum(m));
+ m = m->base_midx;
+ }
+
+ keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash,
+ r->hash_algo));
+
+ i = 0;
+ for (m = ctx.m;
+ m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
+ m = m->base_midx) {
+ keep_hashes[keep_hashes_nr - i - 1] =
+ xstrdup(get_midx_checksum(m));
+ i++;
+ }
+ } else {
+ keep_hashes[ctx.num_multi_pack_indexes_before] =
+ xstrdup(hash_to_hex_algop(midx_hash,
+ r->hash_algo));
+
+ for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
+ uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
+
+ keep_hashes[j] = xstrdup(get_midx_checksum(m));
+ m = m->base_midx;
+ }
}
- for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++)
+ for (uint32_t i = 0; i < keep_hashes_nr; i++)
fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]);
} else {
keep_hashes[ctx.num_multi_pack_indexes_before] =
if (commit_lock_file(&lk) < 0)
die_errno(_("could not write multi-pack-index"));
- clear_midx_files(opts->source, keep_hashes,
- ctx.num_multi_pack_indexes_before + 1,
+ clear_midx_files(opts->source, keep_hashes, keep_hashes_nr,
ctx.incremental);
result = 0;
free(ctx.pack_perm);
free(ctx.pack_order);
if (keep_hashes) {
- for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++)
+ for (uint32_t i = 0; i < keep_hashes_nr; i++)
free((char *)keep_hashes[i]);
free(keep_hashes);
}
return write_midx_internal(&opts);
}
+int write_midx_file_compact(struct odb_source *source,
+ struct multi_pack_index *from,
+ struct multi_pack_index *to,
+ unsigned flags)
+{
+ struct write_midx_opts opts = {
+ .source = source,
+ .compact_from = from,
+ .compact_to = to,
+ .flags = flags | MIDX_WRITE_COMPACT,
+ };
+
+ return write_midx_internal(&opts);
+}
+
int expire_midx_packs(struct odb_source *source, unsigned flags)
{
uint32_t i, *count, result = 0;
--- /dev/null
+#!/bin/sh
+
+test_description='multi-pack-index compaction'
+
+. ./test-lib.sh
+
+GIT_TEST_MULTI_PACK_INDEX=0
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0
+
+objdir=.git/objects
+packdir=$objdir/pack
+midxdir=$packdir/multi-pack-index.d
+midx_chain=$midxdir/multi-pack-index-chain
+
+nth_line() {
+ local n="$1"
+ shift
+ awk "NR==$n" "$@"
+}
+
+write_packs () {
+ for c in "$@"
+ do
+ test_commit "$c" &&
+
+ git pack-objects --all --unpacked $packdir/pack-$c &&
+ git prune-packed &&
+
+ git multi-pack-index write --incremental --bitmap || return 1
+ done
+}
+
+test_midx_layer_packs () {
+ local checksum="$1" &&
+ shift &&
+
+ test-tool read-midx $objdir "$checksum" >out &&
+
+ printf "%s\n" "$@" >expect &&
+ # NOTE: do *not* pipe through sort here, we want to ensure the
+ # order of packs is preserved during compaction.
+ grep "^pack-" out | cut -d"-" -f2 >actual &&
+
+ test_cmp expect actual
+}
+
+test_midx_layer_object_uniqueness () {
+ : >objs.all
+ while read layer
+ do
+ test-tool read-midx --show-objects $objdir "$layer" >out &&
+ grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer &&
+ test_stdout_line_count = 0 comm -12 objs.all objs.layer &&
+ cat objs.all objs.layer | sort >objs.tmp &&
+ mv objs.tmp objs.all || return 1
+ done <$midx_chain
+}
+
+test_expect_success 'MIDX compaction with lex-ordered pack names' '
+ git init midx-compact-lex-order &&
+ (
+ cd midx-compact-lex-order &&
+
+ write_packs A B C D E &&
+ test_line_count = 5 $midx_chain &&
+
+ git multi-pack-index compact --incremental \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+ test_line_count = 3 $midx_chain &&
+
+ test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A &&
+ test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D &&
+ test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
+
+ test_midx_layer_object_uniqueness
+ )
+'
+
+test_expect_success 'MIDX compaction with non-lex-ordered pack names' '
+ git init midx-compact-non-lex-order &&
+ (
+ cd midx-compact-non-lex-order &&
+
+ write_packs D C A B E &&
+ test_line_count = 5 $midx_chain &&
+
+ git multi-pack-index compact --incremental \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+ test_line_count = 3 $midx_chain &&
+
+ test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D &&
+ test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B &&
+ test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
+
+ test_midx_layer_object_uniqueness
+ )
+'
+
+test_done