]> git.ipfire.org Git - thirdparty/git.git/commitdiff
test-tool: add pack-deltas helper
authorDerrick Stolee <stolee@gmail.com>
Mon, 28 Apr 2025 20:24:43 +0000 (20:24 +0000)
committerJunio C Hamano <gitster@pobox.com>
Mon, 28 Apr 2025 22:37:25 +0000 (15:37 -0700)
When trying to demonstrate certain behavior in tests, it can be helpful
to create packfiles that have specific delta structures. 'git
pack-objects' uses various algorithms to select deltas based on their
compression rates, but that does not always demonstrate all possible
packfile shapes. This becomes especially important when wanting to test
'git index-pack' and its ability to parse certain pack shapes.

We have prior art in t/lib-pack.sh, where certain delta structures are
produced by manually writing certain opaque pack contents. However,
producing these script updates is cumbersome and difficult to do as a
contributor.

Instead, create a new test-tool, 'test-tool pack-deltas', that reads a
list of instructions for which objects to include in a packfile and how
those objects should be written in delta form.

At the moment, this only supports REF_DELTAs as those are the kinds of
deltas needed to exercise a bug in 'git index-pack'.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Makefile
t/helper/meson.build
t/helper/test-pack-deltas.c [new file with mode: 0644]
t/helper/test-tool.c
t/helper/test-tool.h

index 13f9062a056944621698682e12d871df2487cec2..c4d21ccd3d1b6b0092d463bd3a353563bff6408d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -821,6 +821,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
 TEST_BUILTINS_OBJS += test-mktemp.o
 TEST_BUILTINS_OBJS += test-name-hash.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
+TEST_BUILTINS_OBJS += test-pack-deltas.o
 TEST_BUILTINS_OBJS += test-pack-mtimes.o
 TEST_BUILTINS_OBJS += test-parse-options.o
 TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
index d2cabaa2bcfcc9e55f1e1bd10ee66498496303d4..d4e8b26df8d6de7d8a485405feb563aad513f10d 100644 (file)
@@ -36,6 +36,7 @@ test_tool_sources = [
   'test-mktemp.c',
   'test-name-hash.c',
   'test-online-cpus.c',
+  'test-pack-deltas.c',
   'test-pack-mtimes.c',
   'test-parse-options.c',
   'test-parse-pathspec-file.c',
diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c
new file mode 100644 (file)
index 0000000..4caa024
--- /dev/null
@@ -0,0 +1,148 @@
+#define USE_THE_REPOSITORY_VARIABLE
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "delta.h"
+#include "git-zlib.h"
+#include "hash.h"
+#include "hex.h"
+#include "pack.h"
+#include "pack-objects.h"
+#include "parse-options.h"
+#include "setup.h"
+#include "strbuf.h"
+#include "string-list.h"
+
+static const char *usage_str[] = {
+       "test-tool pack-deltas --num-objects <num-objects>",
+       NULL
+};
+
+static unsigned long do_compress(void **pptr, unsigned long size)
+{
+       git_zstream stream;
+       void *in, *out;
+       unsigned long maxsize;
+
+       git_deflate_init(&stream, 1);
+       maxsize = git_deflate_bound(&stream, size);
+
+       in = *pptr;
+       out = xmalloc(maxsize);
+       *pptr = out;
+
+       stream.next_in = in;
+       stream.avail_in = size;
+       stream.next_out = out;
+       stream.avail_out = maxsize;
+       while (git_deflate(&stream, Z_FINISH) == Z_OK)
+               ; /* nothing */
+       git_deflate_end(&stream);
+
+       free(in);
+       return stream.total_out;
+}
+
+static void write_ref_delta(struct hashfile *f,
+                           struct object_id *oid,
+                           struct object_id *base)
+{
+       unsigned char header[MAX_PACK_OBJECT_HEADER];
+       unsigned long size, base_size, delta_size, compressed_size, hdrlen;
+       enum object_type type;
+       void *base_buf, *delta_buf;
+       void *buf = repo_read_object_file(the_repository,
+                                         oid, &type,
+                                         &size);
+
+       if (!buf)
+               die("unable to read %s", oid_to_hex(oid));
+
+       base_buf = repo_read_object_file(the_repository,
+                                        base, &type,
+                                        &base_size);
+
+       if (!base_buf)
+               die("unable to read %s", oid_to_hex(base));
+
+       delta_buf = diff_delta(base_buf, base_size,
+                              buf, size, &delta_size, 0);
+
+       compressed_size = do_compress(&delta_buf, delta_size);
+
+       hdrlen = encode_in_pack_object_header(header, sizeof(header),
+                                             OBJ_REF_DELTA, delta_size);
+       hashwrite(f, header, hdrlen);
+       hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
+       hashwrite(f, delta_buf, compressed_size);
+
+       free(buf);
+       free(base_buf);
+       free(delta_buf);
+}
+
+int cmd__pack_deltas(int argc, const char **argv)
+{
+       int num_objects = -1;
+       struct hashfile *f;
+       struct strbuf line = STRBUF_INIT;
+       struct option options[] = {
+               OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")),
+               OPT_END()
+       };
+
+       argc = parse_options(argc, argv, NULL,
+                            options, usage_str, 0);
+
+       if (argc || num_objects < 0)
+               usage_with_options(usage_str, options);
+
+       setup_git_directory();
+
+       f = hashfd(the_repository->hash_algo, 1, "<stdout>");
+       write_pack_header(f, num_objects);
+
+       /* Read each line from stdin into 'line' */
+       while (strbuf_getline_lf(&line, stdin) != EOF) {
+               const char *type_str, *content_oid_str, *base_oid_str = NULL;
+               struct object_id content_oid, base_oid;
+               struct string_list items = STRING_LIST_INIT_NODUP;
+               /*
+                * Tokenize into two or three parts:
+                * 1. REF_DELTA, OFS_DELTA, or FULL.
+                * 2. The object ID for the content object.
+                * 3. The object ID for the base object (optional).
+                */
+               if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
+                       die("invalid input format: %s", line.buf);
+
+               if (items.nr < 2)
+                       die("invalid input format: %s", line.buf);
+
+               type_str = items.items[0].string;
+               content_oid_str = items.items[1].string;
+
+               if (get_oid_hex(content_oid_str, &content_oid))
+                       die("invalid object: %s", content_oid_str);
+               if (items.nr >= 3) {
+                       base_oid_str = items.items[2].string;
+                       if (get_oid_hex(base_oid_str, &base_oid))
+                               die("invalid object: %s", base_oid_str);
+               }
+               string_list_clear(&items, 0);
+
+               if (!strcmp(type_str, "REF_DELTA"))
+                       write_ref_delta(f, &content_oid, &base_oid);
+               else if (!strcmp(type_str, "OFS_DELTA"))
+                       die("OFS_DELTA not implemented");
+               else if (!strcmp(type_str, "FULL"))
+                       die("FULL not implemented");
+               else
+                       die("unknown pack type: %s", type_str);
+       }
+
+       finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
+                         CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+       strbuf_release(&line);
+       return 0;
+}
index 50dc4dac4ed625a3555e38a3d6b85525c2256694..74812ed86d385af013c53b88de0b3585120ef082 100644 (file)
@@ -46,6 +46,7 @@ static struct test_cmd cmds[] = {
        { "mktemp", cmd__mktemp },
        { "name-hash", cmd__name_hash },
        { "online-cpus", cmd__online_cpus },
+       { "pack-deltas", cmd__pack_deltas },
        { "pack-mtimes", cmd__pack_mtimes },
        { "parse-options", cmd__parse_options },
        { "parse-options-flags", cmd__parse_options_flags },
index 6d62a5b53d9596cc7280f2f5c57e54724914b28f..2571a3ccfe8991932de4df568bda1046af8793bf 100644 (file)
@@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv);
 int cmd__mktemp(int argc, const char **argv);
 int cmd__name_hash(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
+int cmd__pack_deltas(int argc, const char **argv);
 int cmd__pack_mtimes(int argc, const char **argv);
 int cmd__parse_options(int argc, const char **argv);
 int cmd__parse_options_flags(int argc, const char **argv);