]> git.ipfire.org Git - thirdparty/git.git/commitdiff
repack-promisor add helper to fill promisor file after repack
authorLorenzoPegorari <lorenzo.pegorari2002@gmail.com>
Sat, 18 Apr 2026 14:17:01 +0000 (16:17 +0200)
committerJunio C Hamano <gitster@pobox.com>
Sat, 18 Apr 2026 19:38:16 +0000 (12:38 -0700)
A ".promisor" file may contain ref names (and their associated hashes)
that were fetched at the time the corresponding packfile was downloaded.
This information is used for debugging reasons. This information is
stored as lines structured like this: "<oid> <ref>".

Create a `write_promisor_file_after_repack()` helper function that allows
this debugging info to not be lost after a `repack`, by copying it inside
a new ".promisor" file.

The function logic is the following:
 * Take all ".promisor" files contained inside the given `repo`.
 * Ignore those whose name is contained inside the given `strset
   not_repacked_names`, which basically acts as a "promisor ignorelist"
   (intended to be used for packfiles that have not been repacked).
 * Read each line of the remaining ".promisor" files, which can be:
    * "<oid> <ref>" if the ".promisor" file was never repacked. If so,
      add the time (in Unix time) at which the ".promisor" file was last
      modified <time> to the line, to obtain: "<oid> <ref> <time>".
    * "<oid> <ref> <time>" if the ".promisor" file was repacked. If so,
      don't modify it.
 * Ignore the line if its <oid> is not present inside the
   "<packtmp>-<dest_hex>.idx" file.
 * If the destination file "<packtmp>-<dest_hex>.promisor" does not
   already contain the line, append it to the file.

The time of last data modification, for never-repacked ".promisor" file,
can be used when comparing the entries in it with entries in another
".promisor" file that did get repacked. With these timestamps, the
debugger will be able to tell at which time the refs at the remote
repository pointed at what object. Also, when looking at already
repacked ".promisor" files, the same ref may appear multiple times, and
having timestamps will help understanding what happened over time.

The function tries its best to deal with malformed ".promisor" files,
ignoring those lines:
 * That cannot be split into "<oid> <ref>" or "<oid> <ref> <time>".
 * Whose <oid> is not a sane hexadecimal string.
 * Whose <ref> does not have the correct format for a refname.

The function `parse_pack_index()`, which is loose in validation, can be
safely used to obtain the `packed_git` of the packs created during the
`repack` because, when `write_promisor_file_after_repack()` is called by
`finish_repacking_promisor_objects()`, we know for a fact that they were
just successfully created by `pack-objects` (also, these packs have not
yet been finalized, and so they are not part of the repository). Anyway,
if an error happens while trying to obtain the `packed_git`, the
".promisor" file will be created empty.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Tian Yuchen <cat@malon.dev>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: LorenzoPegorari <lorenzo.pegorari2002@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
repack-promisor.c

index 90318ce15093f55a80c1eda1f0698bb15a413baf..8fc541d2cfb9d4f88c541460175ee82e6ff1301b 100644 (file)
@@ -4,6 +4,7 @@
 #include "pack.h"
 #include "packfile.h"
 #include "path.h"
+#include "refs.h"
 #include "repository.h"
 #include "run-command.h"
 
@@ -34,6 +35,154 @@ static int write_oid(const struct object_id *oid,
        return 0;
 }
 
+/*
+ * Go through all .promisor files contained in repo (excluding those whose name
+ * appears in not_repacked_basenames, which acts as a ignorelist), and copies
+ * their content inside the destination file "<packtmp>-<dest_hex>.promisor".
+ * Each line of a never repacked .promisor file is: "<oid> <ref>" (as described
+ * in the write_promisor_file() function).
+ * After a repack, the copied lines will be: "<oid> <ref> <time>", where <time>
+ * is the time (in Unix time) at which the .promisor file was last modified.
+ * Only the lines whose <oid> is present inside "<packtmp>-<dest_hex>.idx" will
+ * be copied.
+ * The contents of all .promisor files are assumed to be correctly formed.
+ */
+static void write_promisor_file_after_repack(struct repository *repo,
+                                                               const char *dest_hex,
+                                                               const char *packtmp,
+                                                               struct strset *not_repacked_basenames)
+{
+       char *dest_promisor_name;
+       char *dest_idx_name;
+       FILE *dest;
+       struct object_id dest_oid;
+       struct packed_git *dest_pack, *p;
+       struct strbuf source_promisor_name = STRBUF_INIT;
+       struct strset seen_lines = STRSET_INIT;
+       struct strbuf line = STRBUF_INIT;
+       int err;
+
+       /* First of all, let's create and open the .promisor dest file */
+       dest_promisor_name = mkpathdup("%s-%s.promisor", packtmp, dest_hex);
+       dest = xfopen(dest_promisor_name, "w");
+
+       /*
+        * Now let's retrieve the destination pack.
+        * We use parse_pack_index() because dest_hex/packtmp point to the packfile
+        * that "pack-objects" just created, which is about to become part of this
+        * repository, but has not yet been finalized.
+        * If we are here, we know that "pack-objects" did not fail, so
+        * parse_pack_index() being loose in validation does not pose a problem.
+        * If an error happens, we simply leave the ".promisor" file empty.
+        */
+       if (get_oid_hex_algop(dest_hex, &dest_oid, repo->hash_algo)) {
+               warning(_("Promisor file left empty: '%s' not a hash"), dest_hex);
+               if (fclose(dest))
+                       die(_("Could not close '%s' promisor file"), dest_promisor_name);
+               free(dest_promisor_name);
+               return;
+       }
+       dest_idx_name = mkpathdup("%s-%s.idx", packtmp, dest_hex);
+       dest_pack = parse_pack_index(repo, dest_oid.hash, dest_idx_name);
+       if (!dest_pack) {
+               warning(_("Promisor file left empty: couldn't open packfile '%s'"), dest_idx_name);
+               if (fclose(dest))
+                       die(_("Could not close '%s' promisor file"), dest_promisor_name);
+               free(dest_promisor_name);
+               free(dest_idx_name);
+               return;
+       }
+
+       repo_for_each_pack(repo, p) {
+               FILE *source;
+               struct stat source_stat;
+
+               if (!p->pack_promisor)
+                       continue;
+
+               if (not_repacked_basenames &&
+                       strset_contains(not_repacked_basenames, pack_basename(p)))
+                       continue;
+
+               strbuf_reset(&source_promisor_name);
+               strbuf_addstr(&source_promisor_name, p->pack_name);
+               strbuf_strip_suffix(&source_promisor_name, ".pack");
+               strbuf_addstr(&source_promisor_name, ".promisor");
+
+               if (stat(source_promisor_name.buf, &source_stat))
+                       die(_("File not found: %s"), source_promisor_name.buf);
+
+               source = xfopen(source_promisor_name.buf, "r");
+
+               while (strbuf_getline(&line, source) != EOF) {
+                       struct string_list line_sections = STRING_LIST_INIT_DUP;
+                       struct object_id oid;
+
+                       /* Split line into <oid>, <ref> and <time> (if <time> exists).
+                        * Check that it was actually split into 2 or 3 parts. If it was
+                        *  not, then it is malformed, so skip it.
+                        */
+                       string_list_split(&line_sections, line.buf, " ", 3);
+                       if (line_sections.nr != 2 && line_sections.nr != 3) {
+                               string_list_clear(&line_sections, 0);
+                               continue;
+                       }
+
+                       /* Skip the lines where <oid> is not a sane hexadecimal string */
+                       if (get_oid_hex_algop(line_sections.items[0].string,
+                                               &oid, repo->hash_algo)) {
+                               string_list_clear(&line_sections, 0);
+                               continue;
+                       }
+                       /* Ignore the lines where <oid> doesn't appear in the dest_pack */
+                       if (!find_pack_entry_one(&oid, dest_pack)) {
+                               string_list_clear(&line_sections, 0);
+                               continue;
+                       }
+
+                       /*
+                        * Skip the lines where <ref> does not have the
+                        * correct format for a refname.
+                        */
+                       printf("%s\n", line_sections.items[1].string);
+                       if (check_refname_format(line_sections.items[1].string,
+                                                       REFNAME_ALLOW_ONELEVEL)) {
+                               string_list_clear(&line_sections, 0);
+                               continue;
+                       }
+
+                       /* If <time> doesn't exist, retrieve it and add it to line */
+                       if (line_sections.nr != 3)
+                               strbuf_addf(&line, " %" PRItime,
+                                                       (timestamp_t)source_stat.st_mtime);
+
+                       /* If the finalized line is new, append it to dest */
+                       if (strset_add(&seen_lines, line.buf))
+                               fprintf(dest, "%s\n", line.buf);
+
+                       string_list_clear(&line_sections, 0);
+               }
+
+               err = ferror(source);
+               err |= fclose(source);
+               if (err)
+                       die(_("Could not read '%s' promisor file"), source_promisor_name.buf);
+       }
+
+       err = ferror(dest);
+       err |= fclose(dest);
+       if (err)
+               die(_("Could not write '%s' promisor file"), dest_promisor_name);
+
+       close_pack_index(dest_pack);
+       free(dest_pack);
+       free(dest_promisor_name);
+       free(dest_idx_name);
+       strbuf_release(&source_promisor_name);
+       strbuf_release(&line);
+       strset_clear(&seen_lines);
+}
+
 static void finish_repacking_promisor_objects(struct repository *repo,
                                              struct child_process *cmd,
                                              struct string_list *names,