]> git.ipfire.org Git - thirdparty/git.git/commitdiff
pack-mtimes: support reading .mtimes files
authorTaylor Blau <me@ttaylorr.com>
Fri, 20 May 2022 23:17:35 +0000 (19:17 -0400)
committerJunio C Hamano <gitster@pobox.com>
Thu, 26 May 2022 22:48:26 +0000 (15:48 -0700)
To store the individual mtimes of objects in a cruft pack, introduce a
new `.mtimes` format that can optionally accompany a single pack in the
repository.

The format is defined in Documentation/technical/pack-format.txt, and
stores a 4-byte network order timestamp for each object in name (index)
order.

This patch prepares for cruft packs by defining the `.mtimes` format,
and introducing a basic API that callers can use to read out individual
mtimes.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/technical/pack-format.txt
Makefile
builtin/repack.c
object-store.h
pack-mtimes.c [new file with mode: 0644]
pack-mtimes.h [new file with mode: 0644]
packfile.c

index 6d3efb7d16e10683ab3ad8e6bf87217e443f350e..b520aa9c45bf6cf9b12d6ef798658c298b1888a2 100644 (file)
@@ -294,6 +294,25 @@ Pack file entry: <+
 
 All 4-byte numbers are in network order.
 
+== pack-*.mtimes files have the format:
+
+All 4-byte numbers are in network byte order.
+
+  - A 4-byte magic number '0x4d544d45' ('MTME').
+
+  - A 4-byte version identifier (= 1).
+
+  - A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256).
+
+  - A table of 4-byte unsigned integers. The ith value is the
+    modification time (mtime) of the ith object in the corresponding
+    pack by lexicographic (index) order. The mtimes count standard
+    epoch seconds.
+
+  - A trailer, containing a checksum of the corresponding packfile,
+    and a checksum of all of the above (each having length according
+    to the specified hash function).
+
 == multi-pack-index (MIDX) files have the following format:
 
 The multi-pack-index files refer to multiple pack-files and loose objects.
index f8bccfab5e9c46d39be6fe28b091330d5a3dd895..e59328ab7d103255d3e9fefeed302d7e3c1afb60 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -993,6 +993,7 @@ LIB_OBJS += oidtree.o
 LIB_OBJS += pack-bitmap-write.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-check.o
+LIB_OBJS += pack-mtimes.o
 LIB_OBJS += pack-objects.o
 LIB_OBJS += pack-revindex.o
 LIB_OBJS += pack-write.o
index d1a563d5b65666b68596ad353a945e8e11c6ac70..e7a3920c6da0c36ca8c2357fc29eae88d871c14e 100644 (file)
@@ -217,6 +217,7 @@ static struct {
 } exts[] = {
        {".pack"},
        {".rev", 1},
+       {".mtimes", 1},
        {".bitmap", 1},
        {".promisor", 1},
        {".idx"},
index bd2322ed8ce3368d8b06ab570f2f1f4c2217d757..3c98028ce608cea5b83fa1ea667fb84bcafa5c55 100644 (file)
@@ -115,12 +115,20 @@ struct packed_git {
                 freshened:1,
                 do_not_close:1,
                 pack_promisor:1,
-                multi_pack_index:1;
+                multi_pack_index:1,
+                is_cruft:1;
        unsigned char hash[GIT_MAX_RAWSZ];
        struct revindex_entry *revindex;
        const uint32_t *revindex_data;
        const uint32_t *revindex_map;
        size_t revindex_size;
+       /*
+        * mtimes_map points at the beginning of the memory mapped region of
+        * this pack's corresponding .mtimes file, and mtimes_size is the size
+        * of that .mtimes file
+        */
+       const uint32_t *mtimes_map;
+       size_t mtimes_size;
        /* something like ".git/objects/pack/xxxxx.pack" */
        char pack_name[FLEX_ARRAY]; /* more */
 };
diff --git a/pack-mtimes.c b/pack-mtimes.c
new file mode 100644 (file)
index 0000000..0e0aafd
--- /dev/null
@@ -0,0 +1,129 @@
+#include "git-compat-util.h"
+#include "pack-mtimes.h"
+#include "object-store.h"
+#include "packfile.h"
+
+static char *pack_mtimes_filename(struct packed_git *p)
+{
+       size_t len;
+       if (!strip_suffix(p->pack_name, ".pack", &len))
+               BUG("pack_name does not end in .pack");
+       return xstrfmt("%.*s.mtimes", (int)len, p->pack_name);
+}
+
+#define MTIMES_HEADER_SIZE (12)
+
+struct mtimes_header {
+       uint32_t signature;
+       uint32_t version;
+       uint32_t hash_id;
+};
+
+static int load_pack_mtimes_file(char *mtimes_file,
+                                uint32_t num_objects,
+                                const uint32_t **data_p, size_t *len_p)
+{
+       int fd, ret = 0;
+       struct stat st;
+       uint32_t *data = NULL;
+       size_t mtimes_size, expected_size;
+       struct mtimes_header header;
+
+       fd = git_open(mtimes_file);
+
+       if (fd < 0) {
+               ret = -1;
+               goto cleanup;
+       }
+       if (fstat(fd, &st)) {
+               ret = error_errno(_("failed to read %s"), mtimes_file);
+               goto cleanup;
+       }
+
+       mtimes_size = xsize_t(st.st_size);
+
+       if (mtimes_size < MTIMES_HEADER_SIZE) {
+               ret = error(_("mtimes file %s is too small"), mtimes_file);
+               goto cleanup;
+       }
+
+       data = xmmap(NULL, mtimes_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+       header.signature = ntohl(data[0]);
+       header.version = ntohl(data[1]);
+       header.hash_id = ntohl(data[2]);
+
+       if (header.signature != MTIMES_SIGNATURE) {
+               ret = error(_("mtimes file %s has unknown signature"), mtimes_file);
+               goto cleanup;
+       }
+
+       if (header.version != 1) {
+               ret = error(_("mtimes file %s has unsupported version %"PRIu32),
+                           mtimes_file, header.version);
+               goto cleanup;
+       }
+
+       if (!(header.hash_id == 1 || header.hash_id == 2)) {
+               ret = error(_("mtimes file %s has unsupported hash id %"PRIu32),
+                           mtimes_file, header.hash_id);
+               goto cleanup;
+       }
+
+
+       expected_size = MTIMES_HEADER_SIZE;
+       expected_size = st_add(expected_size, st_mult(sizeof(uint32_t), num_objects));
+       expected_size = st_add(expected_size, 2 * (header.hash_id == 1 ? GIT_SHA1_RAWSZ : GIT_SHA256_RAWSZ));
+
+       if (mtimes_size != expected_size) {
+               ret = error(_("mtimes file %s is corrupt"), mtimes_file);
+               goto cleanup;
+       }
+
+cleanup:
+       if (ret) {
+               if (data)
+                       munmap(data, mtimes_size);
+       } else {
+               *len_p = mtimes_size;
+               *data_p = data;
+       }
+
+       close(fd);
+       return ret;
+}
+
+int load_pack_mtimes(struct packed_git *p)
+{
+       char *mtimes_name = NULL;
+       int ret = 0;
+
+       if (!p->is_cruft)
+               return ret; /* not a cruft pack */
+       if (p->mtimes_map)
+               return ret; /* already loaded */
+
+       ret = open_pack_index(p);
+       if (ret < 0)
+               goto cleanup;
+
+       mtimes_name = pack_mtimes_filename(p);
+       ret = load_pack_mtimes_file(mtimes_name,
+                                   p->num_objects,
+                                   &p->mtimes_map,
+                                   &p->mtimes_size);
+cleanup:
+       free(mtimes_name);
+       return ret;
+}
+
+uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos)
+{
+       if (!p->mtimes_map)
+               BUG("pack .mtimes file not loaded for %s", p->pack_name);
+       if (p->num_objects <= pos)
+               BUG("pack .mtimes out-of-bounds (%"PRIu32" vs %"PRIu32")",
+                   pos, p->num_objects);
+
+       return get_be32(p->mtimes_map + pos + 3);
+}
diff --git a/pack-mtimes.h b/pack-mtimes.h
new file mode 100644 (file)
index 0000000..cc957b3
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef PACK_MTIMES_H
+#define PACK_MTIMES_H
+
+#include "git-compat-util.h"
+
+#define MTIMES_SIGNATURE 0x4d544d45 /* "MTME" */
+#define MTIMES_VERSION 1
+
+struct packed_git;
+
+/*
+ * Loads the .mtimes file corresponding to "p", if any, returning zero
+ * on success.
+ */
+int load_pack_mtimes(struct packed_git *p);
+
+/* Returns the mtime associated with the object at position "pos" (in
+ * lexicographic/index order) in pack "p".
+ *
+ * Note that it is a BUG() to call this function if either (a) "p" does
+ * not have a corresponding .mtimes file, or (b) it does, but it hasn't
+ * been loaded
+ */
+uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos);
+
+#endif
index 835b2d271645ce08b7f98214748d3ffe4296edb8..fc0245fbab56fb7c47885a193e63466576cde712 100644 (file)
@@ -334,12 +334,22 @@ static void close_pack_revindex(struct packed_git *p)
        p->revindex_data = NULL;
 }
 
+static void close_pack_mtimes(struct packed_git *p)
+{
+       if (!p->mtimes_map)
+               return;
+
+       munmap((void *)p->mtimes_map, p->mtimes_size);
+       p->mtimes_map = NULL;
+}
+
 void close_pack(struct packed_git *p)
 {
        close_pack_windows(p);
        close_pack_fd(p);
        close_pack_index(p);
        close_pack_revindex(p);
+       close_pack_mtimes(p);
        oidset_clear(&p->bad_objects);
 }
 
@@ -363,7 +373,7 @@ void close_object_store(struct raw_object_store *o)
 
 void unlink_pack_path(const char *pack_name, int force_delete)
 {
-       static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"};
+       static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"};
        int i;
        struct strbuf buf = STRBUF_INIT;
        size_t plen;
@@ -718,6 +728,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
        if (!access(p->pack_name, F_OK))
                p->pack_promisor = 1;
 
+       xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes");
+       if (!access(p->pack_name, F_OK))
+               p->is_cruft = 1;
+
        xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
        if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
                free(p);
@@ -869,7 +883,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
            ends_with(file_name, ".pack") ||
            ends_with(file_name, ".bitmap") ||
            ends_with(file_name, ".keep") ||
-           ends_with(file_name, ".promisor"))
+           ends_with(file_name, ".promisor") ||
+           ends_with(file_name, ".mtimes"))
                string_list_append(data->garbage, full_name);
        else
                report_garbage(PACKDIR_FILE_GARBAGE, full_name);