]> git.ipfire.org Git - thirdparty/git.git/commitdiff
loose: add a mapping between SHA-1 and SHA-256 for loose objects
authorbrian m. carlson <sandals@crustytoothpaste.net>
Mon, 2 Oct 2023 02:40:09 +0000 (21:40 -0500)
committerJunio C Hamano <gitster@pobox.com>
Mon, 2 Oct 2023 21:57:38 +0000 (14:57 -0700)
As part of the transition plan, we'd like to add a file in the .git
directory that maps loose objects between SHA-1 and SHA-256.  Let's
implement the specification in the transition plan and store this data
on a per-repository basis in struct repository.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Makefile
loose.c [new file with mode: 0644]
loose.h [new file with mode: 0644]
object-file-convert.c
object-store-ll.h
object.c
repository.c

index f7e824f25cda0c739762367c5fceff921cd9e089..3c18664def9a49663eee6616efededac8e49b3ff 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1053,6 +1053,7 @@ LIB_OBJS += list-objects-filter.o
 LIB_OBJS += list-objects.o
 LIB_OBJS += lockfile.o
 LIB_OBJS += log-tree.o
+LIB_OBJS += loose.o
 LIB_OBJS += ls-refs.o
 LIB_OBJS += mailinfo.o
 LIB_OBJS += mailmap.o
diff --git a/loose.c b/loose.c
new file mode 100644 (file)
index 0000000..6ba73cc
--- /dev/null
+++ b/loose.c
@@ -0,0 +1,246 @@
+#include "git-compat-util.h"
+#include "hash.h"
+#include "path.h"
+#include "object-store.h"
+#include "hex.h"
+#include "wrapper.h"
+#include "gettext.h"
+#include "loose.h"
+#include "lockfile.h"
+
+static const char *loose_object_header = "# loose-object-idx\n";
+
+static inline int should_use_loose_object_map(struct repository *repo)
+{
+       return repo->compat_hash_algo && repo->gitdir;
+}
+
+void loose_object_map_init(struct loose_object_map **map)
+{
+       struct loose_object_map *m;
+       m = xmalloc(sizeof(**map));
+       m->to_compat = kh_init_oid_map();
+       m->to_storage = kh_init_oid_map();
+       *map = m;
+}
+
+static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const struct object_id *value)
+{
+       khiter_t pos;
+       int ret;
+       struct object_id *stored;
+
+       pos = kh_put_oid_map(map, *key, &ret);
+
+       /* This item already exists in the map. */
+       if (ret == 0)
+               return 0;
+
+       stored = xmalloc(sizeof(*stored));
+       oidcpy(stored, value);
+       kh_value(map, pos) = stored;
+       return 1;
+}
+
+static int load_one_loose_object_map(struct repository *repo, struct object_directory *dir)
+{
+       struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
+       FILE *fp;
+
+       if (!dir->loose_map)
+               loose_object_map_init(&dir->loose_map);
+
+       insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
+       insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_tree, repo->hash_algo->empty_tree);
+
+       insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob);
+       insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_blob, repo->hash_algo->empty_blob);
+
+       insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid);
+       insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->null_oid, repo->hash_algo->null_oid);
+
+       strbuf_git_common_path(&path, repo, "objects/loose-object-idx");
+       fp = fopen(path.buf, "rb");
+       if (!fp) {
+               strbuf_release(&path);
+               return 0;
+       }
+
+       errno = 0;
+       if (strbuf_getwholeline(&buf, fp, '\n') || strcmp(buf.buf, loose_object_header))
+               goto err;
+       while (!strbuf_getline_lf(&buf, fp)) {
+               const char *p;
+               struct object_id oid, compat_oid;
+               if (parse_oid_hex_algop(buf.buf, &oid, &p, repo->hash_algo) ||
+                   *p++ != ' ' ||
+                   parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) ||
+                   p != buf.buf + buf.len)
+                       goto err;
+               insert_oid_pair(dir->loose_map->to_compat, &oid, &compat_oid);
+               insert_oid_pair(dir->loose_map->to_storage, &compat_oid, &oid);
+       }
+
+       strbuf_release(&buf);
+       strbuf_release(&path);
+       return errno ? -1 : 0;
+err:
+       strbuf_release(&buf);
+       strbuf_release(&path);
+       return -1;
+}
+
+int repo_read_loose_object_map(struct repository *repo)
+{
+       struct object_directory *dir;
+
+       if (!should_use_loose_object_map(repo))
+               return 0;
+
+       prepare_alt_odb(repo);
+
+       for (dir = repo->objects->odb; dir; dir = dir->next) {
+               if (load_one_loose_object_map(repo, dir) < 0) {
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+int repo_write_loose_object_map(struct repository *repo)
+{
+       kh_oid_map_t *map = repo->objects->odb->loose_map->to_compat;
+       struct lock_file lock;
+       int fd;
+       khiter_t iter;
+       struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
+
+       if (!should_use_loose_object_map(repo))
+               return 0;
+
+       strbuf_git_common_path(&path, repo, "objects/loose-object-idx");
+       fd = hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1);
+       iter = kh_begin(map);
+       if (write_in_full(fd, loose_object_header, strlen(loose_object_header)) < 0)
+               goto errout;
+
+       for (; iter != kh_end(map); iter++) {
+               if (kh_exist(map, iter)) {
+                       if (oideq(&kh_key(map, iter), the_hash_algo->empty_tree) ||
+                           oideq(&kh_key(map, iter), the_hash_algo->empty_blob))
+                               continue;
+                       strbuf_addf(&buf, "%s %s\n", oid_to_hex(&kh_key(map, iter)), oid_to_hex(kh_value(map, iter)));
+                       if (write_in_full(fd, buf.buf, buf.len) < 0)
+                               goto errout;
+                       strbuf_reset(&buf);
+               }
+       }
+       strbuf_release(&buf);
+       if (commit_lock_file(&lock) < 0) {
+               error_errno(_("could not write loose object index %s"), path.buf);
+               strbuf_release(&path);
+               return -1;
+       }
+       strbuf_release(&path);
+       return 0;
+errout:
+       rollback_lock_file(&lock);
+       strbuf_release(&buf);
+       error_errno(_("failed to write loose object index %s\n"), path.buf);
+       strbuf_release(&path);
+       return -1;
+}
+
+static int write_one_object(struct repository *repo, const struct object_id *oid,
+                           const struct object_id *compat_oid)
+{
+       struct lock_file lock;
+       int fd;
+       struct stat st;
+       struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
+
+       strbuf_git_common_path(&path, repo, "objects/loose-object-idx");
+       hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1);
+
+       fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666);
+       if (fd < 0)
+               goto errout;
+       if (fstat(fd, &st) < 0)
+               goto errout;
+       if (!st.st_size && write_in_full(fd, loose_object_header, strlen(loose_object_header)) < 0)
+               goto errout;
+
+       strbuf_addf(&buf, "%s %s\n", oid_to_hex(oid), oid_to_hex(compat_oid));
+       if (write_in_full(fd, buf.buf, buf.len) < 0)
+               goto errout;
+       if (close(fd))
+               goto errout;
+       adjust_shared_perm(path.buf);
+       rollback_lock_file(&lock);
+       strbuf_release(&buf);
+       strbuf_release(&path);
+       return 0;
+errout:
+       error_errno(_("failed to write loose object index %s\n"), path.buf);
+       close(fd);
+       rollback_lock_file(&lock);
+       strbuf_release(&buf);
+       strbuf_release(&path);
+       return -1;
+}
+
+int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid,
+                             const struct object_id *compat_oid)
+{
+       int inserted = 0;
+
+       if (!should_use_loose_object_map(repo))
+               return 0;
+
+       inserted |= insert_oid_pair(repo->objects->odb->loose_map->to_compat, oid, compat_oid);
+       inserted |= insert_oid_pair(repo->objects->odb->loose_map->to_storage, compat_oid, oid);
+       if (inserted)
+               return write_one_object(repo, oid, compat_oid);
+       return 0;
+}
+
+int repo_loose_object_map_oid(struct repository *repo,
+                             const struct object_id *src,
+                             const struct git_hash_algo *to,
+                             struct object_id *dest)
+{
+       struct object_directory *dir;
+       kh_oid_map_t *map;
+       khiter_t pos;
+
+       for (dir = repo->objects->odb; dir; dir = dir->next) {
+               struct loose_object_map *loose_map = dir->loose_map;
+               if (!loose_map)
+                       continue;
+               map = (to == repo->compat_hash_algo) ?
+                       loose_map->to_compat :
+                       loose_map->to_storage;
+               pos = kh_get_oid_map(map, *src);
+               if (pos < kh_end(map)) {
+                       oidcpy(dest, kh_value(map, pos));
+                       return 0;
+               }
+       }
+       return -1;
+}
+
+void loose_object_map_clear(struct loose_object_map **map)
+{
+       struct loose_object_map *m = *map;
+       struct object_id *oid;
+
+       if (!m)
+               return;
+
+       kh_foreach_value(m->to_compat, oid, free(oid));
+       kh_foreach_value(m->to_storage, oid, free(oid));
+       kh_destroy_oid_map(m->to_compat);
+       kh_destroy_oid_map(m->to_storage);
+       free(m);
+       *map = NULL;
+}
diff --git a/loose.h b/loose.h
new file mode 100644 (file)
index 0000000..2c29570
--- /dev/null
+++ b/loose.h
@@ -0,0 +1,22 @@
+#ifndef LOOSE_H
+#define LOOSE_H
+
+#include "khash.h"
+
+struct loose_object_map {
+       kh_oid_map_t *to_compat;
+       kh_oid_map_t *to_storage;
+};
+
+void loose_object_map_init(struct loose_object_map **map);
+void loose_object_map_clear(struct loose_object_map **map);
+int repo_loose_object_map_oid(struct repository *repo,
+                             const struct object_id *src,
+                             const struct git_hash_algo *dest_algo,
+                             struct object_id *dest);
+int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid,
+                             const struct object_id *compat_oid);
+int repo_read_loose_object_map(struct repository *repo);
+int repo_write_loose_object_map(struct repository *repo);
+
+#endif
index 4777aba8363620433a4d1334408b4e2bf9efcfa0..1ec945eaa17faf6bf30e32924df8880c5f70ce15 100644 (file)
@@ -4,6 +4,7 @@
 #include "repository.h"
 #include "hash-ll.h"
 #include "object.h"
+#include "loose.h"
 #include "object-file-convert.h"
 
 int repo_oid_to_algop(struct repository *repo, const struct object_id *src,
@@ -21,7 +22,18 @@ int repo_oid_to_algop(struct repository *repo, const struct object_id *src,
                        oidcpy(dest, src);
                return 0;
        }
-       return -1;
+       if (repo_loose_object_map_oid(repo, src, to, dest)) {
+               /*
+                * We may have loaded the object map at repo initialization but
+                * another process (perhaps upstream of a pipe from us) may have
+                * written a new object into the map.  If the object is missing,
+                * let's reload the map to see if the object has appeared.
+                */
+               repo_read_loose_object_map(repo);
+               if (repo_loose_object_map_oid(repo, src, to, dest))
+                       return -1;
+       }
+       return 0;
 }
 
 int convert_object_file(struct strbuf *outbuf,
index 26a3895c821c61620d5988b2e62a77dc9513d025..bc76d6bec80d9cc81818ad475d0fcce1a964abca 100644 (file)
@@ -26,6 +26,9 @@ struct object_directory {
        uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
        struct oidtree *loose_objects_cache;
 
+       /* Map between object IDs for loose objects. */
+       struct loose_object_map *loose_map;
+
        /*
         * This is a temporary object store created by the tmp_objdir
         * facility. Disable ref updates since the objects in the store
index 2c61e4c86217e633d2e28acd0b3ae654584ede7d..186a0a47c0fbd28836552baab3440e2468879514 100644 (file)
--- a/object.c
+++ b/object.c
@@ -13,6 +13,7 @@
 #include "alloc.h"
 #include "packfile.h"
 #include "commit-graph.h"
+#include "loose.h"
 
 unsigned int get_max_object_index(void)
 {
@@ -540,6 +541,7 @@ void free_object_directory(struct object_directory *odb)
 {
        free(odb->path);
        odb_clear_loose_cache(odb);
+       loose_object_map_clear(&odb->loose_map);
        free(odb);
 }
 
index 80252b79e93eaa11aaa643b543a1bb3bfc2df53b..6214f61cf4e79559cfdb554329eb78d4a153537c 100644 (file)
@@ -14,6 +14,7 @@
 #include "read-cache-ll.h"
 #include "remote.h"
 #include "setup.h"
+#include "loose.h"
 #include "submodule-config.h"
 #include "sparse-index.h"
 #include "trace2.h"
@@ -109,6 +110,8 @@ void repo_set_compat_hash_algo(struct repository *repo, int algo)
        if (hash_algo_by_ptr(repo->hash_algo) == algo)
                BUG("hash_algo and compat_hash_algo match");
        repo->compat_hash_algo = algo ? &hash_algos[algo] : NULL;
+       if (repo->compat_hash_algo)
+               repo_read_loose_object_map(repo);
 }
 
 /*
@@ -201,6 +204,9 @@ int repo_init(struct repository *repo,
        if (worktree)
                repo_set_worktree(repo, worktree);
 
+       if (repo->compat_hash_algo)
+               repo_read_loose_object_map(repo);
+
        clear_repository_format(&format);
        return 0;