]> git.ipfire.org Git - thirdparty/git.git/commitdiff
odb: introduce generic `odb_find_abbrev_len()`
authorPatrick Steinhardt <ps@pks.im>
Fri, 20 Mar 2026 07:07:40 +0000 (08:07 +0100)
committerJunio C Hamano <gitster@pobox.com>
Fri, 20 Mar 2026 20:16:42 +0000 (13:16 -0700)
Introduce a new generic `odb_find_abbrev_len()` function as well as
source-specific callback functions. This makes the logic to compute the
required prefix length to make a given object unique fully pluggable.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
object-name.c
odb.c
odb.h
odb/source-files.c
odb/source.h

index bb2294a1932a8dc6ad3a39fb5df84557cb01006f..f6e1f29e1fee65dd82facbafbfd46333eee35f8a 100644 (file)
 #include "refs.h"
 #include "remote.h"
 #include "dir.h"
+#include "odb.h"
 #include "oid-array.h"
-#include "packfile.h"
 #include "pretty.h"
-#include "object-file.h"
 #include "read-cache-ll.h"
 #include "repo-settings.h"
 #include "repository.h"
@@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix,
        return ret;
 }
 
-/*
- * Return the slot of the most-significant bit set in "val". There are various
- * ways to do this quickly with fls() or __builtin_clzl(), but speed is
- * probably not a big deal here.
- */
-static unsigned msb(unsigned long val)
-{
-       unsigned r = 0;
-       while (val >>= 1)
-               r++;
-       return r;
-}
-
 void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
                                   const struct object_id *oid, int abbrev_len)
 {
@@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
 {
        const struct git_hash_algo *algo =
                oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
-       const unsigned hexsz = algo->hexsz;
        unsigned len;
 
-       if (min_len < 0) {
-               unsigned long count;
-
-               if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
-                       count = 0;
-
-               /*
-                * Add one because the MSB only tells us the highest bit set,
-                * not including the value of all the _other_ bits (so "15"
-                * is only one off of 2^4, but the MSB is the 3rd bit.
-                */
-               len = msb(count) + 1;
-               /*
-                * We now know we have on the order of 2^len objects, which
-                * expects a collision at 2^(len/2). But we also care about hex
-                * chars, not bits, and there are 4 bits per hex. So all
-                * together we need to divide by 2 and round up.
-                */
-               len = DIV_ROUND_UP(len, 2);
-               /*
-                * For very small repos, we stick with our regular fallback.
-                */
-               if (len < FALLBACK_DEFAULT_ABBREV)
-                       len = FALLBACK_DEFAULT_ABBREV;
-       } else {
-               len = min_len;
-       }
+       if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0)
+               len = algo->hexsz;
 
        oid_to_hex_r(hex, oid);
-       if (len >= hexsz || !len)
-               return hexsz;
-
-       odb_prepare_alternates(r->objects);
-       for (struct odb_source *s = r->objects->sources; s; s = s->next) {
-               struct odb_source_files *files = odb_source_files_downcast(s);
-               packfile_store_find_abbrev_len(files->packed, oid, len, &len);
-               odb_source_loose_find_abbrev_len(s, oid, len, &len);
-       }
-
        hex[len] = 0;
+
        return len;
 }
 
diff --git a/odb.c b/odb.c
index 3019957b8747a93d5c1562f31f2e4e6ffccafe17..3f94a53df157e1292d46415b31328f07cbfb6c80 100644 (file)
--- a/odb.c
+++ b/odb.c
@@ -12,6 +12,7 @@
 #include "midx.h"
 #include "object-file-convert.h"
 #include "object-file.h"
+#include "object-name.h"
 #include "odb.h"
 #include "packfile.h"
 #include "path.h"
@@ -964,6 +965,78 @@ out:
        return ret;
 }
 
+/*
+ * Return the slot of the most-significant bit set in "val". There are various
+ * ways to do this quickly with fls() or __builtin_clzl(), but speed is
+ * probably not a big deal here.
+ */
+static unsigned msb(unsigned long val)
+{
+       unsigned r = 0;
+       while (val >>= 1)
+               r++;
+       return r;
+}
+
+int odb_find_abbrev_len(struct object_database *odb,
+                       const struct object_id *oid,
+                       int min_length,
+                       unsigned *out)
+{
+       const struct git_hash_algo *algo =
+               oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo;
+       const unsigned hexsz = algo->hexsz;
+       unsigned len;
+       int ret;
+
+       if (min_length < 0) {
+               unsigned long count;
+
+               if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+                       count = 0;
+
+               /*
+                * Add one because the MSB only tells us the highest bit set,
+                * not including the value of all the _other_ bits (so "15"
+                * is only one off of 2^4, but the MSB is the 3rd bit.
+                */
+               len = msb(count) + 1;
+               /*
+                * We now know we have on the order of 2^len objects, which
+                * expects a collision at 2^(len/2). But we also care about hex
+                * chars, not bits, and there are 4 bits per hex. So all
+                * together we need to divide by 2 and round up.
+                */
+               len = DIV_ROUND_UP(len, 2);
+               /*
+                * For very small repos, we stick with our regular fallback.
+                */
+               if (len < FALLBACK_DEFAULT_ABBREV)
+                       len = FALLBACK_DEFAULT_ABBREV;
+       } else {
+               len = min_length;
+       }
+
+       if (len >= hexsz || !len) {
+               *out = hexsz;
+               ret = 0;
+               goto out;
+       }
+
+       odb_prepare_alternates(odb);
+       for (struct odb_source *source = odb->sources; source; source = source->next) {
+               ret = odb_source_find_abbrev_len(source, oid, len, &len);
+               if (ret)
+                       goto out;
+       }
+
+       ret = 0;
+       *out = len;
+
+out:
+       return ret;
+}
+
 void odb_assert_oid_type(struct object_database *odb,
                         const struct object_id *oid, enum object_type expect)
 {
diff --git a/odb.h b/odb.h
index e80fd8f7abd534de757efcd1c272a486e9c8445d..984bafca9d652b94e1b1edd13a30f1c83ebb0aab 100644 (file)
--- a/odb.h
+++ b/odb.h
@@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb,
                      enum odb_count_objects_flags flags,
                      unsigned long *out);
 
+/*
+ * Given an object ID, find the minimum required length required to make the
+ * object ID unique across the whole object database.
+ *
+ * The `min_len` determines the minimum abbreviated length that'll be returned
+ * by this function. If `min_len < 0`, then the function will set a sensible
+ * default minimum abbreviation length.
+ *
+ * Returns 0 on success, a negative error code otherwise. The computed length
+ * will be assigned to `*out`.
+ */
+int odb_find_abbrev_len(struct object_database *odb,
+                       const struct object_id *oid,
+                       int min_len,
+                       unsigned *out);
+
 enum {
        /*
         * By default, `odb_write_object()` does not actually write anything
index e90bb689bb01e64b1336c6b99c33da3c31e2f0a7..76797569de428001aaec524c33ed2e9d4ccef883 100644 (file)
@@ -122,6 +122,30 @@ out:
        return ret;
 }
 
+static int odb_source_files_find_abbrev_len(struct odb_source *source,
+                                           const struct object_id *oid,
+                                           unsigned min_len,
+                                           unsigned *out)
+{
+       struct odb_source_files *files = odb_source_files_downcast(source);
+       unsigned len = min_len;
+       int ret;
+
+       ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len);
+       if (ret < 0)
+               goto out;
+
+       ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
+       if (ret < 0)
+               goto out;
+
+       *out = len;
+       ret = 0;
+
+out:
+       return ret;
+}
+
 static int odb_source_files_freshen_object(struct odb_source *source,
                                           const struct object_id *oid)
 {
@@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
        files->base.read_object_stream = odb_source_files_read_object_stream;
        files->base.for_each_object = odb_source_files_for_each_object;
        files->base.count_objects = odb_source_files_count_objects;
+       files->base.find_abbrev_len = odb_source_files_find_abbrev_len;
        files->base.freshen_object = odb_source_files_freshen_object;
        files->base.write_object = odb_source_files_write_object;
        files->base.write_object_stream = odb_source_files_write_object_stream;
index ee5d6ed530d519a15d20382593691e1371772258..a9d7d0b96fde72d5257d7f18212417ccac273f4c 100644 (file)
@@ -157,6 +157,18 @@ struct odb_source {
                             enum odb_count_objects_flags flags,
                             unsigned long *out);
 
+       /*
+        * This callback is expected to find the minimum required length to
+        * make the given object ID unique.
+        *
+        * The callback is expected to return a negative error code in case it
+        * failed, 0 otherwise.
+        */
+       int (*find_abbrev_len)(struct odb_source *source,
+                              const struct object_id *oid,
+                              unsigned min_length,
+                              unsigned *out);
+
        /*
         * This callback is expected to freshen the given object so that its
         * last access time is set to the current time. This is used to ensure
@@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source,
        return source->count_objects(source, flags, out);
 }
 
+/*
+ * Determine the minimum required length to make the given object ID unique in
+ * the given source. Returns 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_find_abbrev_len(struct odb_source *source,
+                                            const struct object_id *oid,
+                                            unsigned min_len,
+                                            unsigned *out)
+{
+       return source->find_abbrev_len(source, oid, min_len, out);
+}
+
 /*
  * Freshen an object in the object database by updating its timestamp.
  * Returns 1 in case the object has been freshened, 0 in case the object does