]> git.ipfire.org Git - thirdparty/git.git/commitdiff
odb: introduce generic object counting
authorPatrick Steinhardt <ps@pks.im>
Thu, 12 Mar 2026 08:43:01 +0000 (09:43 +0100)
committerJunio C Hamano <gitster@pobox.com>
Thu, 12 Mar 2026 15:38:43 +0000 (08:38 -0700)
Similar to the preceding commit, introduce counting of objects on the
object database level, replacing the logic that we have in
`repo_approximate_object_count()`.

Note that the function knows to cache the object count. It's unclear
whether this cache is really required as we shouldn't have that many
cases where we count objects repeatedly. But to be on the safe side the
caching mechanism is retained, with the only excepting being that we
also have to use the passed flags as caching key.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/gc.c
commit-graph.c
object-name.c
odb.c
odb.h
packfile.c
packfile.h

index 3a64d28da81b611eb28b7b6030b6566fa6208732..cb9ca89a974819c054b09d6f72cb5bd67fd028e5 100644 (file)
@@ -574,9 +574,13 @@ static uint64_t total_ram(void)
 static uint64_t estimate_repack_memory(struct gc_config *cfg,
                                       struct packed_git *pack)
 {
-       unsigned long nr_objects = repo_approximate_object_count(the_repository);
+       unsigned long nr_objects;
        size_t os_cache, heap;
 
+       if (odb_count_objects(the_repository->objects,
+                             ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0)
+               return 0;
+
        if (!pack || !nr_objects)
                return 0;
 
index f8e24145a513b083052ea3bfc22372972f250e68..c03000333044042cdb87cc32987faba2d018dbc0 100644 (file)
@@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source,
                        replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
        }
 
-       ctx.approx_nr_objects = repo_approximate_object_count(r);
+       if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0)
+               ctx.approx_nr_objects = 0;
 
        if (ctx.append && g) {
                for (i = 0; i < g->num_commits; i++) {
index 7b14c3bf9b9b48c587bfa8b7d5e28f4c82484724..e5adec4c9d5084d0a947322ba044ce7302b10cef 100644 (file)
@@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
        const unsigned hexsz = algo->hexsz;
 
        if (len < 0) {
-               unsigned long count = repo_approximate_object_count(r);
+               unsigned long count;
+
+               if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+                       count = 0;
+
                /*
                 * Add one because the MSB only tells us the highest bit set,
                 * not including the value of all the _other_ bits (so "15"
diff --git a/odb.c b/odb.c
index 84a31084d3884b01663c2b8f8967eb9397dc3b44..350e23f3c0798d35bbb1f378b23d45ca6bcfa2bc 100644 (file)
--- a/odb.c
+++ b/odb.c
@@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb,
        return 0;
 }
 
+int odb_count_objects(struct object_database *odb,
+                     enum odb_count_objects_flags flags,
+                     unsigned long *out)
+{
+       struct odb_source *source;
+       unsigned long count = 0;
+       int ret;
+
+       if (odb->object_count_valid && odb->object_count_flags == flags) {
+               *out = odb->object_count;
+               return 0;
+       }
+
+       odb_prepare_alternates(odb);
+       for (source = odb->sources; source; source = source->next) {
+               unsigned long c;
+
+               ret = odb_source_count_objects(source, flags, &c);
+               if (ret < 0)
+                       goto out;
+
+               count += c;
+       }
+
+       odb->object_count = count;
+       odb->object_count_valid = 1;
+       odb->object_count_flags = flags;
+
+       *out = count;
+       ret = 0;
+
+out:
+       return ret;
+}
+
 void odb_assert_oid_type(struct object_database *odb,
                         const struct object_id *oid, enum object_type expect)
 {
@@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o)
        for (source = o->sources; source; source = source->next)
                odb_source_reprepare(source);
 
-       o->approximate_object_count_valid = 0;
+       o->object_count_valid = 0;
 
        obj_read_unlock();
 }
diff --git a/odb.h b/odb.h
index e6057477f624cd1559ffcd8f25ac5e43dbab8d76..9aee260105ae5473ecc955b6f2d7102685bed190 100644 (file)
--- a/odb.h
+++ b/odb.h
@@ -110,10 +110,11 @@ struct object_database {
        /*
         * A fast, rough count of the number of objects in the repository.
         * These two fields are not meant for direct access. Use
-        * repo_approximate_object_count() instead.
+        * odb_count_objects() instead.
         */
-       unsigned long approximate_object_count;
-       unsigned approximate_object_count_valid : 1;
+       unsigned long object_count;
+       unsigned object_count_flags;
+       unsigned object_count_valid : 1;
 
        /*
         * Submodule source paths that will be added as additional sources to
@@ -509,6 +510,18 @@ enum odb_count_objects_flags {
        ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
 };
 
+/*
+ * Count the number of objects in the given object database. This object count
+ * may double-count objects that are stored in multiple backends, or which are
+ * stored multiple times in a single backend.
+ *
+ * Returns 0 on success, a negative error code otherwise. The number of objects
+ * will be assigned to the `out` pointer on success.
+ */
+int odb_count_objects(struct object_database *odb,
+                     enum odb_count_objects_flags flags,
+                     unsigned long *out);
+
 enum {
        /*
         * By default, `odb_write_object()` does not actually write anything
index 8ee462303afa79b382e80aa63f1eff5db87c90a6..d4de9f3ffe831eda9a2dead1516524a2647010fb 100644 (file)
@@ -1132,33 +1132,6 @@ out:
        return ret;
 }
 
-/*
- * Give a fast, rough count of the number of objects in the repository. This
- * ignores loose objects completely. If you have a lot of them, then either
- * you should repack because your performance will be awful, or they are
- * all unreachable objects about to be pruned, in which case they're not really
- * interesting as a measure of repo size in the first place.
- */
-unsigned long repo_approximate_object_count(struct repository *r)
-{
-       if (!r->objects->approximate_object_count_valid) {
-               struct odb_source *source;
-               unsigned long count = 0;
-
-               odb_prepare_alternates(r->objects);
-               for (source = r->objects->sources; source; source = source->next) {
-                       unsigned long c;
-
-                       if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c))
-                               count += c;
-               }
-
-               r->objects->approximate_object_count = count;
-               r->objects->approximate_object_count_valid = 1;
-       }
-       return r->objects->approximate_object_count;
-}
-
 unsigned long unpack_object_header_buffer(const unsigned char *buf,
                unsigned long len, enum object_type *type, unsigned long *sizep)
 {
index 74b6bc58c5a6b7f0b741ae7b4f88c370c0d93227..a16ec3950d2507ef76896866c988b9a75eb35792 100644 (file)
@@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store,
 #define PACKDIR_FILE_GARBAGE 4
 extern void (*report_garbage)(unsigned seen_bits, const char *path);
 
-/*
- * Give a rough count of objects in the repository. This sacrifices accuracy
- * for speed.
- */
-unsigned long repo_approximate_object_count(struct repository *r);
-
 void pack_report(struct repository *repo);
 
 /*