odb: introduce generic object counting

author Patrick Steinhardt <ps@pks.im>

Thu, 12 Mar 2026 08:43:01 +0000 (09:43 +0100)

committer Junio C Hamano <gitster@pobox.com>

Thu, 12 Mar 2026 15:38:43 +0000 (08:38 -0700)
author Patrick Steinhardt <ps@pks.im>
Thu, 12 Mar 2026 08:43:01 +0000 (09:43 +0100)
committer Junio C Hamano <gitster@pobox.com>
Thu, 12 Mar 2026 15:38:43 +0000 (08:38 -0700)
diff --git a/builtin/gc.c b/builtin/gc.c

index 3a64d28da81b611eb28b7b6030b6566fa6208732..cb9ca89a974819c054b09d6f72cb5bd67fd028e5 100644 (file)
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -574,9 +574,13 @@ static uint64_t total_ram(void)
  static uint64_t estimate_repack_memory(struct gc_config *cfg,
                                        struct packed_git *pack)
  {
-       unsigned long nr_objects = repo_approximate_object_count(the_repository);
+       unsigned long nr_objects;
         size_t os_cache, heap;
  
+       if (odb_count_objects(the_repository->objects,
+                             ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0)
+               return 0;
+
         if (!pack || !nr_objects)
                 return 0;
  
diff --git a/commit-graph.c b/commit-graph.c

index f8e24145a513b083052ea3bfc22372972f250e68..c03000333044042cdb87cc32987faba2d018dbc0 100644 (file)
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source,
                         replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
         }
  
-       ctx.approx_nr_objects = repo_approximate_object_count(r);
+       if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0)
+               ctx.approx_nr_objects = 0;
  
         if (ctx.append && g) {
                 for (i = 0; i < g->num_commits; i++) {
diff --git a/object-name.c b/object-name.c

index 7b14c3bf9b9b48c587bfa8b7d5e28f4c82484724..e5adec4c9d5084d0a947322ba044ce7302b10cef 100644 (file)
--- a/object-name.c
+++ b/object-name.c
@@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
         const unsigned hexsz = algo->hexsz;
  
         if (len < 0) {
-               unsigned long count = repo_approximate_object_count(r);
+               unsigned long count;
+
+               if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+                       count = 0;
+
                 /*
                  * Add one because the MSB only tells us the highest bit set,
                  * not including the value of all the _other_ bits (so "15"
diff --git a/odb.c b/odb.c

index 84a31084d3884b01663c2b8f8967eb9397dc3b44..350e23f3c0798d35bbb1f378b23d45ca6bcfa2bc 100644 (file)
--- a/odb.c
+++ b/odb.c
@@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb,
         return 0;
  }
  
+int odb_count_objects(struct object_database *odb,
+                     enum odb_count_objects_flags flags,
+                     unsigned long *out)
+{
+       struct odb_source *source;
+       unsigned long count = 0;
+       int ret;
+
+       if (odb->object_count_valid && odb->object_count_flags == flags) {
+               *out = odb->object_count;
+               return 0;
+       }
+
+       odb_prepare_alternates(odb);
+       for (source = odb->sources; source; source = source->next) {
+               unsigned long c;
+
+               ret = odb_source_count_objects(source, flags, &c);
+               if (ret < 0)
+                       goto out;
+
+               count += c;
+       }
+
+       odb->object_count = count;
+       odb->object_count_valid = 1;
+       odb->object_count_flags = flags;
+
+       *out = count;
+       ret = 0;
+
+out:
+       return ret;
+}
+
  void odb_assert_oid_type(struct object_database *odb,
                          const struct object_id *oid, enum object_type expect)
  {
@@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o)
         for (source = o->sources; source; source = source->next)
                 odb_source_reprepare(source);
  
-       o->approximate_object_count_valid = 0;
+       o->object_count_valid = 0;
  
         obj_read_unlock();
  }
diff --git a/odb.h b/odb.h

index e6057477f624cd1559ffcd8f25ac5e43dbab8d76..9aee260105ae5473ecc955b6f2d7102685bed190 100644 (file)
--- a/odb.h
+++ b/odb.h
@@ -110,10 +110,11 @@ struct object_database {
         /*
          * A fast, rough count of the number of objects in the repository.
          * These two fields are not meant for direct access. Use
-        * repo_approximate_object_count() instead.
+        * odb_count_objects() instead.
          */
-       unsigned long approximate_object_count;
-       unsigned approximate_object_count_valid : 1;
+       unsigned long object_count;
+       unsigned object_count_flags;
+       unsigned object_count_valid : 1;
  
         /*
          * Submodule source paths that will be added as additional sources to
@@ -509,6 +510,18 @@ enum odb_count_objects_flags {
         ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
  };
  
+/*
+ * Count the number of objects in the given object database. This object count
+ * may double-count objects that are stored in multiple backends, or which are
+ * stored multiple times in a single backend.
+ *
+ * Returns 0 on success, a negative error code otherwise. The number of objects
+ * will be assigned to the `out` pointer on success.
+ */
+int odb_count_objects(struct object_database *odb,
+                     enum odb_count_objects_flags flags,
+                     unsigned long *out);
+
  enum {
         /*
          * By default, `odb_write_object()` does not actually write anything
diff --git a/packfile.c b/packfile.c

index 8ee462303afa79b382e80aa63f1eff5db87c90a6..d4de9f3ffe831eda9a2dead1516524a2647010fb 100644 (file)
--- a/packfile.c
+++ b/packfile.c
@@ -1132,33 +1132,6 @@ out:
         return ret;
  }
  
-/*
- * Give a fast, rough count of the number of objects in the repository. This
- * ignores loose objects completely. If you have a lot of them, then either
- * you should repack because your performance will be awful, or they are
- * all unreachable objects about to be pruned, in which case they're not really
- * interesting as a measure of repo size in the first place.
- */
-unsigned long repo_approximate_object_count(struct repository *r)
-{
-       if (!r->objects->approximate_object_count_valid) {
-               struct odb_source *source;
-               unsigned long count = 0;
-
-               odb_prepare_alternates(r->objects);
-               for (source = r->objects->sources; source; source = source->next) {
-                       unsigned long c;
-
-                       if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c))
-                               count += c;
-               }
-
-               r->objects->approximate_object_count = count;
-               r->objects->approximate_object_count_valid = 1;
-       }
-       return r->objects->approximate_object_count;
-}
-
  unsigned long unpack_object_header_buffer(const unsigned char *buf,
                 unsigned long len, enum object_type *type, unsigned long *sizep)
  {
diff --git a/packfile.h b/packfile.h

index 74b6bc58c5a6b7f0b741ae7b4f88c370c0d93227..a16ec3950d2507ef76896866c988b9a75eb35792 100644 (file)
--- a/packfile.h
+++ b/packfile.h
@@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store,
  #define PACKDIR_FILE_GARBAGE 4
  extern void (*report_garbage)(unsigned seen_bits, const char *path);
  
-/*
- * Give a rough count of objects in the repository. This sacrifices accuracy
- * for speed.
- */
-unsigned long repo_approximate_object_count(struct repository *r);
-
  void pack_report(struct repository *repo);
  
  /*
author	Patrick Steinhardt <ps@pks.im>
	Thu, 12 Mar 2026 08:43:01 +0000 (09:43 +0100)
committer	Junio C Hamano <gitster@pobox.com>
	Thu, 12 Mar 2026 15:38:43 +0000 (08:38 -0700)
builtin/gc.c		patch \| blob \| blame \| history
commit-graph.c		patch \| blob \| blame \| history
object-name.c		patch \| blob \| blame \| history
odb.c		patch \| blob \| blame \| history
odb.h		patch \| blob \| blame \| history
packfile.c		patch \| blob \| blame \| history
packfile.h		patch \| blob \| blame \| history