]> git.ipfire.org Git - thirdparty/git.git/commitdiff
odb/source: make `read_object_info()` function pluggable
authorPatrick Steinhardt <ps@pks.im>
Thu, 5 Mar 2026 14:19:49 +0000 (15:19 +0100)
committerJunio C Hamano <gitster@pobox.com>
Thu, 5 Mar 2026 19:45:15 +0000 (11:45 -0800)
Introduce a new callback function in `struct odb_source` to make the
function pluggable.

Note that this function is a bit less straight-forward to convert
compared to the other functions. The reason here is that the logic to
read an object is:

  1. We try to read the object. If it exists we return it.

  2. If the object does not exist we reprepare the object database
     source.

  3. We then try reading the object info a second time in case the
     reprepare caused it to appear.

The second read is only supposed to happen for the packfile store
though, as reading loose objects is not impacted by repreparing the
object database.

Ideally, we'd just move this whole logic into the ODB source. But that's
not easily possible because we try to avoid the reprepare unless really
required, which is after we have found out that no other ODB source
contains the object, either. So the logic spans across multiple ODB
sources, and consequently we cannot move it into an individual source.

Instead, introduce a new flag `OBJECT_INFO_SECOND_READ` that tells the
backend that we already tried to look up the object once, and that this
time around the ODB source should try to find any new objects that may
have surfaced due to an on-disk change.

With this flag, the "files" backend can trivially skip trying to re-read
the object as a loose object. Furthermore, as we know that we only try
the second read via the packfile store, we can skip repreparing loose
objects and only reprepare the packfile store.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
object-file.c
odb.c
odb.h
odb/source-files.c
odb/source.h
packfile.c

index 7ef8291a486794c7a8972eb8a37c4b0b337895fe..eefde72c7da10303814ee52ce58e68a48672bf2b 100644 (file)
@@ -546,6 +546,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
                                      enum object_info_flags flags)
 {
        static struct strbuf buf = STRBUF_INIT;
+
+       /*
+        * The second read shouldn't cause new loose objects to show up, unless
+        * there was a race condition with a secondary process. We don't care
+        * about this case though, so we simply skip reading loose objects a
+        * second time.
+        */
+       if (flags & OBJECT_INFO_SECOND_READ)
+               return -1;
+
        odb_loose_path(source, &buf, oid);
        return read_object_info_from_path(source, buf.buf, oid, oi, flags);
 }
diff --git a/odb.c b/odb.c
index f7487eb0df93b9004d0ff18284d37254859880c3..c0b8cd062bf3b38e27b71174a19171e93efb4ca6 100644 (file)
--- a/odb.c
+++ b/odb.c
@@ -688,22 +688,20 @@ static int do_oid_object_info_extended(struct object_database *odb,
        while (1) {
                struct odb_source *source;
 
-               /* Most likely it's a loose object. */
-               for (source = odb->sources; source; source = source->next) {
-                       struct odb_source_files *files = odb_source_files_downcast(source);
-                       if (!packfile_store_read_object_info(files->packed, real, oi, flags) ||
-                           !odb_source_loose_read_object_info(source, real, oi, flags))
+               for (source = odb->sources; source; source = source->next)
+                       if (!odb_source_read_object_info(source, real, oi, flags))
                                return 0;
-               }
 
-               /* Not a loose object; someone else may have just packed it. */
+               /*
+                * When the object hasn't been found we try a second read and
+                * tell the sources so. This may cause them to invalidate
+                * caches or reload on-disk state.
+                */
                if (!(flags & OBJECT_INFO_QUICK)) {
-                       odb_reprepare(odb->repo->objects);
-                       for (source = odb->sources; source; source = source->next) {
-                               struct odb_source_files *files = odb_source_files_downcast(source);
-                               if (!packfile_store_read_object_info(files->packed, real, oi, flags))
+                       for (source = odb->sources; source; source = source->next)
+                               if (!odb_source_read_object_info(source, real, oi,
+                                                                flags | OBJECT_INFO_SECOND_READ))
                                        return 0;
-                       }
                }
 
                /*
diff --git a/odb.h b/odb.h
index e13b5b7c44120b527ec9a798f90a775ab659089a..70ffb033f936158caa41fd095945df9261e8e963 100644 (file)
--- a/odb.h
+++ b/odb.h
@@ -339,30 +339,6 @@ struct object_info {
  */
 #define OBJECT_INFO_INIT { 0 }
 
-/* Flags that can be passed to `odb_read_object_info_extended()`. */
-enum object_info_flags {
-       /* Invoke lookup_replace_object() on the given hash. */
-       OBJECT_INFO_LOOKUP_REPLACE = (1 << 0),
-
-       /* Do not reprepare object sources when the first lookup has failed. */
-       OBJECT_INFO_QUICK = (1 << 1),
-
-       /*
-        * Do not attempt to fetch the object if missing (even if fetch_is_missing is
-        * nonzero).
-        */
-       OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2),
-
-       /* Die if object corruption (not just an object being missing) was detected. */
-       OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3),
-
-       /*
-        * This is meant for bulk prefetching of missing blobs in a partial
-        * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK.
-        */
-       OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK),
-};
-
 /*
  * Read object info from the object database and populate the `object_info`
  * structure. Returns 0 on success, a negative error code otherwise.
index 20a24f524a46cd4772f1375d73ac8cf86852208c..f2969a1214c8f726db671bf262cd3c7a515ece4e 100644 (file)
@@ -41,6 +41,20 @@ static void odb_source_files_reprepare(struct odb_source *source)
        packfile_store_reprepare(files->packed);
 }
 
+static int odb_source_files_read_object_info(struct odb_source *source,
+                                            const struct object_id *oid,
+                                            struct object_info *oi,
+                                            enum object_info_flags flags)
+{
+       struct odb_source_files *files = odb_source_files_downcast(source);
+
+       if (!packfile_store_read_object_info(files->packed, oid, oi, flags) ||
+           !odb_source_loose_read_object_info(source, oid, oi, flags))
+               return 0;
+
+       return -1;
+}
+
 struct odb_source_files *odb_source_files_new(struct object_database *odb,
                                              const char *path,
                                              bool local)
@@ -55,6 +69,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
        files->base.free = odb_source_files_free;
        files->base.close = odb_source_files_close;
        files->base.reprepare = odb_source_files_reprepare;
+       files->base.read_object_info = odb_source_files_read_object_info;
 
        /*
         * Ideally, we would only ever store absolute paths in the source. This
index 0e6c6abdb1c19ad752d194a001c0f4bfb35f5b06..150becafe6ce379b79d514988accc7b098e32908 100644 (file)
@@ -12,6 +12,45 @@ enum odb_source_type {
        ODB_SOURCE_FILES,
 };
 
+/* Flags that can be passed to `odb_read_object_info_extended()`. */
+enum object_info_flags {
+       /* Invoke lookup_replace_object() on the given hash. */
+       OBJECT_INFO_LOOKUP_REPLACE = (1 << 0),
+
+       /* Do not reprepare object sources when the first lookup has failed. */
+       OBJECT_INFO_QUICK = (1 << 1),
+
+       /*
+        * Do not attempt to fetch the object if missing (even if fetch_is_missing is
+        * nonzero).
+        */
+       OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2),
+
+       /* Die if object corruption (not just an object being missing) was detected. */
+       OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3),
+
+       /*
+        * We have already tried reading the object, but it couldn't be found
+        * via any of the attached sources, and are now doing a second read.
+        * This second read asks the individual sources to also evaluate
+        * whether any on-disk state may have changed that may have caused the
+        * object to appear.
+        *
+        * This flag is for internal use, only. The second read only occurs
+        * when `OBJECT_INFO_QUICK` was not passed.
+        */
+       OBJECT_INFO_SECOND_READ = (1 << 4),
+
+       /*
+        * This is meant for bulk prefetching of missing blobs in a partial
+        * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK.
+        */
+       OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK),
+};
+
+struct object_id;
+struct object_info;
+
 /*
  * The source is the part of the object database that stores the actual
  * objects. It thus encapsulates the logic to read and write the specific
@@ -72,6 +111,33 @@ struct odb_source {
         * example just been repacked so that new objects will become visible.
         */
        void (*reprepare)(struct odb_source *source);
+
+       /*
+        * This callback is expected to read object information from the object
+        * database source. The object info will be partially populated with
+        * pointers for each bit of information that was requested by the
+        * caller.
+        *
+        * The flags field is a combination of `OBJECT_INFO` flags. Only the
+        * following fields need to be handled by the backend:
+        *
+        *   - `OBJECT_INFO_QUICK` indicates it is fine to use caches without
+        *     re-verifying the data.
+        *
+        *   - `OBJECT_INFO_SECOND_READ` indicates that the initial object
+        *     lookup has failed and that the object sources should check
+        *     whether any of its on-disk state has changed that may have
+        *     caused the object to appear. Sources are free to ignore the
+        *     second read in case they know that the first read would have
+        *     already surfaced the object without reloading any on-disk state.
+        *
+        * The callback is expected to return a negative error code in case
+        * reading the object has failed, 0 otherwise.
+        */
+       int (*read_object_info)(struct odb_source *source,
+                               const struct object_id *oid,
+                               struct object_info *oi,
+                               enum object_info_flags flags);
 };
 
 /*
@@ -131,4 +197,16 @@ static inline void odb_source_reprepare(struct odb_source *source)
        source->reprepare(source);
 }
 
+/*
+ * Read an object from the object database source identified by its object ID.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_read_object_info(struct odb_source *source,
+                                             const struct object_id *oid,
+                                             struct object_info *oi,
+                                             enum object_info_flags flags)
+{
+       return source->read_object_info(source, oid, oi, flags);
+}
+
 #endif
index da1c0dfa3995d02fa8c71d979c04559c830039c8..71db10e7c655615de111ac19789691e3f75f0cf9 100644 (file)
@@ -2181,11 +2181,19 @@ int packfile_store_freshen_object(struct packfile_store *store,
 int packfile_store_read_object_info(struct packfile_store *store,
                                    const struct object_id *oid,
                                    struct object_info *oi,
-                                   enum object_info_flags flags UNUSED)
+                                   enum object_info_flags flags)
 {
        struct pack_entry e;
        int ret;
 
+       /*
+        * In case the first read didn't surface the object, we have to reload
+        * packfiles. This may cause us to discover new packfiles that have
+        * been added since the last time we have prepared the packfile store.
+        */
+       if (flags & OBJECT_INFO_SECOND_READ)
+               packfile_store_reprepare(store);
+
        if (!find_pack_entry(store, oid, &e))
                return 1;