]> git.ipfire.org Git - thirdparty/git.git/commitdiff
odb/source-loose: wire up `read_object_stream()` callback
authorPatrick Steinhardt <ps@pks.im>
Thu, 21 May 2026 08:22:27 +0000 (10:22 +0200)
committerJunio C Hamano <gitster@pobox.com>
Thu, 21 May 2026 13:35:19 +0000 (22:35 +0900)
Move `odb_source_loose_read_object_stream()` and its associated helpers
from "object-file.c" into "odb/source-loose.c" and wire it up as the
`read_object_stream()` callback of the loose source.

As part of the move we are also forced to expose a couple of functions
from "object-file.h" that parse object headers in a somewhat-generic
way, as those functions are now used by both subsystems.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
object-file.c
object-file.h
odb/source-files.c
odb/source-loose.c

index fa174512a43c75994b9364347b8bace64ae69bb3..adfb67249364525f7dc1a30d38e889762ac5b14b 100644 (file)
@@ -164,28 +164,6 @@ int stream_object_signature(struct repository *r,
        return !oideq(oid, &real_oid) ? -1 : 0;
 }
 
-/*
- * Find "oid" as a loose object in given source, open the object and return its
- * file descriptor. Returns the file descriptor on success, negative on failure.
- *
- * The "path" out-parameter will give the path of the object we found (if any).
- * Note that it may point to static storage and is only valid until another
- * call to stat_loose_object().
- */
-static int open_loose_object(struct odb_source_loose *loose,
-                            const struct object_id *oid, const char **path)
-{
-       static struct strbuf buf = STRBUF_INIT;
-       int fd;
-
-       *path = odb_loose_path(&loose->files->base, &buf, oid);
-       fd = git_open(*path);
-       if (fd >= 0)
-               return fd;
-
-       return -1;
-}
-
 static int quick_has_loose(struct odb_source_loose *loose,
                           const struct object_id *oid)
 {
@@ -215,42 +193,11 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
        return map;
 }
 
-static void *odb_source_loose_map_object(struct odb_source *source,
-                                        const struct object_id *oid,
-                                        unsigned long *size)
-{
-       struct odb_source_files *files = odb_source_files_downcast(source);
-       const char *p;
-       int fd = open_loose_object(files->loose, oid, &p);
-
-       if (fd < 0)
-               return NULL;
-       return map_fd(fd, p, size);
-}
-
-enum unpack_loose_header_result {
-       ULHR_OK,
-       ULHR_BAD,
-       ULHR_TOO_LONG,
-};
-
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-                                                          unsigned char *map,
-                                                          unsigned long mapsize,
-                                                          void *buffer,
-                                                          unsigned long bufsiz)
+enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+                                                   unsigned char *map,
+                                                   unsigned long mapsize,
+                                                   void *buffer,
+                                                   unsigned long bufsiz)
 {
        int status;
 
@@ -340,7 +287,7 @@ static void *unpack_loose_rest(git_zstream *stream,
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-static int parse_loose_header(const char *hdr, struct object_info *oi)
+int parse_loose_header(const char *hdr, struct object_info *oi)
 {
        const char *type_buf = hdr;
        size_t size;
@@ -2170,138 +2117,3 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
 
        return &transaction->base;
 }
-
-struct odb_loose_read_stream {
-       struct odb_read_stream base;
-       git_zstream z;
-       enum {
-               ODB_LOOSE_READ_STREAM_INUSE,
-               ODB_LOOSE_READ_STREAM_DONE,
-               ODB_LOOSE_READ_STREAM_ERROR,
-       } z_state;
-       void *mapped;
-       unsigned long mapsize;
-       char hdr[32];
-       int hdr_avail;
-       int hdr_used;
-};
-
-static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
-{
-       struct odb_loose_read_stream *st =
-               container_of(_st, struct odb_loose_read_stream, base);
-       size_t total_read = 0;
-
-       switch (st->z_state) {
-       case ODB_LOOSE_READ_STREAM_DONE:
-               return 0;
-       case ODB_LOOSE_READ_STREAM_ERROR:
-               return -1;
-       default:
-               break;
-       }
-
-       if (st->hdr_used < st->hdr_avail) {
-               size_t to_copy = st->hdr_avail - st->hdr_used;
-               if (sz < to_copy)
-                       to_copy = sz;
-               memcpy(buf, st->hdr + st->hdr_used, to_copy);
-               st->hdr_used += to_copy;
-               total_read += to_copy;
-       }
-
-       while (total_read < sz) {
-               int status;
-
-               st->z.next_out = (unsigned char *)buf + total_read;
-               st->z.avail_out = sz - total_read;
-               status = git_inflate(&st->z, Z_FINISH);
-
-               total_read = st->z.next_out - (unsigned char *)buf;
-
-               if (status == Z_STREAM_END) {
-                       git_inflate_end(&st->z);
-                       st->z_state = ODB_LOOSE_READ_STREAM_DONE;
-                       break;
-               }
-               if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-                       git_inflate_end(&st->z);
-                       st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
-                       return -1;
-               }
-       }
-       return total_read;
-}
-
-static int close_istream_loose(struct odb_read_stream *_st)
-{
-       struct odb_loose_read_stream *st =
-               container_of(_st, struct odb_loose_read_stream, base);
-
-       if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
-               git_inflate_end(&st->z);
-       munmap(st->mapped, st->mapsize);
-       return 0;
-}
-
-int odb_source_loose_read_object_stream(struct odb_read_stream **out,
-                                       struct odb_source *source,
-                                       const struct object_id *oid)
-{
-       struct object_info oi = OBJECT_INFO_INIT;
-       struct odb_loose_read_stream *st;
-       unsigned long mapsize;
-       unsigned long size_ul;
-       void *mapped;
-
-       mapped = odb_source_loose_map_object(source, oid, &mapsize);
-       if (!mapped)
-               return -1;
-
-       /*
-        * Note: we must allocate this structure early even though we may still
-        * fail. This is because we need to initialize the zlib stream, and it
-        * is not possible to copy the stream around after the fact because it
-        * has self-referencing pointers.
-        */
-       CALLOC_ARRAY(st, 1);
-
-       switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
-                                   sizeof(st->hdr))) {
-       case ULHR_OK:
-               break;
-       case ULHR_BAD:
-       case ULHR_TOO_LONG:
-               goto error;
-       }
-
-       /*
-        * object_info.sizep is unsigned long* (32-bit on Windows), but
-        * st->base.size is size_t (64-bit). Use temporary variable.
-        * Note: loose objects >4GB would still truncate here, but such
-        * large loose objects are uncommon (they'd normally be packed).
-        */
-       oi.sizep = &size_ul;
-       oi.typep = &st->base.type;
-
-       if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
-               goto error;
-       st->base.size = size_ul;
-
-       st->mapped = mapped;
-       st->mapsize = mapsize;
-       st->hdr_used = strlen(st->hdr) + 1;
-       st->hdr_avail = st->z.total_out;
-       st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
-       st->base.close = close_istream_loose;
-       st->base.read = read_istream_loose;
-
-       *out = &st->base;
-
-       return 0;
-error:
-       git_inflate_end(&st->z);
-       munmap(mapped, mapsize);
-       free(st);
-       return -1;
-}
index 8ac2832dac34395a0fc6aa7014cd019bb7b17233..d93b7ffad704b0e5337c06d4a64d65648353826c 100644 (file)
@@ -18,13 +18,8 @@ int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct s
 int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
 
 struct object_info;
-struct odb_read_stream;
 struct odb_source;
 
-int odb_source_loose_read_object_stream(struct odb_read_stream **out,
-                                       struct odb_source *source,
-                                       const struct object_id *oid);
-
 /*
  * Return true iff an object database source has a loose object
  * with the specified name.  This function does not respect replace
@@ -199,6 +194,32 @@ int read_object_info_from_path(struct odb_source_loose *loose,
                               struct object_info *oi,
                               enum object_info_flags flags);
 
+enum unpack_loose_header_result {
+       ULHR_OK,
+       ULHR_BAD,
+       ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+                                                   unsigned char *map,
+                                                   unsigned long mapsize,
+                                                   void *buffer,
+                                                   unsigned long bufsiz);
+
+int parse_loose_header(const char *hdr, struct object_info *oi);
+
 struct odb_transaction;
 
 /*
index 8d6924755ffb70dee49213c9a223f4331c10cecf..90806ddf86b662a7215f1e724b2e02f50f82380d 100644 (file)
@@ -67,7 +67,7 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out,
 {
        struct odb_source_files *files = odb_source_files_downcast(source);
        if (!packfile_store_read_object_stream(out, files->packed, oid) ||
-           !odb_source_loose_read_object_stream(out, source, oid))
+           !odb_source_read_object_stream(out, &files->loose->base, oid))
                return 0;
        return -1;
 }
index 50f387ecf31e385f8ad2e65e1743fd5d809e8ba1..4b82c6f316512eff360950ee34cab5b819d846f9 100644 (file)
@@ -1,11 +1,13 @@
 #include "git-compat-util.h"
 #include "abspath.h"
 #include "chdir-notify.h"
+#include "gettext.h"
 #include "loose.h"
 #include "object-file.h"
 #include "odb.h"
 #include "odb/source-files.h"
 #include "odb/source-loose.h"
+#include "odb/streaming.h"
 #include "oidtree.h"
 #include "strbuf.h"
 
@@ -30,6 +32,192 @@ static int odb_source_loose_read_object_info(struct odb_source *source,
        return read_object_info_from_path(loose, buf.buf, oid, oi, flags);
 }
 
+/*
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
+ *
+ * The "path" out-parameter will give the path of the object we found (if any).
+ * Note that it may point to static storage and is only valid until another
+ * call to open_loose_object().
+ */
+static int open_loose_object(struct odb_source_loose *loose,
+                            const struct object_id *oid, const char **path)
+{
+       static struct strbuf buf = STRBUF_INIT;
+       int fd;
+
+       *path = odb_loose_path(&loose->base, &buf, oid);
+       fd = git_open(*path);
+       if (fd >= 0)
+               return fd;
+
+       return -1;
+}
+
+static void *odb_source_loose_map_object(struct odb_source_loose *loose,
+                                        const struct object_id *oid,
+                                        unsigned long *size)
+{
+       const char *p;
+       int fd = open_loose_object(loose, oid, &p);
+       void *map = NULL;
+       struct stat st;
+
+       if (fd < 0)
+               return NULL;
+
+       if (!fstat(fd, &st)) {
+               *size = xsize_t(st.st_size);
+               if (!*size) {
+                       /* mmap() is forbidden on empty files */
+                       error(_("object file %s is empty"), p);
+                       goto out;
+               }
+
+               map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
+       }
+
+out:
+       close(fd);
+       return map;
+}
+
+struct odb_loose_read_stream {
+       struct odb_read_stream base;
+       git_zstream z;
+       enum {
+               ODB_LOOSE_READ_STREAM_INUSE,
+               ODB_LOOSE_READ_STREAM_DONE,
+               ODB_LOOSE_READ_STREAM_ERROR,
+       } z_state;
+       void *mapped;
+       unsigned long mapsize;
+       char hdr[32];
+       int hdr_avail;
+       int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+       struct odb_loose_read_stream *st =
+               container_of(_st, struct odb_loose_read_stream, base);
+       size_t total_read = 0;
+
+       switch (st->z_state) {
+       case ODB_LOOSE_READ_STREAM_DONE:
+               return 0;
+       case ODB_LOOSE_READ_STREAM_ERROR:
+               return -1;
+       default:
+               break;
+       }
+
+       if (st->hdr_used < st->hdr_avail) {
+               size_t to_copy = st->hdr_avail - st->hdr_used;
+               if (sz < to_copy)
+                       to_copy = sz;
+               memcpy(buf, st->hdr + st->hdr_used, to_copy);
+               st->hdr_used += to_copy;
+               total_read += to_copy;
+       }
+
+       while (total_read < sz) {
+               int status;
+
+               st->z.next_out = (unsigned char *)buf + total_read;
+               st->z.avail_out = sz - total_read;
+               status = git_inflate(&st->z, Z_FINISH);
+
+               total_read = st->z.next_out - (unsigned char *)buf;
+
+               if (status == Z_STREAM_END) {
+                       git_inflate_end(&st->z);
+                       st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+                       break;
+               }
+               if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+                       git_inflate_end(&st->z);
+                       st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+                       return -1;
+               }
+       }
+       return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+       struct odb_loose_read_stream *st =
+               container_of(_st, struct odb_loose_read_stream, base);
+
+       if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+               git_inflate_end(&st->z);
+       munmap(st->mapped, st->mapsize);
+       return 0;
+}
+
+static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+                                              struct odb_source *source,
+                                              const struct object_id *oid)
+{
+       struct odb_source_loose *loose = odb_source_loose_downcast(source);
+       struct object_info oi = OBJECT_INFO_INIT;
+       struct odb_loose_read_stream *st;
+       unsigned long mapsize;
+       unsigned long size_ul;
+       void *mapped;
+
+       mapped = odb_source_loose_map_object(loose, oid, &mapsize);
+       if (!mapped)
+               return -1;
+
+       /*
+        * Note: we must allocate this structure early even though we may still
+        * fail. This is because we need to initialize the zlib stream, and it
+        * is not possible to copy the stream around after the fact because it
+        * has self-referencing pointers.
+        */
+       CALLOC_ARRAY(st, 1);
+
+       switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+                                   sizeof(st->hdr))) {
+       case ULHR_OK:
+               break;
+       case ULHR_BAD:
+       case ULHR_TOO_LONG:
+               goto error;
+       }
+
+       /*
+        * object_info.sizep is unsigned long* (32-bit on Windows), but
+        * st->base.size is size_t (64-bit). Use temporary variable.
+        * Note: loose objects >4GB would still truncate here, but such
+        * large loose objects are uncommon (they'd normally be packed).
+        */
+       oi.sizep = &size_ul;
+       oi.typep = &st->base.type;
+
+       if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+               goto error;
+       st->base.size = size_ul;
+
+       st->mapped = mapped;
+       st->mapsize = mapsize;
+       st->hdr_used = strlen(st->hdr) + 1;
+       st->hdr_avail = st->z.total_out;
+       st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+       st->base.close = close_istream_loose;
+       st->base.read = read_istream_loose;
+
+       *out = &st->base;
+
+       return 0;
+error:
+       git_inflate_end(&st->z);
+       munmap(mapped, mapsize);
+       free(st);
+       return -1;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
        oidtree_clear(loose->cache);
@@ -84,6 +272,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
        loose->base.close = odb_source_loose_close;
        loose->base.reprepare = odb_source_loose_reprepare;
        loose->base.read_object_info = odb_source_loose_read_object_info;
+       loose->base.read_object_stream = odb_source_loose_read_object_stream;
 
        if (!is_absolute_path(loose->base.path))
                chdir_notify_register(NULL, odb_source_loose_reparent, loose);