]> git.ipfire.org Git - thirdparty/git.git/commitdiff
streaming: move logic to read loose objects streams into backend
authorPatrick Steinhardt <ps@pks.im>
Sun, 23 Nov 2025 18:59:40 +0000 (19:59 +0100)
committerJunio C Hamano <gitster@pobox.com>
Sun, 23 Nov 2025 20:56:45 +0000 (12:56 -0800)
Move the logic to read loose object streams into the respective
subsystem. This allows us to make a couple of function declarations
private.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
object-file.c
object-file.h
streaming.c

index b62b21a45289fc1f229c4f318f826c9c27a7e015..8c67847feaceb641d2791c7d36a79d944416ac24 100644 (file)
@@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
        return map;
 }
 
-void *odb_source_loose_map_object(struct odb_source *source,
-                                 const struct object_id *oid,
-                                 unsigned long *size)
+static void *odb_source_loose_map_object(struct odb_source *source,
+                                        const struct object_id *oid,
+                                        unsigned long *size)
 {
        const char *p;
        int fd = open_loose_object(source->loose, oid, &p);
@@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
        return map_fd(fd, p, size);
 }
 
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-                                                   unsigned char *map,
-                                                   unsigned long mapsize,
-                                                   void *buffer,
-                                                   unsigned long bufsiz)
+enum unpack_loose_header_result {
+       ULHR_OK,
+       ULHR_BAD,
+       ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+                                                          unsigned char *map,
+                                                          unsigned long mapsize,
+                                                          void *buffer,
+                                                          unsigned long bufsiz)
 {
        int status;
 
@@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream,
 }
 
 /*
+ * parse_loose_header() parses the starting "<type> <len>\0" of an
+ * object. If it doesn't follow that format -1 is returned. To check
+ * the validity of the <type> populate the "typep" in the "struct
+ * object_info". It will be OBJ_BAD if the object type is unknown. The
+ * parsed <len> can be retrieved via "oi->sizep", and from there
+ * passed to unpack_loose_rest().
+ *
  * We used to just use "sscanf()", but that's actually way
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_loose_header(const char *hdr, struct object_info *oi)
+static int parse_loose_header(const char *hdr, struct object_info *oi)
 {
        const char *type_buf = hdr;
        size_t size;
@@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
        loose_object_map_clear(&loose->map);
        free(loose);
 }
+
+struct odb_loose_read_stream {
+       struct odb_read_stream base;
+       git_zstream z;
+       enum {
+               ODB_LOOSE_READ_STREAM_INUSE,
+               ODB_LOOSE_READ_STREAM_DONE,
+               ODB_LOOSE_READ_STREAM_ERROR,
+       } z_state;
+       void *mapped;
+       unsigned long mapsize;
+       char hdr[32];
+       int hdr_avail;
+       int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+       struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+       size_t total_read = 0;
+
+       switch (st->z_state) {
+       case ODB_LOOSE_READ_STREAM_DONE:
+               return 0;
+       case ODB_LOOSE_READ_STREAM_ERROR:
+               return -1;
+       default:
+               break;
+       }
+
+       if (st->hdr_used < st->hdr_avail) {
+               size_t to_copy = st->hdr_avail - st->hdr_used;
+               if (sz < to_copy)
+                       to_copy = sz;
+               memcpy(buf, st->hdr + st->hdr_used, to_copy);
+               st->hdr_used += to_copy;
+               total_read += to_copy;
+       }
+
+       while (total_read < sz) {
+               int status;
+
+               st->z.next_out = (unsigned char *)buf + total_read;
+               st->z.avail_out = sz - total_read;
+               status = git_inflate(&st->z, Z_FINISH);
+
+               total_read = st->z.next_out - (unsigned char *)buf;
+
+               if (status == Z_STREAM_END) {
+                       git_inflate_end(&st->z);
+                       st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+                       break;
+               }
+               if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+                       git_inflate_end(&st->z);
+                       st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+                       return -1;
+               }
+       }
+       return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+       struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+       if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+               git_inflate_end(&st->z);
+       munmap(st->mapped, st->mapsize);
+       return 0;
+}
+
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+                                       struct odb_source *source,
+                                       const struct object_id *oid)
+{
+       struct object_info oi = OBJECT_INFO_INIT;
+       struct odb_loose_read_stream *st;
+       unsigned long mapsize;
+       void *mapped;
+
+       mapped = odb_source_loose_map_object(source, oid, &mapsize);
+       if (!mapped)
+               return -1;
+
+       /*
+        * Note: we must allocate this structure early even though we may still
+        * fail. This is because we need to initialize the zlib stream, and it
+        * is not possible to copy the stream around after the fact because it
+        * has self-referencing pointers.
+        */
+       CALLOC_ARRAY(st, 1);
+
+       switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+                                   sizeof(st->hdr))) {
+       case ULHR_OK:
+               break;
+       case ULHR_BAD:
+       case ULHR_TOO_LONG:
+               goto error;
+       }
+
+       oi.sizep = &st->base.size;
+       oi.typep = &st->base.type;
+
+       if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+               goto error;
+
+       st->mapped = mapped;
+       st->mapsize = mapsize;
+       st->hdr_used = strlen(st->hdr) + 1;
+       st->hdr_avail = st->z.total_out;
+       st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+       st->base.close = close_istream_loose;
+       st->base.read = read_istream_loose;
+
+       *out = &st->base;
+
+       return 0;
+error:
+       git_inflate_end(&st->z);
+       munmap(st->mapped, st->mapsize);
+       free(st);
+       return -1;
+}
index eeffa67bbda63102e345c49e7bbf18871ed82123..1229d5f675b44aa002cb49d9cdafe6842405cf2c 100644 (file)
@@ -16,6 +16,8 @@ enum {
 int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
 int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
 
+struct object_info;
+struct odb_read_stream;
 struct odb_source;
 
 struct odb_source_loose {
@@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
                                      const struct object_id *oid,
                                      struct object_info *oi, int flags);
 
-void *odb_source_loose_map_object(struct odb_source *source,
-                                 const struct object_id *oid,
-                                 unsigned long *size);
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+                                       struct odb_source *source,
+                                       const struct object_id *oid);
 
 /*
  * Return true iff an object database source has a loose object
@@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
 int format_object_header(char *str, size_t size, enum object_type type,
                         size_t objsize);
 
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-enum unpack_loose_header_result {
-       ULHR_OK,
-       ULHR_BAD,
-       ULHR_TOO_LONG,
-};
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-                                                   unsigned char *map,
-                                                   unsigned long mapsize,
-                                                   void *buffer,
-                                                   unsigned long bufsiz);
-
-/**
- * parse_loose_header() parses the starting "<type> <len>\0" of an
- * object. If it doesn't follow that format -1 is returned. To check
- * the validity of the <type> populate the "typep" in the "struct
- * object_info". It will be OBJ_BAD if the object type is unknown. The
- * parsed <len> can be retrieved via "oi->sizep", and from there
- * passed to unpack_loose_rest().
- */
-struct object_info;
-int parse_loose_header(const char *hdr, struct object_info *oi);
-
 int force_object_loose(struct odb_source *source,
                       const struct object_id *oid, time_t mtime);
 
index 0635b7c12e223325b8ebb01702bc00f96d593022..d5acc1c39650e491c8041da904e8928f4114c372 100644 (file)
@@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
        return &fs->base;
 }
 
-/*****************************************************************
- *
- * Loose object stream
- *
- *****************************************************************/
-
-struct odb_loose_read_stream {
-       struct odb_read_stream base;
-       git_zstream z;
-       enum {
-               ODB_LOOSE_READ_STREAM_INUSE,
-               ODB_LOOSE_READ_STREAM_DONE,
-               ODB_LOOSE_READ_STREAM_ERROR,
-       } z_state;
-       void *mapped;
-       unsigned long mapsize;
-       char hdr[32];
-       int hdr_avail;
-       int hdr_used;
-};
-
-static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
-{
-       struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
-       size_t total_read = 0;
-
-       switch (st->z_state) {
-       case ODB_LOOSE_READ_STREAM_DONE:
-               return 0;
-       case ODB_LOOSE_READ_STREAM_ERROR:
-               return -1;
-       default:
-               break;
-       }
-
-       if (st->hdr_used < st->hdr_avail) {
-               size_t to_copy = st->hdr_avail - st->hdr_used;
-               if (sz < to_copy)
-                       to_copy = sz;
-               memcpy(buf, st->hdr + st->hdr_used, to_copy);
-               st->hdr_used += to_copy;
-               total_read += to_copy;
-       }
-
-       while (total_read < sz) {
-               int status;
-
-               st->z.next_out = (unsigned char *)buf + total_read;
-               st->z.avail_out = sz - total_read;
-               status = git_inflate(&st->z, Z_FINISH);
-
-               total_read = st->z.next_out - (unsigned char *)buf;
-
-               if (status == Z_STREAM_END) {
-                       git_inflate_end(&st->z);
-                       st->z_state = ODB_LOOSE_READ_STREAM_DONE;
-                       break;
-               }
-               if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-                       git_inflate_end(&st->z);
-                       st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
-                       return -1;
-               }
-       }
-       return total_read;
-}
-
-static int close_istream_loose(struct odb_read_stream *_st)
-{
-       struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
-       if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
-               git_inflate_end(&st->z);
-       munmap(st->mapped, st->mapsize);
-       return 0;
-}
-
-static int open_istream_loose(struct odb_read_stream **out,
-                             struct odb_source *source,
-                             const struct object_id *oid)
-{
-       struct object_info oi = OBJECT_INFO_INIT;
-       struct odb_loose_read_stream *st;
-       unsigned long mapsize;
-       void *mapped;
-
-       mapped = odb_source_loose_map_object(source, oid, &mapsize);
-       if (!mapped)
-               return -1;
-
-       /*
-        * Note: we must allocate this structure early even though we may still
-        * fail. This is because we need to initialize the zlib stream, and it
-        * is not possible to copy the stream around after the fact because it
-        * has self-referencing pointers.
-        */
-       CALLOC_ARRAY(st, 1);
-
-       switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
-                                   sizeof(st->hdr))) {
-       case ULHR_OK:
-               break;
-       case ULHR_BAD:
-       case ULHR_TOO_LONG:
-               goto error;
-       }
-
-       oi.sizep = &st->base.size;
-       oi.typep = &st->base.type;
-
-       if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
-               goto error;
-
-       st->mapped = mapped;
-       st->mapsize = mapsize;
-       st->hdr_used = strlen(st->hdr) + 1;
-       st->hdr_avail = st->z.total_out;
-       st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
-       st->base.close = close_istream_loose;
-       st->base.read = read_istream_loose;
-
-       *out = &st->base;
-
-       return 0;
-error:
-       git_inflate_end(&st->z);
-       munmap(st->mapped, st->mapsize);
-       free(st);
-       return -1;
-}
-
-
 /*****************************************************************
  *
  * Non-delta packed object stream
@@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out,
 
        odb_prepare_alternates(r->objects);
        for (source = r->objects->sources; source; source = source->next)
-               if (!open_istream_loose(out, source, oid))
+               if (!odb_source_loose_read_object_stream(out, source, oid))
                        return 0;
 
        return open_istream_incore(out, r, oid);