]> git.ipfire.org Git - thirdparty/git.git/commitdiff
packfile: widen unpack_entry()'s size out-parameter to size_t
authorJohannes Schindelin <johannes.schindelin@gmx.de>
Mon, 15 Jun 2026 11:52:26 +0000 (11:52 +0000)
committerJunio C Hamano <gitster@pobox.com>
Mon, 15 Jun 2026 14:45:40 +0000 (07:45 -0700)
The topic `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects paths to `size_t` but deliberately stopped
at the in-memory `unpack_entry()` cascade, which still hands back the
unpacked size through `unsigned long *`.  On Windows that boundary
truncates above 4 GiB because that data type is only 32 bits wide on
that platform.

Widen the code path. Except `packed_object_info_with_index_pos()`: It
cannot yet pass `oi->sizep` directly because the field is still
`unsigned long *`; bridge it with a `size_t` temporary that narrows
back, and let a later commit drop the bridge once the field is wide
too. `gfi_unpack_entry()` keeps its narrow signature because fast-import
tracks sizes through `unsigned long` everywhere it crosses subsystem
boundaries, keeping its signature allows the scope of this commit to be
somewhat reasonable, still.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/fast-import.c
pack-check.c
packfile.c
packfile.h

index 82bc6dcc003723615af11e638deaea45278b6729..3dff898c434692db670fbffb1b1368454a4a58a2 100644 (file)
@@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry(
        unsigned long *sizep)
 {
        enum object_type type;
+       size_t size_st = 0;
+       void *data;
        struct packed_git *p = all_packs[oe->pack_id];
        if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
                /* The object is stored in the packfile we are writing to
@@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry(
                 */
                p->pack_size = pack_size + the_hash_algo->rawsz;
        }
-       return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep);
+       data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st);
+       if (sizep)
+               *sizep = cast_size_t_to_ulong(size_st);
+       return data;
 }
 
 static void load_tree(struct tree_entry *root)
index 2792f34d2595bf486222f0ac4fbbeb40e61debc4..5adfb3f2726fb3a4a02174f0c6e8157f64375c63 100644 (file)
@@ -143,9 +143,8 @@ static int verify_packfile(struct repository *r,
                        data = NULL;
                        data_valid = 0;
                } else {
-                       unsigned long sz;
-                       data = unpack_entry(r, p, entries[i].offset, &type, &sz);
-                       size = sz;
+                       data = unpack_entry(r, p, entries[i].offset, &type,
+                                           &size);
                        data_valid = 1;
                }
 
index e202f488379e5f50c2c138d606f1e8640b0dbbf2..dab0a9b16d139fe3474b1c744f3af6455d597fa9 100644 (file)
@@ -1454,7 +1454,7 @@ struct delta_base_cache_entry {
        struct delta_base_cache_key key;
        struct list_head lru;
        void *data;
-       unsigned long size;
+       size_t size;
        enum object_type type;
 };
 
@@ -1525,7 +1525,7 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
 }
 
 static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
-                                  off_t base_offset, unsigned long *base_size,
+                                  off_t base_offset, size_t *base_size,
                                   enum object_type *type)
 {
        struct delta_base_cache_entry *ent;
@@ -1558,8 +1558,8 @@ void clear_delta_base_cache(void)
 }
 
 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
-                                void *base, unsigned long base_size,
-                                unsigned long delta_base_cache_limit,
+                                void *base, size_t base_size,
+                                size_t delta_base_cache_limit,
                                 enum object_type type)
 {
        struct delta_base_cache_entry *ent;
@@ -1614,10 +1614,13 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
         * a "real" type later if the caller is interested.
         */
        if (oi->contentp) {
-               *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep,
-                                                     &type);
+               size_t size_st = 0;
+               *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
+                                                     &size_st, &type);
                if (!*oi->contentp)
                        type = OBJ_BAD;
+               else if (oi->sizep)
+                       *oi->sizep = cast_size_t_to_ulong(size_st);
        } else if (oi->sizep || oi->typep || oi->delta_base_oid) {
                type = unpack_object_header(p, &w_curs, &curpos, &size);
        }
@@ -1735,7 +1738,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset,
 static void *unpack_compressed_entry(struct packed_git *p,
                                    struct pack_window **w_curs,
                                    off_t curpos,
-                                   unsigned long size)
+                                   size_t size)
 {
        int st;
        git_zstream stream;
@@ -1790,11 +1793,11 @@ int do_check_packed_object_crc;
 struct unpack_entry_stack_ent {
        off_t obj_offset;
        off_t curpos;
-       unsigned long size;
+       size_t size;
 };
 
 void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
-                  enum object_type *final_type, unsigned long *final_size)
+                  enum object_type *final_type, size_t *final_size)
 {
        struct pack_window *w_curs = NULL;
        off_t curpos = obj_offset;
@@ -1911,7 +1914,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
                void *delta_data;
                void *base = data;
                void *external_base = NULL;
-               unsigned long delta_size, base_size = size;
+               size_t delta_size, base_size = size;
                int i;
                off_t base_obj_offset = obj_offset;
 
@@ -1928,6 +1931,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
                        struct object_id base_oid;
                        if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
                                struct object_info oi = OBJECT_INFO_INIT;
+                               unsigned long bsz_ul = 0;
 
                                nth_packed_object_id(&base_oid, p,
                                                     pack_pos_to_index(p, pos));
@@ -1938,11 +1942,13 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
                                mark_bad_packed_object(p, &base_oid);
 
                                oi.typep = &type;
-                               oi.sizep = &base_size;
+                               oi.sizep = &bsz_ul;
                                oi.contentp = &base;
                                if (odb_read_object_info_extended(r->objects, &base_oid,
                                                                  &oi, 0) < 0)
                                        base = NULL;
+                               else
+                                       base_size = bsz_ul;
 
                                external_base = base;
                        }
index 49d6bdecf6ea185534419f3fe00fb9d20d894fbe..0b5ae3f9fcf3b36e68fb047b6ebcc53e43754595 100644 (file)
@@ -455,7 +455,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
 off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
 
 int is_pack_valid(struct packed_git *);
-void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
+void *unpack_entry(struct repository *r, struct packed_git *, off_t,
+                  enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
 unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);