From: Patrick Steinhardt Date: Sun, 23 Nov 2025 18:59:37 +0000 (+0100) Subject: streaming: rely on object sources to create object stream X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4c89d31494bff4bde6079a0e0821f1437e37d07b;p=thirdparty%2Fgit.git streaming: rely on object sources to create object stream When creating an object stream we first look up the object info and, if it's present, we call into the respective backend that contains the object to create a new stream for it. This has the consequence that, for loose object source, we basically iterate through the object sources twice: we first discover that the file exists as a loose object in the first place by iterating through all sources. And, once we have discovered it, we again walk through all sources to try and map the object. The same issue will eventually also surface once the packfile store becomes per-object-source. Furthermore, it feels rather pointless to first look up the object only to then try and read it. Refactor the logic to be centered around sources instead. Instead of first reading the object, we immediately ask the source to create the object stream for us. If the object exists we get stream, otherwise we'll try the next source. Like this we only have to iterate through sources once. But even more importantly, this change also helps us to make the whole logic pluggable. The object read stream subsystem does not need to be aware of the different source backends anymore, but eventually it'll only have to call the source's callback function. Note that at the current point in time we aren't fully there yet: - The packfile store still sits on the object database level and is thus agnostic of the sources. - We still have to call into both the packfile store and the loose object source. But both of these issues will soon be addressed. This refactoring results in a slight change to semantics: previously, it was `odb_read_object_info_extended()` that picked the source for us, and it would have favored packed (non-deltified) objects over loose objects. And while we still favor packed over loose objects for a single source with the new logic, we'll now favor a loose object from an earlier source over a packed object from a later source. Ultimately this shouldn't matter though: the stream doesn't indicate to the caller which source it is from and whether it was created from a packed or loose object, so such details are opaque to the caller. And other than that we should be able to assume that two objects with the same object ID should refer to the same content, so the streamed data would be the same, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- diff --git a/streaming.c b/streaming.c index 46fddaf2ca..f0f7d31956 100644 --- a/streaming.c +++ b/streaming.c @@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st) } static int open_istream_loose(struct odb_read_stream **out, - struct repository *r, + struct odb_source *source, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; - struct odb_source *source; unsigned long mapsize; void *mapped; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (mapped) - break; - } + mapped = odb_source_loose_map_object(source, oid, &mapsize); if (!mapped) return -1; @@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) } static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - struct packed_git *pack, - off_t offset) + struct object_database *odb, + const struct object_id *oid) { struct odb_packed_read_stream *stream; - struct pack_window *window; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - size_t size; + unsigned long size; - window = NULL; + oi.sizep = &size; + + if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + return -1; - in_pack_type = unpack_object_header(pack, + in_pack_type = unpack_object_header(oi.u.packed.pack, &window, - &offset, + &oi.u.packed.offset, &size); unuse_pack(&window); switch (in_pack_type) { @@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = pack; - stream->pos = offset; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; *out = &stream->base; @@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; + struct odb_source *source; - switch (oi.whence) { - case OI_LOOSE: - if (open_istream_loose(out, r, oid) < 0) - break; - return 0; - case OI_PACKED: - if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, - oi.u.packed.offset) < 0) - break; + if (!open_istream_pack_non_delta(out, r->objects, oid)) return 0; - default: - break; - } + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) + if (!open_istream_loose(out, source, oid)) + return 0; return open_istream_incore(out, r, oid); }