From 7ec85185b197ce1cd28721a6f4415fb9db5cd42f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:20 +0200 Subject: [PATCH] object: add flag to `peel_object()` to verify object type When peeling a tag to a non-tag object we repeatedly call `parse_object()` on the tagged object until we find the first object that isn't a tag. While this feels sensible at first, there is a big catch here: `parse_object()` doesn't actually verify the type of the tagged object. The relevant code path here eventually ends up in `parse_tag_buffer()`. Here, we parse the various fields of the tag, including the "type". Once we've figured out the type and the tagged object ID, we call one of the `lookup_${type}()` functions for whatever type we have found. There is two possible outcomes in the successful case: 1. The object is already part of our cached objects. In that case we double-check whether the type we're trying to look up matches the type that was cached. 2. The object is _not_ part of our cached objects. In that case, we simply create a new object with the expected type, but we don't parse that object. In the first case we might notice type mismatches, but only in the case where our cache has the object with the correct type. In the second case, we'll blindly assume that the type is correct and then go with it. We'll only notice that the type might be wrong when we try to parse the object at a later point. Now arguably, we could change `parse_tag_buffer()` to verify the tagged object's type for us. But that would have the effect that such a tag cannot be parsed at all anymore, and we have a small bunch of tests for exactly this case that assert we still can open such tags. So this change does not feel like something we can retroactively tighten, even though one shouldn't ever hit such corrupted tags. Instead, add a new `flags` field to `peel_object()` that allows the caller to opt in to strict object verification. This will be wired up at a subset of callsites over the next few commits. Note that this change also inlines `deref_tag_noverify()`. There's only been two callsites of that function, the one we're changing and one in our test helpers. The latter callsite can trivially use `deref_tag()` instead, so by inlining the function we avoid having to pass down the flag. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object.c | 20 +++++++++++++++++--- object.h | 15 ++++++++++++++- ref-filter.c | 2 +- refs.c | 2 +- refs/packed-backend.c | 5 ++--- refs/reftable-backend.c | 4 ++-- t/helper/test-reach.c | 2 +- tag.c | 12 ------------ tag.h | 1 - 9 files changed, 38 insertions(+), 25 deletions(-) diff --git a/object.c b/object.c index 986114a6db..e72b0ed436 100644 --- a/object.c +++ b/object.c @@ -209,11 +209,12 @@ struct object *lookup_object_by_type(struct repository *r, enum peel_status peel_object(struct repository *r, const struct object_id *name, - struct object_id *oid) + struct object_id *oid, + unsigned flags) { struct object *o = lookup_unknown_object(r, name); - if (o->type == OBJ_NONE) { + if (o->type == OBJ_NONE || flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { int type = odb_read_object_info(r->objects, name, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; @@ -222,7 +223,20 @@ enum peel_status peel_object(struct repository *r, if (o->type != OBJ_TAG) return PEEL_NON_TAG; - o = deref_tag_noverify(r, o); + while (o && o->type == OBJ_TAG) { + o = parse_object(r, &o->oid); + if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) { + o = ((struct tag *)o)->tagged; + + if (flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + int type = odb_read_object_info(r->objects, &o->oid, NULL); + if (type < 0 || !object_as_type(o, type, 0)) + return PEEL_INVALID; + } + } else { + o = NULL; + } + } if (!o) return PEEL_INVALID; diff --git a/object.h b/object.h index 8c3c1c46e1..1499f63d50 100644 --- a/object.h +++ b/object.h @@ -287,6 +287,17 @@ enum peel_status { PEEL_BROKEN = -4 }; +enum peel_object_flags { + /* + * Always verify the object type, even in the case where the looked-up + * object already has an object type. This can be useful when the + * stored object type may be invalid. One such case is when looking up + * objects via tags, where we blindly trust the object type declared by + * the tag. + */ + PEEL_OBJECT_VERIFY_OBJECT_TYPE = (1 << 0), +}; + /* * Peel the named object; i.e., if the object is a tag, resolve the * tag recursively until a non-tag is found. If successful, store the @@ -295,7 +306,9 @@ enum peel_status { * and leave oid unchanged. */ enum peel_status peel_object(struct repository *r, - const struct object_id *name, struct object_id *oid); + const struct object_id *name, + struct object_id *oid, + unsigned flags); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/ref-filter.c b/ref-filter.c index 7fd8babec8..9a8ed8c8fc 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/refs.c b/refs.c index 9d8f0a9ca4..a41a94ae55 100644 --- a/refs.c +++ b/refs.c @@ -2333,7 +2333,7 @@ int reference_get_peeled_oid(struct repository *repo, return 0; } - return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; + return peel_object(repo, ref->oid, peeled_oid, 0) ? -1 : 0; } int refs_update_symref(struct ref_store *refs, const char *ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 6fa229edd0..4752d3f398 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1527,9 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re i++; } else { struct object_id peeled; - int peel_error = peel_object(refs->base.repo, - &update->new_oid, - &peeled); + int peel_error = peel_object(refs->base.repo, &update->new_oid, + &peeled, 0); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e329d4a423..9febb2322c 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); @@ -2497,7 +2497,7 @@ static int write_reflog_expiry_table(struct reftable_writer *writer, void *cb_da ref.refname = (char *)arg->refname; ref.update_index = ts; - if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled)) { + if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled, 0)) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); memcpy(ref.value.val2.value, arg->update_oid.hash, GIT_MAX_RAWSZ); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 028ec00306..c58c93800f 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -63,7 +63,7 @@ int cmd__reach(int ac, const char **av) die("failed to resolve %s", buf.buf + 2); orig = parse_object(r, &oid); - peeled = deref_tag_noverify(the_repository, orig); + peeled = deref_tag(the_repository, orig, NULL, 0); if (!peeled) die("failed to load commit for input %s resulting in oid %s", diff --git a/tag.c b/tag.c index 1d52686ee1..f5c232d2f1 100644 --- a/tag.c +++ b/tag.c @@ -94,18 +94,6 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war return o; } -struct object *deref_tag_noverify(struct repository *r, struct object *o) -{ - while (o && o->type == OBJ_TAG) { - o = parse_object(r, &o->oid); - if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) - o = ((struct tag *)o)->tagged; - else - o = NULL; - } - return o; -} - struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); diff --git a/tag.h b/tag.h index c49d7c19ad..ef12a61037 100644 --- a/tag.h +++ b/tag.h @@ -16,7 +16,6 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u int parse_tag(struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); -struct object *deref_tag_noverify(struct repository *r, struct object *); int gpg_verify_tag(const struct object_id *oid, const char *name_to_report, unsigned flags); struct object_id *get_tagged_oid(struct tag *tag); -- 2.47.3