]> git.ipfire.org Git - thirdparty/git.git/commitdiff
ref-filter: parse objects on demand
authorPatrick Steinhardt <ps@pks.im>
Thu, 23 Oct 2025 07:16:23 +0000 (09:16 +0200)
committerJunio C Hamano <gitster@pobox.com>
Tue, 4 Nov 2025 15:32:25 +0000 (07:32 -0800)
When formatting an arbitrary object we parse that object regardless of
whether or not we actually need any parsed data. In fact, many of the
atoms we have don't require any.

Refactor the code so that we parse the data on demand when we see an
atom that wants to access the objects. This leads to a small speedup,
for example in the Chromium repository with around 40000 refs:

    Benchmark 1: for-each-ref --format='%(raw)' (HEAD~)
      Time (mean ± σ):     388.7 ms ±   1.1 ms    [User: 322.2 ms, System: 65.0 ms]
      Range (min … max):   387.3 ms … 390.8 ms    10 runs

    Benchmark 2: for-each-ref --format='%(raw)' (HEAD)
      Time (mean ± σ):     344.7 ms ±   0.7 ms    [User: 287.8 ms, System: 55.1 ms]
      Range (min … max):   343.9 ms … 345.7 ms    10 runs

    Summary
      for-each-ref --format='%(raw)' (HEAD) ran
        1.13 ± 0.00 times faster than for-each-ref --format='%(raw)' (HEAD~)

With this change, we now spend ~90% of the time decompressing objects,
which is almost as good as it gets regarding git-for-each-ref(1)'s own
infrastructure.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
ref-filter.c

index c54025d6b4c4cd2fd227fb3b690854635ccea025..7cfcd5c35549308dcee9c3654c858f5ebce02a36 100644 (file)
@@ -91,6 +91,7 @@ static struct expand_data {
        struct object_id delta_base_oid;
        void *content;
 
+       struct object *maybe_object;
        struct object_info info;
 } oi, oi_deref;
 
@@ -1475,11 +1476,29 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_
        }
 }
 
+static struct object *get_or_parse_object(struct expand_data *data, const char *refname,
+                                         struct strbuf *err, int *eaten)
+{
+       if (!data->maybe_object) {
+               data->maybe_object = parse_object_buffer(the_repository, &data->oid, data->type,
+                                                        data->size, data->content, eaten);
+               if (!data->maybe_object) {
+                       strbuf_addf(err, _("parse_object_buffer failed on %s for %s"),
+                                   oid_to_hex(&data->oid), refname);
+                       return NULL;
+               }
+       }
+
+       return data->maybe_object;
+}
+
 /* See grab_values */
-static void grab_tag_values(struct atom_value *val, int deref, struct object *obj)
+static int grab_tag_values(struct atom_value *val, int deref,
+                          struct expand_data *data, const char *refname,
+                          struct strbuf *err, int *eaten)
 {
+       struct tag *tag = NULL;
        int i;
-       struct tag *tag = (struct tag *) obj;
 
        for (i = 0; i < used_atom_cnt; i++) {
                const char *name = used_atom[i].name;
@@ -1487,6 +1506,14 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob
                struct atom_value *v = &val[i];
                if (!!deref != (*name == '*'))
                        continue;
+
+               if (!tag) {
+                       tag = (struct tag *) get_or_parse_object(data, refname,
+                                                                err, eaten);
+                       if (!tag)
+                               return -1;
+               }
+
                if (deref)
                        name++;
                if (atom_type == ATOM_TAG)
@@ -1496,22 +1523,35 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob
                else if (atom_type == ATOM_OBJECT && tag->tagged)
                        v->s = xstrdup(oid_to_hex(&tag->tagged->oid));
        }
+
+       return 0;
 }
 
 /* See grab_values */
-static void grab_commit_values(struct atom_value *val, int deref, struct object *obj)
+static int grab_commit_values(struct atom_value *val, int deref,
+                             struct expand_data *data, const char *refname,
+                             struct strbuf *err, int *eaten)
 {
        int i;
-       struct commit *commit = (struct commit *) obj;
+       struct commit *commit = NULL;
 
        for (i = 0; i < used_atom_cnt; i++) {
                const char *name = used_atom[i].name;
                enum atom_type atom_type = used_atom[i].atom_type;
                struct atom_value *v = &val[i];
+
                if (!!deref != (*name == '*'))
                        continue;
                if (deref)
                        name++;
+
+               if (!commit) {
+                       commit = (struct commit *) get_or_parse_object(data, refname,
+                                                                      err, eaten);
+                       if (!commit)
+                               return -1;
+               }
+
                if (atom_type == ATOM_TREE &&
                    grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
                        continue;
@@ -1531,6 +1571,8 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
                        v->s = strbuf_detach(&s, NULL);
                }
        }
+
+       return 0;
 }
 
 static const char *find_wholine(const char *who, int wholen, const char *buf)
@@ -1759,10 +1801,12 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void
        }
 }
 
-static void grab_signature(struct atom_value *val, int deref, struct object *obj)
+static int grab_signature(struct atom_value *val, int deref,
+                         struct expand_data *data, const char *refname,
+                         struct strbuf *err, int *eaten)
 {
        int i;
-       struct commit *commit = (struct commit *) obj;
+       struct commit *commit = NULL;
        struct signature_check sigc = { 0 };
        int signature_checked = 0;
 
@@ -1790,6 +1834,13 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj
                        continue;
 
                if (!signature_checked) {
+                       if (!commit) {
+                               commit = (struct commit *) get_or_parse_object(data, refname,
+                                                                              err, eaten);
+                               if (!commit)
+                                       return -1;
+                       }
+
                        check_commit_signature(commit, &sigc);
                        signature_checked = 1;
                }
@@ -1843,6 +1894,8 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj
 
        if (signature_checked)
                signature_check_clear(&sigc);
+
+       return 0;
 }
 
 static void find_subpos(const char *buf,
@@ -1920,9 +1973,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 static void grab_describe_values(struct atom_value *val, int deref,
-                                struct object *obj)
+                                struct expand_data *data)
 {
-       struct commit *commit = (struct commit *)obj;
        int i;
 
        for (i = 0; i < used_atom_cnt; i++) {
@@ -1944,7 +1996,7 @@ static void grab_describe_values(struct atom_value *val, int deref,
                cmd.git_cmd = 1;
                strvec_push(&cmd.args, "describe");
                strvec_pushv(&cmd.args, atom->u.describe_args.v);
-               strvec_push(&cmd.args, oid_to_hex(&commit->object.oid));
+               strvec_push(&cmd.args, oid_to_hex(&data->oid));
                if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) {
                        error(_("failed to run 'describe'"));
                        v->s = xstrdup("");
@@ -2066,24 +2118,36 @@ static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
+static int grab_values(struct atom_value *val, int deref, struct expand_data *data,
+                      const char *refname, struct strbuf *err, int *eaten)
 {
        void *buf = data->content;
+       int ret;
 
-       switch (obj->type) {
+       switch (data->type) {
        case OBJ_TAG:
-               grab_tag_values(val, deref, obj);
+               ret = grab_tag_values(val, deref, data, refname, err, eaten);
+               if (ret < 0)
+                       goto out;
+
                grab_sub_body_contents(val, deref, data);
                grab_person("tagger", val, deref, buf);
-               grab_describe_values(val, deref, obj);
+               grab_describe_values(val, deref, data);
                break;
        case OBJ_COMMIT:
-               grab_commit_values(val, deref, obj);
+               ret = grab_commit_values(val, deref, data, refname, err, eaten);
+               if (ret < 0)
+                       goto out;
+
                grab_sub_body_contents(val, deref, data);
                grab_person("author", val, deref, buf);
                grab_person("committer", val, deref, buf);
-               grab_signature(val, deref, obj);
-               grab_describe_values(val, deref, obj);
+
+               ret = grab_signature(val, deref, data, refname, err, eaten);
+               if (ret < 0)
+                       goto out;
+
+               grab_describe_values(val, deref, data);
                break;
        case OBJ_TREE:
                /* grab_tree_values(val, deref, obj, buf, sz); */
@@ -2094,8 +2158,12 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
                grab_sub_body_contents(val, deref, data);
                break;
        default:
-               die("Eh?  Object of type %d?", obj->type);
+               die("Eh?  Object of type %d?", data->type);
        }
+
+       ret = 0;
+out:
+       return ret;
 }
 
 static inline char *copy_advance(char *dst, const char *src)
@@ -2292,38 +2360,41 @@ static const char *get_refname(struct used_atom *atom, struct ref_array_item *re
        return show_ref(&atom->u.refname, ref->refname);
 }
 
-static int get_object(struct ref_array_item *ref, int deref, struct object **obj,
+static int get_object(struct ref_array_item *ref, int deref,
                      struct expand_data *oi, struct strbuf *err)
 {
-       /* parse_object_buffer() will set eaten to 0 if free() will be needed */
-       int eaten = 1;
+       /* parse_object_buffer() will set eaten to 1 if free() will be needed */
+       int eaten = 0;
+       int ret;
+
        if (oi->info.contentp) {
                /* We need to know that to use parse_object_buffer properly */
                oi->info.sizep = &oi->size;
                oi->info.typep = &oi->type;
        }
+
        if (odb_read_object_info_extended(the_repository->objects, &oi->oid, &oi->info,
-                                         OBJECT_INFO_LOOKUP_REPLACE))
-               return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
-                                      oid_to_hex(&oi->oid), ref->refname);
+                                         OBJECT_INFO_LOOKUP_REPLACE)) {
+               ret = strbuf_addf_ret(err, -1, _("missing object %s for %s"),
+                                     oid_to_hex(&oi->oid), ref->refname);
+               goto out;
+       }
        if (oi->info.disk_sizep && oi->disk_size < 0)
                BUG("Object size is less than zero.");
 
        if (oi->info.contentp) {
-               *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
-               if (!*obj) {
-                       if (!eaten)
-                               free(oi->content);
-                       return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
-                                              oid_to_hex(&oi->oid), ref->refname);
-               }
-               grab_values(ref->value, deref, *obj, oi);
+               ret = grab_values(ref->value, deref, oi, ref->refname, err, &eaten);
+               if (ret < 0)
+                       goto out;
        }
 
        grab_common_values(ref->value, deref, oi);
+       ret = 0;
+
+out:
        if (!eaten)
                free(oi->content);
-       return 0;
+       return ret;
 }
 
 static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees)
@@ -2376,7 +2447,6 @@ static char *get_worktree_path(const struct ref_array_item *ref)
  */
 static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 {
-       struct object *obj;
        int i;
        struct object_info empty = OBJECT_INFO_INIT;
        int ahead_behind_atoms = 0;
@@ -2564,14 +2634,14 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 
        oi.oid = ref->objectname;
-       if (get_object(ref, 0, &obj, &oi, err))
+       if (get_object(ref, 0, &oi, err))
                return -1;
 
        /*
         * If there is no atom that wants to know about tagged
         * object, we are done.
         */
-       if (!need_tagged || (obj->type != OBJ_TAG))
+       if (!need_tagged || (oi.type != OBJ_TAG))
                return 0;
 
        /*
@@ -2589,7 +2659,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
                }
        }
 
-       return get_object(ref, 1, &obj, &oi_deref, err);
+       return get_object(ref, 1, &oi_deref, err);
 }
 
 /*