]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'jk/shortlog-group-by-trailer'
authorJunio C Hamano <gitster@pobox.com>
Sun, 4 Oct 2020 19:49:14 +0000 (12:49 -0700)
committerJunio C Hamano <gitster@pobox.com>
Sun, 4 Oct 2020 19:49:14 +0000 (12:49 -0700)
"git shortlog" has been taught to group commits by the contents of
the trailer lines, like "Reviewed-by:", "Coauthored-by:", etc.

* jk/shortlog-group-by-trailer:
  shortlog: allow multiple groups to be specified
  shortlog: parse trailer idents
  shortlog: rename parse_stdin_ident()
  shortlog: de-duplicate trailer values
  shortlog: match commit trailers with --group
  trailer: add interface for iterating over commit trailers
  shortlog: add grouping option
  shortlog: change "author" variables to "ident"

Documentation/git-shortlog.txt
builtin/log.c
builtin/shortlog.c
shortlog.h
t/t4201-shortlog.sh
trailer.c
trailer.h

index a72ea7f7bafaabbc4ffc36e3fb89f1809917e94e..fd93cd41e90c7d7b6c5b9dcbc904681a0b247254 100644 (file)
@@ -47,9 +47,38 @@ OPTIONS
 
        Each pretty-printed commit will be rewrapped before it is shown.
 
+--group=<type>::
+       Group commits based on `<type>`. If no `--group` option is
+       specified, the default is `author`. `<type>` is one of:
++
+--
+ - `author`, commits are grouped by author
+ - `committer`, commits are grouped by committer (the same as `-c`)
+ - `trailer:<field>`, the `<field>` is interpreted as a case-insensitive
+   commit message trailer (see linkgit:git-interpret-trailers[1]). For
+   example, if your project uses `Reviewed-by` trailers, you might want
+   to see who has been reviewing with
+   `git shortlog -ns --group=trailer:reviewed-by`.
++
+Note that commits that do not include the trailer will not be counted.
+Likewise, commits with multiple trailers (e.g., multiple signoffs) may
+be counted more than once (but only once per unique trailer value in
+that commit).
++
+Shortlog will attempt to parse each trailer value as a `name <email>`
+identity. If successful, the mailmap is applied and the email is omitted
+unless the `--email` option is specified. If the value cannot be parsed
+as an identity, it will be taken literally and completely.
+--
++
+If `--group` is specified multiple times, commits are counted under each
+value (but again, only once per unique value in that commit). For
+example, `git shortlog --group=author --group=trailer:co-authored-by`
+counts both authors and co-authors.
+
 -c::
 --committer::
-       Collect and show committer identities instead of authors.
+       This is an alias for `--group=committer`.
 
 -w[<width>[,<indent1>[,<indent2>]]]::
        Linewrap the output by wrapping each line at `width`.  The first
index b8824d898f495e4f1bd48ddfeb4383ce8a205d90..7f27e9eca1f74cfa4f3d5b8c3cb4075b6178ca39 100644 (file)
@@ -1195,6 +1195,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout,
        log.in1 = 2;
        log.in2 = 4;
        log.file = rev->diffopt.file;
+       log.groups = SHORTLOG_GROUP_AUTHOR;
        for (i = 0; i < nr; i++)
                shortlog_add_commit(&log, list[i]);
 
index c856c58bb5a6cb4c867a2b61bc5355bc96ef915f..0a5c4968f64e1655b5cf16e3555209d52dae065c 100644 (file)
@@ -9,6 +9,7 @@
 #include "mailmap.h"
 #include "shortlog.h"
 #include "parse-options.h"
+#include "trailer.h"
 
 static char const * const shortlog_usage[] = {
        N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
@@ -49,12 +50,12 @@ static int compare_by_list(const void *a1, const void *a2)
 }
 
 static void insert_one_record(struct shortlog *log,
-                             const char *author,
+                             const char *ident,
                              const char *oneline)
 {
        struct string_list_item *item;
 
-       item = string_list_insert(&log->list, author);
+       item = string_list_insert(&log->list, ident);
 
        if (log->summary)
                item->util = (void *)(UTIL_TO_INT(item) + 1);
@@ -97,8 +98,8 @@ static void insert_one_record(struct shortlog *log,
        }
 }
 
-static int parse_stdin_author(struct shortlog *log,
-                              struct strbuf *out, const char *in)
+static int parse_ident(struct shortlog *log,
+                      struct strbuf *out, const char *in)
 {
        const char *mailbuf, *namebuf;
        size_t namelen, maillen;
@@ -122,18 +123,33 @@ static int parse_stdin_author(struct shortlog *log,
 
 static void read_from_stdin(struct shortlog *log)
 {
-       struct strbuf author = STRBUF_INIT;
-       struct strbuf mapped_author = STRBUF_INIT;
+       struct strbuf ident = STRBUF_INIT;
+       struct strbuf mapped_ident = STRBUF_INIT;
        struct strbuf oneline = STRBUF_INIT;
        static const char *author_match[2] = { "Author: ", "author " };
        static const char *committer_match[2] = { "Commit: ", "committer " };
        const char **match;
 
-       match = log->committer ? committer_match : author_match;
-       while (strbuf_getline_lf(&author, stdin) != EOF) {
+       if (HAS_MULTI_BITS(log->groups))
+               die(_("using multiple --group options with stdin is not supported"));
+
+       switch (log->groups) {
+       case SHORTLOG_GROUP_AUTHOR:
+               match = author_match;
+               break;
+       case SHORTLOG_GROUP_COMMITTER:
+               match = committer_match;
+               break;
+       case SHORTLOG_GROUP_TRAILER:
+               die(_("using --group=trailer with stdin is not supported"));
+       default:
+               BUG("unhandled shortlog group");
+       }
+
+       while (strbuf_getline_lf(&ident, stdin) != EOF) {
                const char *v;
-               if (!skip_prefix(author.buf, match[0], &v) &&
-                   !skip_prefix(author.buf, match[1], &v))
+               if (!skip_prefix(ident.buf, match[0], &v) &&
+                   !skip_prefix(ident.buf, match[1], &v))
                        continue;
                while (strbuf_getline_lf(&oneline, stdin) != EOF &&
                       oneline.len)
@@ -142,23 +158,118 @@ static void read_from_stdin(struct shortlog *log)
                       !oneline.len)
                        ; /* discard blanks */
 
-               strbuf_reset(&mapped_author);
-               if (parse_stdin_author(log, &mapped_author, v) < 0)
+               strbuf_reset(&mapped_ident);
+               if (parse_ident(log, &mapped_ident, v) < 0)
                        continue;
 
-               insert_one_record(log, mapped_author.buf, oneline.buf);
+               insert_one_record(log, mapped_ident.buf, oneline.buf);
        }
-       strbuf_release(&author);
-       strbuf_release(&mapped_author);
+       strbuf_release(&ident);
+       strbuf_release(&mapped_ident);
        strbuf_release(&oneline);
 }
 
+struct strset_item {
+       struct hashmap_entry ent;
+       char value[FLEX_ARRAY];
+};
+
+struct strset {
+       struct hashmap map;
+};
+
+#define STRSET_INIT { { NULL } }
+
+static int strset_item_hashcmp(const void *hash_data,
+                              const struct hashmap_entry *entry,
+                              const struct hashmap_entry *entry_or_key,
+                              const void *keydata)
+{
+       const struct strset_item *a, *b;
+
+       a = container_of(entry, const struct strset_item, ent);
+       if (keydata)
+               return strcmp(a->value, keydata);
+
+       b = container_of(entry_or_key, const struct strset_item, ent);
+       return strcmp(a->value, b->value);
+}
+
+/*
+ * Adds "str" to the set if it was not already present; returns true if it was
+ * already there.
+ */
+static int strset_check_and_add(struct strset *ss, const char *str)
+{
+       unsigned int hash = strhash(str);
+       struct strset_item *item;
+
+       if (!ss->map.table)
+               hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
+
+       if (hashmap_get_from_hash(&ss->map, hash, str))
+               return 1;
+
+       FLEX_ALLOC_STR(item, value, str);
+       hashmap_entry_init(&item->ent, hash);
+       hashmap_add(&ss->map, &item->ent);
+       return 0;
+}
+
+static void strset_clear(struct strset *ss)
+{
+       if (!ss->map.table)
+               return;
+       hashmap_free_entries(&ss->map, struct strset_item, ent);
+}
+
+static void insert_records_from_trailers(struct shortlog *log,
+                                        struct strset *dups,
+                                        struct commit *commit,
+                                        struct pretty_print_context *ctx,
+                                        const char *oneline)
+{
+       struct trailer_iterator iter;
+       const char *commit_buffer, *body;
+       struct strbuf ident = STRBUF_INIT;
+
+       /*
+        * Using format_commit_message("%B") would be simpler here, but
+        * this saves us copying the message.
+        */
+       commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
+       body = strstr(commit_buffer, "\n\n");
+       if (!body)
+               return;
+
+       trailer_iterator_init(&iter, body);
+       while (trailer_iterator_advance(&iter)) {
+               const char *value = iter.val.buf;
+
+               if (!string_list_has_string(&log->trailers, iter.key.buf))
+                       continue;
+
+               strbuf_reset(&ident);
+               if (!parse_ident(log, &ident, value))
+                       value = ident.buf;
+
+               if (strset_check_and_add(dups, value))
+                       continue;
+               insert_one_record(log, value, oneline);
+       }
+       trailer_iterator_release(&iter);
+
+       strbuf_release(&ident);
+       unuse_commit_buffer(commit, commit_buffer);
+}
+
 void shortlog_add_commit(struct shortlog *log, struct commit *commit)
 {
-       struct strbuf author = STRBUF_INIT;
+       struct strbuf ident = STRBUF_INIT;
        struct strbuf oneline = STRBUF_INIT;
+       struct strset dups = STRSET_INIT;
        struct pretty_print_context ctx = {0};
-       const char *fmt;
+       const char *oneline_str;
 
        ctx.fmt = CMIT_FMT_USERFORMAT;
        ctx.abbrev = log->abbrev;
@@ -166,21 +277,38 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit)
        ctx.date_mode.type = DATE_NORMAL;
        ctx.output_encoding = get_log_output_encoding();
 
-       fmt = log->committer ?
-               (log->email ? "%cN <%cE>" : "%cN") :
-               (log->email ? "%aN <%aE>" : "%aN");
-
-       format_commit_message(commit, fmt, &author, &ctx);
        if (!log->summary) {
                if (log->user_format)
                        pretty_print_commit(&ctx, commit, &oneline);
                else
                        format_commit_message(commit, "%s", &oneline, &ctx);
        }
+       oneline_str = oneline.len ? oneline.buf : "<none>";
+
+       if (log->groups & SHORTLOG_GROUP_AUTHOR) {
+               strbuf_reset(&ident);
+               format_commit_message(commit,
+                                     log->email ? "%aN <%aE>" : "%aN",
+                                     &ident, &ctx);
+               if (!HAS_MULTI_BITS(log->groups) ||
+                   !strset_check_and_add(&dups, ident.buf))
+                       insert_one_record(log, ident.buf, oneline_str);
+       }
+       if (log->groups & SHORTLOG_GROUP_COMMITTER) {
+               strbuf_reset(&ident);
+               format_commit_message(commit,
+                                     log->email ? "%cN <%cE>" : "%cN",
+                                     &ident, &ctx);
+               if (!HAS_MULTI_BITS(log->groups) ||
+                   !strset_check_and_add(&dups, ident.buf))
+                       insert_one_record(log, ident.buf, oneline_str);
+       }
+       if (log->groups & SHORTLOG_GROUP_TRAILER) {
+               insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
+       }
 
-       insert_one_record(log, author.buf, oneline.len ? oneline.buf : "<none>");
-
-       strbuf_release(&author);
+       strset_clear(&dups);
+       strbuf_release(&ident);
        strbuf_release(&oneline);
 }
 
@@ -241,6 +369,28 @@ static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
        return 0;
 }
 
+static int parse_group_option(const struct option *opt, const char *arg, int unset)
+{
+       struct shortlog *log = opt->value;
+       const char *field;
+
+       if (unset) {
+               log->groups = 0;
+               string_list_clear(&log->trailers, 0);
+       } else if (!strcasecmp(arg, "author"))
+               log->groups |= SHORTLOG_GROUP_AUTHOR;
+       else if (!strcasecmp(arg, "committer"))
+               log->groups |= SHORTLOG_GROUP_COMMITTER;
+       else if (skip_prefix(arg, "trailer:", &field)) {
+               log->groups |= SHORTLOG_GROUP_TRAILER;
+               string_list_append(&log->trailers, field);
+       } else
+               return error(_("unknown group type: %s"), arg);
+
+       return 0;
+}
+
+
 void shortlog_init(struct shortlog *log)
 {
        memset(log, 0, sizeof(*log));
@@ -251,6 +401,8 @@ void shortlog_init(struct shortlog *log)
        log->wrap = DEFAULT_WRAPLEN;
        log->in1 = DEFAULT_INDENT1;
        log->in2 = DEFAULT_INDENT2;
+       log->trailers.strdup_strings = 1;
+       log->trailers.cmp = strcasecmp;
 }
 
 int cmd_shortlog(int argc, const char **argv, const char *prefix)
@@ -260,8 +412,9 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
        int nongit = !startup_info->have_repository;
 
        const struct option options[] = {
-               OPT_BOOL('c', "committer", &log.committer,
-                        N_("Group by committer rather than author")),
+               OPT_BIT('c', "committer", &log.groups,
+                       N_("Group by committer rather than author"),
+                       SHORTLOG_GROUP_COMMITTER),
                OPT_BOOL('n', "numbered", &log.sort_by_number,
                         N_("sort output according to the number of commits per author")),
                OPT_BOOL('s', "summary", &log.summary,
@@ -271,6 +424,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
                OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
                        N_("Linewrap output"), PARSE_OPT_OPTARG,
                        &parse_wrap_args),
+               OPT_CALLBACK(0, "group", &log, N_("field"),
+                       N_("Group by field"), parse_group_option),
                OPT_END(),
        };
 
@@ -311,6 +466,10 @@ parse_done:
        log.abbrev = rev.abbrev;
        log.file = rev.diffopt.file;
 
+       if (!log.groups)
+               log.groups = SHORTLOG_GROUP_AUTHOR;
+       string_list_sort(&log.trailers);
+
        /* assume HEAD if from a tty */
        if (!nongit && !rev.pending.nr && isatty(0))
                add_head_to_pending(&rev);
index 2fa61c42946262238ae2f1662ef3c2d1e981b274..64be879b241b1bd3daf8d5a30cf736f98211e745 100644 (file)
@@ -15,7 +15,13 @@ struct shortlog {
        int in2;
        int user_format;
        int abbrev;
-       int committer;
+
+       enum {
+               SHORTLOG_GROUP_AUTHOR = (1 << 0),
+               SHORTLOG_GROUP_COMMITTER = (1 << 1),
+               SHORTLOG_GROUP_TRAILER = (1 << 2),
+       } groups;
+       struct string_list trailers;
 
        char *common_repo_prefix;
        int email;
index d3a7ce6bbb2ca926d200f099d25d26570a3c1ce6..3d5c4a2086afbaa5f5ae275c957efb6ee809953b 100755 (executable)
@@ -215,4 +215,145 @@ test_expect_success 'shortlog --committer (external)' '
        test_cmp expect actual
 '
 
+test_expect_success '--group=committer is the same as --committer' '
+       git shortlog -ns --group=committer HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'shortlog --group=trailer:signed-off-by' '
+       git commit --allow-empty -m foo -s &&
+       GIT_COMMITTER_NAME="SOB One" \
+       GIT_COMMITTER_EMAIL=sob@example.com \
+               git commit --allow-empty -m foo -s &&
+       git commit --allow-empty --amend --no-edit -s &&
+       cat >expect <<-\EOF &&
+            2  C O Mitter <committer@example.com>
+            1  SOB One <sob@example.com>
+       EOF
+       git shortlog -nse --group=trailer:signed-off-by HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'trailer idents are split' '
+       cat >expect <<-\EOF &&
+            2  C O Mitter
+            1  SOB One
+       EOF
+       git shortlog -ns --group=trailer:signed-off-by HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'trailer idents are mailmapped' '
+       cat >expect <<-\EOF &&
+            2  C O Mitter
+            1  Another Name
+       EOF
+       echo "Another Name <sob@example.com>" >mail.map &&
+       git -c mailmap.file=mail.map shortlog -ns \
+               --group=trailer:signed-off-by HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'shortlog de-duplicates trailers in a single commit' '
+       git commit --allow-empty -F - <<-\EOF &&
+       subject one
+
+       this message has two distinct values, plus a repeat
+
+       Repeated-trailer: Foo
+       Repeated-trailer: Bar
+       Repeated-trailer: Foo
+       EOF
+
+       git commit --allow-empty -F - <<-\EOF &&
+       subject two
+
+       similar to the previous, but without the second distinct value
+
+       Repeated-trailer: Foo
+       Repeated-trailer: Foo
+       EOF
+
+       cat >expect <<-\EOF &&
+            2  Foo
+            1  Bar
+       EOF
+       git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'shortlog can match multiple groups' '
+       git commit --allow-empty -F - <<-\EOF &&
+       subject one
+
+       this has two trailers that are distinct from the author; it will count
+       3 times in the output
+
+       Some-trailer: User A <a@example.com>
+       Another-trailer: User B <b@example.com>
+       EOF
+
+       git commit --allow-empty -F - <<-\EOF &&
+       subject two
+
+       this one has two trailers, one of which is a duplicate with the author;
+       it will only be counted once for them
+
+       Another-trailer: A U Thor <author@example.com>
+       Some-trailer: User B <b@example.com>
+       EOF
+
+       cat >expect <<-\EOF &&
+            2  A U Thor
+            2  User B
+            1  User A
+       EOF
+       git shortlog -ns \
+               --group=author \
+               --group=trailer:some-trailer \
+               --group=trailer:another-trailer \
+               -2 HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'set up option selection tests' '
+       git commit --allow-empty -F - <<-\EOF
+       subject
+
+       body
+
+       Trailer-one: value-one
+       Trailer-two: value-two
+       EOF
+'
+
+test_expect_success '--no-group resets group list to author' '
+       cat >expect <<-\EOF &&
+            1  A U Thor
+       EOF
+       git shortlog -ns \
+               --group=committer \
+               --group=trailer:trailer-one \
+               --no-group \
+               -1 HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success '--no-group resets trailer list' '
+       cat >expect <<-\EOF &&
+            1  value-two
+       EOF
+       git shortlog -ns \
+               --group=trailer:trailer-one \
+               --no-group \
+               --group=trailer:trailer-two \
+               -1 HEAD >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'stdin with multiple groups reports error' '
+       git log >log &&
+       test_must_fail git shortlog --group=author --group=committer <log
+'
+
 test_done
index 68dabc2556c8232e1df268ae9f82d539505541ef..3f7391d793c87b9ff9e40572e9c219f9377d9d6a 100644 (file)
--- a/trailer.c
+++ b/trailer.c
@@ -1183,3 +1183,39 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg,
        format_trailer_info(out, &info, opts);
        trailer_info_release(&info);
 }
+
+void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
+{
+       struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
+       strbuf_init(&iter->key, 0);
+       strbuf_init(&iter->val, 0);
+       opts.no_divider = 1;
+       trailer_info_get(&iter->info, msg, &opts);
+       iter->cur = 0;
+}
+
+int trailer_iterator_advance(struct trailer_iterator *iter)
+{
+       while (iter->cur < iter->info.trailer_nr) {
+               char *trailer = iter->info.trailers[iter->cur++];
+               int separator_pos = find_separator(trailer, separators);
+
+               if (separator_pos < 1)
+                       continue; /* not a real trailer */
+
+               strbuf_reset(&iter->key);
+               strbuf_reset(&iter->val);
+               parse_trailer(&iter->key, &iter->val, NULL,
+                             trailer, separator_pos);
+               unfold_value(&iter->val);
+               return 1;
+       }
+       return 0;
+}
+
+void trailer_iterator_release(struct trailer_iterator *iter)
+{
+       trailer_info_release(&iter->info);
+       strbuf_release(&iter->val);
+       strbuf_release(&iter->key);
+}
index 203acf4ee1db99ea35a1d202010834330ce4af58..cd93e7ddea789cb6c320af2a7b984bd6f8a8215f 100644 (file)
--- a/trailer.h
+++ b/trailer.h
@@ -2,8 +2,7 @@
 #define TRAILER_H
 
 #include "list.h"
-
-struct strbuf;
+#include "strbuf.h"
 
 enum trailer_where {
        WHERE_DEFAULT,
@@ -103,4 +102,46 @@ void trailer_info_release(struct trailer_info *info);
 void format_trailers_from_commit(struct strbuf *out, const char *msg,
                                 const struct process_trailer_options *opts);
 
+/*
+ * An interface for iterating over the trailers found in a particular commit
+ * message. Use like:
+ *
+ *   struct trailer_iterator iter;
+ *   trailer_iterator_init(&iter, msg);
+ *   while (trailer_iterator_advance(&iter))
+ *      ... do something with iter.key and iter.val ...
+ *   trailer_iterator_release(&iter);
+ */
+struct trailer_iterator {
+       struct strbuf key;
+       struct strbuf val;
+
+       /* private */
+       struct trailer_info info;
+       size_t cur;
+};
+
+/*
+ * Initialize "iter" in preparation for walking over the trailers in the commit
+ * message "msg". The "msg" pointer must remain valid until the iterator is
+ * released.
+ *
+ * After initializing, note that key/val will not yet point to any trailer.
+ * Call advance() to parse the first one (if any).
+ */
+void trailer_iterator_init(struct trailer_iterator *iter, const char *msg);
+
+/*
+ * Advance to the next trailer of the iterator. Returns 0 if there is no such
+ * trailer, and 1 otherwise. The key and value of the trailer can be
+ * fetched from the iter->key and iter->value fields (which are valid
+ * only until the next advance).
+ */
+int trailer_iterator_advance(struct trailer_iterator *iter);
+
+/*
+ * Release all resources associated with the trailer iteration.
+ */
+void trailer_iterator_release(struct trailer_iterator *iter);
+
 #endif /* TRAILER_H */