From: Justin Tobler Date: Sat, 27 Sep 2025 14:50:46 +0000 (-0500) Subject: builtin/repo: introduce stats subcommand X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=57243d6030010e2b69dbb90a1abd173a4eb01677;p=thirdparty%2Fgit.git builtin/repo: introduce stats subcommand The shape of a repository's history can have huge impacts on the performance and health of the repository itself. Currently, Git lacks a means to surface key stats/information regarding the shape of a repository via a single command. Acquiring this information requires users to be fairly knowledgeable about the structure of a Git repository and how to identify the relevant data points. To fill this gap, supplemental tools such as git-sizer(1) have been developed. To allow users to more readily identify potential issues for a repository, introduce the "stats" subcommand in git-repo(1) to output stats for the repository that may be of interest to users. The goal of this subcommand is to eventually provide similar functionality to git-sizer(1), but natively in Git. The initial version of this command only iterates through all references in the repository and tracks the count of branches, tags, remote refs, and other reference types. The corresponding information is displayed in a human-friendly table formatted in a very similar manner to git-sizer(1). The width of each table column is adjusted automatically to satisfy the requirements of the widest row contained. Subsequent commits will surface additional relevant data points to output. Based-on-patch-by: Derrick Stolee Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 209afd1b61..a009bf8cf1 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,6 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] +git repo stats DESCRIPTION ----------- @@ -43,6 +44,15 @@ supported: + `-z` is an alias for `--format=nul`. +`stats`:: + Retrieve statistics about the current repository. The following kinds + of information are reported: ++ +* Reference counts categorized by type + ++ +The table output format may change and is not intended for machine parsing. + INFO KEYS --------- In order to obtain a set of values from `git repo info`, you should provide diff --git a/builtin/repo.c b/builtin/repo.c index eeeab8fbd2..889e344f15 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,12 +4,16 @@ #include "environment.h" #include "parse-options.h" #include "quote.h" +#include "ref-filter.h" #include "refs.h" #include "strbuf.h" +#include "string-list.h" #include "shallow.h" +#include "utf8.h" static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", + "git repo stats", NULL }; @@ -156,12 +160,206 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct ref_stats { + size_t branches; + size_t remotes; + size_t tags; + size_t others; +}; + +struct stats_table { + struct string_list rows; + + size_t name_col_width; + size_t value_col_width; +}; + +/* + * Holds column data that gets stored for each row. + */ +struct stats_table_entry { + char *value; +}; + +static void stats_table_vaddf(struct stats_table *table, + struct stats_table_entry *entry, + const char *format, va_list ap) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + char *formatted_name; + size_t name_width; + + strbuf_vaddf(&buf, format, ap); + formatted_name = strbuf_detach(&buf, NULL); + name_width = utf8_strwidth(formatted_name); + + item = string_list_append_nodup(&table->rows, formatted_name); + item->util = entry; + + if (name_width > table->name_col_width) + table->name_col_width = name_width; + if (entry) { + size_t value_width = utf8_strwidth(entry->value); + if (value_width > table->value_col_width) + table->value_col_width = value_width; + } +} + +static void stats_table_addf(struct stats_table *table, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + stats_table_vaddf(table, NULL, format, ap); + va_end(ap); +} + +static void stats_table_count_addf(struct stats_table *table, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value); + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + +static void stats_table_setup(struct stats_table *table, struct ref_stats *refs) +{ + size_t ref_total; + + ref_total = refs->branches + refs->remotes + refs->tags + refs->others; + stats_table_addf(table, "* %s", _("References")); + stats_table_count_addf(table, ref_total, " * %s", _("Count")); + stats_table_count_addf(table, refs->branches, " * %s", _("Branches")); + stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); + stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); + stats_table_count_addf(table, refs->others, " * %s", _("Others")); +} + +static inline size_t max_size_t(size_t a, size_t b) +{ + return (a > b) ? a : b; +} + +static void stats_table_print(const struct stats_table *table) +{ + const char *name_col_title = _("Repository stats"); + const char *value_col_title = _("Value"); + size_t name_title_len = utf8_strwidth(name_col_title); + size_t value_title_len = utf8_strwidth(value_col_title); + struct string_list_item *item; + int name_col_width; + int value_col_width; + + name_col_width = cast_size_t_to_int( + max_size_t(table->name_col_width, name_title_len)); + value_col_width = cast_size_t_to_int( + max_size_t(table->value_col_width, value_title_len)); + + printf("| %-*s | %-*s |\n", name_col_width, name_col_title, + value_col_width, value_col_title); + printf("| "); + for (int i = 0; i < name_col_width; i++) + putchar('-'); + printf(" | "); + for (int i = 0; i < value_col_width; i++) + putchar('-'); + printf(" |\n"); + + for_each_string_list_item(item, &table->rows) { + struct stats_table_entry *entry = item->util; + const char *value = ""; + + if (entry) { + struct stats_table_entry *entry = item->util; + value = entry->value; + } + + printf("| %-*s | %*s |\n", name_col_width, item->string, + value_col_width, value); + } +} + +static void stats_table_clear(struct stats_table *table) +{ + struct stats_table_entry *entry; + struct string_list_item *item; + + for_each_string_list_item(item, &table->rows) { + entry = item->util; + if (entry) + free(entry->value); + } + + string_list_clear(&table->rows, 1); +} + +static void stats_count_references(struct ref_stats *stats, struct ref_array *refs) +{ + for (int i = 0; i < refs->nr; i++) { + struct ref_array_item *ref = refs->items[i]; + + switch (ref->kind) { + case FILTER_REFS_BRANCHES: + stats->branches++; + break; + case FILTER_REFS_REMOTES: + stats->remotes++; + break; + case FILTER_REFS_TAGS: + stats->tags++; + break; + case FILTER_REFS_OTHERS: + stats->others++; + break; + default: + BUG("unexpected reference type"); + } + } +} + +static int cmd_repo_stats(int argc, const char **argv, const char *prefix, + struct repository *repo UNUSED) +{ + struct ref_filter filter = REF_FILTER_INIT; + struct stats_table table = { + .rows = STRING_LIST_INIT_DUP, + }; + struct ref_stats stats = { 0 }; + struct ref_array refs = { 0 }; + struct option options[] = { 0 }; + + argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (argc) + usage(_("too many arguments")); + + if (filter_refs(&refs, &filter, FILTER_REFS_REGULAR)) + die(_("unable to filter refs")); + + stats_count_references(&stats, &refs); + + stats_table_setup(&table, &stats); + stats_table_print(&table); + + stats_table_clear(&table); + ref_array_clear(&refs); + + return 0; +} + int cmd_repo(int argc, const char **argv, const char *prefix, struct repository *repo) { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { OPT_SUBCOMMAND("info", &fn, cmd_repo_info), + OPT_SUBCOMMAND("stats", &fn, cmd_repo_stats), OPT_END() }; diff --git a/t/meson.build b/t/meson.build index 7974795fe4..071d4a5112 100644 --- a/t/meson.build +++ b/t/meson.build @@ -236,6 +236,7 @@ integration_tests = [ 't1701-racy-split-index.sh', 't1800-hook.sh', 't1900-repo.sh', + 't1901-repo-stats.sh', 't2000-conflict-when-checking-files-out.sh', 't2002-checkout-cache-u.sh', 't2003-checkout-cache-mkdir.sh', diff --git a/t/t1901-repo-stats.sh b/t/t1901-repo-stats.sh new file mode 100755 index 0000000000..535ac511dd --- /dev/null +++ b/t/t1901-repo-stats.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='test git repo stats' + +. ./test-lib.sh + +test_expect_success 'empty repository' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >expect <<-\EOF && + | Repository stats | Value | + | ---------------- | ----- | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + EOF + + git repo stats >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_expect_success 'repository with references' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m init && + git tag -a foo -m bar && + + oid="$(git rev-parse HEAD)" && + git update-ref refs/remotes/origin/foo "$oid" && + + git notes add -m foo && + + cat >expect <<-\EOF && + | Repository stats | Value | + | ---------------- | ----- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + EOF + + git repo stats >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_done