From d4d7efc3087db32f739120c9f05b9d8fb82622c0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 19 Aug 2025 00:33:40 +0000 Subject: [PATCH] v2+extindex: show commit time and indexing rate in progress With the `-v' switch, we'll display these rates to track total indexing rate and commit speeds throughout the indexing phase. These numbers will help us monitor for slowdowns throughout the entirety of a large indexing job taking several days. This change may help us decide whether or not to start implementing autodefrag for btrfs and similar CoW FSes prone to performance degradation from fragmentation. --- lib/PublicInbox/ExtSearchIdx.pm | 1 + lib/PublicInbox/V2Writable.pm | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 2bb12224b..3442c9012 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -1241,6 +1241,7 @@ sub symlink_packs ($$) { sub idx_init { # similar to V2Writable my ($self, $opt) = @_; return if $self->{idx_shards}; + $self->{txn_t0} = now; $self->git->cleanup; my $mode = 0644; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 7186b2cce..9e60caa84 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -22,7 +22,7 @@ use PublicInbox::Spawn qw(spawn popen_rd run_die); use PublicInbox::Search; use PublicInbox::SearchIdx qw(log2stack is_ancestor check_size is_bad_blob update_checkpoint); -use PublicInbox::DS; +use PublicInbox::DS qw(now); use IO::Handle; # ->autoflush use POSIX (); use Carp qw(confess); @@ -256,6 +256,7 @@ sub parallel_init ($$) { sub idx_init { my ($self, $opt) = @_; return if $self->{idx_shards}; + $self->{txn_t0} = now; my $ibx = $self->{ibx}; # do not leak read-only FDs to child processes, we only have these @@ -701,6 +702,8 @@ sub reindex_checkpoint ($) { my $mm_tmp = $self->{mm_tmp}; $mm_tmp->atfork_prepare if $mm_tmp; die 'BUG: {im} during reindex' if $self->{im}; + my $t0 = now; + my $txn_bytes = $self->{transact_bytes}; if ($self->{ibx_map} && !$self->{checkpoint_unlocks}) { checkpoint($self, 1); # no need to release lock on pure index } else { @@ -708,7 +711,14 @@ sub reindex_checkpoint ($) { } if (my $pr = $self->{-regen_fmt} ? $self->{-opt}->{-progress} : undef) { - $pr->(sprintf $self->{-regen_fmt}, $self->{nrec}); + my $now = now; + chop(my $fmt = $self->{-regen_fmt}); # remove '\n'; + $fmt .= " c:%ums all:%0.1fKB/s\n"; + my $txn_kb = $txn_bytes / 1024; + $pr->(sprintf $fmt, $self->{nrec}, + ($now - $t0) * 1000, + $txn_kb / ($now - $self->{txn_t0})); + $self->{txn_t0} = $now; } # allow -watch or -mda to write... @@ -811,6 +821,7 @@ sub index_oid { # cat_async callback } # only update last_commit for $i on reindex iff newer than current +# Overridden for PublicInbox::ExtSearchIdx sub update_last_commit { my ($self, $stk) = @_; my $unit = $self->{unit} // return; -- 2.47.2