From 9011547f607e20aae22d84b710e4b83088a35223 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 12 Sep 2025 23:28:18 +0000 Subject: [PATCH] extindex: fix --reindex `public-inbox-extindex --reindex' deprioritizes itself for public-inbox-extindex invocations without --reindex by shutting down shard processes to let other processes acquire the lock and process new messages, first. Restarting shard processes during --reindex was causing new Xapian shards to be written to v2 inboxes instead of the extindex itself. This bug was introduced with the simplifications to internal data structures to eliminate the ad-hoc $sync structure. The local-ized use of ExtSearchIdx->{ibx} tricked PublicInbox::SearchIdxShard::new into using the standard v2 code path. So make SearchIdxShard->new check the `$v2w' object for the ability to call `eidx_sync' rather than the existence of the {ibx} field. I only noticed this bug while working on the --split-shards feature for performance. Fixes: 922b765d ((ext)index: move {max_size} and related bits to $self, 2025-01-10) --- lib/PublicInbox/SearchIdxShard.pm | 7 +++---- t/extsearch.t | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 8f340071f..8de79b782 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -10,10 +10,9 @@ use PublicInbox::OnDestroy; use PublicInbox::Syscall qw($F_SETPIPE_SZ); sub new { - my ($class, $v2w, $shard) = @_; # v2w may be ExtSearchIdx - my $ibx = $v2w->{ibx}; - my $self = $ibx ? $class->SUPER::new($ibx, $v2w->{-opt}, $shard) - : $class->eidx_shard_new($v2w, $shard); + my ($cls, $v2w, $shard) = @_; # v2w may be ExtSearchIdx + my $self = $v2w->can('eidx_sync') ? $cls->eidx_shard_new($v2w, $shard) + : $cls->SUPER::new(@$v2w{qw(ibx -opt)}, $shard); # create the DB before forking: $self->idx_acquire; $self->set_metadata_once; diff --git a/t/extsearch.t b/t/extsearch.t index 08fdc6148..aa9da3fae 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -752,4 +752,35 @@ SKIP: { unlike $lsa, qr/No_COW/i, '--cow respected'; } +{ + my $many = create_inbox 'many', version => 2, indexlevel => 'basic', + tmpdir => "$home/many", sub { + my $eml = PublicInbox::Eml->new(<<'EOM'); +From: a@example.com +To: b@example.com +Subject: s +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +EOM + my ($im, $ibx) = @_; + for my $i (0..6) { # >(PublicInbox::Git::MAX_INFLIGHT/3) + $eml->header_set('Message-ID', "<$i\@a>"); + $im->add($eml); + } + $im->done; + }; + my @before = glob("$many->{inboxdir}/xap*/?"); + is_deeply \@before, [], + 'no Xapian shards in v2 to be reindexed by -extindex'; + my $opt = { 2 => \(my $err = '') }; + ok run_script([qw(-extindex --reindex --batch-size=1), + "$home/fresh", $many->{inboxdir}], + undef, $opt), + '--reindex fresh on fresh directory'; + my @after = glob("$many->{inboxdir}/xap*/?"); + is_deeply \@after, [], + 'no Xapian shards in v2 after reindexed by -extindex'; + is $err, '', 'no warnings on --reindex'; +} + done_testing; -- 2.47.3