From: Eric Wong Date: Tue, 26 Aug 2025 19:50:40 +0000 (+0000) Subject: extindex: reduce IPC and Xapian updates on reindex X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7d4c3d8220df78434c3fc38754a403b12a1aa916;p=thirdparty%2Fpublic-inbox.git extindex: reduce IPC and Xapian updates on reindex Instead of updating the document and re-adding eidx keys + List-IDs repeatedly, we can do it at once. Doing so reduces IPC traffic and ought to reduce FS traffic on the Xapian DB. --- diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 3442c9012..e3ac80910 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -620,14 +620,12 @@ sub _reindex_finalize ($$$) { my $ibx = _ibx_for $self, $smsg; $smsg->{eidx_key} = $ibx->eidx_key; if ($self->{-need_xapian}) { - my $idx = idx_shard($self, $docid); - $idx->index_eml($eml, $smsg); for my $x (reverse @$stable) { my $lid = delete $x->{lid} // die 'BUG: no {lid}'; - @$lid and $idx->ipc_do('add_eidx_info_raw', $docid, - _ibx_for($self, $x)->eidx_key, - @$lid); + @$lid and push @{$smsg->{-eidx_more}}, + [ _ibx_for($self, $x)->eidx_key, @$lid ] } + idx_shard($self, $docid)->index_eml($eml, $smsg); } return if $nr == 1; # likely, all good diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 8ddbc07a0..bbae2e015 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -522,7 +522,7 @@ sub eml2doc ($$$;$) { sub add_xapian ($$$$) { my ($self, $eml, $smsg, $mids) = @_; begin_txn_lazy($self); - my $merge_vmd = delete $smsg->{-merge_vmd}; + my ($merge_vmd, $eidx_more) = delete @$smsg{qw(-merge_vmd -eidx_more)}; my $doc = eml2doc($self, $eml, $smsg, $mids); if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) { my @x = @VMD_MAP; @@ -532,6 +532,11 @@ sub add_xapian ($$$$) { } } } + for (@$eidx_more) { + my ($eidx_key, @list_ids) = @$_; + add_bool_term($doc, 'O'.$eidx_key) if $eidx_key ne '.'; + index_list_id_raw $self, $doc, @list_ids; + } $self->{xdb}->replace_document($smsg->{num}, $doc); }