From ff4ac066a7ff50ee136d918cfdd58bf43a09f083 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 15 Aug 2025 13:41:36 +0000 Subject: [PATCH] extindex: preserve indexlevel=basic on incremental update -extindex needs to preserve indexlevel=basic when doing incremental updates if the extindex was originally created with indexlevel=basic. Otherwise blindly upgrading somebody to indexlevel=full would waste disk space and likely result in inconsistent indexing on the Xapian side. Fixes: bf2360b31 (extindex: support `-L basic' to avoid most Xapian space, 2025-08-13) --- lib/PublicInbox/ExtSearchIdx.pm | 35 ++++++++++++++++++++++++++++----- t/extsearch.t | 24 +++++++++++++++++++--- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index bc1e68d77..1fd86e059 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -46,11 +46,31 @@ use PublicInbox::Admin qw(fmt_localtime); use PublicInbox::Config qw(rel2abs_collapsed); use PublicInbox::IO qw(try_cat); +sub detect_indexlevel ($) { + my ($self) = @_; + my $l = 'full'; # the default for new extindices + if ($self->xdb) { + my $m = $self->xdb->get_metadata('indexlevel'); + if ($m eq 'medium') { + $l = 'medium'; + } elsif ($m ne '') { + warn <{topdir} has unexpected indexlevel in Xapian: $l +EOM + } + # we're basic if over.sqlite3 is non-empty and there's nothing + # in Xapian + } elsif ($self->over && $self->over->dbh->selectrow_array(<<'') > 0) { +SELECT COUNT(*) FROM over + + $l = 'basic'; + } + delete @$self{qw(xdb over)}; + $l; +} + sub new { my (undef, $dir, $opt) = @_; - my $l = $opt->{indexlevel} // 'full'; - $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and - die "invalid indexlevel=$l\n"; my $self = bless { xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION, topdir => $dir, @@ -58,7 +78,6 @@ sub new { ibx_map => {}, # (newsgroup//inboxdir) => $ibx ibx_active => [], # by config section order ibx_known => [], # by config section order - indexlevel => $l, transact_bytes => 0, total_bytes => 0, current_info => '', @@ -67,7 +86,13 @@ sub new { }, __PACKAGE__; $self->{shards} = $self->count_shards || nproc_shards { nproc => $opt->{jobs} }; - my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3"); + my $l = $opt->{indexlevel}; + my $over_file = "$self->{xpfx}/over.sqlite3"; + $l ||= detect_indexlevel $self; + $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and + die "invalid indexlevel=$l\n"; + $self->{indexlevel} = $l; + my $oidx = PublicInbox::OverIdx->new($over_file); $oidx->{journal_mode} = 'wal' if $opt->{wal}; $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync}; $self->{-dangerous} = 1 if $opt->{dangerous}; diff --git a/t/extsearch.t b/t/extsearch.t index bb1fcfc50..553ff4056 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -718,10 +718,28 @@ if ('max-size') { } if ('basic') { - ok run_script([qw(-extindex -L basic --dangerous --all), - "$home/basic"]), 'extindex init basic'; - my @shards = glob "$home/basic/ei*/[0123]/"; + my $rdr = { 2 => \(my $err = '') }; + my $dir = "$home/basic"; + ok run_script([qw(-extindex -L basic --dangerous --all), $dir], + undef, $rdr), 'extindex init basic'; + my @shards = glob "$dir/ei*/[0123]/"; is_deeply \@shards, [], 'no search shards created'; + + $env->{ORIGINAL_RECIPIENT} = $v2addr; + my $eml = eml_load('t/msg_iter-order.eml'); + my $msgid = 'msg-iter-order@eml'; + $eml->header_set('Message-ID', "<$msgid>"); + my $in = \($eml->as_string); + run_script [qw(-mda --no-precheck)], $env, { 0 => $in } or + xbail '-mda'; + + ok run_script([qw(-extindex --all), $dir], undef, $rdr), + 'extindex incremental basic'; + @shards = glob "$dir/ei*/[0123]/"; + is_deeply \@shards, [], 'no new search shards on incremental update'; + my $es = PublicInbox::ExtSearch->new($dir); + my $smsg = $es->over->next_by_mid($msgid, \(my $id), \(my $prev)); + ok $smsg, 'new message imported into over.sqlite3 w/ basic'; } done_testing; -- 2.47.3