From: Eric Wong Date: Fri, 5 Dec 2025 07:44:26 +0000 (+0000) Subject: xcpdb: preserve has_threadid+skip_docdata across reshards X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d937b906500374083a6af84c6c6442f50225ab90;p=thirdparty%2Fpublic-inbox.git xcpdb: preserve has_threadid+skip_docdata across reshards Neither the `has_threadid' nor `skip_docdata' metadata elements ever got preserved properly across -xcpdb reshards. This is a long-standing bug which has existed since v1.6. While omitting `skip_docdata' only wasted spacefor v2, omitting `has_threadid' in copies causes missing search functionality as part of the problem reported for the lore.k.o upgrade to v2.0.0. Working around this bug after-the-fact requires the use of xapian-metadata(1) on shard 0 of v2 inboxes and extindices: xapian-metadata set /path/to/v2inbox/xap15/0 has_threadid 1 xapian-metadata set /path/to/extindex/ei15/0 has_threadid 1 Reported-by: Konstantin Ryabitsev Link: https://public-inbox.org/meta/20251204-modest-jaybird-of-will-9d55dc@lemur/ --- diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 2cc48c8e2..476c6c499 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -598,11 +598,15 @@ sub cpdb ($$$) { # cb_spawn callback my $lc = $src->get_metadata('last_commit'); $dst->set_metadata('last_commit', $lc) if $lc; - # only the first xapian shard (0) gets 'indexlevel' + # only the first xapian shard (0) gets metadata if ($new =~ m!/(?:xapian[0-9]+|(?:ei|xap)[0-9]+/0)\b!) { my $l = $src->get_metadata('indexlevel'); $l eq 'medium' and $dst->set_metadata('indexlevel', $l); + for my $k (qw(has_threadid skip_docdata)) { + my $v = $src->get_metadata($k); + $dst->set_metadata($k, $v) if $v; + } } if ($pr_data) { my $tot = $src->get_doccount; diff --git a/t/xcpdb-reshard.t b/t/xcpdb-reshard.t index 7797aaaf2..3f9ae1173 100644 --- a/t/xcpdb-reshard.t +++ b/t/xcpdb-reshard.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2019-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -47,6 +47,10 @@ for my $R (qw(2 4 1 3 3)) { ok(run_script($cmd, $env), "xcpdb -R$R"); my @new_shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@new_shards), $R, 'resharded to two shards'); + is $ibx->search->xdb->get_metadata('has_threadid'), + '1', 'has_threadid set'; + is $ibx->search->xdb->get_metadata('indexlevel'), + 'medium', 'indexlevel preserved'; my $mset = $ibx->search->mset('s:this'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), $ndoc, 'got expected docs after resharding');