From 0e63912e1b6daa712174cbd03fc9bb54d719e279 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 12 May 2025 20:44:59 +0000 Subject: [PATCH] xap_helper: mset supports multiple requests By supporting multiple queries in one IPC call, we can reduce IPC traffic for search endpoints which make multiple search requests but only use the result of the first. This will be used for partial Message-ID matching for handling truncated URLs. --- lib/PublicInbox/XapHelper.pm | 9 +++++++-- lib/PublicInbox/xh_mset.h | 10 +++++++-- t/xap_helper.t | 39 ++++++++++++++++++++---------------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index 813d03c98..a36acf469 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -164,7 +164,7 @@ sub mset_iter ($$) { } sub cmd_mset { # to be used by WWW + IMAP - my ($req, $qry_str) = @_; + my ($req, $qry_str, @rest) = @_; $qry_str // die 'usage: mset [OPTIONS] QRY_STR'; my $opt = { limit => $req->{'m'}, offset => $req->{o} // 0 }; $opt->{relevance} = 1 if $req->{r}; @@ -177,7 +177,12 @@ sub cmd_mset { # to be used by WWW + IMAP $opt->{uid_range} = \@uid_range if grep(defined, @uid_range) == 2; $opt->{threadid} = $req->{T} if defined $req->{T}; my $mset = $req->{srch}->mset($qry_str, $opt); - say { $req->{0} } 'mset.size=', $mset->size, + my $size = $mset->size; + while ($size == 0 && @rest) { + $mset = $req->{srch}->mset(shift @rest, $opt); + $size = $mset->size; + } + say { $req->{0} } 'mset.size=', $size, ' .get_matches_estimated=', $mset->get_matches_estimated; for my $it ($mset->items) { for (my $t = 10; $t > 0; --$t) { diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h index 86996ca5c..7d0100932 100644 --- a/lib/PublicInbox/xh_mset.h +++ b/lib/PublicInbox/xh_mset.h @@ -18,10 +18,16 @@ static bool cmd_mset(struct req *req) CLEANUP_FBUF struct fbuf wbuf = {}; Xapian::MSet mset = req->code_search ? commit_mset(req, qry_str) : mail_mset(req, qry_str); + unsigned long long size = mset.size(); + while (size == 0 && ++optind < req->argc) { + qry_str = req->argv[optind]; + mset = req->code_search ? commit_mset(req, qry_str) : + mail_mset(req, qry_str); + size = mset.size(); + } fbuf_init(&wbuf); fprintf(wbuf.fp, "mset.size=%llu .get_matches_estimated=%llu\n", - (unsigned long long)mset.size(), - (unsigned long long)mset.get_matches_estimated()); + size, (unsigned long long)mset.get_matches_estimated()); int fd = fileno(req->fp[0]); for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) { off_t off = ftello(wbuf.fp); diff --git a/t/xap_helper.t b/t/xap_helper.t index 0db1bd8e6..41520866f 100644 --- a/t/xap_helper.t +++ b/t/xap_helper.t @@ -247,29 +247,34 @@ for my $n (@NO_CXX) { $err = do { local $/; <$err_r> }; is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})"; - pipe $r, $w; - $xhc->mkreq([$w], qw(mset), @ibx_shard_args, - 'dfn:lib/PublicInbox/Search.pm'); - close $w; - chomp((my $hdr, @res) = readline($r)); - like $hdr, qr/\bmset\.size=1\b/, - "got expected header via mset ($xhc->{impl}"; - is scalar(@res), 1, 'got one result'; - @res = split /\0/, $res[0]; - { - my $doc = $v2->search->xdb->get_document($res[0]); - ok $doc, 'valid document retrieved'; - my @q = PublicInbox::Search::xap_terms('Q', $doc); - is_deeply \@q, [ $mid ], 'docid usable'; + # ensure we can try multiple queries and return the first one + # with >0 matches + for my $try ([[], []], [['thisbetternotmatchanything'], ['z:0..']]) { + pipe $r, $w; + $xhc->mkreq([$w], qw(mset), @ibx_shard_args, @{$try->[0]}, + 'dfn:lib/PublicInbox/Search.pm', + @{$try->[1]}); + close $w; + chomp((my $hdr, @res) = readline($r)); + like $hdr, qr/\bmset\.size=1\b/, + "got expected header via mset ($xhc->{impl}"; + is scalar(@res), 1, 'got one result'; + @res = split /\0/, $res[0]; + { + my $doc = $v2->search->xdb->get_document($res[0]); + ok $doc, 'valid document retrieved'; + my @q = PublicInbox::Search::xap_terms('Q', $doc); + is_deeply \@q, [ $mid ], 'docid usable'; + } + ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100'; + is scalar(@res), 3, 'only 3 columns in result'; } - ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100'; - is scalar(@res), 3, 'only 3 columns in result'; pipe $r, $w; $xhc->mkreq([$w], qw(mset), @ibx_shard_args, 'dt:19700101'.'000000..'); close $w; - chomp(($hdr, @res) = readline($r)); + chomp((my $hdr, @res) = readline($r)); like $hdr, qr/\bmset\.size=6\b/, "got expected header via multi-result mset ($xhc->{impl}"; is(scalar(@res), 6, 'got 6 rows'); -- 2.47.3