From: Eric Wong Date: Mon, 12 May 2025 20:45:01 +0000 (+0000) Subject: www: extmsg: dedupe cross-posted Message-IDs X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=df0579765d8481607fcf580822f60b0224469ccb;p=thirdparty%2Fpublic-inbox.git www: extmsg: dedupe cross-posted Message-IDs Having redundant links to the same cross-posted message is unlikely to be of help, especially when using an /all/ extindex. So just grab the first matching inbox + Message-ID combo and ignore subsequent URLs for Message-IDs which exist in multiple inboxes/extindices. --- diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 7453df1ef..9982e6729 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -36,10 +36,14 @@ sub partial_cb { # async_mset cb ++$ctx->{ext_msg_partial_fail}; warn($msg); } else { - my $ibx = $ctx->{partial_ibx}; - my @mid = map { $_->{mid} } @{$srch->mset_to_smsg($ibx, $mset)}; + my $seen = $ctx->{partial_seen} //= {}; + my (@mid, $mid); + for (@{$srch->mset_to_smsg($ctx->{partial_ibx}, $mset)}) { + $mid = $_->{mid}; + $seen->{$mid} //= push @mid, $mid; + } if (scalar @mid) { - push @{$ctx->{partial}}, [ $ibx, \@mid ]; + push @{$ctx->{partial}}, [ $ctx->{partial_ibx}, \@mid ]; (($ctx->{n_partial} += scalar(@mid)) >= PARTIAL_MAX) and delete $ctx->{again}; # done }