]> git.ipfire.org Git - thirdparty/public-inbox.git/commitdiff
www: extmsg: dedupe cross-posted Message-IDs
authorEric Wong <e@80x24.org>
Mon, 12 May 2025 20:45:01 +0000 (20:45 +0000)
committerEric Wong <e@80x24.org>
Thu, 15 May 2025 07:59:57 +0000 (07:59 +0000)
Having redundant links to the same cross-posted message is
unlikely to be of help, especially when using an /all/ extindex.
So just grab the first matching inbox + Message-ID combo and
ignore subsequent URLs for Message-IDs which exist in multiple
inboxes/extindices.

lib/PublicInbox/ExtMsg.pm

index 7453df1ef55ee29843681403094237b57c5eaf49..9982e67291a35103a3324975b5209fc327780aa9 100644 (file)
@@ -36,10 +36,14 @@ sub partial_cb { # async_mset cb
                ++$ctx->{ext_msg_partial_fail};
                warn($msg);
        } else {
-               my $ibx = $ctx->{partial_ibx};
-               my @mid = map { $_->{mid} } @{$srch->mset_to_smsg($ibx, $mset)};
+               my $seen = $ctx->{partial_seen} //= {};
+               my (@mid, $mid);
+               for (@{$srch->mset_to_smsg($ctx->{partial_ibx}, $mset)}) {
+                       $mid = $_->{mid};
+                       $seen->{$mid} //= push @mid, $mid;
+               }
                if (scalar @mid) {
-                       push @{$ctx->{partial}}, [ $ibx, \@mid ];
+                       push @{$ctx->{partial}}, [ $ctx->{partial_ibx}, \@mid ];
                        (($ctx->{n_partial} += scalar(@mid)) >= PARTIAL_MAX) and
                                delete $ctx->{again}; # done
                }