From: Eric Wong Date: Mon, 10 Jun 2024 11:34:27 +0000 (+0000) Subject: www: deduplicate Message-ID in threading + skeleton X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=15470b46d7add99d2f4925ce5b0e45a818e00bc6;p=thirdparty%2Fpublic-inbox.git www: deduplicate Message-ID in threading + skeleton xt/perf-threading.t reports a small 0.5-1.0% memory reduction in non-ancient Perls with CoW strings for threading alone (w/o rendering the View.pm stuff). On informal tests using -httpd and giant Linux stable patch set threads (700+ messages), this ends up being roughly 5MB saved in /T/ rendering since we use the {mid} field again in the $ctx->{mapping} table. This becomes even more beneficial if handling parallel HTTP requests for messages in the same message thread, even across different endpoints. --- diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 00ae9facc..672c53ad9 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -33,19 +33,24 @@ sub thread { # can be shakier if somebody used In-Reply-To with multiple, disparate # messages. So, take the client Date: into account since we can't # always determine ordering when somebody uses multiple In-Reply-To. + my (%dedupe, $mid); my @kids = sort { $a->{ds} <=> $b->{ds} } grep { # this delete saves around 4K across 1K messages # TODO: move this to a more appropriate place, breaks tests # if we do it during psgi_cull delete $_->{num}; bless $_, 'PublicInbox::SearchThread::Msg'; - if (exists $id_table{$_->{mid}}) { + $mid = $_->{mid}; + if (exists $id_table{$mid}) { $_->{children} = []; push @imposters, $_; # we'll deal with them later undef; } else { $_->{children} = {}; # will become arrayref later - $id_table{$_->{mid}} = $_; + %dedupe = ($mid => undef); + ($mid) = keys %dedupe; + $_->{mid} = $mid; + $id_table{$mid} = $_; defined($_->{references}); } } @$msgs; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 958efa417..dcceb3112 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -432,6 +432,7 @@ sub walk_thread ($$$) { sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; + # node->{mid} is deduplicated in PublicInbox::SearchThread::thread $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); }