From: Eric Wong Date: Thu, 6 Jun 2024 07:44:16 +0000 (+0000) Subject: www: reduce fragmentation in /t/ and /T/ endpoints X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d116a7856c8c3d00af2924ba536afc76f6874062;p=thirdparty%2Fpublic-inbox.git www: reduce fragmentation in /t/ and /T/ endpoints For giant threads with /t/ and /T/ endpoints, avoid generating a large string with a medium lifetime for the thread skeleton ($ctx->{skel}). Instead, make $ctx->{skel} an arrayref and use it to store a bunch of smaller strings, instead. While keeping many small strings is inefficient due to pointer chasing; forcing a smaller distribution of sizes makes it easier for the malloc implementation to organize and find small chunks of memory instead of having to find (and hold) larger contiguous chunks. When a large string is created now, it's lifetime is kept as short as possible to decrease its likelyhood of causing fragmentation. Preliminary testing shows this appears to reduce RSS by roughly 20-40% under both glibc malloc (using a tiny MALLOC_MMAP_THRESHOLD_=67000) on 32-bit and jemalloc 5.2.1 on 64-bit with standard settings. --- diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 9ce1b1eed..f056dddfa 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -316,13 +316,12 @@ sub mset_thread { my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? \&sort_relevance : \&PublicInbox::View::sort_ds, $ctx); - my $skel = search_nav_bot($ctx, $mset, $q).'
'. <{skel} = [ search_nav_bot($ctx, $mset, $q).'
'. <{-upfx} = '';
 	$ctx->{anchor_idx} = 1;
 	$ctx->{cur_level} = 0;
-	$ctx->{skel} = \$skel;
 	$ctx->{mapping} = {};
 	$ctx->{searchview} = 1;
 	$ctx->{prev_attr} = '';
@@ -332,7 +331,7 @@ EOM
 	# reduce hash lookups in skel_dump
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	PublicInbox::View::walk_thread($rootset, $ctx,
-		\&PublicInbox::View::pre_thread);
+		\&PublicInbox::View::pre_thread); # pushes to ctx->{skel}
 
 	# link $INBOX_DIR/description text to "recent" view around
 	# the newest message in this result set:
@@ -349,7 +348,7 @@ sub mset_thread_i {
 	print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
 	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
 	my $smsg = shift @{$ctx->{msgs}} or
-		print { $ctx->zfh } ${delete($ctx->{skel})};
+		print { $ctx->zfh } @{delete($ctx->{skel})};
 	$smsg;
 }
 
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 44e1f2a8b..958efa417 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -476,7 +476,7 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
 			print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
 		} else { # all done
 			print { $ctx->zfh } thread_adj_level($ctx, 0),
-						${delete($ctx->{skel})};
+						@{delete($ctx->{skel})};
 			return;
 		}
 	}
@@ -513,11 +513,13 @@ href="../../">newest]
 EOF
 	$skel .= "Thread overview: ";
 	$skel .= $nr == 1 ? '(only message)' : "$nr+ messages";
-	$skel .= " (download: mbox.gz";
-	$skel .= " / follow: Atom feed)\n";
-	$skel .= "-- links below jump to the message on this page --\n";
+	$skel .= <mbox.gz follow: Atom feed
+-- links below jump to the message on this page --
+EOM
 	$ctx->{cur_level} = 0;
-	$ctx->{skel} = \$skel;
+	$ctx->{skel} = [ $skel ];
 	$ctx->{prev_attr} = '';
 	$ctx->{prev_level} = 0;
 	$ctx->{root_anchor} = 'm' . id_compress($mid, 1);
@@ -529,9 +531,9 @@ EOF
 
 	# reduce hash lookups in pre_thread->skel_dump
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread($rootset, $ctx, \&pre_thread);
+	walk_thread($rootset, $ctx, \&pre_thread); # pushes to ctx->{skel}
 
-	$skel .= '
'; + push @{$ctx->{skel}}, '
'; return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg @@ -553,8 +555,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback while (my $smsg = shift @{$ctx->{msgs}}) { return $smsg if exists($smsg->{blob}); } - my $skel = delete($ctx->{skel}) or return; # all done - print { $ctx->zfh } $$skel; + print { $ctx->zfh } @{delete $ctx->{skel} // []}; undef; } } @@ -778,13 +779,13 @@ sub thread_skel ($$$) { my $ibx = $ctx->{ibx}; my ($nr, $msgs) = $ibx->over->get_thread($mid); my $parent = in_reply_to($hdr); - $$skel .= "\nThread overview: "; + $skel->[-1] .= "\nThread overview: "; if ($nr <= 1) { if (defined $parent) { - $$skel .= SKEL_EXPAND."\n "; - $$skel .= ghost_parent('../', $parent) . "\n"; + $skel->[-1] .= SKEL_EXPAND."\n "; + $skel->[-1] .= ghost_parent('../', $parent) . "\n"; } else { - $$skel .= "[no followups] ". + $skel->[-1] .= "[no followups] ". SKEL_EXPAND."\n"; } $ctx->{next_msg} = undef; @@ -792,8 +793,9 @@ sub thread_skel ($$$) { return; } - $$skel .= $nr; - $$skel .= '+ messages / '.SKEL_EXPAND.qq! top\n!; + $skel->[-1] .= $nr; + $skel->[-1] .= '+ messages / '.SKEL_EXPAND. + qq! top\n!; # nb: mutt only shows the first Subject in the index pane # when multiple Subject: headers are present, so we follow suit: @@ -815,7 +817,7 @@ sub thread_skel ($$$) { sub html_footer { my ($ctx, $hdr) = @_; my $upfx = '../'; - my (@related, $skel); + my (@related, @skel); my $foot = '
';
 	my $qry = delete $ctx->{-qry};
 	if ($qry && $ctx->{ibx}->isrch) {
@@ -847,12 +849,12 @@ EOM
 		my $t = ts2str($ctx->{-t_max});
 		my $t_fmt = fmt_ts($ctx->{-t_max});
 		my $fallback = @related ? "\t" : "\t";
-		$skel = <~$t_fmt UTC|newest]
 EOF
-		thread_skel(\$skel, $ctx, $hdr);
+		thread_skel(\@skel, $ctx, $hdr);
 		my ($next, $prev);
 		my $parent = '       ';
 		$next = $prev = '    ';
@@ -879,11 +881,11 @@ EOF
 		}
 		$foot .= "$next $prev$parent ";
 	} else { # unindexed inboxes w/o over
-		$skel = qq( latest);
+		$skel[0] = qq( latest);
 	}
-	# $skel may be big for big threads, don't append it to $foot
+	# @skel may be big for big threads, don't push to it
 	print { $ctx->zfh } $foot, qq(reply),
-				$skel, '
', @related, + @skel, '', @related, msg_reply($ctx, $hdr); } @@ -985,7 +987,8 @@ sub skel_dump { # walk_thread callback my $mid = $smsg->{mid}; if ($level == 0 && $ctx->{skel_dump_roots}++) { - $$skel .= delete($ctx->{sl_note}) || ''; + my $note = delete $ctx->{sl_note}; + push @$skel, $note if $note; } my $f = ascii_html(delete $smsg->{from_name}); @@ -1014,7 +1017,7 @@ sub skel_dump { # walk_thread callback if ($cur) { if ($cur eq $mid) { delete $ctx->{cur}; - $$skel .= "$d". + push @$skel, "$d". "$attr [this message]\n"; return 1; } else { @@ -1054,8 +1057,7 @@ sub skel_dump { # walk_thread callback } else { $m = $ctx->{-upfx}.mid_href($mid).'/'; } - $$skel .= $d . "" . $end; - 1; + push @$skel, qq($d$end); } sub _skel_ghost { @@ -1078,8 +1080,7 @@ sub _skel_ghost { } else { $d .= qq{<$html>\n}; } - ${$ctx->{skel}} .= $d; - 1; + push @{$ctx->{skel}}, $d; } # note: we favor Date: here because git-send-email increments it