From: Eric Wong Date: Thu, 20 Mar 2025 00:05:35 +0000 (+0000) Subject: plack_limiter: PSGI middleware to limit concurrency X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=76a1b52e35a592f76035f31ad5a8b7dde04be725;p=thirdparty%2Fpublic-inbox.git plack_limiter: PSGI middleware to limit concurrency While processing several concurrent requests within the same worker process is helpful to exploit parallelism in git blob lookups and smooth out delays; excessive parallelism is harmful since it allows too much memory to be allocated at once for zlib buffers and such. While PublicInbox::WWW already uses the limiter for certain expensive endpoints (e.g. /s/ and anything using Qspawn); some long-running endpoints with many inexpensive steps (e.g. /T/, /t/, /d/, *.atom, *.mbox.gz, etc.) can end up using a large amount of memory for gzip buffers despite being fair to other responses and being able to stream >500 messages/sec on 2010-era hardware. So give sysadmins an option to balance between smoothing out delays in blob retrieval and memory usage required to compress and spew out chunks of potentially large multi-email responses. --- diff --git a/MANIFEST b/MANIFEST index 93407a46b..5e599990d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -328,6 +328,7 @@ lib/PublicInbox/OverIdx.pm lib/PublicInbox/POP3.pm lib/PublicInbox/POP3D.pm lib/PublicInbox/PktOp.pm +lib/PublicInbox/PlackLimiter.pm lib/PublicInbox/Qspawn.pm lib/PublicInbox/Reply.pm lib/PublicInbox/RepoAtom.pm diff --git a/lib/PublicInbox/PlackLimiter.pm b/lib/PublicInbox/PlackLimiter.pm new file mode 100644 index 000000000..a1cc51dcf --- /dev/null +++ b/lib/PublicInbox/PlackLimiter.pm @@ -0,0 +1,117 @@ +# Copyright (C) all contributors +# License: GPL-3.0+ +# generic Plack/PSGI middleware to expose PublicInbox::Limiter, (see __END__) +package PublicInbox::PlackLimiter; +use v5.12; +use parent qw(Plack::Middleware); +use PublicInbox::OnDestroy; + +sub prepare_app { # called via Plack::Component (used by Plack::Middleware) + my ($self) = @_; + $self->{match_cb} //= sub { 1 }; + $self->{max} //= 2; + $self->{run_queue} = []; + $self->{running} = 0; + $self->{rejected} = 0; + $self->{message} //= "too busy\n"; +} + +sub r503 ($) { + my @body = ($_[0]->{message}); + ++$_[0]->{rejected}; + [ 503, [ 'Content-Type' => 'text/plain', + 'Content-Length' => length($body[0]) ], \@body ] +} + +sub next_req { # on_destroy cb + my ($self) = @_; + --$self->{running}; + my $env = shift @{$self->{run_queue}} or return; + my $wcb = delete $env->{'p-i.limiter.wcb'} // die 'BUG: no wcb'; + my $res = eval { call($self, $env) }; + return warn("W: $@") if $@; + ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res); +} + +sub stats ($) { + my ($self) = @_; + my $nq = scalar @{$self->{run_queue}}; + my $res = <{running} +queued: $nq +rejected: $self->{rejected} +max: $self->{max} +EOM + [ 200, [ 'Content-Type' => 'text/plain', + 'Content-Length' => length($res) ], [ $res ] ] +} + +sub call { + my ($self, $env) = @_; + if (defined $self->{stats_match_cb}) { + return stats $self if $self->{stats_match_cb}->($env); + } + return $self->app->($env) if !$self->{match_cb}->($env); + return r503($self) if @{$self->{run_queue}} > ($self->{depth} // 32); + if ($self->{running} < $self->{max}) { + ++$self->{running}; + $env->{'p-i.limiter.next'} = on_destroy \&next_req, $self; + $self->app->($env); + } else { # capture write cb from PSGI server and queue up + sub { + $env->{'p-i.limiter.wcb'} = $_[0]; + push @{$self->{run_queue}}, $env; + }; + } +} + +1; +__END__ + +=head1 NAME + +PublicInbox::PlackLimiter - limit concurrency to parts of a PSGI app + +=head1 SYNOPSIS + + # In your .psgi file + use Plack::Builder; + builder { + + # by default, only 2 requests may be processed at once: + enable '+PublicInbox::PlackLimiter'; + + # You will likely only want to limit certain expensive endpoints, + # while allowing maximum concurrency for inexpensive endpoints. + # You can do that by passing a `match_cb' parameter: + enable '+PublicInbox::PlackLimiter', + # some expensive endpoints for my public-inbox instance, YMMV + match_cb => sub { + my ($env) = @_; + $env->{PATH_INFO} =~ m!/(?:[Ttd]/|.+\. + (?:mbox\.gz|atom|html))\z!x || + $env->{QUERY_STRING} =~ /\bx=[tA]\b/ + }, + # You can increase `max' and `depth' to higher numbers + max => 3, # maximum concurrent requests + depth => 128, # maximum queue depth (size) + # You can also enable a stats endpoint if you wish (optional): + stats_match_cb => sub { + my ($env) = @_; + $env->{REQUEST_URI} eq '/stats' && + $env->{REMOTE_ADDR} eq '127.0.0.1' + }; + # ... + }; # /builder + +=head1 DESCRIPTION + +PublicInbox::PlackLimiter lets a sysadmin limit concurrency to certain +expensive endpoints while allowing the normal concurrency level of the +server to run inexpensive requests. + +=head1 SEE ALSO + +L L L + +=cut