]> git.ipfire.org Git - thirdparty/public-inbox.git/commitdiff
plack_limiter: PSGI middleware to limit concurrency
authorEric Wong <e@80x24.org>
Thu, 20 Mar 2025 00:05:35 +0000 (00:05 +0000)
committerEric Wong <e@80x24.org>
Fri, 21 Mar 2025 22:01:41 +0000 (22:01 +0000)
While processing several concurrent requests within the same
worker process is helpful to exploit parallelism in git blob
lookups and smooth out delays; excessive parallelism is harmful
since it allows too much memory to be allocated at once for zlib
buffers and such.

While PublicInbox::WWW already uses the limiter for certain
expensive endpoints (e.g. /s/ and anything using Qspawn); some
long-running endpoints with many inexpensive steps (e.g. /T/,
/t/, /d/, *.atom, *.mbox.gz, etc.) can end up using a large
amount of memory for gzip buffers despite being fair to other
responses and being able to stream >500 messages/sec on 2010-era
hardware.

So give sysadmins an option to balance between smoothing out
delays in blob retrieval and memory usage required to compress
and spew out chunks of potentially large multi-email responses.

MANIFEST
lib/PublicInbox/PlackLimiter.pm [new file with mode: 0644]

index 93407a46bc7d06c4ca47c0e6ce8636ce67baa733..5e599990deb36eec9f988158fe38f15486a16808 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -328,6 +328,7 @@ lib/PublicInbox/OverIdx.pm
 lib/PublicInbox/POP3.pm
 lib/PublicInbox/POP3D.pm
 lib/PublicInbox/PktOp.pm
+lib/PublicInbox/PlackLimiter.pm
 lib/PublicInbox/Qspawn.pm
 lib/PublicInbox/Reply.pm
 lib/PublicInbox/RepoAtom.pm
diff --git a/lib/PublicInbox/PlackLimiter.pm b/lib/PublicInbox/PlackLimiter.pm
new file mode 100644 (file)
index 0000000..a1cc51d
--- /dev/null
@@ -0,0 +1,117 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# generic Plack/PSGI middleware to expose PublicInbox::Limiter, (see __END__)
+package PublicInbox::PlackLimiter;
+use v5.12;
+use parent qw(Plack::Middleware);
+use PublicInbox::OnDestroy;
+
+sub prepare_app { # called via Plack::Component (used by Plack::Middleware)
+       my ($self) = @_;
+       $self->{match_cb} //= sub { 1 };
+       $self->{max} //= 2;
+       $self->{run_queue} = [];
+       $self->{running} = 0;
+       $self->{rejected} = 0;
+       $self->{message} //= "too busy\n";
+}
+
+sub r503 ($) {
+       my @body = ($_[0]->{message});
+       ++$_[0]->{rejected};
+       [ 503, [ 'Content-Type' => 'text/plain',
+               'Content-Length' => length($body[0]) ], \@body ]
+}
+
+sub next_req { # on_destroy cb
+       my ($self) = @_;
+       --$self->{running};
+       my $env = shift @{$self->{run_queue}} or return;
+       my $wcb = delete $env->{'p-i.limiter.wcb'} // die 'BUG: no wcb';
+       my $res = eval { call($self, $env) };
+       return warn("W: $@") if $@;
+       ref($res) eq 'CODE' ? $res->($wcb) : $wcb->($res);
+}
+
+sub stats ($) {
+       my ($self) = @_;
+       my $nq = scalar @{$self->{run_queue}};
+       my $res = <<EOM;
+running: $self->{running}
+queued: $nq
+rejected: $self->{rejected}
+max: $self->{max}
+EOM
+       [ 200, [ 'Content-Type' => 'text/plain',
+               'Content-Length' => length($res) ], [ $res ] ]
+}
+
+sub call {
+       my ($self, $env) = @_;
+       if (defined $self->{stats_match_cb}) {
+               return stats $self if $self->{stats_match_cb}->($env);
+       }
+       return $self->app->($env) if !$self->{match_cb}->($env);
+       return r503($self) if @{$self->{run_queue}} > ($self->{depth} // 32);
+       if ($self->{running} < $self->{max}) {
+               ++$self->{running};
+               $env->{'p-i.limiter.next'} = on_destroy \&next_req, $self;
+               $self->app->($env);
+       } else { # capture write cb from PSGI server and queue up
+               sub {
+                       $env->{'p-i.limiter.wcb'} = $_[0];
+                       push @{$self->{run_queue}}, $env;
+               };
+       }
+}
+
+1;
+__END__
+
+=head1 NAME
+
+PublicInbox::PlackLimiter - limit concurrency to parts of a PSGI app
+
+=head1 SYNOPSIS
+
+       # In your .psgi file
+       use Plack::Builder;
+       builder {
+
+       # by default, only 2 requests may be processed at once:
+       enable '+PublicInbox::PlackLimiter';
+
+       # You will likely only want to limit certain expensive endpoints,
+       # while allowing maximum concurrency for inexpensive endpoints.
+       # You can do that by passing a `match_cb' parameter:
+       enable '+PublicInbox::PlackLimiter',
+               # some expensive endpoints for my public-inbox instance, YMMV
+               match_cb => sub {
+                       my ($env) = @_;
+                       $env->{PATH_INFO} =~ m!/(?:[Ttd]/|.+\.
+                                               (?:mbox\.gz|atom|html))\z!x ||
+                               $env->{QUERY_STRING} =~ /\bx=[tA]\b/
+               },
+               # You can increase `max' and `depth' to higher numbers
+               max => 3, # maximum concurrent requests
+               depth => 128, # maximum queue depth (size)
+               # You can also enable a stats endpoint if you wish (optional):
+               stats_match_cb => sub {
+                       my ($env) = @_;
+                       $env->{REQUEST_URI} eq '/stats' &&
+                               $env->{REMOTE_ADDR} eq '127.0.0.1'
+               };
+       # ...
+       }; # /builder
+
+=head1 DESCRIPTION
+
+PublicInbox::PlackLimiter lets a sysadmin limit concurrency to certain
+expensive endpoints while allowing the normal concurrency level of the
+server to run inexpensive requests.
+
+=head1 SEE ALSO
+
+L<Plack> L<Plack::Builder> L<Plack::Middleware>
+
+=cut