From: Eric Wong Date: Wed, 2 Apr 2025 19:00:15 +0000 (+0000) Subject: http: support trailers for chunked requests X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=e476b2653836afa627b64b97fbae4855ccb8ed60;p=thirdparty%2Fpublic-inbox.git http: support trailers for chunked requests While HTTP/1.1 trailers are not widely supported by other software at the moment, they may be someday. Trailers are useful for things such as calculating checksums and signatures on streaming uploads of unseekable data. So add support for them in case they're needed someday, perhaps for allow uploading mail or ActivityPub messages over HTTP. --- diff --git a/MANIFEST b/MANIFEST index c61423982..7602c39df 100644 --- a/MANIFEST +++ b/MANIFEST @@ -436,6 +436,7 @@ scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/.gitconfig +t/.gitignore t/address.t t/admin.t t/alt.psgi @@ -630,6 +631,7 @@ t/tail_notify.t t/thread-cycle.t t/thread-index-gap.t t/time.t +t/trailer-up.h t/uri_imap.t t/uri_nntps.t t/utf8.eml diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 69cba8a63..fd9021d61 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -34,11 +34,22 @@ use PublicInbox::Tmpfile; use constant { CHUNK_START => -1, # [a-f0-9]+\r\n CHUNK_END => -2, # \r\n - CHUNK_ZEND => -3, # \r\n + CHUNK_TLR_END => -3, # (trailers*)?\r\n CHUNK_MAX_HDR => 256, }; use Errno qw(EAGAIN); use PublicInbox::Compat qw(sum0); +my $NOT_TRAILER = join '|', (qw(Content-Length Content-Type), + qw(Trailer Transfer-Encoding Content-Encoding Content-Range), + # RFC 7231 5.1 controls: + qw(Cache-Control Expect Host Max-Forwards Pragma Range TE), + # RFC 7231 5.2 conditionals + qw(If-Match If-None-Match + If-Modified-Since If-Unmodified-Since If-Range), + qw(Authorization Proxy-Authorization), # RFC 7235 + 'Cookie' # RFC 6265 + # ignoring 7231 7.1 control data since those are for responses +); # Use the same configuration parameter as git since this is primarily # a slow-client sponge for git-http-backend @@ -96,10 +107,7 @@ sub event_step { # called by PublicInbox::DS return quit($self, 400); $self->do_read($rbuf, 8192, length($$rbuf)) or return; } - # We do not support Trailers in chunked requests, for now. - # They're rarely-used and git (as of 2.7.2) does not use them. - return quit($self, 400) if exists($env{HTTP_TRAILER}) || - grep(/\s/, keys %env); # stop smugglers + return quit($self, 400) if grep(/\s/, keys %env); # stop smugglers $$rbuf = substr($$rbuf, $r); my $len = input_prepare($self, \%env) // return write_err($self, undef); # EMFILE/ENFILE @@ -315,17 +323,23 @@ sub input_prepare { # rfc 7230 3.3.2, 3.3.3,: favor Transfer-Encoding over Content-Length my $hte = $env->{HTTP_TRANSFER_ENCODING}; + my $tlr = $env->{HTTP_TRAILER}; if (defined $hte) { # rfc7230 3.3.3, point 3 says only chunked is accepted # as the final encoding. Since neither public-inbox-httpd, # git-http-backend, or our WWW-related code uses "gzip", # "deflate" or "compress" as the Transfer-Encoding, we'll # reject them: - return quit($self, 400) if $hte !~ /\Achunked\z/i; - + return quit($self, 400) if $hte !~ /\Achunked\z/i || + (defined($tlr) && grep(/\A($NOT_TRAILER)\z/, + split /\s*,\s*/s, $tlr)); $len = CHUNK_START; $input = tmpfile('http.input', $self->{sock}); } else { + # while (AFAIK) no RFC says Trailer: is explicitly disallowed + # w/o `Transfer-Encoding: chunked', allowing it makes no sense + # and it could be a confusion attack to downstream proxies + return quit($self, 400) if defined $tlr; $len = $env->{CONTENT_LENGTH}; if (defined $len) { # rfc7230 3.3.3.4 @@ -363,6 +377,33 @@ sub recv_err { } } +sub merge_trailers ($$) { + my ($self, $tlr_buf) = @_; + my $env = $self->{env}; + my $exp_tlr = $env->{HTTP_TRAILER}; + return quit($self, 400) if !!$tlr_buf ne !!$exp_tlr; + $exp_tlr // return 1; + # copy expected entries from existing $env for append, if any + my @k = map { tr/-/_/; "HTTP_\U$_" } split /\s*,\s*/s, $exp_tlr; + # there's no public API in Plack to parse w/o the request line, + # so we make it look like a new request and avoid clobbering $env + my %tenv = map { defined($env->{$_}) ? ($_ => $env->{$_}) : () } @k; + substr($tlr_buf, 0, 0) = "GET / HTTP/1.0\r\n"; + my $r = parse_http_request($tlr_buf .= "\r\n", \%tenv); + if ($r <= 0 || scalar @tenv{qw(CONTENT_TYPE CONTENT_LENGTH)}) { + warn 'BUG: incomplete trailer (non-fatal)' if $r == -2; + return quit($self, 400); + } + my %need = map { $_ => 1 } @k; + for my $k (grep /^HTTP_/, keys %tenv) { + # maybe the client sent more than promised: + delete $need{$k} // return quit($self, 400); + $env->{$k} = delete $tenv{$k}; + } + # maybe the client sent less than promised... + keys %need ? quit($self, 400) : 1; +} + sub read_input_chunked { # unlikely... my ($self, $rbuf) = @_; $rbuf //= $self->{rbuf} // (\(my $x = '')); @@ -370,11 +411,17 @@ sub read_input_chunked { # unlikely... my $len = delete $self->{input_left}; while (1) { # chunk start - if ($len == CHUNK_ZEND) { - $$rbuf =~ s/\A\r\n//s and + if ($len == CHUNK_TLR_END) { + # $1: all trailers minus final CRLF + if ($$rbuf =~ s/\A((?: + (?:[a-z][a-z0-9\-]*:[ \t]* # key: LWS + | [ \t]+ # continuation LWS + )[^\n]* # trailer value + \n)* )\r\n//ismx) { + return if !merge_trailers($self, $1); return app_dispatch($self, $input, $rbuf); - - return quit($self, 400) if length($$rbuf) > 2; + } + return quit($self, 400) if length($$rbuf) > 0x4000; } if ($len == CHUNK_END) { if ($$rbuf =~ s/\A\r\n//s) { @@ -400,7 +447,7 @@ sub read_input_chunked { # unlikely... return recv_err($self, $len); # (implicit) goto chunk_start if $r > 0; } - $len = CHUNK_ZEND if $len == 0; + $len = CHUNK_TLR_END if $len == 0; # drain the current chunk until ($len <= 0) { diff --git a/t/.gitignore b/t/.gitignore new file mode 100644 index 000000000..369abbdc6 --- /dev/null +++ b/t/.gitignore @@ -0,0 +1 @@ +trailer-up-* diff --git a/t/httpd-corner.psgi b/t/httpd-corner.psgi index 6d3ee9a2f..f4c8c51f1 100644 --- a/t/httpd-corner.psgi +++ b/t/httpd-corner.psgi @@ -40,6 +40,24 @@ my $app = sub { } $code = 200; push @$body, $sha1->hexdigest; + } elsif ($path eq '/env') { + $code = 200; + require Data::Dumper; + my %env; + while (my ($k, $v) = each %$env) { + $env{$k} = $v if !ref $v; + } + my $in = $env->{'psgi.input'}; + my $buf = ''; + while (1) { + my $r = $in->read($buf, 0x4000, length($buf)) // + die "psgi read: $!"; + last if $r == 0; + } + $env{'test.input_data'} = $buf; + push @$body, Data::Dumper->new([\%env])-> + Useqq(1)->Terse(1)->Sortkeys(1)->Dump; + push @$body, "\n.\n"; # for readline w/ $/; } elsif (my $fifo = $env->{HTTP_X_CHECK_FIFO}) { if ($path eq '/slow-header') { return sub { diff --git a/t/httpd-corner.t b/t/httpd-corner.t index c57bc39fa..18518b38f 100644 --- a/t/httpd-corner.t +++ b/t/httpd-corner.t @@ -4,9 +4,10 @@ # note: our HTTP server should be standalone and capable of running # generic PSGI/Plack apps. use v5.12; use PublicInbox::TestCommon; -use autodie qw(close getsockopt open pipe read seek setsockopt sysread +use autodie qw(close getsockopt open pipe read rename seek setsockopt sysread syswrite truncate unlink); use PublicInbox::DS qw(now); +use PublicInbox::IO qw(poll_in write_file); use PublicInbox::Spawn qw(spawn popen_rd); require_mods '-httpd'; use PublicInbox::SHA qw(sha1_hex); @@ -24,6 +25,8 @@ my $out = "$tmpdir/stdout.log"; my $psgi = "./t/httpd-corner.psgi"; my $sock = tcp_server(); my @zmods = qw(PublicInbox::GzipFilter IO::Uncompress::Gunzip); +my $base_url = 'http://'.tcp_host_port($sock); +use Config; # Make sure we don't clobber socket options set by systemd or similar # using socket activation: @@ -129,20 +132,137 @@ if ('test worker death') { ok(-e $err, 'stderr recreated after USR1'); ok(-e "$tmpdir/alt.err", 'alt.err recreated after USR1'); } + +my $ck_env = sub { + my ($env, $exp, $only) = @_; + for my $k (sort keys %$exp) { + my $v = $exp->{$k}; + is $env->{$k}, $v, "`$k' matches expected ($v)"; + } + return if !$only; + for (sort grep { !defined($exp->{$_}) } grep /^HTTP_/, keys %$env) { + ok !(defined $env->{$_}), "`$_' defined unexpectedly"; + } +}; +my $put = < 'z', b => 'y' }], +['1 good trailer', { a => 'b' }], +['1 multi-line trailer', { a => "multi\r\n line" }], +['2 multi-line trailers', { two => "m\r\n ul", ti => "li\r\n e" }], + ) { + my ($d, $tlr) = @$t; + my ($hdr, $end) = ('Trailer: ', ''); + my %exp = ( + HTTP_TRAILER => join(', ', keys %$tlr), + HTTP_HOST => 'example.com', + HTTP_TRANSFER_ENCODING => 'chunked', + 'test.input_data' => '0', + ); + $hdr .= $exp{HTTP_TRAILER} . "\r\n"; + for my $k (keys %$tlr) { + my $v = $tlr->{$k}; + $end .= "$k: $v\r\n"; + $v =~ s/\r\n[ \t]+/ /sg; + $exp{"HTTP_\U$k"} = $v; + } + my $c = $mkreq->($sock, $d, "$put$hdr$chunk_body$end\r\n"); + my $buf = do { local $/ = "\r\n\r\n"; <$c> }; + like $buf, qr!^HTTP/1\.. 200\b!, "request w/ $d" or next; + { local $/ = "\n.\n"; chomp($buf = <$c>) } + my $env = eval $buf // die "eval $@ ($buf)"; # Perl hashref + $ck_env->($env, \%exp, 1); + } +} + +if ('test rejected trailers') { + for my $t ( +['unexpected trailer only', $put.$chunk_body."unexpected: bye\r\n"], +['expected + unexpected trailer', + $put."Trailer: a\r\n".$chunk_body."unexpected: !\r\na: b\r\n"], +['missing expected trailer', $put."Trailer: a\r\n".$chunk_body."\r\n"], +['Content-Length in trailer', + $put."Trailer: Content-Length\r\n".$chunk_body."Content-Length: 1\r\n"], +['Host in trailer', + $put."Trailer: Host\r\n".$chunk_body."Host: example.com\r\n"], +['long trailer', + $put."Trailer: long\r\n".$chunk_body.'Long: '.('a' x 0x8000)."\r\n"], +['trailer w/ Content-Length header', + "PUT /env HTTP/1.1\r\nHost: example.com\r\n". + "Content-Length: 11\r\nTrailer: a\r\n".$chunk_body."a: b\r\n"] + ) { + my ($d, $req) = @$t; + my $c = $mkreq->($sock, $d, $req."\r\n"); + poll_in $c, 30_000 or Carp::croak "timeout"; + my $buf = do { local $/ = "\r\n\r\n"; <$c> }; + like $buf, qr!^HTTP/1\.. 400\b!, "$d rejected"; + } +} + +{ + my $d = 'trailer appends to header'; + my $hdr = "Trailer: c\r\nc: a\r\n"; + my $end = "C: b\r\n"; + my $c = $mkreq->($sock, $d, "$put$hdr$chunk_body$end\r\n"); + poll_in $c, 30_000 or Carp::croak "timeout"; + my $buf = do { local $/ = "\r\n\r\n"; <$c> }; + like $buf, qr!^HTTP/1\.. 200\b!, "request w/ $d" or next; + { local $/ = "\n.\n"; chomp($buf = <$c>) } + my $env = eval $buf // die "eval $@ ($buf)"; # Perl hashref + my %exp = ( + HTTP_TRAILER => 'c', + HTTP_HOST => 'example.com', + HTTP_TRANSFER_ENCODING => 'chunked', + HTTP_C => 'a, b', + 'test.input_data' => '0', + ); + $ck_env->($env, \%exp, 1); +} + +# I don't trust myself to read RFCs properly and need a 3rd-party client: +my $tup = "t/trailer-up-$Config{archname}"; +my @tup_h_st = stat 't/trailer-up.h' or die "stat('t/trailer-up.h'): $!"; +SKIP: if (!-e $tup || (stat(_))[10] < $tup_h_st[10]) { + my $curl_config = require_cmd 'curl-config', 1; + my %ccfg; + for my $f (qw(version cc cflags libs)) { + chomp($ccfg{$f} = xqx [ $curl_config, "--$f" ]); + skip "$curl_config --$f \$?=$?", 1 if $?; + } + $ccfg{version} =~ /([0-9]+\.[0-9\.]+)/ or + skip "can't parse `$curl_config --version`: $ccfg{version}", 1; + my $curl_ver = eval 'v'.$1; + skip "libcurl $ccfg{version} <7.64.0 for CURLOPT_TRAILERFUNCTION", 1 + if $curl_ver lt v7.64.0; + write_file '>', "$tmpdir/trailer-up.c", qq{#include \n}; + my $cc = require_cmd $ccfg{cc}, 1; + my @build = split ' ', + "$cc $ccfg{cflags} -I t -o $tup.$<.$$.tmp ". + "$tmpdir/trailer-up.c $ccfg{libs}"; + xsys(\@build) and skip "@build failed: \$?=$?", 1; + rename "$tup.$<.$$.tmp", $tup; + stat $tup; # for _ below: +} # SKIP +if (-x _) { + my %opt = (0 => \'i', 1 => \(my $o = ''), 2 => \(my $e = '')); + xsys [ $tup, "$base_url/env" ], undef, \%opt; + is $?, 0, 'trailer-up using libcurl'; + my ($buf) = split /\n\.\n/, $o; + my $env = eval $buf // die "eval $@ ($buf)"; # Perl hashref + $ck_env->($env, { 'test.input_data' => 'i', HTTP_A => 'b', + HTTP_TRAILER => 'a' }); +} + { my $conn = $mkreq->($sock, 'Header spaces bogus', "GET /empty HTTP/1.1\r\nSpaced-Out : 3\r\n\r\n"); sysread $conn, my $buf, 4096; like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bad request'); } -{ - my $conn = $mkreq->($sock, 'Trailer rejected (for now)', <($sock, 'streaming callback', "GET /callback HTTP/1.0\r\n\r\n"); @@ -340,8 +460,7 @@ my $len = length $str; is($len, 26, 'got the alphabet'); my $check_self = sub { my ($conn) = @_; - vec(my $rbits = '', fileno($conn), 1) = 1; - select($rbits, undef, undef, 30) or Carp::confess('timed out'); + poll_in $conn, 30_000 or Carp::croak "timeout"; read $conn, my $buf, 4096; my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); @@ -350,8 +469,7 @@ my $check_self = sub { SKIP: { my $curl = require_cmd('curl', 1) or skip('curl(1) missing', 4); - my $base = 'http://'.tcp_host_port($sock); - my $url = "$base/sha1"; + my $url = "$base_url/sha1"; my ($r, $w); pipe $r, $w; my $cmd = [$curl, qw(--tcp-nodelay -T- -HExpect: -gsSN), $url]; @@ -371,7 +489,7 @@ SKIP: { seek($cout, 0, SEEK_SET); is(<$cout>, sha1_hex($str), 'read expected body'); - my $fh = popen_rd([$curl, '-gsS', "$base/async-big"]); + my $fh = popen_rd([$curl, '-gsS', "$base_url/async-big"]); my $n = 0; my $non_zero = 0; while (1) { @@ -385,13 +503,13 @@ SKIP: { is($non_zero, 0, 'read all zeros'); require_mods(@zmods, 4); - my $buf = xqx([$curl, '-gsS', "$base/psgi-yield-gzip"]); + my $buf = xqx([$curl, '-gsS', "$base_url/psgi-yield-gzip"]); is($?, 0, 'curl succesful'); IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n"); my $curl_rdr = { 2 => \(my $curl_err = '') }; $buf = xqx([$curl, qw(-gsSv --compressed), - "$base/psgi-yield-compressible"], undef, $curl_rdr); + "$base_url/psgi-yield-compressible"], undef, $curl_rdr); is($?, 0, 'curl --compressed successful'); is($buf, "goodbye world\n", 'gzipped response as expected'); like($curl_err, qr/\bContent-Encoding: gzip\b/, diff --git a/t/trailer-up.h b/t/trailer-up.h new file mode 100644 index 000000000..1423e0f06 --- /dev/null +++ b/t/trailer-up.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) all contributors + * License: GPL-3.0+ + * + * Trailer uploader using libcurl since we could've been reading RFCs + * wrong the whole time and no other client seems to support sending + * Trailers. + * + * Built and used by t/httpd-corner.t + * + * .h suffix (not .c) to avoid MakeMaker trying to build this for XS + */ +#include +#include +#include + +#define MY_SETOPT(hnd, opt, param) do { \ + CURLcode ret = curl_easy_setopt(hnd, opt, param); \ + if (ret != CURLE_OK) { \ + fprintf(stderr, "curl_easy_setopt: %s\n", \ + curl_easy_strerror(ret)); \ + abort(); \ + } \ +} while (0) + +static int trailer_cb(struct curl_slist **tr, void *data) +{ + /* libcurl frees the list */ + *tr = curl_slist_append(*tr, "a: b"); + return CURL_TRAILERFUNC_OK; +} + +int main(int argc, char *argv[]) +{ + CURLcode ret; + CURL *hnd; + char ebuf[CURL_ERROR_SIZE]; + struct curl_slist *hdr = NULL; + + if (argc < 2) { + fprintf(stderr, "%s URL\n", argv[0]); + return 1; + } + hnd = curl_easy_init(); + if (!hnd) + abort(); + if (!(hdr = curl_slist_append(hdr, "Expect:"))) // clobber + abort(); + if (!(hdr = curl_slist_append(hdr, "Trailer: a"))) + abort(); + MY_SETOPT(hnd, CURLOPT_URL, argv[1]); + MY_SETOPT(hnd, CURLOPT_HTTPHEADER, hdr); + MY_SETOPT(hnd, CURLOPT_NOPROGRESS, 1L); + MY_SETOPT(hnd, CURLOPT_FAILONERROR, 1L); + MY_SETOPT(hnd, CURLOPT_UPLOAD, 1L); + MY_SETOPT(hnd, CURLOPT_HTTP_VERSION, + (long)CURL_HTTP_VERSION_1_1); + MY_SETOPT(hnd, CURLOPT_TRAILERFUNCTION, trailer_cb); + MY_SETOPT(hnd, CURLOPT_ERRORBUFFER, ebuf); + MY_SETOPT(hnd, CURLOPT_VERBOSE, 1L); + + MY_SETOPT(hnd, CURLOPT_READFUNCTION, fread); + MY_SETOPT(hnd, CURLOPT_READDATA, stdin); + + MY_SETOPT(hnd, CURLOPT_WRITEFUNCTION, fwrite); + MY_SETOPT(hnd, CURLOPT_WRITEDATA, stdout); + + ret = curl_easy_perform(hnd); + if (ret != CURLE_OK) + fprintf(stderr, "curl_easy_perform: %s\n", + curl_easy_strerror(ret)); + + curl_slist_free_all(hdr); + curl_easy_cleanup(hnd); + + return (int)ret; +}