From 25b49fc37121d8584b84b44b20c910ef43c44950 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 4 Apr 2016 21:09:19 +0000 Subject: [PATCH] repobrowse: snapshot support for cgit compatibility We currently do not display links to snapshots, but may in the future (optionally, like cgit). However, support snapshots for compatibility reasons in case people had cached URLs or auto-generated them somewhere. We won't natively support xz and bzip2 just yet, but will document (at least in comments) how to enable these expensive compression schemes via git-config(1). Also, support disabling certain archive types to twart URL guessing or old cached links from spiders burning bandwidth. In retrospect, enabling snapshots for my own cgit views was a bad idea since it wastes bandwidth from crawlers and is is often not useful for users with maintainer-built files (e.g. "configure" from "configure.ac" for autoconf, where only the latter is stored in git and the former is generated in release tarballs). --- lib/PublicInbox/Repobrowse.pm | 2 +- lib/PublicInbox/RepobrowseBase.pm | 4 +- lib/PublicInbox/RepobrowseConfig.pm | 21 +++- lib/PublicInbox/RepobrowseGitSnapshot.pm | 131 +++++++++++++++++++++++ t/repobrowse_git_httpd.t | 18 ++++ t/repobrowse_git_snapshot.t | 46 ++++++++ 6 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 lib/PublicInbox/RepobrowseGitSnapshot.pm create mode 100644 t/repobrowse_git_snapshot.t diff --git a/lib/PublicInbox/Repobrowse.pm b/lib/PublicInbox/Repobrowse.pm index 0c4cf144e..0a812f72d 100644 --- a/lib/PublicInbox/Repobrowse.pm +++ b/lib/PublicInbox/Repobrowse.pm @@ -25,7 +25,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use PublicInbox::RepobrowseConfig; my %CMD = map { lc($_) => $_ } qw(Log Commit Tree Patch Blob Plain Tag Atom - Diff); + Diff Snapshot); my %VCS = (git => 'Git'); my %LOADED; diff --git a/lib/PublicInbox/RepobrowseBase.pm b/lib/PublicInbox/RepobrowseBase.pm index 7863d6ce2..33647fca5 100644 --- a/lib/PublicInbox/RepobrowseBase.pm +++ b/lib/PublicInbox/RepobrowseBase.pm @@ -5,7 +5,7 @@ use strict; use warnings; require PublicInbox::RepobrowseGitQuery; use PublicInbox::Hval; -our %MIME_TYPE_WHITELIST = ( 'application/pdf' => 1 ); +our %MIME_TYPE_WHITELIST = ('application/pdf' => 1); sub new { bless {}, shift } @@ -107,7 +107,7 @@ sub r { # mainly for curl (no-'-L') users: $body = "Redirecting to $redir\n"; } else { - die "not implemented, yet: $status"; + push @h, qw(Content-Type text/plain); } [ $status, \@h, [ $body ] ] diff --git a/lib/PublicInbox/RepobrowseConfig.pm b/lib/PublicInbox/RepobrowseConfig.pm index 703212d9d..77ef46bb2 100644 --- a/lib/PublicInbox/RepobrowseConfig.pm +++ b/lib/PublicInbox/RepobrowseConfig.pm @@ -11,6 +11,10 @@ sub new { $file = default_file() unless defined($file); my $self = bless PublicInbox::Config::git_config_dump($file), $class; $self->{-cache} = {}; + + # hard disable these with '-' prefix by default: + $self->{'repobrowse.snapshots'} ||= '-tar.bz2 -tar.xz'; + # for root $self->{-groups} = { -hidden => [], -none => [] }; $self; @@ -40,6 +44,12 @@ sub lookup { $rv->{path} = $path; $rv->{repo} = $repo_path; + # snapshots: + my $snap = (split('/', $repo_path))[-1]; + $snap =~ s/\.git\z//; # seems common for git URLs to end in ".git" + $rv->{snapshot_re} = qr/\A\Q$snap\E[-_]/; + $rv->{snapshot_pfx} = $snap; + # gitweb compatibility foreach my $key (qw(description cloneurl)) { $rv->{$key} = try_cat("$path/$key"); @@ -48,9 +58,18 @@ sub lookup { $rv->{desc_html} = PublicInbox::Hval->new_oneline($rv->{description})->as_html; - foreach my $key (qw(publicinbox vcs readme group)) { + foreach my $key (qw(publicinbox vcs readme group snapshots)) { $rv->{$key} = $self->{"repo.$repo_path.$key"}; } + unless (defined $rv->{snapshots}) { + $rv->{snapshots} = $self->{'repobrowse.snapshots'} || ''; + } + + my %disabled; + foreach (split(/\s+/, $rv->{snapshots})) { + s/\A-// and $disabled{$_} = 1; + } + $rv->{snapshots_disabled} = \%disabled; my $g = $rv->{group}; defined $g or $g = '-none'; diff --git a/lib/PublicInbox/RepobrowseGitSnapshot.pm b/lib/PublicInbox/RepobrowseGitSnapshot.pm new file mode 100644 index 000000000..106d5651e --- /dev/null +++ b/lib/PublicInbox/RepobrowseGitSnapshot.pm @@ -0,0 +1,131 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# shows the /snapshot/ endpoint for git repositories +# Mainly for compatibility reasons with cgit, I'm unsure if +# showing this in a repository viewer is a good idea. + +package PublicInbox::RepobrowseGitSnapshot; +use strict; +use warnings; +use base qw(PublicInbox::RepobrowseBase); +use PublicInbox::Git; +our $SUFFIX; +BEGIN { + # as described in git-archive(1), users may add support for + # other compression schemes such as xz or bz2 via git-config(1): + # git config tar.tar.xz.command "xz -c" + # git config tar.tar.bz2.command "bzip2 -c" + chomp(my @l = `git archive --list`); + $SUFFIX = join('|', map { quotemeta $_ } @l); +} + +# Not using standard mime types since the compressed tarballs are +# special or do not match my /etc/mime.types. Choose what gitweb +# and cgit agree on for compatibility. +our %FMT_TYPES = ( + 'tar' => 'application/x-tar', + 'tar.bz2' => 'application/x-bzip2', + 'tar.gz' => 'application/x-gzip', + 'tar.xz' => 'application/x-xz', + 'tgz' => 'application/x-gzip', + 'zip' => 'application/x-zip', +); + +sub call_git_snapshot ($$) { # invoked by PublicInbox::RepobrowseBase::call + my ($self, $req) = @_; + + my @extra = @{$req->{extra}}; + my $ref = shift @extra; + return $self->r(404) if (!defined $ref) || scalar(@extra); + my $orig_fn = $ref; + + # just in case git changes refname rules, don't allow wonky filenames + # to break the Content-Disposition header, either. + return $self->r(404) if $orig_fn =~ /["\s]/s; + return $self->r(404) unless ($ref =~ s/\.($SUFFIX)\z//o); + my $fmt = $1; + + my $repo_info = $req->{repo_info}; + + # support disabling certain snapshots types entirely to twart + # URL guessing since it could burn server resources. + return $self->r(404) if $repo_info->{snapshots_disabled}->{$fmt}; + + # strip optional basename (may not exist) + $ref =~ s/$repo_info->{snapshot_re}//; + + # don't allow option/command injection, git refs do not start with '-' + return $self->r(404) if $ref =~ /\A-/; + + my $git = $repo_info->{git}; + my $tree; + + # try prefixing "v" or "V" for tag names + foreach my $r ($ref, "v$ref", "V$ref") { + $tree = $git->qx([qw(rev-parse --verify --revs-only), $r], + undef, { 2 => $git->err_begin }); + if (defined $tree) { + chomp $tree; + last if $tree ne ''; + } + } + return $self->r(404) if (!defined $tree || $tree eq ''); + + my $pfx = "$repo_info->{snapshot_pfx}-$ref/"; + my @cmd = ('archive', "--prefix=$pfx", "--format=$fmt", $tree); + $req->{rpipe} = $git->popen(\@cmd, undef, { 2 => $git->err_begin }); + + my $env = $req->{cgi}->env; + my $vin; + my $end = sub { + my ($n) = @_; + if (my $fh = delete $req->{fh}) { + $fh->close; + } elsif (my $res = delete $req->{res}) { + $res->($self->r(500)); + } + if (my $rpipe = delete $req->{rpipe}) { + $rpipe->close; # _may_ be Danga::Socket::close + } + }; + my $fail = sub { + if ($!{EAGAIN} || $!{EINTR}) { + select($vin, undef, undef, undef) if $vin; + # $vin is undef on async, so this is a noop + return; + } + my $e = $!; + $end->(); + my $err = $env->{'psgi.errors'}; + $err->print("git archive ($git->{git_dir}): $e\n"); + }; + my $cb = sub { + my $n = $req->{rpipe}->sysread(my $buf, 65536); + return $fail->() unless defined $n; + return $end->() if $n == 0; + if (my $res = delete $req->{res}) { + my $h = [ 'Content-Type', + $FMT_TYPES{$fmt} || 'application/octet-stream', + 'Content-Disposition', + qq(inline; filename="$orig_fn"), + 'ETag', qq("$tree") ]; + $req->{fh} = $res->([200, $h]); + } + my $fh = $req->{fh} or return; + $fh->write($buf); + }; + if (my $async = $env->{'pi-httpd.async'}) { + $req->{rpipe} = $async->($req->{rpipe}, $cb); + sub { $req->{res} = $_[0] } # let Danga::Socket handle the rest. + } else { # synchronous loop for other PSGI servers + $vin = ''; + vec($vin, fileno($req->{rpipe}), 1) = 1; + sub { + $req->{res} = $_[0]; # Plack response callback + while ($req->{rpipe}) { $cb->() } + } + } +} + +1; diff --git a/t/repobrowse_git_httpd.t b/t/repobrowse_git_httpd.t index baa7be593..3e52b1b5c 100644 --- a/t/repobrowse_git_httpd.t +++ b/t/repobrowse_git_httpd.t @@ -115,5 +115,23 @@ test_psgi(uri => $uri, client => sub { is($b2, substr($body, 5), 'substring matches on 206'); } +test_psgi(uri => $uri, client => sub { + my ($cb) = @_; + my $res = $cb->(GET($uri . 'test.git/snapshot/test-master.tar.gz')); + is(200, $res->code, 'got gzipped tarball'); + my $got = "$tmpdir/got.tar.gz"; + my $exp = "$tmpdir/exp.tar.gz"; + open my $fh, '>', $got or die "open got.tar.gz: $!"; + print $fh $res->content; + close $fh or die "close failed: $!"; + $res = undef; + my $rc = system('git', "--git-dir=$test->{git_dir}", + qw(archive --prefix=test-master/ --format=tar.gz), + '-o', $exp, 'master'); + is(0, $rc, 'git-archive generated check correctly'); + is(0, system('cmp', $got, $exp), 'got expected gzipped tarball'); + +}); + done_testing(); 1; diff --git a/t/repobrowse_git_snapshot.t b/t/repobrowse_git_snapshot.t new file mode 100644 index 000000000..b608459ee --- /dev/null +++ b/t/repobrowse_git_snapshot.t @@ -0,0 +1,46 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +my $test = require './t/repobrowse_common_git.perl'; + +test_psgi($test->{app}, sub { + my ($cb) = @_; + my ($req, $rc, $res); + + $req = 'http://example.com/test.git/snapshot/test-master.tar.gz'; + $res = $cb->(GET($req)); + is($res->code, 200, 'got 200 response from $NAME-master-tar.gz'); + is($res->header('Content-Type'), 'application/x-gzip', + 'Content-Type is as expected'); + + $req = 'http://example.com/test.git/snapshot/test-nonexistent.tar.gz'; + $res = $cb->(GET($req)); + is($res->code, 404, 'got 404 for non-existent'); + + $rc = system('git', "--git-dir=$test->{git_dir}", 'tag', '-a', + '-m', 'annotated tag!', 'v1.0.0'); + is($rc, 0, 'created annotated 1.0.0 tag'); + $req = 'http://example.com/test.git/snapshot/test-1.0.0.tar.gz'; + $res = $cb->(GET($req)); + is($res->code, 200, 'got 200 response for tag'); + is($res->header('Content-Type'), 'application/x-gzip', + 'Content-Type is as expected'); + is($res->header('Content-Disposition'), + 'inline; filename="test-1.0.0.tar.gz"', + 'Content-Disposition is as expected'); + + $rc = system('git', "--git-dir=$test->{git_dir}", 'tag', + '-m', 'lightweight tag!', 'v2.0.0'); + is($rc, 0, 'created lightweight 2.0.0 tag'); + $req = 'http://example.com/test.git/snapshot/test-2.0.0.tar.gz'; + $res = $cb->(GET($req)); + is($res->code, 200, 'got 200 response for tag'); + is($res->header('Content-Type'), 'application/x-gzip', + 'Content-Type is as expected'); + is($res->header('Content-Disposition'), + 'inline; filename="test-2.0.0.tar.gz"', + 'Content-Disposition is as expected'); +}); + +done_testing(); -- 2.47.3