From: Eric Wong Date: Tue, 13 Dec 2016 21:56:39 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/repobrowse' into repobrowse X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=00488f0cfe9f81d04cd65d09ea783e860c937401;p=thirdparty%2Fpublic-inbox.git Merge remote-tracking branch 'origin/repobrowse' into repobrowse * origin/repobrowse: (98 commits) t/repobrowse_git_httpd.t: ensure signature exists for split t/repobrowse_git_tree.t: fix test for lack of bold repobrowse: fix alignment of gitlink entries repobrowse: show invalid type for tree views repobrowse: do not bold directory names in tree view repobrowse: reduce checks for response fh repobrowse: larger, short-lived buffer for reading patches repobrowse: reduce risk of callback reference cycles repobrowse: snapshot support for cgit compatibility test: disable warning for Plack::Test::Impl repobrowse: avoid confusing linkification for "diff" repobrowse: git commit view uses pi-httpd.async repobrowse: more consistent variable naming for /commit/ repobrowse: show roughly equivalent "diff-tree" invocation repobrowse: reduce local variables for state management repobrowse: summary handles multiple README types repobrowse: remove bold decorations from diff view repobrowse: common git diff parsing code repobrowse: implement diff view for compatibility examples/repobrowse.psgi: disable Chunked response by default ... --- 00488f0cfe9f81d04cd65d09ea783e860c937401 diff --cc MANIFEST index 3388b1a19,259f42ce0..8e0681a25 --- a/MANIFEST +++ b/MANIFEST @@@ -27,32 -19,13 +27,33 @@@ examples/apache2_perl.con examples/apache2_perl_old.conf examples/cgi-webrick.rb examples/cgit-commit-filter.lua +examples/logrotate.conf examples/public-inbox-config +examples/public-inbox-httpd.socket +examples/public-inbox-httpd@.service +examples/public-inbox-nntpd.socket +examples/public-inbox-nntpd@.service examples/public-inbox.psgi ++examples/repobrowse.psgi +examples/unsubscribe-milter.socket +examples/unsubscribe-milter@.service +examples/unsubscribe-psgi.socket +examples/unsubscribe-psgi@.service +examples/unsubscribe.milter +examples/unsubscribe.psgi +examples/varnish-4.vcl +lib/PublicInbox/Address.pm +lib/PublicInbox/AltId.pm lib/PublicInbox/Config.pm lib/PublicInbox/Daemon.pm +lib/PublicInbox/Emergency.pm +lib/PublicInbox/EvCleanup.pm lib/PublicInbox/ExtMsg.pm lib/PublicInbox/Feed.pm -lib/PublicInbox/Filter.pm +lib/PublicInbox/Filter/Base.pm +lib/PublicInbox/Filter/Mirror.pm +lib/PublicInbox/Filter/Vger.pm +lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm lib/PublicInbox/GitHTTPBackend.pm lib/PublicInbox/HTTP.pm @@@ -66,15 -35,11 +67,34 @@@ lib/PublicInbox/Listener.p lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm lib/PublicInbox/Mbox.pm +lib/PublicInbox/MsgIter.pm lib/PublicInbox/Msgmap.pm lib/PublicInbox/NNTP.pm -lib/PublicInbox/NewsGroup.pm +lib/PublicInbox/NNTPD.pm lib/PublicInbox/NewsWWW.pm +lib/PublicInbox/ParentPipe.pm lib/PublicInbox/ProcessPipe.pm +lib/PublicInbox/Qspawn.pm ++lib/PublicInbox/Repobrowse.pm ++lib/PublicInbox/RepobrowseBase.pm ++lib/PublicInbox/RepobrowseConfig.pm ++lib/PublicInbox/RepobrowseGit.pm ++lib/PublicInbox/RepobrowseGitAtom.pm ++lib/PublicInbox/RepobrowseGitBlob.pm ++lib/PublicInbox/RepobrowseGitCommit.pm ++lib/PublicInbox/RepobrowseGitDiff.pm ++lib/PublicInbox/RepobrowseGitDiffCommon.pm ++lib/PublicInbox/RepobrowseGitFallback.pm ++lib/PublicInbox/RepobrowseGitLog.pm ++lib/PublicInbox/RepobrowseGitPatch.pm ++lib/PublicInbox/RepobrowseGitPlain.pm ++lib/PublicInbox/RepobrowseGitQuery.pm ++lib/PublicInbox/RepobrowseGitSnapshot.pm ++lib/PublicInbox/RepobrowseGitSummary.pm ++lib/PublicInbox/RepobrowseGitTag.pm ++lib/PublicInbox/RepobrowseGitTree.pm ++lib/PublicInbox/RepobrowseRoot.pm +lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/Search.pm lib/PublicInbox/SearchIdx.pm lib/PublicInbox/SearchMsg.pm @@@ -134,10 -78,7 +154,11 @@@ t/git. t/html_index.t t/httpd-corner.psgi t/httpd-corner.t +t/httpd-unix.t t/httpd.t ++t/hval.t +t/import.t +t/inbox.t t/init.t t/linkify.t t/main-bin/spamc @@@ -149,14 -88,7 +170,23 @@@ t/nntp. t/nntpd.t t/plack.t t/precheck.t +t/psgi_attach.t +t/psgi_mount.t +t/psgi_text.t +t/qspawn.t ++t/repobrowse.t ++t/repobrowse_common_git.perl ++t/repobrowse_git.t ++t/repobrowse_git_atom.t ++t/repobrowse_git_commit.t ++t/repobrowse_git_httpd.t ++t/repobrowse_git_plain.t ++t/repobrowse_git_snapshot.t ++t/repobrowse_git_tree.t t/search.t +t/spamcheck_spamc.t t/spawn.t +t/thread-cycle.t t/utf8.mbox t/view.t +t/watch_maildir.t diff --cc lib/PublicInbox/GitHTTPBackend.pm index 1987a013e,d0ce80bc5..0275a2a01 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@@ -44,37 -29,14 +44,37 @@@ sub r ($;$) } sub serve { - my ($cgi, $git, $path) = @_; - my $service = $cgi->param('service') || ''; - if ($service =~ /\Agit-\w+-pack\z/ || $path =~ /\Agit-\w+-pack\z/) { - my $ok = serve_smart($cgi, $git, $path); + my ($env, $git, $path) = @_; + + # Documentation/technical/http-protocol.txt in git.git + # requires one and exactly one query parameter: + if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ || + $path =~ /\Agit-\w+-pack\z/) { + my $ok = serve_smart($env, $git, $path); return $ok if $ok; + # fall through to dumb HTTP... } - - serve_dumb($cgi, $git, $path); + serve_dumb($env, $git, $path); +} + +sub err ($@) { + my ($env, @msg) = @_; + $env->{'psgi.errors'}->print(@msg, "\n"); +} + +sub drop_client ($) { + if (my $io = $_[0]->{'psgix.io'}) { + $io->close; # this is Danga::Socket::close + } +} + +my $prev = 0; +my $exp; +sub cache_one_year { + my ($h) = @_; + my $t = time + 31536000; + push @$h, 'Expires', $t == $prev ? $exp : ($exp = time2str($prev = $t)), + 'Cache-Control', 'public, max-age=31536000'; } sub serve_dumb { diff --cc lib/PublicInbox/Hval.pm index 77acecda0,c0db56677..15b5fd3ec --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@@ -7,9 -7,10 +7,10 @@@ package PublicInbox::Hval use strict; use warnings; use Encode qw(find_encoding); +use PublicInbox::MID qw/mid_clean mid_escape/; + use URI::Escape qw(uri_escape_utf8); -use PublicInbox::MID qw/mid_clean/; use base qw/Exporter/; - our @EXPORT_OK = qw/ascii_html/; + our @EXPORT_OK = qw/ascii_html utf8_html to_attr from_attr/; # for user-generated content (UGC) which may have excessively long lines # and screw up rendering on some browsers. This is the only CSS style diff --cc lib/PublicInbox/Repobrowse.pm index 000000000,0a812f72d..cdd708e96 mode 000000,100644..100644 --- a/lib/PublicInbox/Repobrowse.pm +++ b/lib/PublicInbox/Repobrowse.pm @@@ -1,0 -1,150 +1,151 @@@ + # Copyright (C) 2015 all contributors + # License: AGPL-3.0+ + + # Version control system (VCS) repository viewer like cgit or gitweb, + # but with optional public-inbox archive integration. + # This uses cgit-compatible PATH_INFO URLs. + # This may be expanded to support other Free Software VCSes such as + # Subversion and Mercurial, so not just git + # + # Same web design principles as PublicInbox::WWW for supporting the + # lowest common denominators (see bottom of Documentation/design_www.txt) + # + # This allows an M:N relationship between "normal" repos for project + # and public-inbox (ssoma) git repositories where N may be zero. + # In other words, repobrowse must work for repositories without + # any public-inbox at all; or with multiple public-inboxes. + # And the rest of public-inbox will always work without a "normal" + # code repo for the project. + + package PublicInbox::Repobrowse; + use strict; + use warnings; + use Plack::Request; + use URI::Escape qw(uri_escape_utf8 uri_unescape); + use PublicInbox::RepobrowseConfig; + + my %CMD = map { lc($_) => $_ } qw(Log Commit Tree Patch Blob Plain Tag Atom + Diff Snapshot); + my %VCS = (git => 'Git'); + my %LOADED; + + sub new { + my ($class, $rconfig) = @_; + $rconfig ||= PublicInbox::RepobrowseConfig->new; + bless { rconfig => $rconfig }, $class; + } + + # simple response for errors + sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } + + # Remove trailing slash in URLs which regular humans are likely to read + # in an attempt to improve cache hit ratios. Do not redirect + # plain|patch|blob|fallback endpoints since those could be using + # automated tools which may not follow redirects automatically + # (e.g. curl does not follow 301 unless given "-L") + my %NO_TSLASH = map { $_ => 1 } qw(Log Commit Tree Summary Tag); + sub no_tslash { + my ($cgi) = @_; # Plack::Request + my ($base, $uri); + $base = $cgi->base; + $base =~ s!/+\z!!; + $uri = $cgi->request_uri; + my $qs = ''; + if ($uri =~ s/(\?.+)\z//) { + $qs = $1; + } + if ($uri !~ s!/+\z!!) { + warn "W: buggy redirect? base=$base request_uri=$uri\n"; + } + my $url = $base . $uri . $qs; + [ 301, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] + } + + sub root_index { + my ($self) = @_; + my $mod = load_once('PublicInbox::RepobrowseRoot'); + $mod->new->call($self->{rconfig}); # RepobrowseRoot::call + } + + sub call { + my ($self, $env) = @_; + my $cgi = Plack::Request->new($env); + my $method = $cgi->method; + return r(405, 'Method Not Allowed') if ($method !~ /\AGET|HEAD|POST\z/); + + # URL syntax: / repo [ / cmd [ / path ] ] + # cmd: log | commit | diff | tree | view | blob | snapshot + # repo and path (@extra) may both contain '/' + my $path_info = uri_unescape($cgi->path_info); + my (undef, $repo_path, @extra) = split(m{/+}, $path_info, -1); + + return $self->root_index($self) unless length($repo_path); + + my $rconfig = $self->{rconfig}; # RepobrowseConfig + my $repo_info; + until ($repo_info = $rconfig->lookup($repo_path)) { + my $p = shift @extra or last; + $repo_path .= "/$p"; + } + return r404() unless $repo_info; + + my $req = { + repo_info => $repo_info, + extra => \@extra, # path + cgi => $cgi, + rconfig => $rconfig, ++ env => $env, + }; + my $tslash = 0; + my $cmd = shift @extra; + my $vcs_lc = $repo_info->{vcs}; + my $vcs = $VCS{$vcs_lc} or return r404(); + my $mod; + if (defined $cmd && length $cmd) { + $mod = $CMD{$cmd}; + unless ($mod) { + unshift @extra, $cmd; + $mod = 'Fallback'; + } + $req->{relcmd} = '../' x scalar(@extra); + } else { + $mod = 'Summary'; + $cmd = 'summary'; + if ($path_info =~ m!/\z!) { + $tslash = $path_info =~ tr!/!!; + } else { + my @x = split('/', $repo_path); + $req->{relcmd} = @x > 1 ? "./$x[-1]/" : "/$x[-1]/"; + } + } + while (@extra && $extra[-1] eq '') { + pop @extra; + ++$tslash; + } + + return no_tslash($cgi) if ($tslash && $NO_TSLASH{$mod}); + + $req->{tslash} = $tslash; + $mod = load_once("PublicInbox::Repobrowse$vcs$mod"); + $vcs = load_once("PublicInbox::$vcs"); + $repo_info->{$vcs_lc} ||= $vcs->new($repo_info->{path}); + + $req->{expath} = join('/', @extra); + my $rv = eval { $mod->new->call($cmd, $req) }; # RepobrowseBase::call + $rv || r404(); + } + + sub r404 { r(404, 'Not Found') } + + sub load_once { + my ($mod) = @_; + + return $mod if $LOADED{$mod}; + eval "require $mod"; + $LOADED{$mod} = 1 unless $@; + $mod; + } + + 1; diff --cc lib/PublicInbox/RepobrowseConfig.pm index 000000000,77ef46bb2..a08c6cecd mode 000000,100644..100644 --- a/lib/PublicInbox/RepobrowseConfig.pm +++ b/lib/PublicInbox/RepobrowseConfig.pm @@@ -1,0 -1,87 +1,88 @@@ + # Copyright (C) 2015 all contributors + # License: AGPL-3.0+ + package PublicInbox::RepobrowseConfig; + use strict; + use warnings; -use PublicInbox::Config qw/try_cat/; ++use PublicInbox::Inbox; ++use PublicInbox::Config; + require PublicInbox::Hval; + + sub new { + my ($class, $file) = @_; + $file = default_file() unless defined($file); + my $self = bless PublicInbox::Config::git_config_dump($file), $class; + $self->{-cache} = {}; + + # hard disable these with '-' prefix by default: + $self->{'repobrowse.snapshots'} ||= '-tar.bz2 -tar.xz'; + + # for root + $self->{-groups} = { -hidden => [], -none => [] }; + $self; + } + + sub default_file { + my $f = $ENV{REPOBROWSE_CONFIG}; + return $f if defined $f; + PublicInbox::Config::config_dir() . '/repobrowse_config'; + } + + # Returns something like: + # { + # path => '/home/git/foo.git', + # description => 'foo repo', + # cloneurl => "git://example.com/foo.git\nhttp://example.com/foo.git", + # publicinbox => '/home/pub/foo-public.git', + # } + sub lookup { + my ($self, $repo_path) = @_; # "git.git" + my $rv; + + $rv = $self->{-cache}->{$repo_path} and return $rv; + + my $path = $self->{"repo.$repo_path.path"}; + (defined $path && -d $path) or return; + $rv->{path} = $path; + $rv->{repo} = $repo_path; + + # snapshots: + my $snap = (split('/', $repo_path))[-1]; + $snap =~ s/\.git\z//; # seems common for git URLs to end in ".git" + $rv->{snapshot_re} = qr/\A\Q$snap\E[-_]/; + $rv->{snapshot_pfx} = $snap; + + # gitweb compatibility + foreach my $key (qw(description cloneurl)) { - $rv->{$key} = try_cat("$path/$key"); ++ $rv->{$key} = PublicInbox::Inbox::try_cat("$path/$key"); + } + + $rv->{desc_html} = + PublicInbox::Hval->new_oneline($rv->{description})->as_html; + + foreach my $key (qw(publicinbox vcs readme group snapshots)) { + $rv->{$key} = $self->{"repo.$repo_path.$key"}; + } + unless (defined $rv->{snapshots}) { + $rv->{snapshots} = $self->{'repobrowse.snapshots'} || ''; + } + + my %disabled; + foreach (split(/\s+/, $rv->{snapshots})) { + s/\A-// and $disabled{$_} = 1; + } + $rv->{snapshots_disabled} = \%disabled; + + my $g = $rv->{group}; + defined $g or $g = '-none'; + if (ref($g) eq 'ARRAY') { + push @{$self->{-groups}->{$_} ||= []}, $repo_path foreach @$g; + } else { + push @{$self->{-groups}->{$g} ||= []}, $repo_path; + } + + # of course git is the default VCS + $rv->{vcs} ||= 'git'; + $self->{-cache}->{$repo_path} = $rv; + } + + 1; diff --cc lib/PublicInbox/RepobrowseGitFallback.pm index 000000000,696e5b944..386401392 mode 000000,100644..100644 --- a/lib/PublicInbox/RepobrowseGitFallback.pm +++ b/lib/PublicInbox/RepobrowseGitFallback.pm @@@ -1,0 -1,22 +1,21 @@@ + # Copyright (C) 2015 all contributors + # License: AGPL-3.0+ (https://www.gnu.org/licenses/agpl-3.0.txt) + + # when no endpoints match, fallback to this and serve a static file + # This can serve Smart HTTP in the future. + package PublicInbox::RepobrowseGitFallback; + use strict; + use warnings; + use base qw(PublicInbox::RepobrowseBase); + use PublicInbox::GitHTTPBackend; + + # overrides PublicInbox::RepobrowseBase::call + sub call { + my ($self, undef, $req) = @_; + my $expath = $req->{expath}; + return if index($expath, '..') >= 0; # prevent path traversal + my $git = $req->{repo_info}->{git}; - my $cgi = $req->{cgi}; - PublicInbox::GitHTTPBackend::serve($cgi, $git, $expath); ++ PublicInbox::GitHTTPBackend::serve($req->{env}, $git, $expath); + } + + 1;