From: Eric Wong Date: Sat, 21 Jan 2017 11:34:31 +0000 (+0000) Subject: repobrowse: simplify git log parsing implementation X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5027b5fad0aa4a448e53eeba4027328dd528c918;p=thirdparty%2Fpublic-inbox.git repobrowse: simplify git log parsing implementation Based on what was done for the Atom feed, this will allow us to simplify state management through metaprogramming and avoid placeholder characters ('D' for decoration) for empty fields. --- diff --git a/MANIFEST b/MANIFEST index 789ed68ca..29b98e902 100644 --- a/MANIFEST +++ b/MANIFEST @@ -183,6 +183,7 @@ t/repobrowse_git.t t/repobrowse_git_atom.t t/repobrowse_git_commit.t t/repobrowse_git_httpd.t +t/repobrowse_git_log.t t/repobrowse_git_plain.t t/repobrowse_git_snapshot.t t/repobrowse_git_tree.t diff --git a/lib/PublicInbox/RepobrowseGitLog.pm b/lib/PublicInbox/RepobrowseGitLog.pm index e62486ba1..21c23fd32 100644 --- a/lib/PublicInbox/RepobrowseGitLog.pm +++ b/lib/PublicInbox/RepobrowseGitLog.pm @@ -10,8 +10,9 @@ use base qw(PublicInbox::RepobrowseBase); use PublicInbox::RepobrowseGit qw(git_dec_links git_commit_title); use PublicInbox::Qspawn; # cannot rely on --date=format-local:... yet, it is too new (September 2015) -my $LOG_FMT = '--pretty=tformat:'. - join('%x00', qw(%h %p %s D%D %ai a%an b%b), '', ''); +use constant STATES => qw(h p D ai an s b); +use constant STATE_BODY => (scalar(STATES) - 1); +my $LOG_FMT = '--pretty=tformat:'. join('%n', map { "%$_" } STATES).'%x00'; sub parent_links { if (@_ == 1) { # typical, single-parent commit @@ -24,11 +25,33 @@ sub parent_links { } } +sub flush_log_hdr ($$$) { + my ($req, $dst, $hdr) = @_; + my $rel = $req->{relcmd}; + my $seen = $req->{seen}; + $$dst .= '
' if scalar keys %$seen;
+	my $id = $hdr->{h};
+	$seen->{$id} = 1;
+	$$dst .= qq();
+	$$dst .= utf8_html($hdr->{'s'}); # FIXME may still OOM
+	$$dst .= '';
+	my $D = $hdr->{D}; # FIXME: thousands of decorations may OOM us
+	if ($D ne '') {
+		$$dst .= ' (' . join(', ', git_dec_links($rel, $D)) . ')';
+	}
+	my @p = split(/ /, $hdr->{p});
+	push @{$req->{parents}}, @p;
+	my $plinks = parent_links(@p);
+	$$dst .= "\n- ";
+	$$dst .= utf8_html($hdr->{an});
+	$$dst .= " @ $hdr->{ai}\n  commit $id$plinks\n";
+	undef
+}
+
 sub git_log_sed_end ($$) {
-	my $req = $_[0];
-	my $dst = delete $req->{lhtml} || '';
-	$dst .= utf8_html($_[1]); # existing buffer
-	$dst .= '

';
+	my ($req, $dst) = @_;
+	$$dst .= '
';
 	my $m = '';
 	my $np = 0;
 	my $seen = $req->{seen};
@@ -43,106 +66,55 @@ sub git_log_sed_end ($$) {
 		$m .= qq($s);
 	}
 	if ($np == 0) {
-		$dst .= "No commits follow";
+		$$dst .= "No commits follow";
 	} elsif ($np > 1) {
-		$dst .= "Unseen parent commits to follow (multiple choice):\n";
+		$$dst .= "Unseen parent commits to follow (multiple choice):\n";
 	} else {
-		$dst .= "Next parent to follow:\n";
+		$$dst .= "Next parent to follow:\n";
 	}
-	$dst .= $m;
-	$dst .= '
'; + $$dst .= $m; + $$dst .= '
'; } sub git_log_sed ($$) { my ($self, $req) = @_; my $buf = ''; - my $state = 'h'; - my %acache; - my $rel = $req->{relcmd}; - my $seen = $req->{seen} = {}; - my $parents = $req->{parents} = []; - my ($plinks, $id, $ai); + my $state = 0; + $req->{seen} = {}; + $req->{parents} = []; + my $hdr = {}; sub { my $dst; # $_[0] == scalar buffer, undef means EOF from "git log" - return git_log_sed_end($req, $buf) unless defined $_[0]; $dst = delete $req->{lhtml} || ''; my @tmp; - $buf .= $_[0]; - @tmp = split(/\0/, $buf, -1); - $buf = @tmp ? pop(@tmp) : ''; + if (defined $_[0]) { + $buf .= $_[0]; + @tmp = split(/\n/, $buf, -1); + $buf = @tmp ? pop(@tmp) : ''; + } else { + @tmp = split(/\n/, $buf, -1); + $buf = undef; + } - while (@tmp) { - if ($state eq 'b') { - my $bb = shift @tmp; - $state = 'B' if $bb =~ s/\Ab/\n/; - my @lines = split(/\n/, $bb); - $bb = utf8_html(pop @lines); - $dst .= utf8_html($_)."\n" for @lines; - $dst .= $bb; - } elsif ($state eq 'B') { - my $bb = shift @tmp; - if ($bb eq '') { - $state = 'BB'; - } else { - my @lines = split(/\n/, $bb); - $bb = undef; - my $last = utf8_html(pop @lines); - $dst .= utf8_html($_)."\n" for @lines; - $dst .= $last; - } - } elsif ($state eq 'BB') { - if ($tmp[0] =~ s/\A\n//s) { - $state = 'h'; - } else { - @tmp = (); - warn 'Bad state BB in log parser: ', - $req->{-debug}; - } - } elsif ($state eq 'h') { - if (scalar keys %$seen) { - $dst .= '
';
+		foreach my $l (@tmp) {
+			if ($state != STATE_BODY) {
+				$hdr->{((STATES)[$state])} = $l;
+				if (++$state == STATE_BODY) {
+					flush_log_hdr($req, \$dst, $hdr);
+					$hdr = {};
 				}
-				$id = shift @tmp;
-				$seen->{$id} = 1;
-				$state = 'p'
-			} elsif ($state eq 'p') {
-				my @p = split(/ /, shift @tmp);
-				push @$parents, @p;
-				$plinks = parent_links(@p);
-				$state = 's'
-			} elsif ($state eq 's') {
-				# FIXME: excessively long subjects OOM us
-				my $s = shift @tmp;
-				$dst .= qq();
-				$dst .= utf8_html($s);
-				$dst .= '';
-				$state = 'D'
-			} elsif ($state eq 'D') {
-				# FIXME: thousands of decorations may OOM us
-				my $D = shift @tmp;
-				if ($D =~ /\AD(.+)/) {
-					$dst .= ' (';
-					$dst .= join(', ',
-						git_dec_links($rel, $1));
-					$dst .= ')';
-				}
-				$state = 'ai';
-			} elsif ($state eq 'ai') {
-				$ai = shift @tmp;
-				$state = 'an';
-			} elsif ($state eq 'an') {
-				my $an = shift @tmp;
-				$an =~ s/\Aa// or
-					die "missing 'a' from author: $an";
-				my $ah = $acache{$an} ||= utf8_html($an);
-				$dst .= "\n- $ah @ $ai\n  commit $id$plinks\n";
-				$id = $plinks = $ai = '';
-				$state = 'b';
+				next;
+			}
+			if ($l eq "\0") {
+				$dst .= qq(
); + $state = 0; + } else { + $dst .= "\n"; + $dst .= utf8_html($l); } } - + git_log_sed_end($req, \$dst) unless defined $buf; $dst; }; } diff --git a/t/repobrowse_git_log.t b/t/repobrowse_git_log.t new file mode 100644 index 000000000..86338698a --- /dev/null +++ b/t/repobrowse_git_log.t @@ -0,0 +1,19 @@ +# Copyright (C) 2017 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +my $test = require './t/repobrowse_common_git.perl'; +use Test::More; + +test_psgi($test->{app}, sub { + my ($cb) = @_; + my $req = 'http://example.com/test.git/log'; + my $res = $cb->(GET($req)); + is($res->code, 200, 'got 200'); + is($res->header('Content-Type'), 'text/html', + 'got correct Content-Type'); + my $body = dechunk($res); + like($body, qr!!, 'valid HTML :)'); +}); + +done_testing();