From: Eric Wong Date: Mon, 14 Mar 2016 18:20:16 +0000 (+0000) Subject: repobrowse: common git diff parsing code X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2a89856c74a27eafa24ef3bd718455ea60581eb6;p=thirdparty%2Fpublic-inbox.git repobrowse: common git diff parsing code This reduces the amount of duplicated code between the "diff" and "commit" views of repobrowse. --- diff --git a/lib/PublicInbox/RepobrowseGitCommit.pm b/lib/PublicInbox/RepobrowseGitCommit.pm index 7d8a08439..ea119fb28 100644 --- a/lib/PublicInbox/RepobrowseGitCommit.pm +++ b/lib/PublicInbox/RepobrowseGitCommit.pm @@ -17,6 +17,8 @@ use warnings; use base qw(PublicInbox::RepobrowseBase); use PublicInbox::Hval qw(utf8_html to_attr); use PublicInbox::RepobrowseGit qw(git_unquote git_commit_title); +use PublicInbox::RepobrowseGitDiffCommon qw/git_diffstat_emit + git_diff_ab_index git_diff_ab_hdr git_diff_ab_hunk/; use constant GIT_FMT => '--pretty=format:'.join('%n', '%H', '%h', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci', @@ -89,8 +91,16 @@ sub git_commit_stream { $l = <$log>; chomp $l; $fh->write(utf8_html($l)."---\n"); - my $diff = { anchors => {}, h => $h, p => \@p, rel => $rel }; - git_show_diffstat($diff, $req, $fh, $log); + $req->{anchors} = {}; + $req->{h} = $h; + $req->{p} = \@p; + $req->{rel} = $rel; + { + local $/ = "\0\0"; + my $l = <$log>; + chomp $l; + git_diffstat_emit($req, $fh, $l); + } my $help; $help = " This is a merge, showing combined diff:\n\n" if ($np > 1); @@ -104,25 +114,24 @@ sub git_commit_stream { $help = undef; } if ($l =~ m{^diff --git ("?a/.+) ("?b/.+)$}) { # regular - $l = git_diff_ab_hdr($diff, $1, $2) . "\n"; + $l = git_diff_ab_hdr($req, $1, $2) . "\n"; } elsif ($l =~ m{^diff --(cc|combined) (.+)$}) { - $l = git_diff_cc_hdr($diff, $1, $2) . "\n"; + $l = git_diff_cc_hdr($req, $1, $2) . "\n"; } elsif ($l =~ /^index ($cmt)\.\.($cmt)(.*)$/o) { # regular - $l = git_diff_ab_index($diff, $1, $2, $3) . "\n"; + $l = git_diff_ab_index($1, $2, $3) . "\n"; } elsif ($l =~ /^@@ (\S+) (\S+) @@(.*)$/) { # regular - $l = git_diff_ab_hunk($diff, $1, $2, $3) . "\n"; - + $l = git_diff_ab_hunk($req, $1, $2, $3) . "\n"; } elsif ($l =~ /^\+{1,3}\s*/ || ($cc_ins && $l =~ $cc_ins)) { $l = git_diff_ins($l) . "\n"; } elsif ($l =~ s/^(\-{1,3}\s*)// || ($cc_del && $l =~ s/$cc_del//)) { $l = git_diff_del($1, $l) . "\n"; } elsif ($l =~ /^index ($cmt,[^\.]+)\.\.($cmt)(.*)$/o) { # --cc - $l = git_diff_cc_index($diff, $1, $2, $3) . "\n"; - $cc_ins ||= $diff->{cc_ins}; - $cc_del ||= $diff->{cc_del}; + $l = git_diff_cc_index($req, $1, $2, $3) . "\n"; + $cc_ins ||= $req->{cc_ins}; + $cc_del ||= $req->{cc_del}; } elsif ($l =~ /^(@@@+) (\S+.*\S+) @@@+(.*)$/) { # --cc - $l = git_diff_cc_hunk($diff, $1, $2, $3) . "\n"; + $l = git_diff_cc_hunk($req, $1, $2, $3) . "\n"; } else { $l = utf8_html($l); } @@ -133,7 +142,7 @@ sub git_commit_stream { $fh->write(" This is a merge, combined diff is empty.\n"); } - show_unchanged($fh, $diff, $qs); + show_unchanged($fh, $req, $qs); $fh->write(''); } @@ -195,104 +204,6 @@ sub git_commit_404 { delete($req->{res})->([404, ['Content-Type'=>'text/html'], [ $x ]]); } -sub git_show_diffstat { - my ($diff, $req, $fh, $log) = @_; - local $/ = "\0\0"; - my $l = <$log>; - chomp $l; - my @stat = split("\0", $l); - my $nr = 0; - my ($nadd, $ndel) = (0, 0); - while (defined($l = shift @stat)) { - $l =~ s/\n?(\S+)\t+(\S+)\t+// or next; - my ($add, $del) = ($1, $2); - if ($add =~ /\A\d+\z/) { - $nadd += $add; - $ndel += $del; - $add = "+$add"; - $del = "-$del"; - } - my $num = sprintf('% 6s/%-6s', $del, $add); - if (length $l) { - my $anchor = to_attr(git_unquote($l)); - $diff->{anchors}->{$anchor} = $l; - $l = utf8_html($l); - $l = qq($l); - } else { - my $from = shift @stat; - my $to = shift @stat; - $l = git_diffstat_rename($diff, $from, $to); - } - ++$nr; - $fh->write(' '.$num."\t".$l."\n"); - } - $l = "\n $nr "; - $l .= $nr == 1 ? 'file changed, ' : 'files changed, '; - $l .= $nadd; - $l .= $nadd == 1 ? ' insertion(+), ' : ' insertions(+), '; - $l .= $ndel; - $l .= $ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n"; - $fh->write($l); -} - -# index abcdef89..01234567 -sub git_diff_ab_index { - my ($diff, $xa, $xb, $end) = @_; - # not wasting bandwidth on links here, yet - # links in hunk headers are far more useful with line offsets - $end = utf8_html($end); - "index $xa..$xb$end"; -} - -# diff --git a/foo.c b/bar.c -sub git_diff_ab_hdr { - my ($diff, $fa, $fb) = @_; - my $html_a = utf8_html($fa); - my $html_b = utf8_html($fb); - $fa = git_unquote($fa); - $fb = git_unquote($fb); - $fa =~ s!\Aa/!!; - $fb =~ s!\Ab/!!; - my $anchor = to_attr($fb); - delete $diff->{anchors}->{$anchor}; - $fa = $diff->{fa} = PublicInbox::Hval->utf8($fa); - $fb = $diff->{fb} = PublicInbox::Hval->utf8($fb); - $diff->{path_a} = $fa->as_path; - $diff->{path_b} = $fb->as_path; - - # not wasting bandwidth on links here, yet - # links in hunk headers are far more useful with line offsets - qq(diff --git $html_a $html_b); -} - -# @@ -1,2 +3,4 @@ (regular diff) -sub git_diff_ab_hunk { - my ($diff, $ca, $cb, $ctx) = @_; - my ($na) = ($ca =~ /\A-(\d+)/); - my ($nb) = ($cb =~ /\A\+(\d+)/); - - my $rel = $diff->{rel}; - my $rv = '@@ '; - if ($na == 0) { # new file - $rv .= $ca; - } else { - my $p = $diff->{p}->[0]; - $rv .= qq({path_a}?id=$p#n$na">); - $rv .= "$ca"; - } - $rv .= ' '; - if ($nb == 0) { # deleted file - $rv .= $cb; - } else { - my $h = $diff->{h}; - $rv .= qq({path_b}?id=$h#n$nb">); - $rv .= "$cb"; - } - $rv . ' @@' . utf8_html($ctx); -} - sub git_diff_cc_hdr { my ($diff, $combined, $path) = @_; my $html_path = utf8_html($path); @@ -360,28 +271,6 @@ sub git_diff_cc_hunk { $rv .= " $at" . utf8_html($ctx); } -sub git_diffstat_rename { - my ($diff, $from, $to) = @_; - my $anchor = to_attr(git_unquote($to)); - $diff->{anchors}->{$anchor} = $to; - my @from = split('/', $from); - my @to = split('/', $to); - my $orig_to = $to; - my ($base, @base); - while (@to && @from && $to[0] eq $from[0]) { - push @base, shift(@to); - shift @from; - } - - $base = utf8_html(join('/', @base)) if @base; - $from = utf8_html(join('/', @from)); - $to = PublicInbox::Hval->utf8(join('/', @to), $orig_to); - my $tp = $to->as_path; - my $th = $to->as_html; - $to = qq($th); - @base ? "$base/{$from => $to}" : "$from => $to"; -} - # It would be nice to be able to use colors for showing diff hunks. # Unfortunately, the default green+red colors in common web viewers # (gitweb, cgit, etc) are difficult to read for some people, myself diff --git a/lib/PublicInbox/RepobrowseGitDiff.pm b/lib/PublicInbox/RepobrowseGitDiff.pm index 7e137adb3..3459ec5fe 100644 --- a/lib/PublicInbox/RepobrowseGitDiff.pm +++ b/lib/PublicInbox/RepobrowseGitDiff.pm @@ -14,6 +14,8 @@ use warnings; use base qw(PublicInbox::RepobrowseBase); use PublicInbox::Hval qw(utf8_html to_attr); use PublicInbox::RepobrowseGit qw(git_unquote git_commit_title); +use PublicInbox::RepobrowseGitDiffCommon qw/git_diffstat_emit + git_diff_ab_index git_diff_ab_hdr git_diff_ab_hunk/; sub call_git_diff { my ($self, $req) = @_; @@ -92,44 +94,6 @@ sub call_git_diff { } } -sub git_diffstat_to_html ($$$) { - my ($req, $fh, undef) = @_; - my @stat = split("\0", $_[2]); # avoiding copy for $_[2] - my $nr = 0; - my ($nadd, $ndel) = (0, 0); - my $s = ''; - while (defined(my $l = shift @stat)) { - $l =~ s/\n?(\S+)\t+(\S+)\t+// or next; - my ($add, $del) = ($1, $2); - if ($add =~ /\A\d+\z/) { - $nadd += $add; - $ndel += $del; - $add = "+$add"; - $del = "-$del"; - } - my $num = sprintf('% 6s/%-6s', $del, $add); - if (length $l) { - my $anchor = to_attr(git_unquote($l)); - $req->{anchors}->{$anchor} = $l; - $l = utf8_html($l); - $l = qq($l); - } else { - my $from = shift @stat; - my $to = shift @stat; - $l = git_diffstat_rename($req, $from, $to); - } - ++$nr; - $s .= ' '.$num."\t".$l."\n"; - } - $s .= "\n $nr "; - $s .= $nr == 1 ? 'file changed, ' : 'files changed, '; - $s .= $nadd; - $s .= $nadd == 1 ? ' insertion(+), ' : ' insertions(+), '; - $s .= $ndel; - $s .= $ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n"; - $fh->write($s); -} - sub git_diff_line_i { my ($req, $l) = @_; my $cmt = '[a-f0-9]+'; @@ -137,11 +101,9 @@ sub git_diff_line_i { if ($l =~ m{^diff --git ("?a/.+) ("?b/.+)$}) { # regular $l = git_diff_ab_hdr($req, $1, $2); } elsif ($l =~ /^index ($cmt)\.\.($cmt)(.*)$/o) { # regular - $l = git_diff_ab_index($req, $1, $2, $3); + $l = git_diff_ab_index($1, $2, $3); } elsif ($l =~ /^@@ (\S+) (\S+) @@(.*)$/) { # regular $l = git_diff_ab_hunk($req, $1, $2, $3); - } elsif ($l =~ /^index ($cmt,[^\.]+)\.\.($cmt)(.*)$/o) { # --cc - $l = git_diff_cc_index($req, $1, $2, $3); } else { $l = utf8_html($l); } @@ -154,7 +116,7 @@ sub git_diff_to_html { my ($stat, $buf) = split(/\0\0/, $req->{dbuf}, 2); return unless defined $buf; $req->{dbuf} = $buf; - git_diffstat_to_html($req, $fh, $stat); + git_diffstat_emit($req, $fh, $stat); $req->{diff_state} = 1; } my @buf = split(/\n/, $req->{dbuf}, -1); @@ -166,87 +128,4 @@ sub git_diff_to_html { } } -sub git_diffstat_rename { - my ($req, $from, $to) = @_; - my $anchor = to_attr(git_unquote($to)); - $req->{anchors}->{$anchor} = $to; - my @from = split('/', $from); - my @to = split('/', $to); - my $orig_to = $to; - my ($base, @base); - while (@to && @from && $to[0] eq $from[0]) { - push @base, shift(@to); - shift @from; - } - - $base = utf8_html(join('/', @base)) if @base; - $from = utf8_html(join('/', @from)); - $to = PublicInbox::Hval->utf8(join('/', @to), $orig_to); - my $tp = $to->as_path; - my $th = $to->as_html; - $to = qq($th); - @base ? "$base/{$from => $to}" : "$from => $to"; -} - -# index abcdef89..01234567 -sub git_diff_ab_index { - my ($req, $xa, $xb, $end) = @_; - # not wasting bandwidth on links here, yet - # links in hunk headers are far more useful with line offsets - $end = utf8_html($end); - "index $xa..$xb$end"; -} - -# diff --git a/foo.c b/bar.c -sub git_diff_ab_hdr { - my ($req, $fa, $fb) = @_; - my $html_a = utf8_html($fa); - my $html_b = utf8_html($fb); - $fa = git_unquote($fa); - $fb = git_unquote($fb); - $fa =~ s!\Aa/!!; - $fb =~ s!\Ab/!!; - my $anchor = to_attr($fb); - delete $req->{anchors}->{$anchor}; - $fa = $req->{fa} = PublicInbox::Hval->utf8($fa); - $fb = $req->{fb} = PublicInbox::Hval->utf8($fb); - $req->{path_a} = $fa->as_path; - $req->{path_b} = $fb->as_path; - - # not wasting bandwidth on links here - # links in hunk headers are far more useful with line offsets - qq(diff --git $html_a $html_b); -} - -# @@ -1,2 +3,4 @@ (regular diff) -sub git_diff_ab_hunk { - my ($req, $ca, $cb, $ctx) = @_; - my ($na) = ($ca =~ /\A-(\d+)/); - my ($nb) = ($cb =~ /\A\+(\d+)/); - - # we add "rel=nofollow" here to reduce load on search engines, here - my $rel = $req->{relcmd}; - my $rv = '@@ '; - if (defined($na) && $na == 0) { # new file - $rv .= $ca; - } else { - $na = defined $na ? "#n$na" : ''; - my $p = $req->{p}->[0]; - $rv .= qq({path_a}?id=$p$na">); - $rv .= "$ca"; - } - $rv .= ' '; - if (defined($nb) && $nb == 0) { # deleted file - $rv .= $cb; - } else { - my $h = $req->{h}; - $nb = defined $nb ? "#n$nb" : ''; - $rv .= qq({path_b}?id=$h$nb">); - $rv .= "$cb"; - } - $rv . ' @@' . utf8_html($ctx); -} - 1; diff --git a/lib/PublicInbox/RepobrowseGitDiffCommon.pm b/lib/PublicInbox/RepobrowseGitDiffCommon.pm new file mode 100644 index 000000000..3b2cec6c5 --- /dev/null +++ b/lib/PublicInbox/RepobrowseGitDiffCommon.pm @@ -0,0 +1,135 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# common git diff-related code +package PublicInbox::RepobrowseGitDiffCommon; +use strict; +use warnings; +use PublicInbox::RepobrowseGit qw/git_unquote git_commit_title/; +use PublicInbox::Hval qw/utf8_html to_attr/; +use base qw/Exporter/; +our @EXPORT_OK = qw/git_diffstat_emit + git_diff_ab_index git_diff_ab_hdr git_diff_ab_hunk/; + +# index abcdef89..01234567 +sub git_diff_ab_index ($$$) { + my ($xa, $xb, $end) = @_; + # not wasting bandwidth on links here, yet + # links in hunk headers are far more useful with line offsets + $end = utf8_html($end); + "index $xa..$xb$end"; +} + +# diff --git a/foo.c b/bar.c +sub git_diff_ab_hdr ($$$) { + my ($req, $fa, $fb) = @_; + my $html_a = utf8_html($fa); + my $html_b = utf8_html($fb); + $fa = git_unquote($fa); + $fb = git_unquote($fb); + $fa =~ s!\Aa/!!; + $fb =~ s!\Ab/!!; + my $anchor = to_attr($fb); + delete $req->{anchors}->{$anchor}; + $fa = $req->{fa} = PublicInbox::Hval->utf8($fa); + $fb = $req->{fb} = PublicInbox::Hval->utf8($fb); + $req->{path_a} = $fa->as_path; + $req->{path_b} = $fb->as_path; + + # not wasting bandwidth on links here + # links in hunk headers are far more useful with line offsets + qq(diff --git $html_a $html_b); +} + +# @@ -1,2 +3,4 @@ (regular diff) +sub git_diff_ab_hunk ($$$$) { + my ($req, $ca, $cb, $ctx) = @_; + my ($na) = ($ca =~ /\A-(\d+)/); + my ($nb) = ($cb =~ /\A\+(\d+)/); + + # we add "rel=nofollow" here to reduce load on search engines, here + my $rel = $req->{relcmd}; + my $rv = '@@ '; + if (defined($na) && $na == 0) { # new file + $rv .= $ca; + } else { + $na = defined $na ? "#n$na" : ''; + my $p = $req->{p}->[0]; + $rv .= qq({path_a}?id=$p$na">); + $rv .= "$ca"; + } + $rv .= ' '; + if (defined($nb) && $nb == 0) { # deleted file + $rv .= $cb; + } else { + my $h = $req->{h}; + $nb = defined $nb ? "#n$nb" : ''; + $rv .= qq({path_b}?id=$h$nb">); + $rv .= "$cb"; + } + $rv . ' @@' . utf8_html($ctx); +} + +sub git_diffstat_rename ($$$) { + my ($req, $from, $to) = @_; + my $anchor = to_attr(git_unquote($to)); + $req->{anchors}->{$anchor} = $to; + my @from = split('/', $from); + my @to = split('/', $to); + my $orig_to = $to; + my ($base, @base); + while (@to && @from && $to[0] eq $from[0]) { + push @base, shift(@to); + shift @from; + } + + $base = utf8_html(join('/', @base)) if @base; + $from = utf8_html(join('/', @from)); + $to = PublicInbox::Hval->utf8(join('/', @to), $orig_to); + my $tp = $to->as_path; + my $th = $to->as_html; + $to = qq($th); + @base ? "$base/{$from => $to}" : "$from => $to"; +} + +sub git_diffstat_emit ($$$) { + my ($req, $fh, undef) = @_; + my @stat = split("\0", $_[2]); # avoiding copy for $_[2] + my $nr = 0; + my ($nadd, $ndel) = (0, 0); + my $s = ''; + while (defined(my $l = shift @stat)) { + $l =~ s/\n?(\S+)\t+(\S+)\t+// or next; + my ($add, $del) = ($1, $2); + if ($add =~ /\A\d+\z/) { + $nadd += $add; + $ndel += $del; + $add = "+$add"; + $del = "-$del"; + } + my $num = sprintf('% 6s/%-6s', $del, $add); + if (length $l) { + my $anchor = to_attr(git_unquote($l)); + $req->{anchors}->{$anchor} = $l; + $l = utf8_html($l); + $l = qq($l); + } else { + my $from = shift @stat; + my $to = shift @stat; + $l = git_diffstat_rename($req, $from, $to); + } + ++$nr; + $s .= ' '.$num."\t".$l."\n"; + } + $s .= "\n $nr "; + $s .= $nr == 1 ? 'file changed, ' : 'files changed, '; + $s .= $nadd; + $s .= $nadd == 1 ? ' insertion(+), ' : ' insertions(+), '; + $s .= $ndel; + $s .= $ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n"; + $fh->write($s); +} + +1;