From b00cfa89a6e0a9fc89aedf1a66172c4ad0566ece Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 25 Apr 2023 10:50:50 +0000 Subject: [PATCH] mail_diff: match ContentHash EOL and EOM behavior more closely ContentHash currently doesn't convert CRCRLF to LF. Perhaps it should, but for now, have diff behavior match the actual comparison behavior used for dedupe and omit all trailing whitespace for diff. --- lib/PublicInbox/ContentHash.pm | 2 +- lib/PublicInbox/MailDiff.pm | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index a4f6196f8..fc94257c7 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -45,7 +45,7 @@ sub content_dig_i { my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); if (defined $s) { - $s =~ s/\r\n/\n/gs; + $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View $s =~ s/\s*\z//s; utf8::encode($s); } else { diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index 7511144c6..d9733ed40 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -11,7 +11,7 @@ use PublicInbox::GitAsyncCat; sub write_part { # Eml->each_part callback my ($ary, $self) = @_; my ($part, $depth, $idx) = @$ary; - if ($idx ne '1' || $self->{-raw_hdr}) { + if ($idx ne '1' || $self->{-raw_hdr}) { # lei mail-diff --raw-header open my $fh, '>', "$self->{curdir}/$idx.hdr" or die "open: $!"; print $fh ${$part->{hdr}} or die "print $!"; close $fh or die "close $!"; @@ -20,7 +20,8 @@ sub write_part { # Eml->each_part callback my ($s, $err) = msg_part_text($part, $ct); my $sfx = defined($s) ? 'txt' : 'bin'; $s //= $part->body; - $s =~ s/\r+\n/\n/sg; + $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View + $s =~ s/\s*\z//s; open my $fh, '>:utf8', "$self->{curdir}/$idx.$sfx" or die "open: $!"; print $fh $s or die "print $!"; close $fh or die "close $!"; -- 2.47.3