Bug 410902: Some characters are mangled in diff and interdiff modes when viewing...

author lpsolit%gmail.com <>

Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)

committer lpsolit%gmail.com <>

Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)
author lpsolit%gmail.com <>
Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)
committer lpsolit%gmail.com <>
Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)
diff --git a/Bugzilla/Attachment/PatchReader.pm b/Bugzilla/Attachment/PatchReader.pm

index 5dbffb5c9dd9f82c65c4f0d47b4aa1e14effbc3a..44193ed860d84f444ba4003703dc601b7b01d5a5 100644 (file)
--- a/Bugzilla/Attachment/PatchReader.pm
+++ b/Bugzilla/Attachment/PatchReader.pm
@@ -21,7 +21,9 @@ package Bugzilla::Attachment::PatchReader;
  
  use Bugzilla::Error;
  use Bugzilla::Attachment;
+use Bugzilla::Util;
  
+use Encode;
  
  sub process_diff {
      my ($attachment, $format, $context) = @_;
@@ -38,7 +40,7 @@ sub process_diff {
          # Actually print out the patch.
          print $cgi->header(-type => 'text/plain',
                             -expires => '+3M');
-
+        disable_utf8();
          $reader->iterate_string('Attachment ' . $attachment->id, $attachment->data);
      }
      else {
@@ -74,7 +76,12 @@ sub process_diff {
          $vars->{'other_patches'} = \@other_patches;
  
          setup_template_patch_reader($last_reader, $format, $context, $vars);
-        # Actually print out the patch.
+        # The patch is going to be displayed in a HTML page and if the utf8
+        # param is enabled, we have to encode attachment data as utf8.
+        # Encode::decode() knows what to do with invalid characters.
+        if (Bugzilla->params->{'utf8'}) {
+            $attachment->{data} = Encode::decode_utf8($attachment->data);
+        }
          $reader->iterate_string('Attachment ' . $attachment->id, $attachment->data);
      }
  }
@@ -85,10 +92,18 @@ sub process_interdiff {
      my $lc  = Bugzilla->localconfig;
      my $vars = {};
  
+    # Encode attachment data as utf8 if it's going to be displayed in a HTML
+    # page using the UTF-8 encoding.
+    # Encode::decode() knows what to do with invalid characters.
+    if ($format ne 'raw' && Bugzilla->params->{'utf8'}) {
+        $old_attachment->{data} = Encode::decode_utf8($old_attachment->data);
+        $new_attachment->{data} = Encode::decode_utf8($new_attachment->data);
+    }
+
      # Get old patch data.
-    my ($old_filename, $old_file_list) = get_unified_diff($old_attachment);
+    my ($old_filename, $old_file_list) = get_unified_diff($old_attachment, $format);
      # Get new patch data.
-    my ($new_filename, $new_file_list) = get_unified_diff($new_attachment);
+    my ($new_filename, $new_file_list) = get_unified_diff($new_attachment, $format);
  
      my $warning = warn_if_interdiff_might_fail($old_file_list, $new_file_list);
  
@@ -105,8 +120,12 @@ sub process_interdiff {
          # Actually print out the patch.
          print $cgi->header(-type => 'text/plain',
                             -expires => '+3M');
+        disable_utf8();
      }
      else {
+        # In case the HTML page is displayed with the UTF-8 encoding.
+        binmode $interdiff_fh, ':utf8' if Bugzilla->params->{'utf8'};
+
          $vars->{'warning'} = $warning if $warning;
          $vars->{'bugid'} = $new_attachment->bug_id;
          $vars->{'oldid'} = $old_attachment->id;
@@ -131,7 +150,7 @@ sub process_interdiff {
  ######################
  
  sub get_unified_diff {
-    my $attachment = shift;
+    my ($attachment, $format) = @_;
  
      # Bring in the modules we need.
      require PatchReader::Raw;
@@ -162,6 +181,10 @@ sub get_unified_diff {
  
      # Prints out to temporary file.
      my ($fh, $filename) = File::Temp::tempfile();
+    if ($format ne 'raw' && Bugzilla->params->{'utf8'}) {
+        # The HTML page will be displayed with the UTF-8 encoding.
+        binmode $fh, ':utf8';
+    }
      my $raw_printer = new PatchReader::DiffPrinter::raw($fh);
      $last_reader->sends_data_to($raw_printer);
      $last_reader = $raw_printer;
@@ -245,7 +268,7 @@ sub setup_template_patch_reader {
          $vars->{'headers'} = $cgi->param('headers');
      }
      else {
-        $vars->{'headers'} = 1 if !defined $cgi->param('headers');
+        $vars->{'headers'} = 1;
      }
  
      $vars->{'collapsed'} = $cgi->param('collapsed');
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm

index da41008e151e3cbf3bb6d8b30806df45e87db05e..e7a76e21d1c8a2b5d10e9e1510233c6dd10c0c5b 100644 (file)
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -44,7 +44,7 @@ use base qw(Exporter);
                               file_mod_time is_7bit_clean
                               bz_crypt generate_random_password
                               validate_email_syntax clean_text
-                             get_text);
+                             get_text disable_utf8);
  
  use Bugzilla::Constants;
  
@@ -534,6 +534,12 @@ sub get_netaddr {
      return join(".", unpack("CCCC", pack("N", $addr)));
  }
  
+sub disable_utf8 {
+    if (Bugzilla->params->{'utf8'}) {
+        binmode STDOUT, ':raw'; # Turn off UTF8 encoding.
+    }
+}
+
  1;
  
  __END__
@@ -781,6 +787,10 @@ The search starts at $maxpos and goes back to the beginning of the string.
  Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
  ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
  
+=item C<disable_utf8()>
+
+Disable utf8 on STDOUT (and display raw data instead).
+
  =item C<clean_text($str)>
  Returns the parameter "cleaned" by exchanging non-printable characters with spaces.
  Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space).
diff --git a/attachment.cgi b/attachment.cgi

index 5f29649f3bd1cce606e850a02fe63ffb13f85b01..937087a519d85395883d83467680eb90802c4ca4 100755 (executable)
--- a/attachment.cgi
+++ b/attachment.cgi
@@ -247,9 +247,7 @@ sub view {
      print $cgi->header(-type=>"$contenttype; name=\"$filename\"",
                         -content_disposition=> "inline; filename=\"$filename\"",
                         -content_length => $attachment->datasize);
-    if (Bugzilla->params->{'utf8'}) {
-        binmode STDOUT, ':raw'; # Turn off UTF8 encoding.
-    }
+    disable_utf8();
      print $attachment->data;
  }
author	lpsolit%gmail.com <>
	Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)
committer	lpsolit%gmail.com <>
	Thu, 3 Apr 2008 21:58:28 +0000 (21:58 +0000)
Bugzilla/Attachment/PatchReader.pm		patch \| blob \| blame \| history
Bugzilla/Util.pm		patch \| blob \| blame \| history
attachment.cgi		patch \| blob \| blame \| history