Bug 405011: Text is cut off when containing Unicode supplementary characters (outside...

author Frédéric Buclin <LpSolit@gmail.com>

Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)

committer Frédéric Buclin <LpSolit@gmail.com>

Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)
author Frédéric Buclin <LpSolit@gmail.com>
Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)
committer Frédéric Buclin <LpSolit@gmail.com>
Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)
diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm

index 367804862217a3ffe2d93daceb95cf1ebf09936d..73b50018a0725da9413b0d4095fa1784b68531f6 100644 (file)
--- a/Bugzilla/Bug.pm
+++ b/Bugzilla/Bug.pm
@@ -3412,9 +3412,15 @@ sub comments {
      if (!defined $self->{'comments'}) {
          $self->{'comments'} = Bugzilla::Comment->match({ bug_id => $self->id });
          my $count = 0;
+        my $is_mysql = Bugzilla->dbh->isa('Bugzilla::DB::Mysql') ? 1 : 0;
          foreach my $comment (@{ $self->{'comments'} }) {
              $comment->{count} = $count++;
              $comment->{bug} = $self;
+            # XXX - hack for MySQL. Convert [U+....] back into its Unicode
+            # equivalent for characters above U+FFFF as MySQL older than 5.5.3
+            # cannot store them, see Bugzilla::Comment::_check_thetext().
+            $comment->{thetext} =~ s/\x{FDD0}\[U\+((?:[1-9A-F]|10)[0-9A-F]{4})\]\x{FDD1}/chr(hex $1)/eg
+              if $is_mysql;
          }
          # Some bugs may have no comments when upgrading old installations.
          Bugzilla::Comment->preload($self->{'comments'}) if $count;
diff --git a/Bugzilla/Comment.pm b/Bugzilla/Comment.pm

index d1e1e253017a47ee1514da7f05dfae6505436649..0dada24cfa353eeedc822834b003923ae85f6562 100644 (file)
--- a/Bugzilla/Comment.pm
+++ b/Bugzilla/Comment.pm
@@ -424,6 +424,15 @@ sub _check_thetext {
      $thetext =~ s/\s*$//s;
      $thetext =~ s/\r\n?/\n/g; # Get rid of \r.
  
+    # Characters above U+FFFF cannot be stored by MySQL older than 5.5.3 as they
+    # require the new utf8mb4 character set. Other DB servers are handling them
+    # without any problem. So we need to replace these characters if we use MySQL,
+    # else the comment is truncated.
+    # XXX - Once we use utf8mb4 for comments, this hack for MySQL can go away.
+    if (Bugzilla->dbh->isa('Bugzilla::DB::Mysql')) {
+        $thetext =~ s/([\x{10000}-\x{10FFFF}])/"\x{FDD0}[" . uc(sprintf('U+%04x', ord($1))) . "]\x{FDD1}"/eg;
+    }
+
      ThrowUserError('comment_too_long') if length($thetext) > MAX_COMMENT_LENGTH;
      return $thetext;
  }
diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm

index b9946889d0dc4386882c2d21a3faee986b4dbe5e..399a5b20234b2dd6af7712e0496cf24240ac2aa4 100644 (file)
--- a/Bugzilla/Search.pm
+++ b/Bugzilla/Search.pm
@@ -1787,17 +1787,23 @@ sub _handle_chart {
      my ($field, $operator, $value) = $condition->fov;
      return if (!defined $field or !defined $operator or !defined $value);
      $field = FIELD_MAP->{$field} || $field;
-    
-    my $string_value;
+
+    my ($string_value, $orig_value);
      if (ref $value eq 'ARRAY') {
          # Trim input and ignore blank values.
          @$value = map { trim($_) } @$value;
          @$value = grep { defined $_ and $_ ne '' } @$value;
          return if !@$value;
+        $orig_value = join(',', @$value);
+        if ($field eq 'longdesc') {
+            @$value = map { _convert_unicode_characters($_) } @$value;
+        }
          $string_value = join(',', @$value);
      }
      else {
          return if $value eq '';
+        $orig_value = $value;
+        $value = _convert_unicode_characters($value) if $field eq 'longdesc';
          $string_value = $value;
      }
      
@@ -1844,7 +1850,7 @@ sub _handle_chart {
      # do_search_function modified them.   
      $self->search_description({
          field => $field, type => $operator,
-        value => $string_value, term => $search_args{term},
+        value => $orig_value, term => $search_args{term},
      });
  
      foreach my $join (@{ $search_args{joins} }) {
@@ -1855,6 +1861,18 @@ sub _handle_chart {
      $condition->translated(\%search_args);
  }
  
+# XXX - This is a hack for MySQL which doesn't understand Unicode characters
+# above U+FFFF, see Bugzilla::Comment::_check_thetext(). This hack can go away
+# once we require MySQL 5.5.3 and use utf8mb4.
+sub _convert_unicode_characters {
+    my $string = shift;
+
+    if (Bugzilla->dbh->isa('Bugzilla::DB::Mysql')) {
+        $string =~ s/([\x{10000}-\x{10FFFF}])/"\x{FDD0}[" . uc(sprintf('U+%04x', ord($1))) . "]\x{FDD1}"/eg;
+    }
+    return $string;
+}
+
  ##################################
  # do_search_function And Helpers #
  ##################################
author	Frédéric Buclin <LpSolit@gmail.com>
	Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)
committer	Frédéric Buclin <LpSolit@gmail.com>
	Tue, 25 Feb 2014 20:42:13 +0000 (21:42 +0100)
Bugzilla/Bug.pm		patch \| blob \| blame \| history
Bugzilla/Comment.pm		patch \| blob \| blame \| history
Bugzilla/Search.pm		patch \| blob \| blame \| history