From: Dylan William Hardison Date: Sun, 7 Jul 2019 23:10:59 +0000 (-0400) Subject: Bug 1328659 - Add support for utf8=utf8mb4 (switches to dynamic/compressed row format... X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=171f86b113f1dffa76aca1053accc2c9d7fe606a;p=thirdparty%2Fbugzilla.git Bug 1328659 - Add support for utf8=utf8mb4 (switches to dynamic/compressed row format, and changes charset to utf8mb4) --- diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index 529bf95280..4b0c9db642 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -123,15 +123,16 @@ sub check_ip { } sub check_utf8 { - my $utf8 = shift; + my ($utf8, $entry) = @_; - # You cannot turn off the UTF-8 parameter if you've already converted - # your tables to utf-8. - my $dbh = Bugzilla->dbh; - if ($dbh->isa('Bugzilla::DB::Mysql') && $dbh->bz_db_is_utf8 && !$utf8) { - return "You cannot disable UTF-8 support, because your MySQL database" - . " is encoded in UTF-8"; + # You cannot turn off the UTF-8 parameter. + if (!$utf8) { + return "You cannot disable UTF-8 support."; } + elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') { + return "You cannot disable UTF8-MB4 support."; + } + return ""; } diff --git a/Bugzilla/Config/General.pm b/Bugzilla/Config/General.pm index 322275aa0a..37c648d3b7 100644 --- a/Bugzilla/Config/General.pm +++ b/Bugzilla/Config/General.pm @@ -24,7 +24,13 @@ use constant get_param_list => ( checker => \&check_email }, - {name => 'utf8', type => 'b', default => '0', checker => \&check_utf8}, + { + name => 'utf8', + type => 's', + choices => ['1', 'utf8', 'utf8mb4'], + default => 'utf8', + checker => \&check_utf8 + }, {name => 'shutdownhtml', type => 'l', default => ''}, diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 1cf0ee5d7e..b97db1fbc8 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -32,8 +32,9 @@ use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::Mysql; -use List::Util qw(max); +use List::Util qw(max any); use Text::ParseWords; +use Carp; # This is how many comments of MAX_COMMENT_LENGTH we expect on a single bug. # In reality, you could have a LOT more comments than this, because @@ -52,7 +53,7 @@ sub BUILDARGS { $dsn .= ";port=$port" if $port; $dsn .= ";mysql_socket=$sock" if $sock; - my %attrs = (mysql_enable_utf8 => Bugzilla->params->{'utf8'},); + my %attrs = (mysql_enable_utf8 => 1); # MySQL SSL options my ($ssl_ca_file, $ssl_ca_path, $ssl_cert, $ssl_key) = @$params{ @@ -75,7 +76,9 @@ sub on_dbi_connected { # This makes sure that if the tables are encoded as UTF-8, we # return their data correctly. - $dbh->do("SET NAMES utf8") if Bugzilla->params->{'utf8'}; + my $charset = $class->utf8_charset; + my $collate = $class->utf8_collate; + $dbh->do("SET NAMES $charset COLLATE $collate"); # Check for MySQL modes. my ($var, $sql_mode) @@ -332,6 +335,30 @@ sub bz_setup_database { die install_string('mysql_innodb_disabled'); } + if ($self->utf8_charset eq 'utf8mb4') { + my %global = map {@$_} + @{$self->selectall_arrayref(q(SHOW GLOBAL VARIABLES LIKE 'innodb_%'))}; + my $utf8mb4_supported + = $global{innodb_file_format} eq 'Barracuda' + && $global{innodb_file_per_table} eq 'ON' + && $global{innodb_large_prefix} eq 'ON'; + + die install_string('mysql_innodb_settings') unless $utf8mb4_supported; + + my $tables = $self->selectall_arrayref('SHOW TABLE STATUS'); + foreach my $table (@$tables) { + my ($table, undef, undef, $row_format) = @$table; + my $new_row_format = $self->default_row_format($table); + next if $new_row_format =~ /compact/i; + if (lc($new_row_format) ne lc($row_format)) { + print install_string( + 'mysql_row_format_conversion', {table => $table, format => $new_row_format} + ), + "\n"; + $self->do(sprintf 'ALTER TABLE %s ROW_FORMAT=%s', $table, $new_row_format); + } + } + } my ($sd_index_deleted, $longdescs_index_deleted); my @tables = $self->bz_table_list_real(); @@ -365,9 +392,6 @@ sub bz_setup_database { 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' ); - foreach my $should_be_myisam (Bugzilla::DB::Schema::Mysql::MYISAM_TABLES) { - @$myisam_tables = grep { $_ ne $should_be_myisam } @$myisam_tables; - } if (scalar @$myisam_tables) { print "Bugzilla now uses the InnoDB storage engine in MySQL for", @@ -550,10 +574,7 @@ sub bz_setup_database { # This kind of situation happens when people create the database # themselves, and if we don't do this they will get the big # scary WARNING statement about conversion to UTF8. - if ( !$self->bz_db_is_utf8 - && !@tables - && (Bugzilla->params->{'utf8'} || !scalar keys %{Bugzilla->params})) - { + unless ($self->bz_db_is_utf8) { $self->_alter_db_charset_to_utf8(); } @@ -664,11 +685,13 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL - AND TABLE_COLLATION NOT LIKE 'utf8%' - LIMIT 1", undef, $db_name + AND TABLE_COLLATION != ? + LIMIT 1", undef, $db_name, $collate ); if (Bugzilla->params->{'utf8'} && $non_utf8_tables) { @@ -684,7 +707,8 @@ sub bz_setup_database { } } - print "Converting table storage format to UTF-8. This may take a", " while.\n"; + print + "Converting table storage format to $charset (collate $collate). This may take a while.\n"; foreach my $table ($self->bz_table_list_real) { my $info_sth = $self->prepare("SHOW FULL COLUMNS FROM $table"); $info_sth->execute(); @@ -698,11 +722,11 @@ sub bz_setup_database { # If this particular column isn't stored in utf-8 if ( $column->{Collation} && $column->{Collation} ne 'NULL' - && $column->{Collation} !~ /utf8/) + && $column->{Collation} ne $collate) { my $name = $column->{Field}; - print "$table.$name needs to be converted to UTF-8...\n"; + print "$table.$name needs to be converted to $charset (collate $collate)...\n"; # These will be automatically re-created at the end # of checksetup. @@ -723,7 +747,7 @@ sub bz_setup_database { my ($binary, $utf8) = ($sql_def, $sql_def); my $type = $self->_bz_schema->convert_type($col_info->{TYPE}); $binary =~ s/(\Q$type\E)/$1 CHARACTER SET binary/; - $utf8 =~ s/(\Q$type\E)/$1 CHARACTER SET utf8/; + $utf8 =~ s/(\Q$type\E)/$1 CHARACTER SET $charset COLLATE $collate/; push(@binary_sql, "MODIFY COLUMN $name $binary"); push(@utf8_sql, "MODIFY COLUMN $name $utf8"); } @@ -743,8 +767,8 @@ sub bz_setup_database { print "Converting the $table table to UTF-8...\n"; my $bin = "ALTER TABLE $table " . join(', ', @binary_sql); - my $utf - = "ALTER TABLE $table " . join(', ', @utf8_sql, 'DEFAULT CHARACTER SET utf8'); + my $utf = "ALTER TABLE $table " + . join(', ', @utf8_sql, "DEFAULT CHARACTER SET $charset COLLATE $collate"); $self->do($bin); $self->do($utf); @@ -754,7 +778,7 @@ sub bz_setup_database { } } else { - $self->do("ALTER TABLE $table DEFAULT CHARACTER SET utf8"); + $self->do("ALTER TABLE $table DEFAULT CHARACTER SET $charset COLLATE $collate"); } } # foreach my $table (@tables) @@ -765,7 +789,7 @@ sub bz_setup_database { # a mysqldump.) So we have this change outside of the above block, # so that it just happens silently if no actual *table* conversion # needs to happen. - if (Bugzilla->params->{'utf8'} && !$self->bz_db_is_utf8) { + unless ($self->bz_db_is_utf8) { $self->_alter_db_charset_to_utf8(); } @@ -847,19 +871,70 @@ sub _fix_defaults { } } +sub utf8_charset { + return 'utf8' unless Bugzilla->params->{'utf8'}; + return Bugzilla->params->{'utf8'} eq 'utf8mb4' ? 'utf8mb4' : 'utf8'; +} + +sub utf8_collate { + my $charset = utf8_charset(); + if ($charset eq 'utf8') { + return 'utf8_general_ci'; + } + elsif ($charset eq 'utf8mb4') { + return 'utf8mb4_unicode_520_ci'; + } + else { + croak "invalid charset: $charset"; + } +} + +sub default_row_format { + my ($class, $table) = @_; + my $charset = utf8_charset(); + if ($charset eq 'utf8') { + return 'Compact'; + } + elsif ($charset eq 'utf8mb4') { + my @no_compress = qw( + bug_user_last_visit + cc + email_rates + logincookies + token_data + tokens + ts_error + ts_exitstatus + ts_funcmap + ts_job + ts_note + user_request_log + votes + ); + return 'Dynamic' if any { $table eq $_ } @no_compress; + return 'Compressed'; + } + else { + croak "invalid charset: $charset"; + } +} + sub _alter_db_charset_to_utf8 { my $self = shift; my $db_name = Bugzilla->localconfig->{db_name}; - $self->do("ALTER DATABASE $db_name CHARACTER SET utf8"); + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("ALTER DATABASE $db_name CHARACTER SET $charset COLLATE $collate"); } sub bz_db_is_utf8 { my $self = shift; - my $db_collation + my $db_charset = $self->selectrow_arrayref("SHOW VARIABLES LIKE 'character_set_database'"); # First column holds the variable name, second column holds the value. - return $db_collation->[1] =~ /utf8/ ? 1 : 0; + my $charset = $self->utf8_charset; + return $db_charset->[1] eq $charset ? 1 : 0; } diff --git a/Bugzilla/DB/Schema/Mysql.pm b/Bugzilla/DB/Schema/Mysql.pm index a56763e74e..3ca54549d3 100644 --- a/Bugzilla/DB/Schema/Mysql.pm +++ b/Bugzilla/DB/Schema/Mysql.pm @@ -85,8 +85,6 @@ use constant REVERSE_MAPPING => { # as in their db-specific version, so no reverse mapping is needed. }; -use constant MYISAM_TABLES => qw(); - #------------------------------------------------------------------------------ sub _initialize { @@ -128,16 +126,16 @@ sub _initialize { #------------------------------------------------------------------------------ sub _get_create_table_ddl { - # Extend superclass method to specify the MYISAM storage engine. # Returns a "create table" SQL statement. - my ($self, $table) = @_; - - my $charset = Bugzilla->dbh->bz_db_is_utf8 ? "CHARACTER SET utf8" : ''; - my $type = grep($_ eq $table, MYISAM_TABLES) ? 'MYISAM' : 'InnoDB'; - return ( - $self->SUPER::_get_create_table_ddl($table) . " ENGINE = $type $charset"); - + my $charset = Bugzilla::DB::Mysql->utf8_charset; + my $collate = Bugzilla::DB::Mysql->utf8_collate; + my $row_format = Bugzilla::DB::Mysql->default_row_format($table); + my @parts = ( + $self->SUPER::_get_create_table_ddl($table), 'ENGINE = InnoDB', + "CHARACTER SET $charset COLLATE $collate", "ROW_FORMAT=$row_format", + ); + return join(' ', @parts); } #eosub--_get_create_table_ddl #------------------------------------------------------------------------------ @@ -165,10 +163,9 @@ sub get_create_database_sql { # We only create as utf8 if we have no params (meaning we're doing # a new installation) or if the utf8 param is on. - my $create_utf8 - = Bugzilla->params->{'utf8'} || !defined Bugzilla->params->{'utf8'}; - my $charset = $create_utf8 ? "CHARACTER SET utf8" : ''; - return ("CREATE DATABASE $name $charset"); + my $charset = Bugzilla::DB::Mysql->utf8_charset; + my $collate = Bugzilla::DB::Mysql->utf8_collate; + return ("CREATE DATABASE $name CHARACTER SET $charset COLLATE $collate"); } # MySQL has a simpler ALTER TABLE syntax than ANSI. diff --git a/template/en/default/setup/strings.txt.pl b/template/en/default/setup/strings.txt.pl index 0efc378bbd..94e9a5c35d 100644 --- a/template/en/default/setup/strings.txt.pl +++ b/template/en/default/setup/strings.txt.pl @@ -335,6 +335,12 @@ EOT InnoDB is disabled in your MySQL installation. Bugzilla requires InnoDB to be enabled. Please enable it and then re-run checksetup.pl. +END + mysql_innodb_settings => <<'END', +Bugzilla requires the following MySQL InnoDB settings: +innodb_file_format = Barracuda +innodb_file_per_table = 1 +innodb_large_prefix = 1 END mysql_index_renaming => <<'END', We are about to rename old indexes. The estimated time to complete @@ -342,7 +348,8 @@ renaming is ##minutes## minutes. You cannot interrupt this action once it has begun. If you would like to cancel, press Ctrl-C now... (Waiting 45 seconds...) END - mysql_utf8_conversion => <<'END', + mysql_row_format_conversion => "Converting ##table## to row format ##format##.", + mysql_utf8_conversion => <<'END', WARNING: We are about to convert your table storage format to UTF-8. This allows Bugzilla to correctly store and sort international characters. However, if you have any non-UTF-8 data in your database,