From: Dave Miller Date: Mon, 26 Aug 2024 01:46:06 +0000 (-0400) Subject: Bug 1898882: detect UTF8 naming in DB, default to utf8mb4 X-Git-Tag: bugzilla-5.2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=99d2e18c069347c022c67170c8f94bb15000bcb1;p=thirdparty%2Fbugzilla.git Bug 1898882: detect UTF8 naming in DB, default to utf8mb4 * Detect at installation/upgrade time how the database refers to the utf8 variant Bugzilla wants to use so that we don't accidentally reconvert the database to the same encoding it's already using on every run on checksetup.pl * Change the default charset on new installs to utf8mb4. a=dylan --- diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index 268333e4ee..86103c8f18 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -125,12 +125,15 @@ sub check_ip { sub check_utf8 { my ($utf8, $entry) = @_; - # You cannot turn off the UTF-8 parameter. + my $current_utf8 = Bugzilla->params->{'utf8'}; if (!$utf8) { return "You cannot disable UTF-8 support."; } - elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') { + elsif ($current_utf8 eq 'utf8mb3' && $utf8 ne 'utf8mb3' && $utf8 ne 'utf8mb4') { + return "You cannot downgrade from utf8mb3 support, only keep it or change to utf8mb4."; + } + elsif ($current_utf8 eq 'utf8mb4' && $utf8 ne 'utf8mb4') { return "You cannot disable UTF8-MB4 support."; } diff --git a/Bugzilla/Config/General.pm b/Bugzilla/Config/General.pm index 968517d937..d1acadbf3b 100644 --- a/Bugzilla/Config/General.pm +++ b/Bugzilla/Config/General.pm @@ -27,11 +27,19 @@ use constant get_param_list => ( { name => 'utf8', type => 's', - choices => ['1', 'utf8', 'utf8mb4'], - default => 'utf8', + choices => ['1', 'utf8', 'utf8mb3', 'utf8mb4'], + default => 'utf8mb4', checker => \&check_utf8 }, + { + name => 'utf8_collate', + type => 'r', + no_reset => '1', + default => 'utf8mb4_unicode_520_ci', + }, + + {name => 'shutdownhtml', type => 'l', default => ''}, {name => 'announcehtml', type => 'l', default => ''}, diff --git a/Bugzilla/DB/MariaDB.pm b/Bugzilla/DB/MariaDB.pm index b600e84d21..c57ebd0234 100644 --- a/Bugzilla/DB/MariaDB.pm +++ b/Bugzilla/DB/MariaDB.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::MariaDB; @@ -306,6 +307,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -402,7 +421,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->{db_name}; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -700,8 +718,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -895,24 +911,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8' unless Bugzilla->params->{'utf8'}; - return Bugzilla->params->{'utf8'} eq 'utf8mb4' ? 'utf8mb4' : 'utf8mb3'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { my $charset = utf8_charset(); - if ($charset eq 'utf8') { - return 'utf8_general_ci'; - } - elsif ($charset eq 'utf8mb3') { - return 'utf8mb3_general_ci'; - } - elsif ($charset eq 'utf8mb4') { - return 'utf8mb4_unicode_520_ci'; - } - else { - croak "invalid charset: $charset"; - } + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 96a64ee803..826cd62723 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::Mysql; @@ -306,6 +307,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -402,7 +421,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->{db_name}; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -700,8 +718,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -895,21 +911,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8' unless Bugzilla->params->{'utf8'}; - return Bugzilla->params->{'utf8'} eq 'utf8mb4' ? 'utf8mb4' : 'utf8'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { my $charset = utf8_charset(); - if ($charset eq 'utf8') { - return 'utf8_general_ci'; - } - elsif ($charset eq 'utf8mb4') { - return 'utf8mb4_unicode_520_ci'; - } - else { - croak "invalid charset: $charset"; - } + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/Bugzilla/DB/Schema/MariaDB.pm b/Bugzilla/DB/Schema/MariaDB.pm index c3e795a89f..6438aea450 100644 --- a/Bugzilla/DB/Schema/MariaDB.pm +++ b/Bugzilla/DB/Schema/MariaDB.pm @@ -310,7 +310,7 @@ sub column_info_to_column { ($column->{NOTNULL} = 1) if $column_info->{NULLABLE} == 0; - if ($column_info->{mysql_is_pri_key}) { + if ($column_info->{mariadb_is_pri_key}) { # In MySQL, if a table has no PK, but it has a UNIQUE index, # that index will show up as the PK. So we have to eliminate @@ -370,7 +370,7 @@ sub column_info_to_column { if ($type =~ /CHAR$/ || $type eq 'DECIMAL') { # This is nicely lowercase and has the size/precision appended. - $type = $column_info->{mysql_type_name}; + $type = $column_info->{mariadb_type_name}; } # If we're a tinyint, we could be either a BOOLEAN or an INT1. @@ -393,7 +393,7 @@ sub column_info_to_column { # doesn't touch the live DB. my $ref_sth = $dbh->prepare("SELECT $col_name FROM $table LIMIT 1"); $ref_sth->execute; - if ($ref_sth->{mysql_is_auto_increment}->[0]) { + if ($ref_sth->{mariadb_is_auto_increment}->[0]) { if ($type eq 'MEDIUMINT') { $type = 'MEDIUMSERIAL'; } diff --git a/template/en/default/admin/params/common.html.tmpl b/template/en/default/admin/params/common.html.tmpl index fd9fd3ed12..7be8262dff 100644 --- a/template/en/default/admin/params/common.html.tmpl +++ b/template/en/default/admin/params/common.html.tmpl @@ -16,6 +16,11 @@ [% IF param.type == "t" %] + [% ELSIF param.type == "r" %] + +
+ This value is read-only and you can't change it. [% ELSIF param.type == "p" %] contrib/recode.pl" _ " script." - _ "

Note that if you turn this parameter from "off" to" - _ " "on", you must re-run checksetup.pl immediately" - _ " afterward.

", + _ "

Note that if you change this parameter you must re-run" + _ " checksetup.pl immediately afterward.

", + + utf8_collate => + "The collation to use in database tables. This parameter is" + _ " automatically set by checksetup.pl.", shutdownhtml => "If this field is non-empty, then Bugzilla will be completely"