[Rt-commit] rt branch, 4.6/mysql-utf8mb4-no-collate, created. rt-4.4.4-743-gfe95cfe732

Jim Brandt jbrandt at bestpractical.com
Wed Feb 12 13:28:44 EST 2020


The branch, 4.6/mysql-utf8mb4-no-collate has been created
        at  fe95cfe7329b0a3b92c9c9c8e6d15978fd0563b7 (commit)

- Log -----------------------------------------------------------------
commit 27794d9c2bec7a1a08704e6185bf23b0018fdadd
Author: michel <michel at bestpractical.com>
Date:   Thu Nov 21 15:39:20 2019 +0100

    Migrate charset to utf8mb4 in mysql
    
    This allows 4-byte characters, such as emojis, in fields.
    This feature is supported properly in mysql 5.7.7 and later.
    
    It requires the database to be converted to utf8mb4 to
    support those characters.
    
    MySQL version should be >= 5.7.7
    MariaDB version should be >= 10.2.5
    
    https://dev.mysql.com/doc/relnotes/mysql/5.7/en/news-5-7-7.html#mysqld-5-7-7-feature
    https://mariadb.com/kb/en/changes-improvements-in-mariadb-102/
    https://mariadb.com/kb/en/mariadb-1025-changelog/ (search for utf8)
    
    Before this version utf8mb4 tables could not have indexes with type VARCHAR(255):
    the default size for index entries was 767 bytes, which is enough for 255 chars
    stored as at most 3 chars (the utf8 format), but not as 4 bytes (utf8mb4).
    5.7.7 sets the default index size to 3072, so all of RT indexes are now OK.
    
    Detection of MariaDB is done on the version string (it must match "mariadb"),
    which is the case in both Debian and RedHat based distributions, and likely
    everywhere else.
    
    The innodb_log_file_size check is removed as it's not an issue any more in
    5.7, see also https://dev.mysql.com/doc/relnotes/mysql/5.6/en/news-5-6-20.html

diff --git a/README b/README
index 2f417b8fe8..1e765a36ee 100644
--- a/README
+++ b/README
@@ -20,8 +20,8 @@ o   Perl 5.10.1 or later (http://www.perl.org).
 
 o   A supported SQL database
 
-        Currently supported:  MySQL 5.1 - 5.7 with InnoDB support
-                              MariaDB 10.0 - 10.1 with InnoDB support
+        Currently supported:  MySQL 5.7 with InnoDB support
+                              MariaDB 10.2 or later with InnoDB support
                               Postgres 9.0 or later
                               Oracle 11g or later
                               SQLite 3.0 or later; for testing only, no
diff --git a/etc/schema.mysql b/etc/schema.mysql
index 1d6da82c59..b0b374c736 100644
--- a/etc/schema.mysql
+++ b/etc/schema.mysql
@@ -13,7 +13,7 @@ CREATE TABLE Attachments (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Attachments2 ON Attachments (TransactionId) ;
 CREATE INDEX Attachments3 ON Attachments (Parent, TransactionId) ;
@@ -35,7 +35,7 @@ CREATE TABLE Queues (
   SLADisabled int2 NOT NULL DEFAULT 1 ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE UNIQUE INDEX Queues1 ON Queues (Name) ;
 CREATE INDEX Queues2 ON Queues (Disabled) ;
@@ -54,7 +54,7 @@ CREATE TABLE Links (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Links2 ON Links (Base,  Type) ;
 CREATE INDEX Links3 ON Links (Target,  Type) ;
@@ -67,7 +67,7 @@ CREATE TABLE Principals (
         PrincipalType VARCHAR(16) not null,
         Disabled int2 NOT NULL DEFAULT 0 ,
         PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET ascii;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 CREATE TABLE Groups (
@@ -81,7 +81,7 @@ CREATE TABLE Groups (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Groups1 ON Groups (Domain, Name, Instance);
 CREATE INDEX Groups2 On Groups (Instance);
@@ -99,7 +99,7 @@ CREATE TABLE ScripConditions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 CREATE TABLE Transactions (
@@ -119,7 +119,7 @@ CREATE TABLE Transactions (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Transactions1 ON Transactions (ObjectType, ObjectId);
 
@@ -140,7 +140,7 @@ CREATE TABLE Scrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE ObjectScrips (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -154,7 +154,7 @@ CREATE TABLE ObjectScrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE UNIQUE INDEX ObjectScrips1 ON ObjectScrips (ObjectId, Scrip);
 
@@ -171,7 +171,7 @@ CREATE TABLE ACL (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX  ACL1 on ACL(RightName, ObjectType, ObjectId,PrincipalType,PrincipalId);
 
@@ -186,7 +186,7 @@ CREATE TABLE GroupMembers (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 CREATE UNIQUE INDEX GroupMembers1 on GroupMembers (GroupId, MemberId);
 
 
@@ -206,7 +206,7 @@ create table CachedGroupMembers (
                                            # group members recursively.
                                            # Also, this allows us to have the ACL system elide members of disabled groups
         PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX DisGrouMem  on CachedGroupMembers (GroupId,MemberId,Disabled);
 CREATE INDEX CachedGroupMembers2 on CachedGroupMembers (MemberId, GroupId, Disabled);
@@ -246,7 +246,7 @@ CREATE TABLE Users (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 CREATE UNIQUE INDEX Users1 ON Users (Name) ;
@@ -283,7 +283,7 @@ CREATE TABLE Tickets (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Tickets1 ON Tickets (Queue, Status) ;
 CREATE INDEX Tickets2 ON Tickets (Owner) ;
@@ -302,7 +302,7 @@ CREATE TABLE ScripActions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 
@@ -318,7 +318,7 @@ CREATE TABLE Templates (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 
@@ -340,7 +340,7 @@ CREATE TABLE ObjectCustomFieldValues (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,        # New -- whether the value was current
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX ObjectCustomFieldValues1 ON ObjectCustomFieldValues (Content);
 CREATE INDEX ObjectCustomFieldValues2 ON ObjectCustomFieldValues (CustomField,ObjectType,ObjectId);
@@ -369,7 +369,7 @@ CREATE TABLE CustomFields (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 
@@ -384,7 +384,7 @@ CREATE TABLE ObjectCustomFields (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 
 
@@ -401,7 +401,7 @@ CREATE TABLE CustomFieldValues (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX CustomFieldValues1 ON CustomFieldValues (CustomField);
 
@@ -421,7 +421,7 @@ CREATE TABLE Attributes (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX Attributes1 on Attributes(Name);
 CREATE INDEX Attributes2 on Attributes(ObjectType, ObjectId);
@@ -436,7 +436,7 @@ CREATE TABLE sessions (
     a_session LONGBLOB,
     LastUpdated TIMESTAMP,
     PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET ascii;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE Classes (
   id int(11) NOT NULL auto_increment,
@@ -449,7 +449,7 @@ CREATE TABLE Classes (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE Articles (
   id int(11) NOT NULL auto_increment,
@@ -465,7 +465,7 @@ CREATE TABLE Articles (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE Topics (
   id int(11) NOT NULL auto_increment,
@@ -475,7 +475,7 @@ CREATE TABLE Topics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE ObjectTopics (
   id int(11) NOT NULL auto_increment,
@@ -483,7 +483,7 @@ CREATE TABLE ObjectTopics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE ObjectClasses (
   id int(11) NOT NULL auto_increment,
@@ -495,7 +495,7 @@ CREATE TABLE ObjectClasses (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE Assets (
     id                int(11)         NOT NULL AUTO_INCREMENT,
@@ -508,7 +508,7 @@ CREATE TABLE Assets (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX AssetsName ON Assets (Name);
 CREATE INDEX AssetsStatus ON Assets (Status);
@@ -525,7 +525,7 @@ CREATE TABLE Catalogs (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE INDEX CatalogsName ON Catalogs (Name);
 CREATE INDEX CatalogsDisabled ON Catalogs (Disabled);
@@ -543,7 +543,7 @@ CREATE TABLE CustomRoles (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE TABLE ObjectCustomRoles (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -556,7 +556,7 @@ CREATE TABLE ObjectCustomRoles (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE UNIQUE INDEX ObjectCustomRoles1 ON ObjectCustomRoles (ObjectId, CustomRole);
 
@@ -571,7 +571,7 @@ CREATE TABLE Configurations (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4;
 
 CREATE UNIQUE INDEX Configurations1 ON Configurations (Name);
 CREATE INDEX Configurations2 ON Configurations (Disabled);
diff --git a/lib/RT/Handle.pm b/lib/RT/Handle.pm
index 29e929e7b7..ba2848d8a1 100644
--- a/lib/RT/Handle.pm
+++ b/lib/RT/Handle.pm
@@ -128,9 +128,8 @@ sub Connect {
     );
 
     if ( $db_type eq 'mysql' ) {
-        my $version = $self->DatabaseVersion;
-        ($version) = $version =~ /^(\d+\.\d+)/;
-        $self->dbh->do("SET NAMES 'utf8'") if $version >= 4.1;
+        # set the character set
+        $self->dbh->do("SET NAMES 'utf8mb4'");
     }
     elsif ( $db_type eq 'Pg' ) {
         my $version = $self->DatabaseVersion;
@@ -271,9 +270,17 @@ sub CheckCompatibility {
         return (0, "couldn't get version of the mysql server")
             unless $version;
 
-        ($version) = $version =~ /^(\d+\.\d+)/;
-        return (0, "RT is unsupported on MySQL versions before 4.1.  Your version is $version.")
-            if $version < 4.1;
+        # MySQL and MariaDB are both 'mysql' type.
+        # the minimum version supported is MySQL 5.7.7 / MariaDB 10.2.5
+        # the version string for MariaDB includes "MariaDB" in Debian/RedHat
+        my $is_mariadb        = $version =~ m{mariadb}i ? 1 : 0;
+        my $mysql_min_version = '5.7.7';    # so index sizes allow for VARCHAR(255) fields
+        my $mariadb_min_version = '10.2.5'; # uses innodb by default
+
+        return ( 0, "RT is unsupported on MySQL versions before $mysql_min_version.  Your version is $version.")
+            if !$is_mariadb && cmp_version( $version, $mysql_min_version ) < 0;
+        return ( 0, "RT is unsupported on MariaDB versions before $mariadb_min_version.  Your version is $version.")
+            if $is_mariadb && cmp_version( $version, $mariadb_min_version ) < 0;
 
         # MySQL must have InnoDB support
         local $dbh->{FetchHashKeyName} = 'NAME_lc';
@@ -307,18 +314,6 @@ sub CheckCompatibility {
                 $max_packet = sprintf("%.1fM", $max_packet/1024/1024);
                 warn "max_allowed_packet is set to $max_packet, which limits the maximum attachment or email size that RT can process.  Consider adjusting MySQL's max_allowed_packet setting.\n";
             }
-
-            my $full_version = $show_var->("version");
-            if ($full_version =~ /^5\.(\d+)\.(\d+)$/ and (($1 == 6 and $2 >= 20) or $1 > 6)) {
-                my $redo_log_size = $show_var->("innodb_log_file_size");
-                $redo_log_size *= $show_var->("innodb_log_files_in_group")
-                    if $full_version =~ /^5\.(\d+)\.(\d+)$/ and (($1 == 6 and $2 >= 22) or $1 > 6);
-
-                if ($redo_log_size / 10 < 5 * 1024 * 1024) {
-                    $redo_log_size = sprintf("%.1fM",$redo_log_size/1024/1024);
-                    warn "innodb_log_file_size is set to $redo_log_size; attachments can only be 10% of this value on MySQL 5.6.  Consider adjusting MySQL's innodb_log_file_size setting.\n";
-                }
-            }
         }
     }
     return (1)
diff --git a/sbin/rt-setup-fulltext-index.in b/sbin/rt-setup-fulltext-index.in
index ed19bad668..f342fda2cf 100644
--- a/sbin/rt-setup-fulltext-index.in
+++ b/sbin/rt-setup-fulltext-index.in
@@ -179,7 +179,7 @@ if ( $DB{'type'} eq 'mysql' ) {
     my $engine = $RT::Handle->dbh->{mysql_serverversion} < 50600 ? "MyISAM" : "InnoDB";
     my $schema = "CREATE TABLE $table ( "
         ."id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,"
-        ."Content LONGTEXT ) ENGINE=$engine CHARACTER SET utf8";
+        ."Content LONGTEXT ) ENGINE=$engine CHARACTER SET utf8mb4";
     insert_schema( $schema );
 
     insert_data( Table => $table, Engine => $engine );

commit ea7172902dd02e9162561fe1363cb6a65691ab2e
Author: michel <michel at bestpractical.com>
Date:   Tue Nov 26 16:26:07 2019 +0100

    Explain utf8mb4 character set updates

diff --git a/README.MariaDB b/README.MariaDB
new file mode 100644
index 0000000000..149f1ffa23
--- /dev/null
+++ b/README.MariaDB
@@ -0,0 +1,58 @@
+Starting with RT 5.0.0, the minimum supported MariaDB version is 10.2.5
+because this is the first version to provide full support for 4 byte
+utf8 characters in tables and indexes. Read on for details on this
+change.
+
+RT 5.0.0 now defaults MariaDB tables to utf8mb4, which is available in
+versions before 10.2.5. However, before MariaDB version 10.2.5, utf8mb4
+tables could not have indexes with type VARCHAR(255): the default size
+for index entries was 767 bytes, which is enough for 255 chars stored
+as at most 3 chars (the utf8 format), but not as 4 bytes (utf8mb4).
+10.2.5 sets the default index size to 3072 for InnoDB tables, resolving
+that issue.
+
+https://mariadb.com/kb/en/changes-improvements-in-mariadb-102/
+https://mariadb.com/kb/en/mariadb-1025-changelog/ (search for utf8)
+
+In MariaDB, RT uses the utf8mb4 character set to support all
+unicode characters, including the ones that are encoded with 4 bytes in
+utf8 (some Kanji characters and a good number of emojis). The DB tables
+and RT are both set to this character set.
+
+If your MariaDB database is used only for RT, you can consider
+setting the default character set to utf8mb4. This will
+ensure that backups and other database access outside of RT have the
+correct character set.
+
+This is done by adding the following lines to the MariaDB configuration:
+
+[client-server]
+character-set-server = utf8mb4
+
+You can check the values your server is using by running this command:
+    mysqladmin variables | grep -i character_set
+
+Setting the default is particularly important for mysqldump, to avoid
+backups to be silently corrupted.
+
+If the MySQL DB is shared with other applications and the default
+character set cannot be set to utf8mb4, the command to backup the
+database must set it explicitly:
+
+    ( mysqldump --default-character-set=utf8mb4 rt5 --tables sessions --no-data --single-transaction; \
+      mysqldump --default-character-set=utf8mb4 rt5 --ignore-table rt5.sessions --single-transaction ) \
+        | gzip > rt-`date +%Y%m%d`.sql.gz
+
+Restoring a backup is done the usual way, since the character set for
+all tables is set to utf8mb4, there is no further need to tell MariaDB
+about it:
+
+    gunzip -c rt-20191125.sql.gz | mysql -uroot -p rt5
+
+These character set updates now allow RT on MariaDB to accept and store 4-byte
+characters like emojis. However, searches can still be inconsistent. You may be
+able to get different or better results by experimenting with different collation
+settings. For more information:
+
+https://stackoverflow.com/a/41148052
+https://mariadb.com/kb/en/character-sets/
diff --git a/README.MySQL b/README.MySQL
new file mode 100644
index 0000000000..1ac84c83ba
--- /dev/null
+++ b/README.MySQL
@@ -0,0 +1,67 @@
+Starting with RT 5.0.0, the minimum supported MySQL version is 5.7.7
+because this is the first version to provide full support for 4 byte
+utf8 characters in tables and indexes. Read on for details on this
+change.
+
+Note that MySQL 8 is not yet supported because of changes to the group
+keyword.
+
+RT 5.0.0 now defaults MySQL tables to utf8mb4, which is available in
+versions before 5.7.7. However, before MySQL version 5.7.7, utf8mb4
+tables could not have indexes with type VARCHAR(255): the default size
+for index entries was 767 bytes, which is enough for 255 chars stored
+as at most 3 chars (the utf8 format), but not as 4 bytes (utf8mb4).
+5.7.7 sets the default index size to 3072 for InnoDB tables, resolving
+that issue.
+
+https://dev.mysql.com/doc/relnotes/mysql/5.7/en/news-5-7-7.html#mysqld-5-7-7-feature
+
+In MySQL, RT uses the utf8mb4 character set to support all
+unicode characters, including the ones that are encoded with 4 bytes in
+utf8 (some Kanji characters and a good number of emojis). The DB tables
+and the RT are both set to this character set.
+
+If your MySQL database is used only for RT, you can consider
+setting the default character set to utf8mb4. This will
+ensure that backups and other database access outside of RT have the
+correct character set.
+
+This is done by adding the following lines to the MySQL configuration:
+
+[mysqld]
+character-set-server = utf8mb4
+
+[client]
+default-character-set = utf8mb4
+
+[mysqldump]
+default-character-set = utf8mb4
+
+You can check the values your server is using by running this command:
+
+    mysqladmin variables | grep -i character_set
+
+Setting the default is particularly important for mysqldump, to avoid
+backups to be silently corrupted.
+
+If the MySQL DB is shared with other applications and the default
+character set cannot be set to utf8mb4, the command to backup the
+database can be set explicitly:
+
+    ( mysqldump --default-character-set=utf8mb4 rt5 --tables sessions --no-data --single-transaction; \
+      mysqldump --default-character-set=utf8mb4 rt5 --ignore-table rt5.sessions --single-transaction ) \
+        | gzip > rt-`date +%Y%m%d`.sql.gz
+
+Restoring a backup is done the usual way, since the character set for
+all tables is set to utf8mb4, there is no further need to tell MySQL
+about it:
+
+    gunzip -c rt-20191125.sql.gz | mysql -uroot -p rt5
+
+These character set updates now allow RT on MySQL to accept and store 4-byte
+characters like emojis. However, searches can still be inconsistent. You may be
+able to get different or better results by experimenting with different collation
+settings. For more information:
+
+https://stackoverflow.com/a/41148052
+https://dev.mysql.com/doc/refman/5.7/en/charset-unicode-sets.html
diff --git a/docs/UPGRADING-4.6 b/docs/UPGRADING-4.6
index c357291a4a..62535b56d4 100644
--- a/docs/UPGRADING-4.6
+++ b/docs/UPGRADING-4.6
@@ -14,6 +14,13 @@ extension writers.
 
 =item *
 
+On MySQL and MariaDB, the default character set has been changed from
+utf8 to utf8mb4. This provides support for 4-byte characters such as
+emojis on tickets. Postgres and Oracle already had support for these
+characters and are not effected by this update.
+
+=item *
+
 The variables which alter the set of HTML elements allowed in HTML
 scrubbing have moved; they have been renamed, and are now found under
 L<RT::Interface::Web::Scrubber>.
diff --git a/docs/backups.pod b/docs/backups.pod
index 88e0e63022..9f86711b0d 100644
--- a/docs/backups.pod
+++ b/docs/backups.pod
@@ -39,10 +39,15 @@ RT. :)
 
 =head3 MySQL and MariaDB
 
-    ( mysqldump rt5 --tables sessions --no-data --single-transaction; \
-      mysqldump rt5 --ignore-table rt5.sessions --single-transaction ) \
+    ( mysqldump --default-character-set=utf8mb4 rt5 --tables sessions --no-data --single-transaction; \
+      mysqldump --default-character-set=utf8mb4 rt5 --ignore-table rt5.sessions --single-transaction ) \
         | gzip > rt-`date +%Y%m%d`.sql.gz
 
+If the default character set for the entire database is not set to
+utf8mb4, it is especially important to set the character set in the
+mysqldump command to avoid corrupted backups. As always, it's important
+to test your backups to confirm they restore successfully.
+
 The dump will be much faster if you can connect to the MySQL or
 MariaDB server over localhost.  This will use a local socket instead
 of the network.

commit 23a435381b0a6fd8902ed1b8dbe831bed02e7851
Author: michel <michel at bestpractical.com>
Date:   Wed Dec 4 21:03:17 2019 +0100

    Add upgrade step to migrate charset to utf8mb4
    
    The data itself should not change: it doesn't include any 4 byte utf8
    character.

diff --git a/etc/upgrade/4.5.2/schema.mysql b/etc/upgrade/4.5.2/schema.mysql
new file mode 100644
index 0000000000..df78a7cba3
--- /dev/null
+++ b/etc/upgrade/4.5.2/schema.mysql
@@ -0,0 +1,32 @@
+ ALTER TABLE `Attachments` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Queues` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Links` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Principals` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Groups` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ScripConditions` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Transactions` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Scrips` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectScrips` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ACL` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `GroupMembers` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `CachedGroupMembers` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Users` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Tickets` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ScripActions` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Templates` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectCustomFieldValues` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `CustomFields` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectCustomFields` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `CustomFieldValues` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Attributes` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Classes` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Articles` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Topics` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectTopics` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectClasses` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Assets` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Catalogs` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `CustomRoles` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `ObjectCustomRoles` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `Configurations` CONVERT TO CHARACTER SET utf8mb4;
+ ALTER TABLE `sessions` CONVERT TO CHARACTER SET utf8mb4;

commit fe95cfe7329b0a3b92c9c9c8e6d15978fd0563b7
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Feb 7 21:56:00 2020 +0800

    Add ticket search tests mainly for 4-byte UTF-8 characters
    
    This is initially for MySQL's utf8mb4 charset change, sadly that its
    emoji search is not accurate right now, so we have to skip them.

diff --git a/t/ticket/search_utf8.t b/t/ticket/search_utf8.t
new file mode 100644
index 0000000000..ca2b720b9a
--- /dev/null
+++ b/t/ticket/search_utf8.t
@@ -0,0 +1,47 @@
+use strict;
+use warnings;
+
+use RT::Test tests => undef;
+
+use utf8;
+my @tickets = (
+    'ñèñ',         # accent
+    '你好',         # chinese
+    "\x{20779}",    # 4 bytes han
+    "\x{1F36A}",    # cookie
+    "\x{1F4A9}",    # pile of poo
+    "\x{1F32E}",    # taco
+    "\x{1F336}",    # pepper
+);
+
+RT::Test->load_or_create_custom_field(
+    Name  => 'foo',
+    Type  => 'Freeform',
+    Queue => 'General',
+);
+
+for my $str (@tickets) {
+    RT::Test->create_ticket(
+        Queue        => 'General',
+        Subject      => "Help: $str",
+        Content      => "Content is $str",
+        CustomFields => { foo => $str },
+    );
+}
+
+SKIP: for my $str (@tickets) {
+    skip "MySQL's 4-byte char search is inaccurate", 20
+        if length $str == 1 && RT->Config->Get('DatabaseType') eq 'mysql';
+    my $tickets = RT::Tickets->new( RT->SystemUser );
+    $tickets->FromSQL("Subject LIKE '$str'");
+    diag "Search $str in subject";
+    is( $tickets->Count, 1, 'Found 1 ticket' );
+    like( $tickets->First->Subject, qr/$str/, 'Found the ticket' );
+
+    diag "Search $str in custom field";
+    $tickets->FromSQL("CustomField.foo = '$str'");
+    is( $tickets->Count,                               1,    'Found 1 ticket' );
+    is( $tickets->First->FirstCustomFieldValue('foo'), $str, 'Found the ticket' );
+}
+
+done_testing;

-----------------------------------------------------------------------


More information about the rt-commit mailing list