[Rt-commit] rt branch, 4.6/mysql-utf8mb4, created. rt-4.4.4-537-g1d8d4df791

Michel Rodriguez michel at bestpractical.com
Wed Dec 4 15:23:35 EST 2019


The branch, 4.6/mysql-utf8mb4 has been created
        at  1d8d4df7914fe7c791b4cf199764ad552e580da0 (commit)

- Log -----------------------------------------------------------------
commit 09ab7e589190b3e97829a9e5c9ae9bc97090f296
Author: michel <michel at bestpractical.com>
Date:   Thu Nov 21 15:39:20 2019 +0100

    Adds support for utf8mb4 in mysql.
    
    This allows 4-byte characters, such as emojis, in fields.
    This feature is supported in mysql 5.5.3 and later.
    
    It requires the database to be converted to utf8mb4 to
    support those characters.

diff --git a/README b/README
index 6ec2810146..e963a6203b 100644
--- a/README
+++ b/README
@@ -20,7 +20,7 @@ o   Perl 5.10.1 or later (http://www.perl.org).
 
 o   A supported SQL database
 
-        Currently supported:  MySQL 5.1 - 5.7 with InnoDB support
+        Currently supported:  MySQL 5.5.3 - 5.7 with InnoDB support
                               MariaDB 10.0 - 10.1 with InnoDB support
                               Postgres 9.0 or later
                               Oracle 11g or later
diff --git a/etc/schema.mysql b/etc/schema.mysql
index cb87d86a3a..151fc3eed6 100644
--- a/etc/schema.mysql
+++ b/etc/schema.mysql
@@ -13,7 +13,7 @@ CREATE TABLE Attachments (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET utf8;
+) ENGINE=InnoDB   CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Attachments2 ON Attachments (TransactionId) ;
 CREATE INDEX Attachments3 ON Attachments (Parent, TransactionId) ;
@@ -35,7 +35,7 @@ CREATE TABLE Queues (
   SLADisabled int2 NOT NULL DEFAULT 1 ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX Queues1 ON Queues (Name) ;
 CREATE INDEX Queues2 ON Queues (Disabled) ;
@@ -54,7 +54,7 @@ CREATE TABLE Links (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Links2 ON Links (Base,  Type) ;
 CREATE INDEX Links3 ON Links (Target,  Type) ;
@@ -81,7 +81,7 @@ CREATE TABLE Groups (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Groups1 ON Groups (Domain, Name, Instance);
 CREATE INDEX Groups2 On Groups (Instance);
@@ -99,7 +99,7 @@ CREATE TABLE ScripConditions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 CREATE TABLE Transactions (
@@ -119,7 +119,7 @@ CREATE TABLE Transactions (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Transactions1 ON Transactions (ObjectType, ObjectId);
 
@@ -140,7 +140,7 @@ CREATE TABLE Scrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectScrips (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -154,7 +154,7 @@ CREATE TABLE ObjectScrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX ObjectScrips1 ON ObjectScrips (ObjectId, Scrip);
 
@@ -171,7 +171,7 @@ CREATE TABLE ACL (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX  ACL1 on ACL(RightName, ObjectType, ObjectId,PrincipalType,PrincipalId);
 
@@ -186,7 +186,7 @@ CREATE TABLE GroupMembers (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 CREATE UNIQUE INDEX GroupMembers1 on GroupMembers (GroupId, MemberId);
 
 
@@ -206,7 +206,7 @@ create table CachedGroupMembers (
                                            # group members recursively.
                                            # Also, this allows us to have the ACL system elide members of disabled groups
         PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX DisGrouMem  on CachedGroupMembers (GroupId,MemberId,Disabled);
 CREATE INDEX CachedGroupMembers2 on CachedGroupMembers (MemberId, GroupId, Disabled);
@@ -246,7 +246,7 @@ CREATE TABLE Users (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 CREATE UNIQUE INDEX Users1 ON Users (Name) ;
@@ -283,7 +283,7 @@ CREATE TABLE Tickets (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Tickets1 ON Tickets (Queue, Status) ;
 CREATE INDEX Tickets2 ON Tickets (Owner) ;
@@ -302,7 +302,7 @@ CREATE TABLE ScripActions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -318,7 +318,7 @@ CREATE TABLE Templates (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -340,7 +340,7 @@ CREATE TABLE ObjectCustomFieldValues (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,        # New -- whether the value was current
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX ObjectCustomFieldValues1 ON ObjectCustomFieldValues (Content);
 CREATE INDEX ObjectCustomFieldValues2 ON ObjectCustomFieldValues (CustomField,ObjectType,ObjectId);
@@ -369,7 +369,7 @@ CREATE TABLE CustomFields (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -384,7 +384,7 @@ CREATE TABLE ObjectCustomFields (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -401,7 +401,7 @@ CREATE TABLE CustomFieldValues (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX CustomFieldValues1 ON CustomFieldValues (CustomField);
 
@@ -421,7 +421,7 @@ CREATE TABLE Attributes (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Attributes1 on Attributes(Name);
 CREATE INDEX Attributes2 on Attributes(ObjectType, ObjectId);
@@ -449,7 +449,7 @@ CREATE TABLE Classes (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Articles (
   id int(11) NOT NULL auto_increment,
@@ -465,7 +465,7 @@ CREATE TABLE Articles (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Topics (
   id int(11) NOT NULL auto_increment,
@@ -475,7 +475,7 @@ CREATE TABLE Topics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectTopics (
   id int(11) NOT NULL auto_increment,
@@ -483,7 +483,7 @@ CREATE TABLE ObjectTopics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectClasses (
   id int(11) NOT NULL auto_increment,
@@ -495,7 +495,7 @@ CREATE TABLE ObjectClasses (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Assets (
     id                int(11)         NOT NULL AUTO_INCREMENT,
@@ -508,7 +508,7 @@ CREATE TABLE Assets (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX AssetsName ON Assets (Name);
 CREATE INDEX AssetsStatus ON Assets (Status);
@@ -525,7 +525,7 @@ CREATE TABLE Catalogs (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX CatalogsName ON Catalogs (Name);
 CREATE INDEX CatalogsDisabled ON Catalogs (Disabled);
@@ -543,7 +543,7 @@ CREATE TABLE CustomRoles (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectCustomRoles (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -556,6 +556,6 @@ CREATE TABLE ObjectCustomRoles (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8;
+) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX ObjectCustomRoles1 ON ObjectCustomRoles (ObjectId, CustomRole);
diff --git a/lib/RT/Handle.pm b/lib/RT/Handle.pm
index 29e929e7b7..31403c4a66 100644
--- a/lib/RT/Handle.pm
+++ b/lib/RT/Handle.pm
@@ -129,8 +129,13 @@ sub Connect {
 
     if ( $db_type eq 'mysql' ) {
         my $version = $self->DatabaseVersion;
-        ($version) = $version =~ /^(\d+\.\d+)/;
-        $self->dbh->do("SET NAMES 'utf8'") if $version >= 4.1;
+
+        if( cmp_version( $version, '5.5.3') > 0 ) {
+            $self->dbh->do("SET NAMES 'utf8mb4'");
+        }
+        elsif( cmp_version( $version, '4.1') >= 0 ) {
+            $self->dbh->do("SET NAMES 'utf8'");
+        }
     }
     elsif ( $db_type eq 'Pg' ) {
         my $version = $self->DatabaseVersion;
@@ -141,6 +146,7 @@ sub Connect {
     $self->dbh->{'LongReadLen'} = RT->Config->Get('MaxAttachmentSize');
 }
 
+
 =head2 BuildDSN
 
 Build the DSN for the RT database. Doesn't take any parameters, draws all that
@@ -271,9 +277,9 @@ sub CheckCompatibility {
         return (0, "couldn't get version of the mysql server")
             unless $version;
 
-        ($version) = $version =~ /^(\d+\.\d+)/;
-        return (0, "RT is unsupported on MySQL versions before 4.1.  Your version is $version.")
-            if $version < 4.1;
+        my $min_version = '5.6';
+        return (0, "RT is unsupported on MySQL versions before $min_version.  Your version is $version.")
+            if cmp_version( $version, $min_version) < 0;
 
         # MySQL must have InnoDB support
         local $dbh->{FetchHashKeyName} = 'NAME_lc';
@@ -320,7 +326,6 @@ sub CheckCompatibility {
                 }
             }
         }
-    }
     return (1)
 }
 

commit e53879581d06dd9012f779798e988feb16e52e8b
Author: michel <michel at bestpractical.com>
Date:   Tue Nov 26 16:27:02 2019 +0100

    Adds check on the database encoding. It should be utf8mb4.
    
    We check one column (Tickets.Subject) for practical reasons.

diff --git a/lib/RT/Handle.pm b/lib/RT/Handle.pm
index 31403c4a66..685dd5c46e 100644
--- a/lib/RT/Handle.pm
+++ b/lib/RT/Handle.pm
@@ -326,6 +326,25 @@ sub CheckCompatibility {
                 }
             }
         }
+
+        # check that the DB encoding is utf8mb4
+        # we check one specific column (Tickets.Subject):
+        #  - checking the DB default may be misleading (what's relevant is wheter RT tables have teh proper charset)
+        #  - checking all columns would be expensive
+        my $allowed_charset = 'utf8mb4';
+        my $column_info = $dbh->selectrow_hashref( "show full columns from Tickets where Field = 'Subject'" );
+        if( $column_info) {
+            # $column_info is only defined if the table exists, skip the check it it doesn't (during make initdb)
+            # we get the charset from the collation on the field
+            my $collation = $column_info->{collation};
+            my $collation_info =  $dbh->selectrow_hashref( "SHOW COLLATION WHERE Collation = ?", {}, $collation);
+            my $charset =  $collation_info->{charset} || '';
+            if( $charset ne $allowed_charset) {
+                warn "table encoding set to $charset, it should be $allowed_charset\n";
+            }
+        }
+    }
+
     return (1)
 }
 

commit 46baf3d45cc8b6f7b848688961215dd18ac25700
Author: michel <michel at bestpractical.com>
Date:   Tue Nov 26 16:26:07 2019 +0100

    Adds README.MySQL to explain utf8mb4 character set options.

diff --git a/README.MySQL b/README.MySQL
new file mode 100644
index 0000000000..5740a805e8
--- /dev/null
+++ b/README.MySQL
@@ -0,0 +1,46 @@
+In MySQL, RT uses the utf8mb4 character set, in order to support all
+unicode characters, including the ones that are encoded on 4 bytes in
+utf8 (some Kanji characters and a good number of emojis).
+
+The DB tables and the RT client are both set to this character set.
+
+Unless the DB is shared with other applications that do not use this
+character set, it should be configured to use it by default. This
+should be done in the server, client and mysqldump sections of the 
+mysql config files.
+
+This is done by adding the following lines to the MySQL configuration:
+
+[mysqld]
+character-set-server = utf8mb4
+collation-server = utf8mb4_unicode_ci
+
+[client]
+default-character-set = utf8mb4
+
+[mysqldump]
+default-character-set = utf8mb4
+
+The configuration should be in /etc/my.cnf (RedHat) or /etc/mysql/
+(Debian, split between various files).
+
+You can check the values your server is using by running this command:
+  mysqladmin variables | grep -i character_set
+
+Setting the default is particularly important for mysqldump, to avoid
+backups to be silently corrupted.
+
+
+If the MySQL DB is shared with other applications and the default 
+character set cannot be set to utf8mb4, the command to backup the
+database must set it explicitely:
+
+    ( mysqldump --default-character-set=utf8mb4 rt4 --tables sessions --no-data --single-transaction; \
+      mysqldump --default-character-set=utf8mb4 rt4 --ignore-table rt4.sessions --single-transaction ) \
+        | gzip > rt-`date +%Y%m%d`.sql.gz
+
+Restoring a backup is done the usual way, since the character set for
+all tables is set to utf8mb4, there is no further need to tell MySQL 
+about it:
+
+    gunzip -c rt-20191125.sql.gz | mysql -uroot -p rt4

commit f9c6adedadfd296e67e1238b74892d90cd54be14
Author: michel <michel at bestpractical.com>
Date:   Tue Nov 26 18:45:07 2019 +0100

    Normalizes character set/collation declaration for all tables.
    
    to "CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci

diff --git a/etc/schema.mysql b/etc/schema.mysql
index 151fc3eed6..9c4cee953b 100644
--- a/etc/schema.mysql
+++ b/etc/schema.mysql
@@ -13,7 +13,7 @@ CREATE TABLE Attachments (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB   CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Attachments2 ON Attachments (TransactionId) ;
 CREATE INDEX Attachments3 ON Attachments (Parent, TransactionId) ;
@@ -35,7 +35,7 @@ CREATE TABLE Queues (
   SLADisabled int2 NOT NULL DEFAULT 1 ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX Queues1 ON Queues (Name) ;
 CREATE INDEX Queues2 ON Queues (Disabled) ;
@@ -54,7 +54,7 @@ CREATE TABLE Links (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Links2 ON Links (Base,  Type) ;
 CREATE INDEX Links3 ON Links (Target,  Type) ;
@@ -81,7 +81,7 @@ CREATE TABLE Groups (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Groups1 ON Groups (Domain, Name, Instance);
 CREATE INDEX Groups2 On Groups (Instance);
@@ -99,7 +99,7 @@ CREATE TABLE ScripConditions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 CREATE TABLE Transactions (
@@ -119,7 +119,7 @@ CREATE TABLE Transactions (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Transactions1 ON Transactions (ObjectType, ObjectId);
 
@@ -140,7 +140,7 @@ CREATE TABLE Scrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectScrips (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -154,7 +154,7 @@ CREATE TABLE ObjectScrips (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX ObjectScrips1 ON ObjectScrips (ObjectId, Scrip);
 
@@ -171,7 +171,7 @@ CREATE TABLE ACL (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX  ACL1 on ACL(RightName, ObjectType, ObjectId,PrincipalType,PrincipalId);
 
@@ -186,7 +186,7 @@ CREATE TABLE GroupMembers (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 CREATE UNIQUE INDEX GroupMembers1 on GroupMembers (GroupId, MemberId);
 
 
@@ -206,7 +206,7 @@ create table CachedGroupMembers (
                                            # group members recursively.
                                            # Also, this allows us to have the ACL system elide members of disabled groups
         PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX DisGrouMem  on CachedGroupMembers (GroupId,MemberId,Disabled);
 CREATE INDEX CachedGroupMembers2 on CachedGroupMembers (MemberId, GroupId, Disabled);
@@ -283,7 +283,7 @@ CREATE TABLE Tickets (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Tickets1 ON Tickets (Queue, Status) ;
 CREATE INDEX Tickets2 ON Tickets (Owner) ;
@@ -302,7 +302,7 @@ CREATE TABLE ScripActions (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -318,7 +318,7 @@ CREATE TABLE Templates (
   Creator integer NOT NULL DEFAULT 0  ,
   Created DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -340,7 +340,7 @@ CREATE TABLE ObjectCustomFieldValues (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,        # New -- whether the value was current
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX ObjectCustomFieldValues1 ON ObjectCustomFieldValues (Content);
 CREATE INDEX ObjectCustomFieldValues2 ON ObjectCustomFieldValues (CustomField,ObjectType,ObjectId);
@@ -369,7 +369,7 @@ CREATE TABLE CustomFields (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -384,7 +384,7 @@ CREATE TABLE ObjectCustomFields (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 
 
@@ -401,7 +401,7 @@ CREATE TABLE CustomFieldValues (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX CustomFieldValues1 ON CustomFieldValues (CustomField);
 
@@ -421,7 +421,7 @@ CREATE TABLE Attributes (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB  CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX Attributes1 on Attributes(Name);
 CREATE INDEX Attributes2 on Attributes(ObjectType, ObjectId);
@@ -449,7 +449,7 @@ CREATE TABLE Classes (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Articles (
   id int(11) NOT NULL auto_increment,
@@ -465,7 +465,7 @@ CREATE TABLE Articles (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Topics (
   id int(11) NOT NULL auto_increment,
@@ -475,7 +475,7 @@ CREATE TABLE Topics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectTopics (
   id int(11) NOT NULL auto_increment,
@@ -483,7 +483,7 @@ CREATE TABLE ObjectTopics (
   ObjectType varchar(64) character set ascii NOT NULL default '',
   ObjectId int(11) NOT NULL default '0',
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectClasses (
   id int(11) NOT NULL auto_increment,
@@ -495,7 +495,7 @@ CREATE TABLE ObjectClasses (
   LastUpdatedBy int(11) NOT NULL default '0',
   LastUpdated datetime default NULL,
   PRIMARY KEY  (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE Assets (
     id                int(11)         NOT NULL AUTO_INCREMENT,
@@ -508,7 +508,7 @@ CREATE TABLE Assets (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX AssetsName ON Assets (Name);
 CREATE INDEX AssetsStatus ON Assets (Status);
@@ -525,7 +525,7 @@ CREATE TABLE Catalogs (
     LastUpdatedBy     int(11)         NOT NULL DEFAULT 0,
     LastUpdated       datetime                 DEFAULT NULL,
     PRIMARY KEY (id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE INDEX CatalogsName ON Catalogs (Name);
 CREATE INDEX CatalogsDisabled ON Catalogs (Disabled);
@@ -543,7 +543,7 @@ CREATE TABLE CustomRoles (
   LastUpdated DATETIME NULL  ,
   Disabled int2 NOT NULL DEFAULT 0 ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE TABLE ObjectCustomRoles (
   id INTEGER NOT NULL  AUTO_INCREMENT,
@@ -556,6 +556,6 @@ CREATE TABLE ObjectCustomRoles (
   LastUpdatedBy integer NOT NULL DEFAULT 0  ,
   LastUpdated DATETIME NULL  ,
   PRIMARY KEY (id)
-) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE = utf8mb4_unicode_ci;
+) ENGINE=InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
 
 CREATE UNIQUE INDEX ObjectCustomRoles1 ON ObjectCustomRoles (ObjectId, CustomRole);

commit b9e5c51ea3f5d2c6bfa77aea6e0d1b1e2fd7be49
Author: michel <michel at bestpractical.com>
Date:   Wed Nov 27 22:26:00 2019 +0100

    Adds test for 4-byte utf-8 character searches.
    
    Mostly to check that MySQL does the right thing with them, using
    the utf8mb4 character set.

diff --git a/t/ticket/utf8mb4.t b/t/ticket/utf8mb4.t
new file mode 100644
index 0000000000..d7a4fa5448
--- /dev/null
+++ b/t/ticket/utf8mb4.t
@@ -0,0 +1,97 @@
+
+use strict;
+use warnings;
+
+use utf8;
+
+use RT::Test nodata => 1, tests => undef;
+
+my %tickets = ( 'ascii'  => '#',
+             'accent' => 'ñèñ',
+              ck      =>"\x{1F36A}",
+              kanji   =>"\x{20779}",
+              pop     =>"\x{1F4A9}",
+              taco    =>"\x{1F32E}",
+              pepper  =>"\x{1F336}",
+);
+
+# setup the queue
+
+my $q = RT::Queue->new(RT->SystemUser);
+my $queue = 'queue';  # used globaly
+$q->Create(Name => $queue);
+ok ($q->id, "Created the queue");
+
+my %cf = ( desc => create_cf( $q, 'desc'), char => create_cf( $q, 'char'), cat => create_cf( $q, 'cat') );
+my %cats = ( regular => 0, emoji => 0);
+
+while( my( $desc, $char)= each %tickets) {
+
+    my $cat = ord( $char ) > 100000 ? 'emoji' : 'regular';
+    $cats{$cat}++;
+
+    my $ticket = RT::Ticket->new(RT->SystemUser);
+    my ( $id, undef, $msg ) = $ticket->Create(
+        Queue      => $q->id,
+        Requestor  => ['test at example.com'],
+        Subject    => subject( $desc, $char ),
+        Content    => "Content is $char$char, still $desc",
+        $cf{desc}->{create} => $desc, # a unique text description
+        $cf{char}->{create} => $char, # 1, possibly 4-byte, character
+        $cf{cat}->{create} =>  $cat,  # category (emoji/regular)
+    );
+    ok( $id, $msg );
+}
+
+my $tix = RT::Tickets->new(RT->SystemUser);
+$tix->FromSQL("Queue = '$queue'");
+is($tix->Count, scalar( keys %tickets), "found all the tickets")
+    or diag "wrong results from SQL:\n". $tix->BuildSelectCountQuery;
+
+while( my( $cat, $nb) = each %cats) {
+    my $tix_cat = RT::Tickets->new(RT->SystemUser);
+    $tix_cat->FromSQL("Queue = '$queue' AND $cf{cat}->{search} = '$cat'");
+    is($tix_cat->Count, $nb, "found $nb $cat ticket (from cat CF)")
+        or diag "wrong results from SQL:\n". $tix_cat->BuildSelectCountQuery;
+}
+
+while( my( $desc, $char)= each %tickets) {
+    my $subject = subject( $desc, $char );
+    test_search( $desc, $char, 'Subject', $char);
+    test_search( $desc, $char, $cf{desc}->{search}, $desc);
+    test_search( $desc, $char, $cf{char}->{search}, $char);
+}
+
+done_testing;
+
+sub subject {
+    my( $desc, $char)= @_;
+    return "$char ($desc)";
+}
+
+sub create_cf {
+    my( $q, $name)= @_;
+    my $cf = RT::CustomField->new(RT->SystemUser);
+    $cf->Create(Name => $name, Type => 'Freeform', MaxValues => 0, Queue => $q->id);
+    ok($cf->id, "Created the $name CF");
+    my $create = "CustomField-" . $cf->id; # key used to create the CF
+    my $search = "CF.$name";           # field in search
+    return { name => $name, create => $create, search => $search };
+}
+
+sub test_search {
+    my( $desc, $char, $field, $to_search)= @_;
+
+    my $tix = RT::Tickets->new(RT->SystemUser);
+    $tix->FromSQL("Queue = '$queue' AND $field LIKE '$to_search'");
+
+    is($tix->Count, 1, "found $to_search ticket (from $field)")
+        or diag "wrong results from SQL:\n". $tix->BuildSelectCountQuery;
+
+    my $ticket = $tix->First;
+    return if ! $ticket;
+    my $got =  $ticket->Subject;
+    my $expected = subject( $desc, $char);
+    is( $got, $expected, "$char, $field: Subject is right");
+}
+

commit 82ce30b66d009747a6e7965565dbe66393fd8311
Author: michel <michel at bestpractical.com>
Date:   Mon Dec 2 21:08:35 2019 +0100

    Adds the utf8mb4 character set to the full text indexer
    
    Without this the indexer fails on 4-byte utf8 characters
    Note that full test searches do not find emojis, since they are not words.

diff --git a/sbin/rt-setup-fulltext-index.in b/sbin/rt-setup-fulltext-index.in
index ed19bad668..e31e58880c 100644
--- a/sbin/rt-setup-fulltext-index.in
+++ b/sbin/rt-setup-fulltext-index.in
@@ -179,7 +179,7 @@ if ( $DB{'type'} eq 'mysql' ) {
     my $engine = $RT::Handle->dbh->{mysql_serverversion} < 50600 ? "MyISAM" : "InnoDB";
     my $schema = "CREATE TABLE $table ( "
         ."id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,"
-        ."Content LONGTEXT ) ENGINE=$engine CHARACTER SET utf8";
+        ."Content LONGTEXT ) ENGINE=$engine CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci";
     insert_schema( $schema );
 
     insert_data( Table => $table, Engine => $engine );

commit 1d8d4df7914fe7c791b4cf199764ad552e580da0
Author: michel <michel at bestpractical.com>
Date:   Wed Dec 4 21:03:17 2019 +0100

    Adds the upgrade script: alters all relevant tables to use the utf8mb4 charset.
    
    The data itself should not change: it doesn't include any 4 byte utf8 character.

diff --git a/etc/upgrade/4.5.2/schema.mysql b/etc/upgrade/4.5.2/schema.mysql
new file mode 100644
index 0000000000..34812d2f28
--- /dev/null
+++ b/etc/upgrade/4.5.2/schema.mysql
@@ -0,0 +1,22 @@
+ ALTER TABLE `Attachments` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Queues` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Links` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Groups` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `ScripConditions` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Transactions` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Scrips` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `ObjectScrips` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Users` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Tickets` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `ScripActions` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Templates` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `ObjectCustomFieldValues` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `CustomFields` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `CustomFieldValues` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Attributes` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Classes` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Articles` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Topics` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Assets` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `Catalogs` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+ ALTER TABLE `CustomRoles` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;

-----------------------------------------------------------------------


More information about the rt-commit mailing list