[Rt-commit] rt branch, 4.0/make-sure-utf8-is-utf8, created. rt-4.0.6-250-gf04f561

Ruslan Zakirov ruz at bestpractical.com
Wed Jul 18 17:23:31 EDT 2012


The branch, 4.0/make-sure-utf8-is-utf8 has been created
        at  f04f561fbeca6990ceb773934443c1745917752d (commit)

- Log -----------------------------------------------------------------
commit f04f561fbeca6990ceb773934443c1745917752d
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Thu Jul 19 01:15:18 2012 +0400

    don't skip conversion from utf8 to utf8
    
    An email may say that it's UTF-8, but it
    can contain anything else. We were trusting mail,
    but there are too many MUAs, MDAs MTAs that do
    things wrong.
    
    If we don't do this conversion then at least on Pg RT fails
    to create tickets with error 'invalid byte sequence for encoding
    "UTF8"'.

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index cadf7cc..e453cfa 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -227,7 +227,7 @@ sub SetMIMEEntityToEncoding {
 
     my $body = $entity->bodyhandle;
 
-    if ( $enc ne $charset && $body ) {
+    if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
         my $string = $body->as_string or return;
 
         $RT::Logger->debug( "Converting '$charset' to '$enc' for "
@@ -335,7 +335,7 @@ sub DecodeMIMEWordsToEncoding {
             }
 
             # now we have got a decoded subject, try to convert into the encoding
-            unless ( $charset eq $to_charset ) {
+            if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
                 Encode::from_to( $enc_str, $charset, $to_charset );
             }
 
@@ -537,7 +537,7 @@ sub SetMIMEHeadToEncoding {
         my @values = $head->get_all($tag);
         $head->delete($tag);
         foreach my $value (@values) {
-            if ( $charset ne $enc ) {
+            if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
                 Encode::_utf8_off($value);
                 Encode::from_to( $value, $charset => $enc );
             }
diff --git a/t/mail/gateway.t b/t/mail/gateway.t
index 9f0e669..f44d143 100644
--- a/t/mail/gateway.t
+++ b/t/mail/gateway.t
@@ -608,6 +608,36 @@ EOF
     $m->no_warnings_ok;
 }
 
+diag "make sure we check that UTF-8 is really UTF-8";
+{
+    my $text = <<EOF;
+From: root\@localhost
+To: rtemail\@@{[RT->Config->Get('rtname')]}
+Subject: This is test wrong utf-8 chars
+Content-Type: text/plain; charset="utf-8"
+
+utf-8: informaci\303\263n confidencial
+latin1: informaci\363n confidencial
+
+bye
+EOF
+    my ($status, $id) = RT::Test->send_via_mailgate_and_http($text);
+    is ($status >> 8, 0, "The mail gateway exited normally");
+    ok ($id, "created ticket");
+
+    my $tick = RT::Test->last_ticket;
+    is ($tick->Id, $id, "correct ticket");
+
+    my $content = $tick->Transactions->First->Content;
+    Encode::_utf8_off($content);
+
+    like $content, qr{informaci\303\263n confidencial};
+    like $content, qr{informaci\357\277\275n confidencial};
+
+    $m->no_warnings_ok;
+}
+exit;
+
 diag "check that mailgate doesn't suffer from empty Reply-To:";
 {
     my $text = <<EOF;

-----------------------------------------------------------------------


More information about the Rt-commit mailing list