[Rt-commit] rt branch, 4.2/set-mime-encoding-refactor, created. rt-4.1.8-374-gf627f62

? sunnavy sunnavy at bestpractical.com
Mon May 20 12:16:29 EDT 2013


The branch, 4.2/set-mime-encoding-refactor has been created
        at  f627f62ac54bb7e4640d7a5b5e88a584e466a2ed (commit)

- Log -----------------------------------------------------------------
commit 874779b3c1f6ead9a9b92e0ec1fa108e8bdba1e4
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue May 21 00:09:42 2013 +0800

    refactor SetMIMEEncoding functionality( see also #17680 )
    
    incoming mail:
    
    1) find encoding
    2) if found then try to convert to utf-8 in croak mode, return if success
    3) guess encoding
    4) if guessed differently then try to convert to utf-8 in croak mode, return if success
    5) mark part as application/octet-stream instead of falling back to any encoding
    
    outgoing mail:
    
    1) find encoding
    2) if didn't find then do nothing, send as is, let MUA deal with it
    3) if found then try to convert it to outgoing encoding in croak mode, return if success
    4) do nothing otherwise, keep original encoding

diff --git a/lib/RT/Action/SendEmail.pm b/lib/RT/Action/SendEmail.pm
index 65f99fd..a06a8a6 100644
--- a/lib/RT/Action/SendEmail.pm
+++ b/lib/RT/Action/SendEmail.pm
@@ -197,7 +197,7 @@ sub Prepare {
 
     RT::I18N::SetMIMEEntityToEncoding( $MIMEObj,
         RT->Config->Get('EmailOutputEncoding'),
-        'mime_words_ok', );
+        'mime_words_ok', 'is_out' );
 
     # Build up a MIME::Entity that looks like the original message.
     $self->AddAttachments if ( $MIMEObj->head->get('RT-Attach-Message')
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index f4592af..684044a 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -192,22 +192,40 @@ sub IsTextualContentType {
 }
 
 
-=head2 SetMIMEEntityToEncoding $entity, $encoding
+=head2 SetMIMEEntityToEncoding $entity, $encoding, $preserve_words, $is_out
 
 An utility function which will try to convert entity body into specified
 charset encoding (encoded as octets, *not* unicode-strings).  It will
 iterate all the entities in $entity, and try to convert each one into
 specified charset if whose Content-Type is 'text/plain'.
 
+incoming mail:
+1) find encoding
+2) if found then try to convert to utf-8 in croak mode, return if success
+3) guess encoding
+4) if guessed differently then try to convert to utf-8 in croak mode, return
+   if success
+5) mark part as application/octet-stream instead of falling back to any
+   encoding
+
+outgoing mail:
+1) find encoding
+2) if didn't find then do nothing, send as is, let MUA deal with it
+3) if found then try to convert it to outgoing encoding in croak mode, return
+   if success
+4) do nothing otherwise, keep original encoding
+
 This function doesn't return anything meaningful.
 
 =cut
 
 sub SetMIMEEntityToEncoding {
-    my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
+    my ( $entity, $enc, $preserve_words, $is_out ) =
+      ( shift, shift, shift, shift );
 
     # do the same for parts first of all
-    SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
+    SetMIMEEntityToEncoding( $_, $enc, $preserve_words, $is_out )
+      foreach $entity->parts;
 
     my $head = $entity->head;
 
@@ -227,7 +245,8 @@ sub SetMIMEEntityToEncoding {
     SetMIMEHeadToEncoding(
         $head,
         _FindOrGuessCharset($entity, 1) => $enc,
-        $preserve_words
+        $preserve_words,
+        $is_out,
     );
 
     # If this is a textual entity, we'd need to preserve its original encoding
@@ -247,7 +266,48 @@ sub SetMIMEEntityToEncoding {
 
         # NOTE:: see the comments at the end of the sub.
         Encode::_utf8_off($string);
-        Encode::from_to( $string, $charset => $enc );
+        my $orig_string = $string;
+        eval { Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK ); };
+        if ($@) {
+            if ($is_out) {
+                return;
+            }
+            else {
+                my $success;
+
+                my $err = $@;    # _GuessCharset may override $@
+                my $guess = _GuessCharset($string);
+                if ($guess) {
+                    $RT::Logger->error( "Encoding error: "
+                          . $err
+                          . " falling back to $guess => $enc" );
+                    $string = $orig_string;
+
+                    if ( $guess eq $enc ) {
+                        $success = 1;
+                    }
+                    else {
+                        eval {
+                            Encode::from_to(
+                                $string,
+                                $guess => $enc,
+                                Encode::FB_CROAK
+                            );
+                        };
+                        $success = !$@;
+                    }
+                }
+
+                if ( !$success ) {
+                    $RT::Logger->error( "Encoding error: "
+                          . $@
+                          . " falling back to application/octet-stream" );
+                    $head->mime_attr(
+                        "content-type" => 'application/octet-stream' );
+                    return;
+                }
+            }
+        }
 
         my $new_body = MIME::Body::InCore->new($string);
 
@@ -574,7 +634,7 @@ sub _CanonicalizeCharset {
 }
 
 
-=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
+=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET PRESERVE_WORDS IS_OUT
 
 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
@@ -584,7 +644,8 @@ all the time
 =cut
 
 sub SetMIMEHeadToEncoding {
-    my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
+    my ( $head, $charset, $enc, $preserve_words, $is_out ) =
+      ( shift, shift, shift, shift, shift );
 
     $charset = _CanonicalizeCharset($charset);
     $enc     = _CanonicalizeCharset($enc);
@@ -598,8 +659,57 @@ sub SetMIMEHeadToEncoding {
         foreach my $value (@values) {
             if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
                 Encode::_utf8_off($value);
-                Encode::from_to( $value, $charset => $enc );
+                my $orig_value = $value;
+                eval {
+                    Encode::from_to(
+                        $value,
+                        $charset => $enc,
+                        Encode::FB_CROAK
+                    );
+                };
+                if ($@) {
+                    if ($is_out) {
+                        $value = $orig_value;
+                        $head->add( $tag, $value );
+                        next;
+                    }
+                    else {
+                        my $err = $@;    # _GuessCharset may override $@
+                        my $guess = _GuessCharset($orig_value);
+                        my $success;
+
+                        if ($guess) {
+                            $RT::Logger->error( "Encoding error: "
+                                  . $err
+                                  . " falling back to $guess => $enc" );
+                            $value = $orig_value;
+
+                            if ( $guess eq $enc ) {
+                                $success = 1;
+                            }
+                            else {
+                                eval {
+                                    Encode::from_to(
+                                        $value,
+                                        $guess => $enc,
+                                        Encode::FB_CROAK
+                                    );
+                                };
+                                $success = !$@;
+                            }
+                        }
+
+                        if ( !$success ) {
+                            $RT::Logger->error( "Encoding error: "
+                                  . $@
+                                  . " forcing conversion to $charset => $enc" );
+                            $value = $orig_value;
+                            Encode::from_to( $value, $charset => $enc );
+                        }
+                    }
+                }
             }
+
             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
                 unless $preserve_words;
 
diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
index 327d2ad..e4aaa9c 100644
--- a/t/mail/sendmail.t
+++ b/t/mail/sendmail.t
@@ -2,7 +2,7 @@ use strict;
 use warnings;
 use File::Spec ();
 
-use RT::Test tests => 174;
+use RT::Test tests => undef;
 
 use RT::EmailParser;
 use RT::Tickets;
@@ -323,31 +323,8 @@ $parser->ParseMIMEEntityFromScalar($content);
 
  %args =        (message => $content, queue => 1, action => 'correspond');
 
-{
-
-my @warnings;
-local $SIG{__WARN__} = sub {
-    push @warnings, "@_";
-};
-
 RT::Interface::Email::Gateway(\%args);
 
-TODO: {
-        local $TODO =
-'need a better approach of encoding converter, should be fixed in 4.2';
-ok( @warnings == 1 || @warnings == 2, "1 or 2 warnings are ok" );
-ok( @warnings == 1 || ( @warnings == 2 && $warnings[1] eq $warnings[0] ),
-    'if there are 2 warnings, they should be same' );
-
-like(
-    $warnings[0],
-    qr/\QEncoding error: "\x{041f}" does not map to iso-8859-1/,
-"The badly formed Russian spam we have isn't actually well-formed UTF8, which makes Encode (correctly) warn",
-);
-
-}
-}
-
  $tickets = RT::Tickets->new(RT->SystemUser);
 $tickets->OrderBy(FIELD => 'id', ORDER => 'DESC');
 $tickets->Limit(FIELD => 'id' ,OPERATOR => '>', VALUE => '0');
@@ -556,3 +533,5 @@ diag q{regression test for #5248 from rt3.fsck.com};
 
 # Don't taint the environment
 $everyone->PrincipalObj->RevokeRight(Right =>'SuperUser');
+
+done_testing();
diff --git a/t/mail/wrong_mime_charset.t b/t/mail/wrong_mime_charset.t
index 530b5f3..5ca6dce 100644
--- a/t/mail/wrong_mime_charset.t
+++ b/t/mail/wrong_mime_charset.t
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use RT::Test nodb => 1, tests => 6;
+use RT::Test nodb => 1, tests => undef;
 
 use_ok('RT::I18N');
 use utf8;
@@ -22,10 +22,6 @@ local $SIG{__WARN__} = sub {
 
 RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1' );
 
-TODO: {
-        local $TODO =
-'need a better approach of encoding converter, should be fixed in 4.2';
-
 # this is a weird behavior for different perl versions, 5.12 warns twice,
 # which is correct since we do the encoding thing twice, for Subject
 # and Data respectively.
@@ -46,4 +42,5 @@ is( $subject, $test_string, 'subject is set to iso-8859-1' );
 my $body = decode( 'iso-8859-1', $mime->stringify_body );
 chomp $body;
 is( $body, $test_string, 'body is set to iso-8859-1' );
-}
+
+done_testing;

commit f627f62ac54bb7e4640d7a5b5e88a584e466a2ed
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Jun 16 15:35:57 2011 +0800

    more tests for RT::I18N::SetMIMEEntityToEncoding

diff --git a/t/api/i18n_mime_encoding.t b/t/api/i18n_mime_encoding.t
new file mode 100644
index 0000000..3991ed4
--- /dev/null
+++ b/t/api/i18n_mime_encoding.t
@@ -0,0 +1,41 @@
+use warnings;
+use strict;
+
+use RT::Test nodata => 1, tests => 5;
+use RT::I18N;
+use Encode;
+
+my @warnings;
+local $SIG{__WARN__} = sub {
+    push @warnings, "@_";
+};
+
+diag "normal mime encoding conversion: utf8 => iso-8859-1";
+{
+    my $mime = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', );
+    like(
+        join( '', @warnings ),
+        qr/does not map to iso-8859-1/,
+        'get no-map warning'
+    );
+    is( $mime->stringify_body, 'À中文', 'body is not changed' );
+    is( $mime->head->mime_attr('Content-Type'), 'application/octet-stream' );
+    @warnings = ();
+}
+
+diag "mime encoding conversion: utf8 => iso-8859-1";
+{
+    my $mime = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', '', 1 );
+    is( scalar @warnings, 0, 'no warnings with force' );
+    is( $mime->stringify_body, 'À中文', 'body is not changed' );
+}
+

-----------------------------------------------------------------------


More information about the Rt-commit mailing list