[Rt-commit] rt branch, 4.2/set-mime-encoding-refactor, created. rt-4.0.0rc7-254-g601f7cd

? sunnavy sunnavy at bestpractical.com
Thu Jun 16 04:15:53 EDT 2011


The branch, 4.2/set-mime-encoding-refactor has been created
        at  601f7cd13f1e874012ac39900254f37bf514f9c5 (commit)

- Log -----------------------------------------------------------------
commit cb6018b8bd8edd1d1c3802149c787385c360f534
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Wed Jun 15 01:05:34 2011 +0800

    refactor SetMIMEEncoding functionality( see also #17680 )
    
    incoming mail:
    
    1) find encoding
    2) if found then try to convert to utf-8 in croak mode, return if success
    3) guess encoding
    4) if guessed differently then try to convert to utf-8 in croak mode, return
    if success
    5) mark part as application/octet-stream instead of falling back to any
    encoding
    
    outgoing mail:
    
    1) find encoding
    2) if didn't find then do nothing, send as is, let MUA deal with it
    3) if found then try to convert it to outgoing encoding in croak mode, return
    if success
    4) do nothing otherwise, keep original encoding

diff --git a/lib/RT/Action/SendEmail.pm b/lib/RT/Action/SendEmail.pm
index cc63106..a0bfbf1 100644
--- a/lib/RT/Action/SendEmail.pm
+++ b/lib/RT/Action/SendEmail.pm
@@ -213,7 +213,7 @@ sub Prepare {
 
     RT::I18N::SetMIMEEntityToEncoding( $MIMEObj,
         RT->Config->Get('EmailOutputEncoding'),
-        'mime_words_ok', );
+        'mime_words_ok', 'is_out' );
 
     # Build up a MIME::Entity that looks like the original message.
     $self->AddAttachments if ( $MIMEObj->head->get('RT-Attach-Message')
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index a1f2af5..a20569e 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -192,34 +192,48 @@ sub IsTextualContentType {
 }
 
 
-=head2 SetMIMEEntityToEncoding $entity, $encoding
+=head2 SetMIMEEntityToEncoding $entity, $encoding, $preserve_words, $is_out
 
 An utility function which will try to convert entity body into specified
 charset encoding (encoded as octets, *not* unicode-strings).  It will
 iterate all the entities in $entity, and try to convert each one into
 specified charset if whose Content-Type is 'text/plain'.
 
-the methods are tries in order:
-1) to convert the entity to $encoding, 
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
-3) forcibly convert it to $encoding.
+incoming mail:
+1) find encoding
+2) if found then try to convert to utf-8 in croak mode, return if success
+3) guess encoding
+4) if guessed differently then try to convert to utf-8 in croak mode, return
+if success
+5) mark part as application/octet-stream instead of falling back to any
+encoding
+
+outgoing mail:
+1) find encoding
+2) if didn't find then do nothing, send as is, let MUA deal with it
+3) if found then try to convert it to outgoing encoding in croak mode, return
+if success
+4) do nothing otherwise, keep original encoding
 
 This function doesn't return anything meaningful.
 
 =cut
 
 sub SetMIMEEntityToEncoding {
-    my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
+    my ( $entity, $enc, $preserve_words, $is_out ) =
+      ( shift, shift, shift, shift );
 
     # do the same for parts first of all
-    SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
+    SetMIMEEntityToEncoding( $_, $enc, $preserve_words, $is_out )
+      foreach $entity->parts;
 
     my $charset = _FindOrGuessCharset($entity) or return;
 
     SetMIMEHeadToEncoding(
 	$entity->head,
 	_FindOrGuessCharset($entity, 1) => $enc,
-	$preserve_words
+	$preserve_words,
+    $is_out,
     );
 
     my $head = $entity->head;
@@ -246,35 +260,51 @@ sub SetMIMEEntityToEncoding {
         my $orig_string = $string;
 
         # Convert the body
-        eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for "
-                  . $head->mime_type . " - "
-                  . ( $head->get('subject') || 'Subjectless message' ) );
-            Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
-        };
+        $RT::Logger->debug( "Converting '$charset' to '$enc' for "
+              . $head->mime_type . " - "
+              . ( $head->get('subject') || 'Subjectless message' ) );
 
+        eval { Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK ); };
         if ($@) {
-            $RT::Logger->error( "Encoding error: " 
-                  . $@
-                  . " falling back to iso-8859-1 => $enc" );
-            $string = $orig_string;
-            eval {
-                Encode::from_to(
-                    $string,
-                    'iso-8859-1' => $enc,
-                    Encode::FB_CROAK
-                );
-            };
-            if ($@) {
-                $RT::Logger->error( "Encoding error: " 
-                      . $@
-                      . " forcing conversion to $charset => $enc" );
-                $string = $orig_string;
-                Encode::from_to( $string, $charset => $enc );
+            if ($is_out) {
+                return;
             }
-        }
+            else {
+                my $success;
 
-        # }}}
+                my $err = $@; # _GuessCharset may override $@
+                my $guess = _GuessCharset($string);
+                if ($guess) {
+                    $RT::Logger->error( "Encoding error: " 
+                          . $err
+                          . " falling back to $guess => $enc" );
+                    $string = $orig_string;
+
+                    if ( $guess eq $enc ) {
+                        $success = 1;
+                    }
+                    else {
+                        eval {
+                            Encode::from_to(
+                                $string,
+                                $guess => $enc,
+                                Encode::FB_CROAK
+                            );
+                        };
+                        $success = !$@;
+                    }
+                }
+
+                if ( !$success ) {
+                    $RT::Logger->error( "Encoding error: " 
+                          . $@
+                          . " falling back to application/octet-stream" );
+                    $head->mime_attr(
+                        "content-type" => 'application/octet-stream' );
+                    return;
+                }
+            }
+        }
 
         my $new_body = MIME::Body::InCore->new($string);
 
@@ -560,7 +590,7 @@ sub _CanonicalizeCharset {
 }
 
 
-=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
+=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET PRESERVE_WORDS IS_OUT
 
 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
@@ -570,7 +600,8 @@ all the time
 =cut
 
 sub SetMIMEHeadToEncoding {
-    my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
+    my ( $head, $charset, $enc, $preserve_words, $is_out ) =
+      ( shift, shift, shift, shift, shift );
 
     $charset = _CanonicalizeCharset($charset);
     $enc     = _CanonicalizeCharset($enc);
@@ -586,26 +617,51 @@ sub SetMIMEHeadToEncoding {
             my $orig_value = $value;
             if ( $charset ne $enc ) {
                 eval {
-                    Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
+                    Encode::from_to(
+                        $value,
+                        $charset => $enc,
+                        Encode::FB_CROAK
+                    );
                 };
                 if ($@) {
-                    $RT::Logger->error( "Encoding error: " 
-                          . $@
-                          . " falling back to iso-8859-1 => $enc" );
-                    $value = $orig_value;
-                    eval {
-                        Encode::from_to(
-                            $value,
-                            'iso-8859-1' => $enc,
-                            Encode::FB_CROAK
-                        );
-                    };
-                    if ($@) {
-                        $RT::Logger->error( "Encoding error: " 
-                              . $@
-                              . " forcing conversion to $charset => $enc" );
+                    if ($is_out) {
                         $value = $orig_value;
-                        Encode::from_to( $value, $charset => $enc );
+                        $head->add( $tag, $value );
+                        next;
+                    }
+                    else {
+                        my $err = $@; # _GuessCharset may override $@
+                        my $guess = _GuessCharset($orig_value);
+                        my $success;
+
+                        if ($guess) {
+                            $RT::Logger->error( "Encoding error: " 
+                                  . $err
+                                  . " falling back to $guess => $enc" );
+                            $value = $orig_value;
+
+                            if ( $guess eq $enc ) {
+                                $success = 1;
+                            }
+                            else {
+                                eval {
+                                    Encode::from_to(
+                                        $value,
+                                        $guess => $enc,
+                                        Encode::FB_CROAK
+                                    );
+                                };
+                                $success = !$@;
+                            }
+                        }
+
+                        if ( !$success ) {
+                            $RT::Logger->error( "Encoding error: " 
+                                  . $@
+                                  . " forcing conversion to $charset => $enc" );
+                            $value = $orig_value;
+                            Encode::from_to( $value, $charset => $enc );
+                        }
                     }
                 }
             }
diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
index f78ca0a..6eaa760 100644
--- a/t/mail/sendmail.t
+++ b/t/mail/sendmail.t
@@ -2,7 +2,7 @@
 use strict;
 use File::Spec ();
 
-use RT::Test tests => 139;
+use RT::Test tests => 138;
 use Test::Warn;
 
 use RT::EmailParser;
@@ -315,15 +315,7 @@ $parser->ParseMIMEEntityFromScalar($content);
 
  %args =        (message => $content, queue => 1, action => 'correspond');
 
-warnings_like {
  RT::Interface::Email::Gateway(\%args);
-}
-[
-    qr/Encoding error: "\\x\{041f\}" does not map to iso-8859-1 .*/,
-    qr/Encoding error: "\\x\{041f\}" does not map to iso-8859-1 .*/
-    ],
-"The badly formed Russian spam we have isn't actually well-formed UTF8, which makes Encode (correctly) warn";
-
 
  $tickets = RT::Tickets->new(RT->SystemUser);
 $tickets->OrderBy(FIELD => 'id', ORDER => 'DESC');
diff --git a/t/mail/wrong_mime_charset.t b/t/mail/wrong_mime_charset.t
index 25bb07c..5821170 100644
--- a/t/mail/wrong_mime_charset.t
+++ b/t/mail/wrong_mime_charset.t
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use RT::Test nodb => 1, tests => 6;
+use RT::Test nodb => 1, tests => 4;
 
 use_ok('RT::I18N');
 use utf8;
@@ -22,18 +22,8 @@ local $SIG{__WARN__} = sub {
 
 RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1' );
 
-# this is a weird behavior for different perl versions, 5.12 warns twice,
-# which is correct since we do the encoding thing twice, for Subject
-# and Data respectively.
-# but 5.8 and 5.10 warns only once.
-ok( @warnings == 1 || @warnings == 2, "1 or 2 warnings are ok" );
-ok(
-    @warnings == 1 || $warnings[1] eq $warnings[0],
-    'if there are 2 warnings, they should be same'
-);
-
 like(
-    $warnings[0],
+    join( '', @warnings ),
     qr/\QEncoding error: "\x{fffd}" does not map to iso-8859-1/,
 "We can't encode something into the wrong encoding without Encode complaining"
 );

commit 601f7cd13f1e874012ac39900254f37bf514f9c5
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Jun 16 15:35:57 2011 +0800

    more tests for RT::I18N::SetMIMEEntityToEncoding

diff --git a/t/api/i18n_mime_encoding.t b/t/api/i18n_mime_encoding.t
new file mode 100644
index 0000000..3991ed4
--- /dev/null
+++ b/t/api/i18n_mime_encoding.t
@@ -0,0 +1,41 @@
+use warnings;
+use strict;
+
+use RT::Test nodata => 1, tests => 5;
+use RT::I18N;
+use Encode;
+
+my @warnings;
+local $SIG{__WARN__} = sub {
+    push @warnings, "@_";
+};
+
+diag "normal mime encoding conversion: utf8 => iso-8859-1";
+{
+    my $mime = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', );
+    like(
+        join( '', @warnings ),
+        qr/does not map to iso-8859-1/,
+        'get no-map warning'
+    );
+    is( $mime->stringify_body, 'À中文', 'body is not changed' );
+    is( $mime->head->mime_attr('Content-Type'), 'application/octet-stream' );
+    @warnings = ();
+}
+
+diag "mime encoding conversion: utf8 => iso-8859-1";
+{
+    my $mime = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', '', 1 );
+    is( scalar @warnings, 0, 'no warnings with force' );
+    is( $mime->stringify_body, 'À中文', 'body is not changed' );
+}
+

-----------------------------------------------------------------------


More information about the Rt-commit mailing list