[Rt-commit] rt branch, 3.8/set-mime-encoding-fix, created. rt-3.8.10-13-g10d5b18

? sunnavy sunnavy at bestpractical.com
Wed Jun 8 17:48:05 EDT 2011


The branch, 3.8/set-mime-encoding-fix has been created
        at  10d5b18b6e9a5cd24d53129549d3e36ca214536e (commit)

- Log -----------------------------------------------------------------
commit be7dfadeb396824c046186eb5ec3c8e6b61ac664
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Jun 9 05:39:28 2011 +0800

    refactor SetMIMEHeaderToEncoding and SetMIMEEntityToEncoding
    
    2 important changes:
    
    1. add $force argument to convert strings without check.
    this is useful in SendEmail action as the original MIME object
    is always utf-8, the fallback stuff is useless there.
    
    2. fallback to the guessed charset instead of hardcoded iso-8859-1

diff --git a/lib/RT/Action/SendEmail.pm b/lib/RT/Action/SendEmail.pm
index 9e93e4a..be3bd3a 100755
--- a/lib/RT/Action/SendEmail.pm
+++ b/lib/RT/Action/SendEmail.pm
@@ -212,9 +212,11 @@ sub Prepare {
         $part->head->mime_attr( "Content-Type.charset" => 'utf-8' );
     }
 
+    # $MIMEObj is utf-8 encoded, so it's ok to force( 1 in the arguments )
+    # the conversion
     RT::I18N::SetMIMEEntityToEncoding( $MIMEObj,
         RT->Config->Get('EmailOutputEncoding'),
-        'mime_words_ok', );
+        'mime_words_ok', 1 );
 
     # Build up a MIME::Entity that looks like the original message.
     $self->AddAttachments if ( $MIMEObj->head->get('RT-Attach-Message')
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index 2056b3e..dbc0e2d 100755
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -196,7 +196,7 @@ sub IsTextualContentType {
 
 # {{{ SetMIMEEntityToEncoding
 
-=head2 SetMIMEEntityToEncoding $entity, $encoding
+=head2 SetMIMEEntityToEncoding $entity, $encoding, $preserve_words, $force
 
 An utility function which will try to convert entity body into specified
 charset encoding (encoded as octets, *not* unicode-strings).  It will
@@ -204,19 +204,23 @@ iterate all the entities in $entity, and try to convert each one into
 specified charset if whose Content-Type is 'text/plain'.
 
 the methods are tries in order:
-1) to convert the entity to $encoding, 
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
+1) convert the entity to $encoding, 
+2) guess the entity's encoding then convert it to $encoding,
 3) forcibly convert it to $encoding.
+ 
+if $force is true, 1 and 2 will be skipped.
+
 
 This function doesn't return anything meaningful.
 
 =cut
 
 sub SetMIMEEntityToEncoding {
-    my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
+    my ( $entity, $enc, $preserve_words, $force ) =
+      ( shift, shift, shift, shift );
 
     # do the same for parts first of all
-    SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
+    SetMIMEEntityToEncoding( $_, $enc, $preserve_words, $force ) foreach $entity->parts;
 
     my $charset = _FindOrGuessCharset($entity) or return;
     # one and only normalization
@@ -226,7 +230,8 @@ sub SetMIMEEntityToEncoding {
     SetMIMEHeadToEncoding(
 	$entity->head,
 	_FindOrGuessCharset($entity, 1) => $enc,
-	$preserve_words
+	$preserve_words,
+    $force,
     );
 
     my $head = $entity->head;
@@ -253,31 +258,46 @@ sub SetMIMEEntityToEncoding {
         my $orig_string = $string;
 
         # {{{ Convert the body
-        eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for "
-                  . $head->mime_type . " - "
-                  . ( $head->get('subject') || 'Subjectless message' ) );
-            Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
-        };
-
-        if ($@) {
-            $RT::Logger->error( "Encoding error: " 
-                  . $@
-                  . " falling back to iso-8859-1 => $enc" );
-            $string = $orig_string;
+        $RT::Logger->debug( "Converting '$charset' to '$enc' for "
+              . $head->mime_type . " - "
+              . ( $head->get('subject') || 'Subjectless message' ) );
+        if ( $force ) {
+            Encode::from_to( $string, $charset => $enc );
+        }
+        else {
             eval {
-                Encode::from_to(
-                    $string,
-                    'iso-8859-1' => $enc,
-                    Encode::FB_CROAK
-                );
+                Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
             };
+
             if ($@) {
-                $RT::Logger->error( "Encoding error: " 
-                      . $@
-                      . " forcing conversion to $charset => $enc" );
                 $string = $orig_string;
-                Encode::from_to( $string, $charset => $enc );
+                my $guess = _GuessCharset($orig_string);
+                $RT::Logger->error( "Encoding error: "
+                      . $@
+                      . " falling back to $guess => $enc" );
+
+                my $success;
+                if ( $guess eq $enc ) {
+                    $success = 1;
+                }
+                else {
+                    eval {
+                        Encode::from_to(
+                            $string,
+                            $guess => $enc,
+                            Encode::FB_CROAK
+                        );
+                    };
+                    $success = !$@;
+                }
+
+                if ( !$success ) {
+                    $RT::Logger->error( "Encoding error: "
+                          . $@
+                          . " forcing conversion to $charset => $enc" );
+                    $string = $orig_string;
+                    Encode::from_to( $string, $charset => $enc );
+                }
             }
         }
 
@@ -492,7 +512,7 @@ sub _GuessCharset {
 
 # {{{ SetMIMEHeadToEncoding
 
-=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
+=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET PRESERVE_WORDS FORCE
 
 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
@@ -502,7 +522,8 @@ all the time
 =cut
 
 sub SetMIMEHeadToEncoding {
-    my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
+    my ( $head, $charset, $enc, $preserve_words, $force ) =
+      ( shift, shift, shift, shift, shift );
 
     $charset = 'utf-8' if $charset eq 'utf8';
     $enc     = 'utf-8' if $enc     eq 'utf8';
@@ -517,27 +538,47 @@ sub SetMIMEHeadToEncoding {
             Encode::_utf8_off($value);
             my $orig_value = $value;
             if ( $charset ne $enc ) {
-                eval {
-                    Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
-                };
-                if ($@) {
-                    $RT::Logger->error( "Encoding error: " 
-                          . $@
-                          . " falling back to iso-8859-1 => $enc" );
-                    $value = $orig_value;
+                if ( $force ) {
+                    Encode::from_to( $value, $charset => $enc );
+                }
+                else {
                     eval {
                         Encode::from_to(
                             $value,
-                            'iso-8859-1' => $enc,
+                            $charset => $enc,
                             Encode::FB_CROAK
                         );
                     };
                     if ($@) {
-                        $RT::Logger->error( "Encoding error: " 
+
+                        my $guess = _GuessCharset( $orig_value );
+                        $RT::Logger->error( "Encoding error: "
                               . $@
-                              . " forcing conversion to $charset => $enc" );
+                              . " falling back to $guess => $enc" );
                         $value = $orig_value;
-                        Encode::from_to( $value, $charset => $enc );
+
+                        my $success;
+                        if ( $guess eq $enc ) {
+                            $success = 1;
+                        }
+                        else {
+                            eval {
+                                Encode::from_to(
+                                    $value,
+                                    $guess => $enc,
+                                    Encode::FB_CROAK
+                                );
+                            };
+                            $success = !$@;
+                        }
+
+                        if ( !$success ) {
+                            $RT::Logger->error( "Encoding error: "
+                                  . $@
+                                  . " forcing conversion to $charset => $enc" );
+                            $value = $orig_value;
+                            Encode::from_to( $value, $charset => $enc );
+                        }
                     }
                 }
             }

commit 10d5b18b6e9a5cd24d53129549d3e36ca214536e
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Jun 9 05:40:18 2011 +0800

    more tests for RT::I18N::SetMIMEEntityToEncoding

diff --git a/t/api/i18n_mime_encoding.t b/t/api/i18n_mime_encoding.t
new file mode 100644
index 0000000..725854c
--- /dev/null
+++ b/t/api/i18n_mime_encoding.t
@@ -0,0 +1,46 @@
+use warnings;
+use strict;
+
+use RT::Test nodata => 1, tests => 4;
+use RT::I18N;
+use Encode;
+
+my @warnings;
+local $SIG{__WARN__} = sub {
+    push @warnings, "@_";
+};
+
+my $result = encode( 'iso-8859-1', decode_utf8('À??') );
+
+diag "normal mime encoding conversion: utf8 => iso-8859-1"
+  if $ENV{TEST_VERBOSE};
+{
+    my $mime = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', );
+    like(
+        join( '', @warnings ),
+        qr/does not map to iso-8859-1/,
+        'get no-map warning'
+    );
+    is( $mime->stringify_body, $result,
+        'invalid chars in mail are replaced by ?' );
+    @warnings = ();
+}
+
+diag "force mime encoding conversion: utf8 => iso-8859-1"
+  if $ENV{TEST_VERBOSE};
+{
+    my $mime     = MIME::Entity->build(
+        Type => 'text/plain; charset=utf-8',
+        Data => ['À中文'],
+    );
+    RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', '', 1 );
+    is( scalar @warnings, 0, 'no warnings with force' );
+    is( $mime->stringify_body, $result,
+        'invalid chars in mail are replaced by ?' );
+}
+

-----------------------------------------------------------------------


More information about the Rt-commit mailing list