[Rt-commit] rt branch, 4.0/set-mime-encoding-fallback, created. rt-4.0.1rc1-20-g5763052

Mon Jun 13 22:15:35 EDT 2011

The branch, 4.0/set-mime-encoding-fallback has been created
        at  5763052672fec129df3eb4246e556db9eb5e287d (commit)

- Log -----------------------------------------------------------------
commit 9848b179000e61b80f883b7de6de7c4c292c2463
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 08:29:19 2011 +0800

    Revert "Encode::from_to don't die unless we tell it to. see also #7126"
    
    This reverts commit 49fa6755dbbf2097646a906e244d1f503be17ddc.
    
    Conflicts:
    
    	lib/RT/I18N.pm

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index a1f2af5..eda0ff3 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -199,11 +199,6 @@ charset encoding (encoded as octets, *not* unicode-strings).  It will
 iterate all the entities in $entity, and try to convert each one into
 specified charset if whose Content-Type is 'text/plain'.
 
-the methods are tries in order:
-1) to convert the entity to $encoding, 
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
-3) forcibly convert it to $encoding.
-
 This function doesn't return anything meaningful.
 
 =cut
@@ -241,46 +236,31 @@ sub SetMIMEEntityToEncoding {
 
     if ( $enc ne $charset && $body ) {
         my $string = $body->as_string or return;
-        # NOTE:: see the comments at the end of the sub.
-        Encode::_utf8_off($string);
-        my $orig_string = $string;
 
         # Convert the body
         eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for "
-                  . $head->mime_type . " - "
-                  . ( $head->get('subject') || 'Subjectless message' ) );
-            Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
+            $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
+
+            # NOTE:: see the comments at the end of the sub.
+            Encode::_utf8_off( $string);
+            Encode::from_to( $string, $charset => $enc );
         };
 
         if ($@) {
-            $RT::Logger->error( "Encoding error: " 
-                  . $@
-                  . " falling back to iso-8859-1 => $enc" );
-            $string = $orig_string;
-            eval {
-                Encode::from_to(
-                    $string,
-                    'iso-8859-1' => $enc,
-                    Encode::FB_CROAK
-                );
-            };
+            $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
+            eval { Encode::from_to( $string, 'iso-8859-1' => $enc ) };
             if ($@) {
-                $RT::Logger->error( "Encoding error: " 
-                      . $@
-                      . " forcing conversion to $charset => $enc" );
-                $string = $orig_string;
-                Encode::from_to( $string, $charset => $enc );
+                $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
             }
         }
 
         # }}}
 
-        my $new_body = MIME::Body::InCore->new($string);
+        my $new_body = MIME::Body::InCore->new( $string);
 
         # set up the new entity
         $head->mime_attr( "content-type" => 'text/plain' )
-          unless ( $head->mime_attr("content-type") );
+            unless ( $head->mime_attr("content-type") );
         $head->mime_attr( "content-type.charset" => $enc );
         $entity->bodyhandle($new_body);
     }
@@ -350,16 +330,10 @@ sub DecodeMIMEWordsToEncoding {
             ."only Q(uoted-printable) and B(ase64) are supported");
 	}
 
-        # now we have got a decoded subject, try to convert into the encoding
-        unless ( $charset eq $to_charset ) {
-            my $orig_str = $enc_str;
-            eval { Encode::from_to( $enc_str, $charset, $to_charset, Encode::FB_CROAK ) };
-            if ($@) {
-                $enc_str = $orig_str;
-                $charset = _GuessCharset( $enc_str );
-                Encode::from_to( $enc_str, $charset, $to_charset );
-            }
-        }
+	    # now we have got a decoded subject, try to convert into the encoding
+	    unless ($charset eq $to_charset) {
+	        Encode::from_to($enc_str, $charset,  $to_charset);
+	    }
 
         # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
         # We _should_ be preserving them encoded until after parsing is completed and
@@ -397,16 +371,7 @@ sub DecodeMIMEWordsToEncoding {
             $enc_str =~ s/%(\w{2})/chr hex $1/eg;
             unless ( $charset eq $to_charset ) {
                 my $orig_str = $enc_str;
-                local $@;
-                eval {
-                    Encode::from_to( $enc_str, $charset, $to_charset,
-                        Encode::FB_CROAK );
-                };
-                if ($@) {
-                    $enc_str = $orig_str;
-                    $charset = _GuessCharset($enc_str);
-                    Encode::from_to( $enc_str, $charset, $to_charset );
-                }
+                Encode::from_to( $enc_str, $charset, $to_charset );
             }
             $enc_str = qq{"$enc_str"}
               if $enc_str =~ /[,;]/
@@ -582,30 +547,18 @@ sub SetMIMEHeadToEncoding {
         my @values = $head->get_all($tag);
         $head->delete($tag);
         foreach my $value (@values) {
-            Encode::_utf8_off($value);
-            my $orig_value = $value;
             if ( $charset ne $enc ) {
+
                 eval {
-                    Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
+                    Encode::_utf8_off($value);
+                    Encode::from_to( $value, $charset => $enc );
                 };
                 if ($@) {
-                    $RT::Logger->error( "Encoding error: " 
-                          . $@
-                          . " falling back to iso-8859-1 => $enc" );
-                    $value = $orig_value;
-                    eval {
-                        Encode::from_to(
-                            $value,
-                            'iso-8859-1' => $enc,
-                            Encode::FB_CROAK
-                        );
-                    };
+                    $RT::Logger->error( "Encoding error: " . $@
+                                       . " defaulting to ISO-8859-1 -> UTF-8" );
+                    eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
                     if ($@) {
-                        $RT::Logger->error( "Encoding error: " 
-                              . $@
-                              . " forcing conversion to $charset => $enc" );
-                        $value = $orig_value;
-                        Encode::from_to( $value, $charset => $enc );
+                        $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
                     }
                 }
             }

commit 9931d5ad700795a5794568d78406f71180111f21
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 09:13:35 2011 +0800

    TODO revalent tests as we reverted 49fa675

diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
index 05a59bd..2883141 100644
--- a/t/mail/sendmail.t
+++ b/t/mail/sendmail.t
@@ -316,6 +316,10 @@ $parser->ParseMIMEEntityFromScalar($content);
 
  %args =        (message => $content, queue => 1, action => 'correspond');
 
+TODO: {
+        local $TODO =
+'need a better approach of encoding converter, should be fixed in 4.2';
+
 warnings_like {
  RT::Interface::Email::Gateway(\%args);
 }
@@ -325,6 +329,7 @@ warnings_like {
     ],
 "The badly formed Russian spam we have isn't actually well-formed UTF8, which makes Encode (correctly) warn";
 
+}
 
  $tickets = RT::Tickets->new(RT->SystemUser);
 $tickets->OrderBy(FIELD => 'id', ORDER => 'DESC');
diff --git a/t/mail/wrong_mime_charset.t b/t/mail/wrong_mime_charset.t
index 7636de4..4dd0051 100644
--- a/t/mail/wrong_mime_charset.t
+++ b/t/mail/wrong_mime_charset.t
@@ -23,6 +23,10 @@ local $SIG{__WARN__} = sub {
 
 RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1' );
 
+TODO: {
+        local $TODO =
+'need a better approach of encoding converter, should be fixed in 4.2';
+
 # this is a weird behavior for different perl versions, 5.12 warns twice,
 # which is correct since we do the encoding thing twice, for Subject
 # and Data respectively.
@@ -45,3 +49,4 @@ is( $subject, $test_string, 'subject is set to iso-8859-1' );
 my $body = decode( 'iso-8859-1', $mime->stringify_body );
 chomp $body;
 is( $body, $test_string, 'body is set to iso-8859-1' );
+}

commit 5763052672fec129df3eb4246e556db9eb5e287d
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 10:08:43 2011 +0800

    removed the eval and the unreachable if ($@) code.
    
    note: a more right solution should be done in 4.2:
    ( see also #17680 )
    
    incoming mail:
    1) find encoding
    2) if found then try to convert to utf-8 in croak mode, return if success
    3) guess encoding
    4) if guessed differently then try to convert to utf-8 in croak mode, return
    if success
    5) mark part as application/octeat-stream instead of falling back to any
    encoding
    
    outgoing mail:
    1) find encoding
    2) if didn't find then do nothing, send as is, let MUA deal with it
    3) if found then try to convert it to outgoing encoding in croak mode, return
    if success
    4) do nothing otherwise, keep original encoding

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index eda0ff3..8e08b98 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -238,23 +238,11 @@ sub SetMIMEEntityToEncoding {
         my $string = $body->as_string or return;
 
         # Convert the body
-        eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
-
-            # NOTE:: see the comments at the end of the sub.
-            Encode::_utf8_off( $string);
-            Encode::from_to( $string, $charset => $enc );
-        };
-
-        if ($@) {
-            $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
-            eval { Encode::from_to( $string, 'iso-8859-1' => $enc ) };
-            if ($@) {
-                $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
-            }
-        }
+        $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
 
-        # }}}
+        # NOTE:: see the comments at the end of the sub.
+        Encode::_utf8_off( $string);
+        Encode::from_to( $string, $charset => $enc );
 
         my $new_body = MIME::Body::InCore->new( $string);
 
@@ -548,19 +536,8 @@ sub SetMIMEHeadToEncoding {
         $head->delete($tag);
         foreach my $value (@values) {
             if ( $charset ne $enc ) {
-
-                eval {
-                    Encode::_utf8_off($value);
-                    Encode::from_to( $value, $charset => $enc );
-                };
-                if ($@) {
-                    $RT::Logger->error( "Encoding error: " . $@
-                                       . " defaulting to ISO-8859-1 -> UTF-8" );
-                    eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
-                    if ($@) {
-                        $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
-                    }
-                }
+                Encode::_utf8_off($value);
+                Encode::from_to( $value, $charset => $enc );
             }
             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
                 unless $preserve_words;

-----------------------------------------------------------------------