[Rt-commit] rt branch, 4.0/set-mime-encoding-fallback, created. rt-4.0.1rc1-20-g8287caf

Mon Jun 13 21:42:47 EDT 2011

The branch, 4.0/set-mime-encoding-fallback has been created
        at  8287caff1bc684121e32e8f68f36ed2b7e51b4d1 (commit)

- Log -----------------------------------------------------------------
commit 30f0088c6f9807e79692e5b7ebd725d0a952728d
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 08:29:19 2011 +0800

    Revert "Encode::from_to don't die unless we tell it to. see also #7126"
    
    This reverts commit 49fa6755dbbf2097646a906e244d1f503be17ddc.
    
    Conflicts:
    
    	lib/RT/I18N.pm

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index a1f2af5..91a5657 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -199,11 +199,6 @@ charset encoding (encoded as octets, *not* unicode-strings).  It will
 iterate all the entities in $entity, and try to convert each one into
 specified charset if whose Content-Type is 'text/plain'.
 
-the methods are tries in order:
-1) to convert the entity to $encoding, 
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
-3) forcibly convert it to $encoding.
-
 This function doesn't return anything meaningful.
 
 =cut
@@ -241,46 +236,31 @@ sub SetMIMEEntityToEncoding {
 
     if ( $enc ne $charset && $body ) {
         my $string = $body->as_string or return;
-        # NOTE:: see the comments at the end of the sub.
-        Encode::_utf8_off($string);
-        my $orig_string = $string;
 
         # Convert the body
         eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for "
-                  . $head->mime_type . " - "
-                  . ( $head->get('subject') || 'Subjectless message' ) );
-            Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
+            $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
+
+            # NOTE:: see the comments at the end of the sub.
+            Encode::_utf8_off( $string);
+            Encode::from_to( $string, $charset => $enc );
         };
 
         if ($@) {
-            $RT::Logger->error( "Encoding error: " 
-                  . $@
-                  . " falling back to iso-8859-1 => $enc" );
-            $string = $orig_string;
-            eval {
-                Encode::from_to(
-                    $string,
-                    'iso-8859-1' => $enc,
-                    Encode::FB_CROAK
-                );
-            };
+            $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
+            eval { Encode::from_to( $string, 'iso-8859-1' => $enc ) };
             if ($@) {
-                $RT::Logger->error( "Encoding error: " 
-                      . $@
-                      . " forcing conversion to $charset => $enc" );
-                $string = $orig_string;
-                Encode::from_to( $string, $charset => $enc );
+                $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
             }
         }
 
         # }}}
 
-        my $new_body = MIME::Body::InCore->new($string);
+        my $new_body = MIME::Body::InCore->new( $string);
 
         # set up the new entity
         $head->mime_attr( "content-type" => 'text/plain' )
-          unless ( $head->mime_attr("content-type") );
+            unless ( $head->mime_attr("content-type") );
         $head->mime_attr( "content-type.charset" => $enc );
         $entity->bodyhandle($new_body);
     }
@@ -350,16 +330,10 @@ sub DecodeMIMEWordsToEncoding {
             ."only Q(uoted-printable) and B(ase64) are supported");
 	}
 
-        # now we have got a decoded subject, try to convert into the encoding
-        unless ( $charset eq $to_charset ) {
-            my $orig_str = $enc_str;
-            eval { Encode::from_to( $enc_str, $charset, $to_charset, Encode::FB_CROAK ) };
-            if ($@) {
-                $enc_str = $orig_str;
-                $charset = _GuessCharset( $enc_str );
-                Encode::from_to( $enc_str, $charset, $to_charset );
-            }
-        }
+	    # now we have got a decoded subject, try to convert into the encoding
+	    unless ($charset eq $to_charset) {
+	        Encode::from_to($enc_str, $charset,  $to_charset);
+	    }
 
         # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
         # We _should_ be preserving them encoded until after parsing is completed and
@@ -397,6 +371,7 @@ sub DecodeMIMEWordsToEncoding {
             $enc_str =~ s/%(\w{2})/chr hex $1/eg;
             unless ( $charset eq $to_charset ) {
                 my $orig_str = $enc_str;
+<<<<<<< HEAD
                 local $@;
                 eval {
                     Encode::from_to( $enc_str, $charset, $to_charset,
@@ -407,6 +382,9 @@ sub DecodeMIMEWordsToEncoding {
                     $charset = _GuessCharset($enc_str);
                     Encode::from_to( $enc_str, $charset, $to_charset );
                 }
+=======
+                Encode::from_to( $enc_str, $charset, $to_charset );
+>>>>>>> ced8b87... typo fix
             }
             $enc_str = qq{"$enc_str"}
               if $enc_str =~ /[,;]/
@@ -582,30 +560,18 @@ sub SetMIMEHeadToEncoding {
         my @values = $head->get_all($tag);
         $head->delete($tag);
         foreach my $value (@values) {
-            Encode::_utf8_off($value);
-            my $orig_value = $value;
             if ( $charset ne $enc ) {
+
                 eval {
-                    Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
+                    Encode::_utf8_off($value);
+                    Encode::from_to( $value, $charset => $enc );
                 };
                 if ($@) {
-                    $RT::Logger->error( "Encoding error: " 
-                          . $@
-                          . " falling back to iso-8859-1 => $enc" );
-                    $value = $orig_value;
-                    eval {
-                        Encode::from_to(
-                            $value,
-                            'iso-8859-1' => $enc,
-                            Encode::FB_CROAK
-                        );
-                    };
+                    $RT::Logger->error( "Encoding error: " . $@
+                                       . " defaulting to ISO-8859-1 -> UTF-8" );
+                    eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
                     if ($@) {
-                        $RT::Logger->error( "Encoding error: " 
-                              . $@
-                              . " forcing conversion to $charset => $enc" );
-                        $value = $orig_value;
-                        Encode::from_to( $value, $charset => $enc );
+                        $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
                     }
                 }
             }
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm.orig
similarity index 100%
copy from lib/RT/I18N.pm
copy to lib/RT/I18N.pm.orig

commit 990e03e48f26e2fce785fcdd794ff40d19127032
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 09:13:35 2011 +0800

    TODO revalent tests as we reverted 49fa675

diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
index 05a59bd..2883141 100644
--- a/t/mail/sendmail.t
+++ b/t/mail/sendmail.t
@@ -316,6 +316,10 @@ $parser->ParseMIMEEntityFromScalar($content);
 
  %args =        (message => $content, queue => 1, action => 'correspond');
 
+TODO: {
+        local $TODO =
+'need a better approach of encoding converter, should be fixed in 4.2';
+
 warnings_like {
  RT::Interface::Email::Gateway(\%args);
 }
@@ -325,6 +329,7 @@ warnings_like {
     ],
 "The badly formed Russian spam we have isn't actually well-formed UTF8, which makes Encode (correctly) warn";
 
+}
 
  $tickets = RT::Tickets->new(RT->SystemUser);
 $tickets->OrderBy(FIELD => 'id', ORDER => 'DESC');
diff --git a/t/mail/wrong_mime_charset.t b/t/mail/wrong_mime_charset.t
index 7636de4..4dd0051 100644
--- a/t/mail/wrong_mime_charset.t
+++ b/t/mail/wrong_mime_charset.t
@@ -23,6 +23,10 @@ local $SIG{__WARN__} = sub {
 
 RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1' );
 
+TODO: {
+        local $TODO =
+'need a better approach of encoding converter, should be fixed in 4.2';
+
 # this is a weird behavior for different perl versions, 5.12 warns twice,
 # which is correct since we do the encoding thing twice, for Subject
 # and Data respectively.
@@ -45,3 +49,4 @@ is( $subject, $test_string, 'subject is set to iso-8859-1' );
 my $body = decode( 'iso-8859-1', $mime->stringify_body );
 chomp $body;
 is( $body, $test_string, 'body is set to iso-8859-1' );
+}

commit 8287caff1bc684121e32e8f68f36ed2b7e51b4d1
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Jun 14 08:34:23 2011 +0800

    remove the eval and the unreachable "if ($@)" code.
    
    note: a more right solution should be done in 4.2:
    ( see also #17680 )
    
    incoming mail:
    1) find encoding
    2) if found then try to convert to utf-8 in croak mode, return if success
    3) guess encoding
    4) if guessed differently then try to convert to utf-8 in croak mode, return
    if success
    5) mark part as application/octeat-stream instead of falling back to any
    encoding
    
    outgoing mail:
    1) find encoding
    2) if didn't find then do nothing, send as is, let MUA deal with it
    3) if found then try to convert it to outgoing encoding in croak mode, return
    if success
    4) do nothing otherwise, keep original encoding

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index 91a5657..f2ea5dc 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -238,23 +238,11 @@ sub SetMIMEEntityToEncoding {
         my $string = $body->as_string or return;
 
         # Convert the body
-        eval {
-            $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
-
-            # NOTE:: see the comments at the end of the sub.
-            Encode::_utf8_off( $string);
-            Encode::from_to( $string, $charset => $enc );
-        };
-
-        if ($@) {
-            $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
-            eval { Encode::from_to( $string, 'iso-8859-1' => $enc ) };
-            if ($@) {
-                $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
-            }
-        }
+        $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " . ( $head->get('subject') || 'Subjectless message' ) );
 
-        # }}}
+        # NOTE:: see the comments at the end of the sub.
+        Encode::_utf8_off( $string);
+        Encode::from_to( $string, $charset => $enc );
 
         my $new_body = MIME::Body::InCore->new( $string);
 
@@ -371,20 +359,7 @@ sub DecodeMIMEWordsToEncoding {
             $enc_str =~ s/%(\w{2})/chr hex $1/eg;
             unless ( $charset eq $to_charset ) {
                 my $orig_str = $enc_str;
-<<<<<<< HEAD
-                local $@;
-                eval {
-                    Encode::from_to( $enc_str, $charset, $to_charset,
-                        Encode::FB_CROAK );
-                };
-                if ($@) {
-                    $enc_str = $orig_str;
-                    $charset = _GuessCharset($enc_str);
-                    Encode::from_to( $enc_str, $charset, $to_charset );
-                }
-=======
-                Encode::from_to( $enc_str, $charset, $to_charset );
->>>>>>> ced8b87... typo fix
+                Encode::from_to( $enc_str, $charset, $to_charset,);
             }
             $enc_str = qq{"$enc_str"}
               if $enc_str =~ /[,;]/
@@ -561,19 +536,8 @@ sub SetMIMEHeadToEncoding {
         $head->delete($tag);
         foreach my $value (@values) {
             if ( $charset ne $enc ) {
-
-                eval {
-                    Encode::_utf8_off($value);
-                    Encode::from_to( $value, $charset => $enc );
-                };
-                if ($@) {
-                    $RT::Logger->error( "Encoding error: " . $@
-                                       . " defaulting to ISO-8859-1 -> UTF-8" );
-                    eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
-                    if ($@) {
-                        $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
-                    }
-                }
+                Encode::_utf8_off($value);
+                Encode::from_to( $value, $charset => $enc );
             }
             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
                 unless $preserve_words;

-----------------------------------------------------------------------