[Rt-commit] rt branch, 4.0/set-mime-encoding-fix, created. rt-4.0.1rc1-9-gd1730ac
? sunnavy
sunnavy at bestpractical.com
Wed Jun 8 17:19:56 EDT 2011
The branch, 4.0/set-mime-encoding-fix has been created
at d1730ac6d33f3ba39c09e2f3a6134eda4ec0e37b (commit)
- Log -----------------------------------------------------------------
commit d18e7f38733163d0eea68025dcc6c314a8e45f05
Author: sunnavy <sunnavy at bestpractical.com>
Date: Thu Jun 9 04:54:23 2011 +0800
refactor SetMIMEHeaderToEncoding and SetMIMEEntityToEncoding
2 important changes:
1. add $force argument to convert strings without check.
this is useful in SendEmail action as the original MIME object
is always utf-8, the fallback stuff is useless there.
2. fallback to the guessed charset instead of hardcoded iso-8859-1
diff --git a/lib/RT/Action/SendEmail.pm b/lib/RT/Action/SendEmail.pm
index cc63106..066e84d 100644
--- a/lib/RT/Action/SendEmail.pm
+++ b/lib/RT/Action/SendEmail.pm
@@ -211,9 +211,11 @@ sub Prepare {
$part->head->mime_attr( "Content-Type.charset" => 'utf-8' );
}
+ # $MIMEObj is utf-8 encoded, so it's ok to force( 1 in the arguments )
+ # the conversion
RT::I18N::SetMIMEEntityToEncoding( $MIMEObj,
RT->Config->Get('EmailOutputEncoding'),
- 'mime_words_ok', );
+ 'mime_words_ok', 1 );
# Build up a MIME::Entity that looks like the original message.
$self->AddAttachments if ( $MIMEObj->head->get('RT-Attach-Message')
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index a1f2af5..9887123 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -192,7 +192,7 @@ sub IsTextualContentType {
}
-=head2 SetMIMEEntityToEncoding $entity, $encoding
+=head2 SetMIMEEntityToEncoding $entity, $encoding, $preserve_words, $force
An utility function which will try to convert entity body into specified
charset encoding (encoded as octets, *not* unicode-strings). It will
@@ -200,26 +200,31 @@ iterate all the entities in $entity, and try to convert each one into
specified charset if whose Content-Type is 'text/plain'.
the methods are tries in order:
-1) to convert the entity to $encoding,
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
+1) convert the entity to $encoding,
+2) guess the entity's encoding then convert it to $encoding,
3) forcibly convert it to $encoding.
+if $force is true, 1 and 2 will be skipped.
+
This function doesn't return anything meaningful.
=cut
sub SetMIMEEntityToEncoding {
- my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
+ my ( $entity, $enc, $preserve_words, $force ) =
+ ( shift, shift, shift, shift );
# do the same for parts first of all
- SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
+ SetMIMEEntityToEncoding( $_, $enc, $preserve_words, $force )
+ foreach $entity->parts;
my $charset = _FindOrGuessCharset($entity) or return;
SetMIMEHeadToEncoding(
$entity->head,
_FindOrGuessCharset($entity, 1) => $enc,
- $preserve_words
+ $preserve_words,
+ $force,
);
my $head = $entity->head;
@@ -246,31 +251,48 @@ sub SetMIMEEntityToEncoding {
my $orig_string = $string;
# Convert the body
- eval {
- $RT::Logger->debug( "Converting '$charset' to '$enc' for "
- . $head->mime_type . " - "
- . ( $head->get('subject') || 'Subjectless message' ) );
- Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
- };
-
- if ($@) {
- $RT::Logger->error( "Encoding error: "
- . $@
- . " falling back to iso-8859-1 => $enc" );
- $string = $orig_string;
+ $RT::Logger->debug( "Converting '$charset' to '$enc' for "
+ . $head->mime_type . " - "
+ . ( $head->get('subject') || 'Subjectless message' ) );
+
+ if ( $force ) {
+ Encode::from_to( $string, $charset => $enc );
+ }
+ else {
eval {
- Encode::from_to(
- $string,
- 'iso-8859-1' => $enc,
- Encode::FB_CROAK
- );
+ Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
};
+
if ($@) {
- $RT::Logger->error( "Encoding error: "
+ $RT::Logger->error( "Encoding error: "
. $@
- . " forcing conversion to $charset => $enc" );
+ . " falling back to iso-8859-1 => $enc" );
$string = $orig_string;
- Encode::from_to( $string, $charset => $enc );
+
+ my $guess = _GuessCharset( $string );
+ my $success;
+
+ if ( $guess eq $enc ) {
+ $success = 1;
+ }
+ else {
+ eval {
+ Encode::from_to(
+ $string,
+ $guess => $enc,
+ Encode::FB_CROAK
+ );
+ };
+ $success = !$@;
+ }
+
+ if ( !$success ) {
+ $RT::Logger->error( "Encoding error: "
+ . $@
+ . " forcing conversion to $charset => $enc" );
+ $string = $orig_string;
+ Encode::from_to( $string, $charset => $enc );
+ }
}
}
@@ -560,7 +582,7 @@ sub _CanonicalizeCharset {
}
-=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
+=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET PRESERVE_WORDS FORCE
Converts a MIME Head from one encoding to another. This totally violates the RFC.
We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
@@ -570,7 +592,8 @@ all the time
=cut
sub SetMIMEHeadToEncoding {
- my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
+ my ( $head, $charset, $enc, $preserve_words, $force ) =
+ ( shift, shift, shift, shift, shift );
$charset = _CanonicalizeCharset($charset);
$enc = _CanonicalizeCharset($enc);
@@ -585,27 +608,47 @@ sub SetMIMEHeadToEncoding {
Encode::_utf8_off($value);
my $orig_value = $value;
if ( $charset ne $enc ) {
- eval {
- Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
- };
- if ($@) {
- $RT::Logger->error( "Encoding error: "
- . $@
- . " falling back to iso-8859-1 => $enc" );
- $value = $orig_value;
+ if ( $force ) {
+ Encode::from_to( $value, $charset => $enc );
+ }
+ else {
eval {
Encode::from_to(
$value,
- 'iso-8859-1' => $enc,
+ $charset => $enc,
Encode::FB_CROAK
);
};
if ($@) {
- $RT::Logger->error( "Encoding error: "
+
+ my $guess = _GuessCharset( $orig_value );
+ $RT::Logger->error( "Encoding error: "
. $@
- . " forcing conversion to $charset => $enc" );
+ . " falling back to $guess => $enc" );
$value = $orig_value;
- Encode::from_to( $value, $charset => $enc );
+
+ my $success;
+ if ( $guess eq $enc ) {
+ $success = 1;
+ }
+ else {
+ eval {
+ Encode::from_to(
+ $value,
+ $guess => $enc,
+ Encode::FB_CROAK
+ );
+ };
+ $success = !$@;
+ }
+
+ if ( !$success ) {
+ $RT::Logger->error( "Encoding error: "
+ . $@
+ . " forcing conversion to $charset => $enc" );
+ $value = $orig_value;
+ Encode::from_to( $value, $charset => $enc );
+ }
}
}
}
diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
index 05a59bd..05d900d 100644
--- a/t/mail/sendmail.t
+++ b/t/mail/sendmail.t
@@ -3,7 +3,7 @@
use strict;
use File::Spec ();
-use RT::Test tests => 139;
+use RT::Test tests => 138;
use Test::Warn;
use RT::EmailParser;
@@ -316,15 +316,7 @@ $parser->ParseMIMEEntityFromScalar($content);
%args = (message => $content, queue => 1, action => 'correspond');
-warnings_like {
RT::Interface::Email::Gateway(\%args);
-}
-[
- qr/Encoding error: "\\x\{041f\}" does not map to iso-8859-1 .*/,
- qr/Encoding error: "\\x\{041f\}" does not map to iso-8859-1 .*/
- ],
-"The badly formed Russian spam we have isn't actually well-formed UTF8, which makes Encode (correctly) warn";
-
$tickets = RT::Tickets->new(RT->SystemUser);
$tickets->OrderBy(FIELD => 'id', ORDER => 'DESC');
diff --git a/t/mail/wrong_mime_charset.t b/t/mail/wrong_mime_charset.t
index 7636de4..4739240 100644
--- a/t/mail/wrong_mime_charset.t
+++ b/t/mail/wrong_mime_charset.t
@@ -1,7 +1,7 @@
#!/usr/bin/perl
use strict;
use warnings;
-use RT::Test nodb => 1, tests => 6;
+use RT::Test nodb => 1, tests => 4;
use_ok('RT::I18N');
use utf8;
@@ -23,18 +23,8 @@ local $SIG{__WARN__} = sub {
RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1' );
-# this is a weird behavior for different perl versions, 5.12 warns twice,
-# which is correct since we do the encoding thing twice, for Subject
-# and Data respectively.
-# but 5.8 and 5.10 warns only once.
-ok( @warnings == 1 || @warnings == 2, "1 or 2 warnings are ok" );
-ok(
- @warnings == 1 || $warnings[1] eq $warnings[0],
- 'if there are 2 warnings, they should be same'
-);
-
like(
- $warnings[0],
+ join( '', @warnings ),
qr/\QEncoding error: "\x{fffd}" does not map to iso-8859-1/,
"We can't encode something into the wrong encoding without Encode complaining"
);
commit d1730ac6d33f3ba39c09e2f3a6134eda4ec0e37b
Author: sunnavy <sunnavy at bestpractical.com>
Date: Thu Jun 9 05:16:59 2011 +0800
more tests for RT::I18N::SetMIMEEntityToEncoding
diff --git a/t/api/i18n_mime_encoding.t b/t/api/i18n_mime_encoding.t
new file mode 100644
index 0000000..5109633
--- /dev/null
+++ b/t/api/i18n_mime_encoding.t
@@ -0,0 +1,44 @@
+use warnings;
+use strict;
+
+use RT::Test nodata => 1, tests => 4;
+use RT::I18N;
+use Encode;
+
+my @warnings;
+local $SIG{__WARN__} = sub {
+ push @warnings, "@_";
+};
+
+my $result = encode( 'iso-8859-1', decode_utf8('Ã??') );
+
+diag "normal mime encoding conversion: utf8 => iso-8859-1";
+{
+ my $mime = MIME::Entity->build(
+ Type => 'text/plain; charset=utf-8',
+ Data => ['Ãä¸æ'],
+ );
+
+ RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', );
+ like(
+ join( '', @warnings ),
+ qr/does not map to iso-8859-1/,
+ 'get no-map warning'
+ );
+ is( $mime->stringify_body, $result,
+ 'invalid chars in mail are replaced by ?' );
+ @warnings = ();
+}
+
+diag "force mime encoding conversion: utf8 => iso-8859-1";
+{
+ my $mime = MIME::Entity->build(
+ Type => 'text/plain; charset=utf-8',
+ Data => ['Ãä¸æ'],
+ );
+ RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', '', 1 );
+ is( scalar @warnings, 0, 'no warnings with force' );
+ is( $mime->stringify_body, $result,
+ 'invalid chars in mail are replaced by ?' );
+}
+
-----------------------------------------------------------------------
More information about the Rt-commit
mailing list