[Rt-commit] rt branch, 3.8/set-mime-encoding-fix, created. rt-3.8.10-13-g10d5b18
? sunnavy
sunnavy at bestpractical.com
Wed Jun 8 17:48:05 EDT 2011
The branch, 3.8/set-mime-encoding-fix has been created
at 10d5b18b6e9a5cd24d53129549d3e36ca214536e (commit)
- Log -----------------------------------------------------------------
commit be7dfadeb396824c046186eb5ec3c8e6b61ac664
Author: sunnavy <sunnavy at bestpractical.com>
Date: Thu Jun 9 05:39:28 2011 +0800
refactor SetMIMEHeaderToEncoding and SetMIMEEntityToEncoding
2 important changes:
1. add $force argument to convert strings without check.
this is useful in SendEmail action as the original MIME object
is always utf-8, the fallback stuff is useless there.
2. fallback to the guessed charset instead of hardcoded iso-8859-1
diff --git a/lib/RT/Action/SendEmail.pm b/lib/RT/Action/SendEmail.pm
index 9e93e4a..be3bd3a 100755
--- a/lib/RT/Action/SendEmail.pm
+++ b/lib/RT/Action/SendEmail.pm
@@ -212,9 +212,11 @@ sub Prepare {
$part->head->mime_attr( "Content-Type.charset" => 'utf-8' );
}
+ # $MIMEObj is utf-8 encoded, so it's ok to force( 1 in the arguments )
+ # the conversion
RT::I18N::SetMIMEEntityToEncoding( $MIMEObj,
RT->Config->Get('EmailOutputEncoding'),
- 'mime_words_ok', );
+ 'mime_words_ok', 1 );
# Build up a MIME::Entity that looks like the original message.
$self->AddAttachments if ( $MIMEObj->head->get('RT-Attach-Message')
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index 2056b3e..dbc0e2d 100755
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -196,7 +196,7 @@ sub IsTextualContentType {
# {{{ SetMIMEEntityToEncoding
-=head2 SetMIMEEntityToEncoding $entity, $encoding
+=head2 SetMIMEEntityToEncoding $entity, $encoding, $preserve_words, $force
An utility function which will try to convert entity body into specified
charset encoding (encoded as octets, *not* unicode-strings). It will
@@ -204,19 +204,23 @@ iterate all the entities in $entity, and try to convert each one into
specified charset if whose Content-Type is 'text/plain'.
the methods are tries in order:
-1) to convert the entity to $encoding,
-2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
+1) convert the entity to $encoding,
+2) guess the entity's encoding then convert it to $encoding,
3) forcibly convert it to $encoding.
+
+if $force is true, 1 and 2 will be skipped.
+
This function doesn't return anything meaningful.
=cut
sub SetMIMEEntityToEncoding {
- my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
+ my ( $entity, $enc, $preserve_words, $force ) =
+ ( shift, shift, shift, shift );
# do the same for parts first of all
- SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
+ SetMIMEEntityToEncoding( $_, $enc, $preserve_words, $force ) foreach $entity->parts;
my $charset = _FindOrGuessCharset($entity) or return;
# one and only normalization
@@ -226,7 +230,8 @@ sub SetMIMEEntityToEncoding {
SetMIMEHeadToEncoding(
$entity->head,
_FindOrGuessCharset($entity, 1) => $enc,
- $preserve_words
+ $preserve_words,
+ $force,
);
my $head = $entity->head;
@@ -253,31 +258,46 @@ sub SetMIMEEntityToEncoding {
my $orig_string = $string;
# {{{ Convert the body
- eval {
- $RT::Logger->debug( "Converting '$charset' to '$enc' for "
- . $head->mime_type . " - "
- . ( $head->get('subject') || 'Subjectless message' ) );
- Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
- };
-
- if ($@) {
- $RT::Logger->error( "Encoding error: "
- . $@
- . " falling back to iso-8859-1 => $enc" );
- $string = $orig_string;
+ $RT::Logger->debug( "Converting '$charset' to '$enc' for "
+ . $head->mime_type . " - "
+ . ( $head->get('subject') || 'Subjectless message' ) );
+ if ( $force ) {
+ Encode::from_to( $string, $charset => $enc );
+ }
+ else {
eval {
- Encode::from_to(
- $string,
- 'iso-8859-1' => $enc,
- Encode::FB_CROAK
- );
+ Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
};
+
if ($@) {
- $RT::Logger->error( "Encoding error: "
- . $@
- . " forcing conversion to $charset => $enc" );
$string = $orig_string;
- Encode::from_to( $string, $charset => $enc );
+ my $guess = _GuessCharset($orig_string);
+ $RT::Logger->error( "Encoding error: "
+ . $@
+ . " falling back to $guess => $enc" );
+
+ my $success;
+ if ( $guess eq $enc ) {
+ $success = 1;
+ }
+ else {
+ eval {
+ Encode::from_to(
+ $string,
+ $guess => $enc,
+ Encode::FB_CROAK
+ );
+ };
+ $success = !$@;
+ }
+
+ if ( !$success ) {
+ $RT::Logger->error( "Encoding error: "
+ . $@
+ . " forcing conversion to $charset => $enc" );
+ $string = $orig_string;
+ Encode::from_to( $string, $charset => $enc );
+ }
}
}
@@ -492,7 +512,7 @@ sub _GuessCharset {
# {{{ SetMIMEHeadToEncoding
-=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
+=head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET PRESERVE_WORDS FORCE
Converts a MIME Head from one encoding to another. This totally violates the RFC.
We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
@@ -502,7 +522,8 @@ all the time
=cut
sub SetMIMEHeadToEncoding {
- my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
+ my ( $head, $charset, $enc, $preserve_words, $force ) =
+ ( shift, shift, shift, shift, shift );
$charset = 'utf-8' if $charset eq 'utf8';
$enc = 'utf-8' if $enc eq 'utf8';
@@ -517,27 +538,47 @@ sub SetMIMEHeadToEncoding {
Encode::_utf8_off($value);
my $orig_value = $value;
if ( $charset ne $enc ) {
- eval {
- Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
- };
- if ($@) {
- $RT::Logger->error( "Encoding error: "
- . $@
- . " falling back to iso-8859-1 => $enc" );
- $value = $orig_value;
+ if ( $force ) {
+ Encode::from_to( $value, $charset => $enc );
+ }
+ else {
eval {
Encode::from_to(
$value,
- 'iso-8859-1' => $enc,
+ $charset => $enc,
Encode::FB_CROAK
);
};
if ($@) {
- $RT::Logger->error( "Encoding error: "
+
+ my $guess = _GuessCharset( $orig_value );
+ $RT::Logger->error( "Encoding error: "
. $@
- . " forcing conversion to $charset => $enc" );
+ . " falling back to $guess => $enc" );
$value = $orig_value;
- Encode::from_to( $value, $charset => $enc );
+
+ my $success;
+ if ( $guess eq $enc ) {
+ $success = 1;
+ }
+ else {
+ eval {
+ Encode::from_to(
+ $value,
+ $guess => $enc,
+ Encode::FB_CROAK
+ );
+ };
+ $success = !$@;
+ }
+
+ if ( !$success ) {
+ $RT::Logger->error( "Encoding error: "
+ . $@
+ . " forcing conversion to $charset => $enc" );
+ $value = $orig_value;
+ Encode::from_to( $value, $charset => $enc );
+ }
}
}
}
commit 10d5b18b6e9a5cd24d53129549d3e36ca214536e
Author: sunnavy <sunnavy at bestpractical.com>
Date: Thu Jun 9 05:40:18 2011 +0800
more tests for RT::I18N::SetMIMEEntityToEncoding
diff --git a/t/api/i18n_mime_encoding.t b/t/api/i18n_mime_encoding.t
new file mode 100644
index 0000000..725854c
--- /dev/null
+++ b/t/api/i18n_mime_encoding.t
@@ -0,0 +1,46 @@
+use warnings;
+use strict;
+
+use RT::Test nodata => 1, tests => 4;
+use RT::I18N;
+use Encode;
+
+my @warnings;
+local $SIG{__WARN__} = sub {
+ push @warnings, "@_";
+};
+
+my $result = encode( 'iso-8859-1', decode_utf8('Ã??') );
+
+diag "normal mime encoding conversion: utf8 => iso-8859-1"
+ if $ENV{TEST_VERBOSE};
+{
+ my $mime = MIME::Entity->build(
+ Type => 'text/plain; charset=utf-8',
+ Data => ['Ãä¸æ'],
+ );
+
+ RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', );
+ like(
+ join( '', @warnings ),
+ qr/does not map to iso-8859-1/,
+ 'get no-map warning'
+ );
+ is( $mime->stringify_body, $result,
+ 'invalid chars in mail are replaced by ?' );
+ @warnings = ();
+}
+
+diag "force mime encoding conversion: utf8 => iso-8859-1"
+ if $ENV{TEST_VERBOSE};
+{
+ my $mime = MIME::Entity->build(
+ Type => 'text/plain; charset=utf-8',
+ Data => ['Ãä¸æ'],
+ );
+ RT::I18N::SetMIMEEntityToEncoding( $mime, 'iso-8859-1', '', 1 );
+ is( scalar @warnings, 0, 'no warnings with force' );
+ is( $mime->stringify_body, $result,
+ 'invalid chars in mail are replaced by ?' );
+}
+
-----------------------------------------------------------------------
More information about the Rt-commit
mailing list