[Rt-commit] rt branch, 4.0/rfc2231-param-continuations, updated. rt-4.0.4-119-g06b0d12
Thomas Sibley
trs at bestpractical.com
Fri Dec 9 17:29:37 EST 2011
The branch, 4.0/rfc2231-param-continuations has been updated
via 06b0d1273c3b87a979fc10cbdd0d5ad8edfadd00 (commit)
via 87d42d49a8f5696144fe275fff80815985a1f82e (commit)
via 3d7eb03e26f579bff3e9f0eb29be72df2c081e21 (commit)
from 44a4386d75edb98b1f5c591107eee21c41f4f3d0 (commit)
Summary of changes:
lib/RT/I18N.pm | 43 +++++++++++++++++++++----------------------
t/mail/mime_decoding.t | 14 +++++++-------
2 files changed, 28 insertions(+), 29 deletions(-)
- Log -----------------------------------------------------------------
commit 3d7eb03e26f579bff3e9f0eb29be72df2c081e21
Author: Thomas Sibley <trs at bestpractical.com>
Date: Fri Dec 9 16:57:22 2011 -0500
DecodeMIMEWordsToEncoding takes a header value, not the full header
It also really wants to be passed the header name, otherwise it can only
assume so much.
diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index ca25836..30bb233 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -59,11 +59,11 @@ diag q{newline and encoded file name};
diag q{rfc2231};
{
my $str =
-"filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
+"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
is(
- RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8' ),
- 'filename=tést.txt filename=tést.txt',
- 'right decodig'
+ RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
+ 'attachment; filename=tést.txt filename=tést.txt',
+ 'right decoding'
);
}
@@ -72,15 +72,15 @@ diag q{rfc2231 param continuations};
# XXX TODO: test various forms of the continuation stuff
# quotes around the values
my $hdr = <<'.';
-Content-Disposition: inline;
+inline;
filename*0*=ISO-2022-JP'ja'%1b$B%3f7$7$$%25F%25%2d%259%25H%1b%28B;
filename*1*=%20;
filename*2*=%1b$B%25I%25%2d%25e%25a%25s%25H%1b%28B;
filename*3=.txt
.
is(
- RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8' ),
- 'Content-Disposition: inline; filename*0="æ°ããããã¹ã"; filename*1=" "; filename*2="ããã¥ã¡ã³ã"; filename*3=".txt"',
+ RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
+ 'inline; filename*0="æ°ããããã¹ã"; filename*1=" "; filename*2="ããã¥ã¡ã³ã"; filename*3=".txt"',
'decoded, but continuations preserved'
);
}
commit 87d42d49a8f5696144fe275fff80815985a1f82e
Author: Thomas Sibley <trs at bestpractical.com>
Date: Fri Dec 9 17:03:25 2011 -0500
Cleanup a bogus duplicate parameter in tests
It wasn't even separated with a semicolon as required.
diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index 30bb233..cfbd283 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -59,10 +59,10 @@ diag q{newline and encoded file name};
diag q{rfc2231};
{
my $str =
-"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
+"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
is(
RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
- 'attachment; filename=tést.txt filename=tést.txt',
+ 'attachment; filename=tést.txt',
'right decoding'
);
}
commit 06b0d1273c3b87a979fc10cbdd0d5ad8edfadd00
Author: Thomas Sibley <trs at bestpractical.com>
Date: Fri Dec 9 17:06:20 2011 -0500
Replace our broken custom RFC 2231 support with MIME::Field::ParamVal
This makes parameter continuations work. As ParamVal sometimes produces
QB encoded words with the optional language component after the charset,
our regex is extended to optionally match it (and ignore it).
diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index 60a588e..e1953a9 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -285,7 +285,27 @@ sub DecodeMIMEWordsToEncoding {
my $to_charset = _CanonicalizeCharset(shift);
my $field = shift || '';
- my @list = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs;
+ # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
+ # continuations, and similar syntax from RFC 2231
+ if ($field =~ /^Content-(Type|Disposition)/i) {
+ # This concatenates continued parameters and normalizes encoded params
+ # to QB encoded-words which we handle below
+ $str = MIME::Field::ParamVal->parse($str)->stringify;
+ }
+
+ # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
+ # custom MIME word decoding and charset canonicalization
+ my @list = $str =~ m/(.*?) # prefix
+ =\? # =?
+ ([^?]+?) # charset
+ (?:\*[^?]+)? # optional '*language'
+ \? # ?
+ ([QqBb]) # encoding
+ \? # ?
+ ([^?]+) # encoded string
+ \?= # ?=
+ ([^=]*) # trailing
+ /xgcs;
if ( @list ) {
# add everything that hasn't matched to the end of the latest
@@ -343,27 +363,6 @@ sub DecodeMIMEWordsToEncoding {
}
}
-# handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, see also rfc 2231
- @list = $str =~ m/(.*?\*=)([^']*?)'([^']*?)'(\S+)(.*?)(?=(?:\*=|$))/gcs;
- if (@list) {
- $str = '';
- while (@list) {
- my ( $prefix, $charset, $language, $enc_str, $trailing ) =
- splice @list, 0, 5;
- $prefix =~ s/\*=$/=/; # remove the *
- $charset = _CanonicalizeCharset($charset);
- $enc_str =~ s/%(\w{2})/chr hex $1/eg;
- unless ( $charset eq $to_charset ) {
- Encode::from_to( $enc_str, $charset, $to_charset );
- }
- $enc_str = qq{"$enc_str"}
- if $enc_str =~ /[,;]/
- and $enc_str !~ /^".*"$/
- and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i);
- $str .= $prefix . $enc_str . $trailing;
- }
- }
-
# We might have \n without trailing whitespace, which will result in
# invalid headers.
$str =~ s/\n//g;
diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index cfbd283..0a3610c 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -62,7 +62,7 @@ diag q{rfc2231};
"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
is(
RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
- 'attachment; filename=tést.txt',
+ 'attachment; filename="tést.txt"',
'right decoding'
);
}
@@ -80,7 +80,7 @@ inline;
.
is(
RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
- 'inline; filename*0="æ°ããããã¹ã"; filename*1=" "; filename*2="ããã¥ã¡ã³ã"; filename*3=".txt"',
+ 'inline; filename="æ°ããããã¹ã ããã¥ã¡ã³ã.txt"',
'decoded, but continuations preserved'
);
}
-----------------------------------------------------------------------
More information about the Rt-commit
mailing list