[Rt-commit] rt branch, 4.0/rfc2231-param-continuations, updated. rt-4.0.4-119-g06b0d12

Fri Dec 9 17:29:37 EST 2011

The branch, 4.0/rfc2231-param-continuations has been updated
       via  06b0d1273c3b87a979fc10cbdd0d5ad8edfadd00 (commit)
       via  87d42d49a8f5696144fe275fff80815985a1f82e (commit)
       via  3d7eb03e26f579bff3e9f0eb29be72df2c081e21 (commit)
      from  44a4386d75edb98b1f5c591107eee21c41f4f3d0 (commit)

Summary of changes:
 lib/RT/I18N.pm         |   43 +++++++++++++++++++++----------------------
 t/mail/mime_decoding.t |   14 +++++++-------
 2 files changed, 28 insertions(+), 29 deletions(-)

- Log -----------------------------------------------------------------
commit 3d7eb03e26f579bff3e9f0eb29be72df2c081e21
Author: Thomas Sibley <trs at bestpractical.com>
Date:   Fri Dec 9 16:57:22 2011 -0500

    DecodeMIMEWordsToEncoding takes a header value, not the full header
    
    It also really wants to be passed the header name, otherwise it can only
    assume so much.

diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index ca25836..30bb233 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -59,11 +59,11 @@ diag q{newline and encoded file name};
 diag q{rfc2231};
 {
     my $str =
-"filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
+"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
     is(
-        RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8' ),
-        'filename=tÃ©st.txt filename=tÃ©st.txt',
-        'right decodig'
+        RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
+        'attachment; filename=tÃ©st.txt filename=tÃ©st.txt',
+        'right decoding'
     );
 }
 
@@ -72,15 +72,15 @@ diag q{rfc2231 param continuations};
     # XXX TODO: test various forms of the continuation stuff
     #       quotes around the values
     my $hdr = <<'.';
-Content-Disposition: inline;
+inline;
  filename*0*=ISO-2022-JP'ja'%1b$B%3f7$7$$%25F%25%2d%259%25H%1b%28B;
  filename*1*=%20;
  filename*2*=%1b$B%25I%25%2d%25e%25a%25s%25H%1b%28B;
  filename*3=.txt
 .
     is(
-        RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8' ),
-        'Content-Disposition: inline; filename*0="æ–°ã—ã„ãƒ†ã‚ã‚¹ãƒˆ"; filename*1=" "; filename*2="ãƒ‰ã‚ãƒ¥ãƒ¡ãƒ³ãƒˆ"; filename*3=".txt"',
+        RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
+        'inline; filename*0="æ–°ã—ã„ãƒ†ã‚ã‚¹ãƒˆ"; filename*1=" "; filename*2="ãƒ‰ã‚ãƒ¥ãƒ¡ãƒ³ãƒˆ"; filename*3=".txt"',
         'decoded, but continuations preserved'
     );
 }

commit 87d42d49a8f5696144fe275fff80815985a1f82e
Author: Thomas Sibley <trs at bestpractical.com>
Date:   Fri Dec 9 17:03:25 2011 -0500

    Cleanup a bogus duplicate parameter in tests
    
    It wasn't even separated with a semicolon as required.

diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index 30bb233..cfbd283 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -59,10 +59,10 @@ diag q{newline and encoded file name};
 diag q{rfc2231};
 {
     my $str =
-"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74 filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
+"attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
     is(
         RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
-        'attachment; filename=tÃ©st.txt filename=tÃ©st.txt',
+        'attachment; filename=tÃ©st.txt',
         'right decoding'
     );
 }

commit 06b0d1273c3b87a979fc10cbdd0d5ad8edfadd00
Author: Thomas Sibley <trs at bestpractical.com>
Date:   Fri Dec 9 17:06:20 2011 -0500

    Replace our broken custom RFC 2231 support with MIME::Field::ParamVal
    
    This makes parameter continuations work.  As ParamVal sometimes produces
    QB encoded words with the optional language component after the charset,
    our regex is extended to optionally match it (and ignore it).

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index 60a588e..e1953a9 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -285,7 +285,27 @@ sub DecodeMIMEWordsToEncoding {
     my $to_charset = _CanonicalizeCharset(shift);
     my $field = shift || '';
 
-    my @list = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs;
+    # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
+    # continuations, and similar syntax from RFC 2231
+    if ($field =~ /^Content-(Type|Disposition)/i) {
+        # This concatenates continued parameters and normalizes encoded params
+        # to QB encoded-words which we handle below
+        $str = MIME::Field::ParamVal->parse($str)->stringify;
+    }
+
+    # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
+    # custom MIME word decoding and charset canonicalization
+    my @list = $str =~ m/(.*?)          # prefix
+                         =\?            # =?
+                         ([^?]+?)       # charset
+                         (?:\*[^?]+)?   # optional '*language'
+                         \?             # ?
+                         ([QqBb])       # encoding
+                         \?             # ?
+                         ([^?]+)        # encoded string
+                         \?=            # ?=
+                         ([^=]*)        # trailing
+                        /xgcs;
 
     if ( @list ) {
         # add everything that hasn't matched to the end of the latest
@@ -343,27 +363,6 @@ sub DecodeMIMEWordsToEncoding {
         }
     }
 
-# handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, see also rfc 2231
-    @list = $str =~ m/(.*?\*=)([^']*?)'([^']*?)'(\S+)(.*?)(?=(?:\*=|$))/gcs;
-    if (@list) {
-        $str = '';
-        while (@list) {
-            my ( $prefix, $charset, $language, $enc_str, $trailing ) =
-              splice @list, 0, 5;
-            $prefix =~ s/\*=$/=/; # remove the *
-            $charset = _CanonicalizeCharset($charset);
-            $enc_str =~ s/%(\w{2})/chr hex $1/eg;
-            unless ( $charset eq $to_charset ) {
-                Encode::from_to( $enc_str, $charset, $to_charset );
-            }
-            $enc_str = qq{"$enc_str"}
-              if $enc_str =~ /[,;]/
-              and $enc_str !~ /^".*"$/
-              and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i);
-            $str .= $prefix . $enc_str . $trailing;
-        }
-     }
-
     # We might have \n without trailing whitespace, which will result in
     # invalid headers.
     $str =~ s/\n//g;
diff --git a/t/mail/mime_decoding.t b/t/mail/mime_decoding.t
index cfbd283..0a3610c 100644
--- a/t/mail/mime_decoding.t
+++ b/t/mail/mime_decoding.t
@@ -62,7 +62,7 @@ diag q{rfc2231};
 "attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
     is(
         RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
-        'attachment; filename=tÃ©st.txt',
+        'attachment; filename="tÃ©st.txt"',
         'right decoding'
     );
 }
@@ -80,7 +80,7 @@ inline;
 .
     is(
         RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
-        'inline; filename*0="æ–°ã—ã„ãƒ†ã‚ã‚¹ãƒˆ"; filename*1=" "; filename*2="ãƒ‰ã‚ãƒ¥ãƒ¡ãƒ³ãƒˆ"; filename*3=".txt"',
+        'inline; filename="æ–°ã—ã„ãƒ†ã‚ã‚¹ãƒˆ ãƒ‰ã‚ãƒ¥ãƒ¡ãƒ³ãƒˆ.txt"',
         'decoded, but continuations preserved'
     );
 }

-----------------------------------------------------------------------