[Rt-commit] rt branch, 4.0/protect-more-chars-while-decoding-headers, updated. rt-4.0.6-224-ga17d4eb

Mon Nov 5 09:43:27 EST 2012

The branch, 4.0/protect-more-chars-while-decoding-headers has been updated
       via  a17d4eb7da820da20f18dc953737e68991e317e9 (commit)
      from  298eec23c1858227c7537bed125c5b9178725790 (commit)

Summary of changes:
 lib/RT/I18N.pm      | 147 +++++++++++++++++++++++++++++++---------------------
 t/mail/dashboards.t |   2 +-
 2 files changed, 88 insertions(+), 61 deletions(-)

- Log -----------------------------------------------------------------
commit a17d4eb7da820da20f18dc953737e68991e317e9
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Mon Nov 5 18:20:07 2012 +0400

    change how we deal with decoding structured fields
    
    we use MIME::Field::ParamVal already to deal with
    continued params (RFC2231). This module allows us
    to deal with separate parameters, decode them and
    deal with quotes inside or absence of those without
    guessing.
    
    There is a bug in the module [1] - wrong parsing
    of quoted strings and wrong quoting on stringify.
    It was fixed, patch is on rt.cpan.org waiting for
    merge and release of the module.
    
    Similar situation with From, Cc, Bcc fields. Use
    Email::Address to parse and split, then decode
    and format.

diff --git a/lib/RT/I18N.pm b/lib/RT/I18N.pm
index af6f4df..b91a370 100644
--- a/lib/RT/I18N.pm
+++ b/lib/RT/I18N.pm
@@ -284,14 +284,69 @@ sub DecodeMIMEWordsToEncoding {
     my $str = shift;
     my $to_charset = _CanonicalizeCharset(shift);
     my $field = shift || '';
+    $RT::Logger->warning(
+        "DecodeMIMEWordsToEncoding was called without field name."
+        ."It's known to cause troubles with decoding fields properly."
+    ) unless $field;
+
+    # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
+    # We _should_ be preserving them encoded until after parsing is completed and
+    # THEN undo the mime-encoding.
+    #
+    # This routine should be translating the existing mimeencoding to utf8 but leaving
+    # things encoded.
+    #
+    # It's legal for headers to contain mime-encoded commas and semicolons which
+    # should not be treated as address separators. (Encoding == quoting here)
+    #
+    # until this is fixed, we must escape any string containing a comma or semicolon
+    # this is only a bandaid
+
+    # Some _other_ MUAs encode quotes _already_, and double quotes
+    # confuse us a lot, so only quote it if it isn't quoted
+    # already.
 
     # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
     # continuations, and similar syntax from RFC 2231
-    if ($field =~ /^Content-(Type|Disposition)/i) {
+    if ($field =~ /^Content-/i) {
         # This concatenates continued parameters and normalizes encoded params
         # to QB encoded-words which we handle below
-        $str = MIME::Field::ParamVal->parse($str)->stringify;
+        my $params = MIME::Field::ParamVal->parse_params($str);
+        foreach my $v ( values %$params ) {
+            $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
+            # drop quotes in case those were hidden inside encoded part
+            $v =~ s/^"(.*)"$/$1/;
+        }
+        $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
+    }
+    elsif ( $field =~ /^(?:To|From|B?Cc)$/i ) {
+        my @addresses = RT::EmailParser->ParseEmailAddress( $str );
+        foreach my $address ( @addresses ) {
+            foreach my $field (qw(phrase comment)) {
+                my $v = $address->$field() or next;
+                $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
+                # drop quotes in case those were hidden inside encoded part
+                $v =~ s/^"(.*)"$/$1/;
+                $address->$field($v);
+            }
+        }
+        $str = join ', ', map $_->format, @addresses;
     }
+    else {
+        $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
+    }
+
+
+    # We might have \n without trailing whitespace, which will result in
+    # invalid headers.
+    $str =~ s/\n//g;
+
+    return ($str)
+}
+
+sub _DecodeMIMEWordsToEncoding {
+    my $str = shift;
+    my $to_charset = shift;
 
     # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
     # custom MIME word decoding and charset canonicalization.  We can't do this
@@ -307,72 +362,44 @@ sub DecodeMIMEWordsToEncoding {
                          \?=            # ?=
                          ([^=]*)        # trailing
                         /xgcs;
+    return $str unless @list;
+
+    # add everything that hasn't matched to the end of the latest
+    # string in array this happen when we have 'key="=?encoded?="; key="plain"'
+    $list[-1] .= substr($str, pos $str);
+
+    $str = '';
+    while (@list) {
+        my ($prefix, $charset, $encoding, $enc_str, $trailing) =
+                splice @list, 0, 5;
+        $charset  = _CanonicalizeCharset($charset);
+        $encoding = lc $encoding;
+
+        $trailing =~ s/\s?\t?$//;               # Observed from Outlook Express
+
+        if ( $encoding eq 'q' ) {
+            use MIME::QuotedPrint;
+            $enc_str =~ tr/_/ /;		# Observed from Outlook Express
+            $enc_str = decode_qp($enc_str);
+        } elsif ( $encoding eq 'b' ) {
+            use MIME::Base64;
+            $enc_str = decode_base64($enc_str);
+        } else {
+            $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
+                ."only Q(uoted-printable) and B(ase64) are supported");
+        }
 
-    if ( @list ) {
-        # add everything that hasn't matched to the end of the latest
-        # string in array this happen when we have 'key="=?encoded?="; key="plain"'
-        $list[-1] .= substr($str, pos $str);
-
-        $str = "";
-        while (@list) {
-            my ($prefix, $charset, $encoding, $enc_str, $trailing) =
-                    splice @list, 0, 5;
-            $charset  = _CanonicalizeCharset($charset);
-            $encoding = lc $encoding;
-
-            $trailing =~ s/\s?\t?$//;               # Observed from Outlook Express
-
-            if ( $encoding eq 'q' ) {
-                use MIME::QuotedPrint;
-                $enc_str =~ tr/_/ /;		# Observed from Outlook Express
-                $enc_str = decode_qp($enc_str);
-            } elsif ( $encoding eq 'b' ) {
-                use MIME::Base64;
-                $enc_str = decode_base64($enc_str);
-            } else {
-                $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
-                    ."only Q(uoted-printable) and B(ase64) are supported");
-            }
-
-            # now we have got a decoded subject, try to convert into the encoding
-            unless ( $charset eq $to_charset ) {
-                Encode::from_to( $enc_str, $charset, $to_charset );
-            }
-
-            # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
-            # We _should_ be preserving them encoded until after parsing is completed and
-            # THEN undo the mime-encoding.
-            #
-            # This routine should be translating the existing mimeencoding to utf8 but leaving
-            # things encoded.
-            #
-            # It's legal for headers to contain mime-encoded commas and semicolons which
-            # should not be treated as address separators. (Encoding == quoting here)
-            #
-            # until this is fixed, we must escape any string containing a comma or semicolon
-            # this is only a bandaid
-
-            # Some _other_ MUAs encode quotes _already_, and double quotes
-            # confuse us a lot, so only quote it if it isn't quoted
-            # already.
-            $enc_str = qq{"$enc_str"}
-                if $enc_str =~ /[()<>\[\]:;@\\,.]/
-                and not (($enc_str =~ /^"/ or $prefix =~ /"$/) and ($enc_str =~ /"$/ or $trailing =~ /^"/))
-                and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i);
-
-            $str .= $prefix . $enc_str . $trailing;
+        # now we have got a decoded subject, try to convert into the encoding
+        unless ( $charset eq $to_charset ) {
+            Encode::from_to( $enc_str, $charset, $to_charset );
         }
+        $str .= $prefix . $enc_str . $trailing;
     }
 
-    # We might have \n without trailing whitespace, which will result in
-    # invalid headers.
-    $str =~ s/\n//g;
-
     return ($str)
 }
 
 
-
 =head2 _FindOrGuessCharset MIME::Entity, $head_only
 
 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
diff --git a/t/mail/dashboards.t b/t/mail/dashboards.t
index 00cfc6a..1535595 100644
--- a/t/mail/dashboards.t
+++ b/t/mail/dashboards.t
@@ -102,7 +102,7 @@ sub produces_dashboard_mail_ok { # {{{
 
     my $mail = parse_mail( $mails[0] );
     is($mail->head->get('Subject'), $subject);
-    is($mail->head->get('From'), "root\n");
+    is($mail->head->get('From'), qq{"root" <root\@localhost>\n});
     is($mail->head->get('X-RT-Dashboard-Id'), "$dashboard_id\n");
     is($mail->head->get('X-RT-Dashboard-Subscription-Id'), "$subscription_id\n");
 

-----------------------------------------------------------------------