[Rt-commit] rt branch, 4.0/base64-in-rescue-outlook, created. rt-4.0.8-89-g916395a

Jim Brandt jbrandt at bestpractical.com
Mon Dec 17 16:08:53 EST 2012


The branch, 4.0/base64-in-rescue-outlook has been created
        at  916395abb812a27f60b65feb1b0a00413e09e82e (commit)

- Log -----------------------------------------------------------------
commit 3f61580d3404c2892e6b08252df5d5f22cb515d8
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Fri Nov 2 13:41:54 2012 -0400

    Factor out Outlook/Exchange detection

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index 4cf4184..b6921e0 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -568,50 +568,67 @@ return 1 if it does find the problem in the entity and get it fixed.
 sub RescueOutlook {
     my $self = shift;
     my $mime = $self->Entity();
-    return unless $mime;
 
-    my $mailer = $mime->head->get('X-Mailer');
-    # 12.0 is outlook 2007, 14.0 is 2010
-    if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ ) {
-        my $text_part;
-        if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
-            my $first = $mime->parts(0);
-            if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
+    return unless $mime && $self->LooksLikeMSEmail($mime);
+
+    my $text_part;
+    if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
+        my $first = $mime->parts(0);
+        if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
+        {
+            my $inner_first = $first->parts(0);
+            if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
             {
-                my $inner_first = $first->parts(0);
-                if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
-                {
-                    $text_part = $inner_first;
-                }
+                $text_part = $inner_first;
             }
         }
-        elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
-            my $first = $mime->parts(0);
-            if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
-                $text_part = $first;
-            }
+    }
+    elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
+        my $first = $mime->parts(0);
+        if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
+            $text_part = $first;
         }
+    }
+
+    if ($text_part) {
 
-        if ($text_part) {
-
-            # use the unencoded string
-            my $content = $text_part->bodyhandle->as_string;
-            if ( $content =~ s/\n\n/\n/g ) {
-                # only write only if we did change the content
-                if ( my $io = $text_part->open("w") ) {
-                    $io->print($content);
-                    $io->close;
-                    return 1;
-                }
-                else {
-                    $RT::Logger->error("can't write to body");
-                }
+        # use the unencoded string
+        my $content = $text_part->bodyhandle->as_string;
+        if ( $content =~ s/\n\n/\n/g ) {
+            # only write only if we did change the content
+            if ( my $io = $text_part->open("w") ) {
+                $io->print($content);
+                $io->close;
+                return 1;
+            }
+            else {
+                $RT::Logger->error("can't write to body");
             }
         }
     }
+
     return;
 }
 
+=head1 LooksLikeMSEmail
+
+Try to determine if the current email may have
+come from MS Outlook or gone through Exchange, and therefore
+may have extra newlines added.
+
+=cut
+
+sub LooksLikeMSEmail {
+    my $self = shift;
+    my $mime = shift;
+
+    my $mailer = $mime->head->get('X-Mailer');
+
+    # 12.0 is outlook 2007, 14.0 is 2010
+    return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
+
+    return 0;    # Doesn't look like MS email.
+}
 
 sub DESTROY {
     my $self = shift;

commit d4532b2785a158610443c332f5752d7d481f7f60
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Fri Nov 2 13:43:43 2012 -0400

    Improve logging of Outlook cleanup slightly

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index b6921e0..b1c7246 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -599,10 +599,12 @@ sub RescueOutlook {
             if ( my $io = $text_part->open("w") ) {
                 $io->print($content);
                 $io->close;
+                $RT::Logger->debug(
+                    "Removed extra newlines from MS Outlook message.");
                 return 1;
             }
             else {
-                $RT::Logger->error("can't write to body");
+                $RT::Logger->error("Can't write to body to fix newlines");
             }
         }
     }

commit f9abd6e37b9253c841ae2f59d6a53636ff5b813d
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Fri Nov 2 13:48:43 2012 -0400

    Outlook mails with doubled newlines append whitespace as well; trim it

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index b1c7246..9fb529b 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -595,6 +595,10 @@ sub RescueOutlook {
         # use the unencoded string
         my $content = $text_part->bodyhandle->as_string;
         if ( $content =~ s/\n\n/\n/g ) {
+
+            # Outlook puts a space on extra newlines, remove it
+            $content =~ s/\ +$//mg;
+
             # only write only if we did change the content
             if ( my $io = $text_part->open("w") ) {
                 $io->print($content);

commit cec97f29a5ccc3df884097cdfea2d3a777833123
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Fri Nov 2 13:46:56 2012 -0400

    Move outlook detection to after decoding, for base64'd content
    
    Single-part messages in base64 encoding have been observed in the wild
    with this failure mode, in addition to the multipart/alternative
    messages already dealt with.  Enable double-newline-stripping on all
    Outlook/Exchange single-part base64'd mail; this requires moving the
    cleanup stage until after decoding has happened, such that the decoded
    bodies can be altered.

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index 9fb529b..605d18f 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -131,8 +131,6 @@ sub SmartParseMIMEEntityFromScalar {
         }
     };
 
-    $self->RescueOutlook;
-
     #If for some reason we weren't able to parse the message using a temp file
     # try it with a scalar
     if ( $@ || !$self->Entity ) {
@@ -590,6 +588,13 @@ sub RescueOutlook {
         }
     }
 
+    # Add base64 since we've seen examples of double newlines with
+    # this type too. Need an example of a multi-part base64 to
+    # handle that permutation if it exists.
+    elsif ( $mime->head->get('Content-Transfer-Encoding') =~ m{base64} ) {
+        $text_part = $mime;    # Assuming single part, already decoded.
+    }
+
     if ($text_part) {
 
         # use the unencoded string
diff --git a/lib/RT/Interface/Email.pm b/lib/RT/Interface/Email.pm
index 38157c2..68e9a5d 100644
--- a/lib/RT/Interface/Email.pm
+++ b/lib/RT/Interface/Email.pm
@@ -1430,6 +1430,7 @@ sub Gateway {
     }
     @mail_plugins = grep !$skip_plugin{"$_"}, @mail_plugins;
     $parser->_DecodeBodies;
+    $parser->RescueOutlook;
     $parser->_PostProcessNewEntity;
 
     my $head = $Message->head;

commit 916395abb812a27f60b65feb1b0a00413e09e82e
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Fri Nov 2 13:49:30 2012 -0400

    Add CheckMoreMSMailHeaders config option
    
    Add a CheckMoreMSMailHeaders option to turn off additional MS Outlook
    email detection by default, but allow users to activate it if
    they have problems with double newlines from MS Outlook-based
    email in RT. This option allows a check for MS mail headers which,
    if successful, will remove double newlines from mail bodies.

diff --git a/etc/RT_Config.pm.in b/etc/RT_Config.pm.in
index 0df3e2b..be7bb1d 100755
--- a/etc/RT_Config.pm.in
+++ b/etc/RT_Config.pm.in
@@ -460,6 +460,23 @@ Set($ExtractSubjectTagNoMatch, ( ${RT::EmailSubjectTagRegex}
        ? qr/\[(?:${RT::EmailSubjectTagRegex}) #\d+\]/
        : qr/\[\Q$RT::rtname\E #\d+\]/));
 
+=item C<$CheckMoreMSMailHeaders>
+
+Some email clients create a plain text version of HTML-formatted
+email to help other clients that read only plain text.
+Unfortunately, the plain text parts sometimes end up with
+doubled newlines and these can then end up in RT. This
+is most often seen in MS Outlook.
+
+Enable this option to have RT check for additional mail headers
+and attempt to identify email from MS Outlook. When detected,
+RT will then clean up double newlines. Note that it may
+clean up intentional double newlines as well.
+
+=cut
+
+Set( $CheckMoreMSMailHeaders, 0);
+
 =back
 
 
diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index 605d18f..97e9ab8 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -638,6 +638,17 @@ sub LooksLikeMSEmail {
     # 12.0 is outlook 2007, 14.0 is 2010
     return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
 
+    if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
+
+        # Check for additional headers that might
+        # indicate this came from Outlook or through Exchange.
+        # A sample we received had the headers X-MS-Has-Attach: and
+        # X-MS-Tnef-Correlator: and both had no value.
+
+        my @tags = $mime->head->tags();
+        return 1 if grep { /^X-MS-/ } @tags;
+    }
+
     return 0;    # Doesn't look like MS email.
 }
 
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index c953a54..752a91f 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,9 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 42;
+use RT::Test tests => 66;
+
+RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
 # 12.0 is outlook 2007, 14.0 is 2010
 for my $mailer ( 'Microsoft Office Outlook 12.0', 'Microsoft Outlook 14.0' ) {
@@ -144,8 +146,75 @@ EOF
         test_email( $text, $content,
             $mailer . ' with only text/plain, \n\n are not replaced' );
     }
+
+    diag "Test mail with with outlook, content type is base64";
+    {
+        my $text = <<EOF;
+From: root\@localhost
+X-Mailer: $mailer
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+VGhpcyBpcyB0aGUgYm9keSBvZiBhbiBlbWFpbC4KCkl0IGhhcyBtdWx0aXBs
+ZSBleHRyYSBuZXdsaW5lcy4KCgoKTGlrZSBhIG1hbmdsZWQgT3V0bG9vayBt
+ZXNzYWdlIG1pZ2h0LgoKCgpKb2huIFNtaXRoCgpTb21lIENvbXBhbnkKCmVt
+YWlsQHNvbWVjby5jb20KCg==
+EOF
+
+        my $content = <<EOF;
+This is the body of an email.
+It has multiple extra newlines.
+
+Like a mangled Outlook message might.
+
+John Smith
+Some Company
+email\@someco.com
+EOF
+        test_email( $text, $content,
+            $mailer . ' with base64, \n\n are replaced' );
+    }
+}
+
+# In a sample we received, the two X-MS- headers included
+# below were both present and had no values. For now, using
+# the existence of these headers as evidence of MS Outlook
+# or Exchange.
+
+diag "Test mail with with outlook, no X-Mailer, content type is base64";
+{
+        my $text = <<EOF;
+From: root\@localhost
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+X-MS-Has-Attach:
+X-MS-Tnef-Correlator:
+
+VGhpcyBpcyB0aGUgYm9keSBvZiBhbiBlbWFpbC4KCkl0IGhhcyBtdWx0aXBs
+ZSBleHRyYSBuZXdsaW5lcy4KCgoKTGlrZSBhIG1hbmdsZWQgT3V0bG9vayBt
+ZXNzYWdlIG1pZ2h0LgoKCgpKb2huIFNtaXRoCgpTb21lIENvbXBhbnkKCmVt
+YWlsQHNvbWVjby5jb20KCg==
+EOF
+
+        my $content = <<EOF;
+This is the body of an email.
+It has multiple extra newlines.
+
+Like a mangled Outlook message might.
+
+John Smith
+Some Company
+email\@someco.com
+EOF
+	test_email( $text, $content,
+		    ' with base64, no X-Mailer, \n\n are replaced' );
 }
 
+
 diag "Test mail with with multipart/alternative but x-mailer is not outlook ";
 {
     my $text = <<EOF;
@@ -179,7 +248,6 @@ Content-Transfer-Encoding: quoted-printable
 
 
 ------=_NextPart_000_0004_01CB045C.A5A075D0--
-
 EOF
 
     my $content = <<EOF;
@@ -195,6 +263,129 @@ EOF
     test_email( $text, $content, 'without outlook, \n\n are not replaced' );
 }
 
+diag "Sample multipart email with Exchange headers";
+{
+        my $text = <<EOF;
+X-MimeOLE: Produced By Microsoft Exchange V6.5
+Received: by example.com 
+        id <01CD63FC.33F4C15C\@example.com>; Tue, 17 Jul 2012 10:11:51 +0100
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+        boundary="----_=_NextPart_001_01CD63FC.33F4C15C"
+Content-class: urn:content-classes:message
+Subject: outlook basic test
+Date: Tue, 17 Jul 2012 10:11:50 +0100
+Message-ID: <AA6CEAFB02FF244999046B2A6B6B9D6F05FF9D12\@example.com>
+X-MS-Has-Attach: 
+X-MS-TNEF-Correlator: 
+Thread-Topic: Testing Outlook HTML
+Thread-Index: Ac1j/DNs7ly963bnRt63SJw9DkGwyw==
+From: root\@localhost
+To: rt\@@{[RT->Config->Get('rtname')]}
+
+This is a multi-part message in MIME format.
+
+------_=_NextPart_001_01CD63FC.33F4C15C
+Content-Type: text/plain;
+        charset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+This email contains a line of text containing multiple sentences.  Where
+will RT wrap this when the text is quoted?  What about the footer below?
+
+=20
+
+This is a different line, with a blank line (paragraph) above.  Will
+there be additional blank lines when the text is quoted?
+
+=20
+
+This isthesig
+
+=20
+
+
+------_=_NextPart_001_01CD63FC.33F4C15C
+Content-Type: text/html;
+        charset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+<html xmlns:v=3D"urn:schemas-microsoft-com:vml" =
+xmlns:o=3D"urn:schemas-microsoft-com:office:office" =
+xmlns:w=3D"urn:schemas-microsoft-com:office:word" =
+xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" =
+xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META =
+HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; =
+charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 12 =
+(filtered medium)"><style><!--
+/* Font Definitions */
+\@font-face
+        {font-family:"Cambria Math";
+        panose-1:2 4 5 3 5 4 6 3 2 4;}
+\@font-face
+        {font-family:Calibri;
+        panose-1:2 15 5 2 2 2 4 3 2 4;}
+/* Style Definitions */
+p.MsoNormal, li.MsoNormal, div.MsoNormal
+        {margin:0in;
+        margin-bottom:.0001pt;
+        font-size:11.0pt;
+        font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+        {mso-style-priority:99;
+        color:blue;
+        text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+        {mso-style-priority:99;
+        color:purple;
+        text-decoration:underline;}
+span.EmailStyle17
+        {mso-style-type:personal-compose;
+        font-family:"Calibri","sans-serif";
+        color:windowtext;}
+.MsoChpDefault
+        {mso-style-type:export-only;}
+\@page WordSection1
+        {size:8.5in 11.0in;
+        margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+        {page:WordSection1;}
+--></style><!--[if gte mso 9]><xml>
+<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
+</xml><![endif]--><!--[if gte mso 9]><xml>
+<o:shapelayout v:ext=3D"edit">
+<o:idmap v:ext=3D"edit" data=3D"1" />
+</o:shapelayout></xml><![endif]--></head><body lang=3DEN-US link=3Dblue =
+vlink=3Dpurple><div class=3DWordSection1><p class=3DMsoNormal>This email =
+contains a line of text containing multiple sentences.  Where will =
+RT wrap this when the text is quoted?  What about the footer =
+below?<o:p></o:p></p><p class=3DMsoNormal><o:p> </o:p></p><p =
+class=3DMsoNormal>This is a different line, with a blank line =
+(paragraph) above.  Will there be additional blank lines when the =
+text is quoted?<o:p></o:p></p><p =
+class=3DMsoNormal><o:p> </o:p></p><p class=3DMsoNormal><span =
+lang=3DEN-GB =
+style=3D'font-size:7.5pt;font-family:"Arial","sans-serif"'>This isthesig =
+</span><o:p></o:p></p><p =
+class=3DMsoNormal><o:p> </o:p></p></div></body></html>
+------_=_NextPart_001_01CD63FC.33F4C15C--
+EOF
+
+        my $content = <<EOF;
+This email contains a line of text containing multiple sentences.  Where
+will RT wrap this when the text is quoted?  What about the footer below?
+
+This is a different line, with a blank line (paragraph) above.  Will
+there be additional blank lines when the text is quoted?
+
+This isthesig
+
+EOF
+
+	test_email( $text, $content,
+		    'Another sample multipart message with Exchange headers' );
+}
+
 sub test_email {
     my ( $text, $content, $msg ) = @_;
     my ( $status, $id ) = RT::Test->send_via_mailgate($text);

-----------------------------------------------------------------------


More information about the Rt-commit mailing list