[Rt-commit] rt branch, 4.4/rescue-outlook-html, created. rt-4.4.3-55-gaa608d138

Gergely Nagy algernon at bestpractical.com
Mon Oct 22 09:17:11 EDT 2018


The branch, 4.4/rescue-outlook-html has been created
        at  aa608d1380b2e444831e453620230267e04a36a6 (commit)

- Log -----------------------------------------------------------------
commit a4aab176f141c0d47e5ccad123b6960bbcf28401
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Thu Oct 4 21:46:17 2018 +0200

    Make it possible to select the part type in the Outlook mail tests
    
    Normally, we want to test against the 'text/plain' part of a multipart email,
    but there are cases where we'd rather test against the 'text/html' part. To make
    this possible, 'test_email' takes an additional argument:
    '$check_type' (defaulting to 'text/plain' if unspecified).

diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 8f3b71bc8..62a6fbb34 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -387,7 +387,7 @@ EOF
 }
 
 sub test_email {
-    my ( $text, $content, $msg ) = @_;
+    my ( $text, $content, $msg, $check_type ) = @_;
     my ( $status, $id ) = RT::Test->send_via_mailgate($text);
     is( $status >> 8, 0, "The mail gateway exited normally" );
     ok( $id, "Created ticket" );
@@ -400,6 +400,5 @@ sub test_email {
     $txns->Limit( FIELD => 'Type', VALUE => 'Create' );
     my $txn     = $txns->First;
 
-    is( $txn->Content, $content, $msg );
+    is( $txn->Content(Type => $check_type || "text/plain"), $content, $msg );
 }
-

commit 5e8bc0cf515bce743556ef9e45aa5946abc228df
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Thu Oct 4 21:42:53 2018 +0200

    Add a new Outlook mail test case
    
    The new test case has the HTML part first, the text part second. The goal is to
    test that the text part gets cleaned up anyway.

diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 62a6fbb34..9d49f55d5 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 66;
+use RT::Test tests => 78;
 
 RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
@@ -39,6 +39,52 @@ Content-Transfer-Encoding: quoted-printable
 <html>this is fake</html>
 
 
+------=_NextPart_000_0004_01CB045C.A5A075D0--
+
+EOF
+
+        my $content = <<EOF;
+here is the content
+
+blahmm
+another line
+EOF
+        test_email( $text, $content,
+            $mailer . ' with multipart/alternative, \n\n are replaced' );
+    }
+
+    diag "Test mail with multipart/alternative";
+    {
+        my $text = <<EOF;
+From: root\@localhost
+X-Mailer: $mailer
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: multipart/alternative;
+\tboundary="----=_NextPart_000_0004_01CB045C.A5A075D0"
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/html;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+<html>this is fake</html>
+
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/plain;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: 7bit
+
+here is the content
+
+
+
+blahmm
+
+another line
+
+
 ------=_NextPart_000_0004_01CB045C.A5A075D0--
 
 EOF

commit af941fb1489b5046e58acd9396cfd7e6ba7355fa
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Wed Oct 3 19:14:09 2018 +0200

    Teach RT::EmailParser::RescueOutlook how to clean up HTML mail too
    
    'RescueOutlook' cleaned up the 'text/plain' parts of a message, but over the
    years, HTML mail became much more common, and the HTML parts produced by Outlook
    have similar issues. This change attempts to clean up those parts too.
    
    As a side-effect of the change, the way 'RescueOutlook' finds the e-mail parts
    to clean up has been improved: now it iterates through all parts, and cleans up
    the first text, and the first HTML part, regardless of order.

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index ad26a291b..285798cd8 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -634,6 +634,11 @@ in it.  it's cool to have a 'text/plain' part, but the problem is the part is
 not so right: all the "\n" in your main message will become "\n\n" :/
 
 this method will fix this bug, i.e. replaces "\n\n" to "\n".
+
+Similarly, if the message is HTML-only, the same problem is present there:
+between each paragraph, there will be an empty one in between with only a line
+break. This method removes those line break-only paragraphs too.
+
 return 1 if it does find the problem in the entity and get it fixed.
 
 =cut
@@ -645,25 +650,32 @@ sub RescueOutlook {
 
     return unless $mime && $self->LooksLikeMSEmail($mime);
 
-    my $text_part;
+    my ($text_part, $html_part);
+
     if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
         my $first = $mime->parts(0);
         if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
         {
-            my $inner_first = $first->parts(0);
-            if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
-            {
-                $text_part = $inner_first;
+            foreach my $part ($first->parts) {
+                if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+                    $text_part = $part;
+                }
+                if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+                    $html_part = $part;
+                }
             }
         }
     }
     elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
-        my $first = $mime->parts(0);
-        if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
-            $text_part = $first;
+        foreach my $part ($mime->parts) {
+            if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+                $text_part = $part;
+            }
+            if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+                $html_part = $part;
+            }
         }
     }
-
     # Add base64 since we've seen examples of double newlines with
     # this type too. Need an example of a multi-part base64 to
     # handle that permutation if it exists.
@@ -671,22 +683,25 @@ sub RescueOutlook {
         $text_part = $mime;    # Assuming single part, already decoded.
     }
 
+    my $ret;
+
     if ($text_part) {
 
         # use the unencoded string
-        my $content = $text_part->bodyhandle->as_string;
-        if ( $content =~ s/\n\n/\n/g ) {
+        my $text_content = $text_part->bodyhandle->as_string;
+
+        if ( $text_content =~ s/\n\n/\n/mg ) {
 
             # Outlook puts a space on extra newlines, remove it
-            $content =~ s/\ +$//mg;
+            $text_content =~ s/\ +$//mg;
 
             # only write only if we did change the content
             if ( my $io = $text_part->open("w") ) {
-                $io->print($content);
+                $io->print($text_content);
                 $io->close;
                 $RT::Logger->debug(
                     "Removed extra newlines from MS Outlook message.");
-                return 1;
+                $ret = 1;
             }
             else {
                 $RT::Logger->error("Can't write to body to fix newlines");
@@ -694,7 +709,28 @@ sub RescueOutlook {
         }
     }
 
-    return;
+    if ($html_part) {
+
+        # use the unencoded string
+        my $html_content = $html_part->bodyhandle->as_string;
+
+        if ( $html_content =~ s{<p(\s+style="[^"]*")?><br>\n?</p>}{}mg ) {
+
+            # only write only if we did change the content
+            if ( my $io = $html_part->open("w") ) {
+                $io->print($html_content);
+                $io->close;
+                $RT::Logger->debug(
+                    "Removed extra newlines from MS Outlook message.");
+                $ret = 1;
+            }
+            else {
+                $RT::Logger->error("Can't write to body to fix newlines");
+            }
+        }
+    }
+
+    return $ret;
 }
 
 =head1 LooksLikeMSEmail
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 9d49f55d5..5b659efa2 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 78;
+use RT::Test tests => 90;
 
 RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
@@ -36,21 +36,25 @@ Content-Type: text/html;
 \tcharset="us-ascii"
 Content-Transfer-Encoding: quoted-printable
 
-<html>this is fake</html>
-
+<html><body><p>here is the content</p><p><br></p><p>another paragraph</p></body></html>
 
 ------=_NextPart_000_0004_01CB045C.A5A075D0--
 
 EOF
 
-        my $content = <<EOF;
+        my $text_content = <<EOF;
 here is the content
 
 blahmm
 another line
 EOF
-        test_email( $text, $content,
-            $mailer . ' with multipart/alternative, \n\n are replaced' );
+        my $html_content = <<EOF;
+<p>here is the content</p><p>another paragraph</p>
+EOF
+        test_email( $text, $text_content,
+                    $mailer . ' with multipart/alternative, \n\n are replaced' );
+        test_email( $text, $html_content,
+                    $mailer . ' with multipart/alternative, line-break-only paragraphs removed from the HTML part', "text/html" );
     }
 
     diag "Test mail with multipart/alternative";

commit aa608d1380b2e444831e453620230267e04a36a6
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Mon Oct 22 15:15:18 2018 +0200

    Migrate the Outlook email tests away from manual test counting
    
    Instead of manually keeping the number of tests up to date, use 'undef' and
    'done-testing;' at the end.

diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 5b659efa2..b6538d877 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 90;
+use RT::Test tests => undef;
 
 RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
@@ -317,7 +317,7 @@ diag "Sample multipart email with Exchange headers";
 {
         my $text = <<EOF;
 X-MimeOLE: Produced By Microsoft Exchange V6.5
-Received: by example.com 
+Received: by example.com
         id <01CD63FC.33F4C15C\@example.com>; Tue, 17 Jul 2012 10:11:51 +0100
 MIME-Version: 1.0
 Content-Type: multipart/alternative;
@@ -326,8 +326,8 @@ Content-class: urn:content-classes:message
 Subject: outlook basic test
 Date: Tue, 17 Jul 2012 10:11:50 +0100
 Message-ID: <AA6CEAFB02FF244999046B2A6B6B9D6F05FF9D12\@example.com>
-X-MS-Has-Attach: 
-X-MS-TNEF-Correlator: 
+X-MS-Has-Attach:
+X-MS-TNEF-Correlator:
 Thread-Topic: Testing Outlook HTML
 Thread-Index: Ac1j/DNs7ly963bnRt63SJw9DkGwyw==
 From: root\@localhost
@@ -452,3 +452,5 @@ sub test_email {
 
     is( $txn->Content(Type => $check_type || "text/plain"), $content, $msg );
 }
+
+done_testing;
\ No newline at end of file

-----------------------------------------------------------------------


More information about the rt-commit mailing list