[Rt-commit] rt branch, 4.4/rescue-outlook-html, created. rt-4.4.3-54-gdc6144d49

Gergely Nagy algernon at bestpractical.com
Thu Oct 4 16:49:08 EDT 2018


The branch, 4.4/rescue-outlook-html has been created
        at  dc6144d493c40e25f66d270dacd74ce9b8d88c47 (commit)

- Log -----------------------------------------------------------------
commit 1b5024ff48825b1a600d6643c53ea87d177ea262
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Thu Oct 4 21:46:17 2018 +0200

    t/mail/outlook.t: Make it possible to select the part type with test_email
    
    Normally, we want to test against the `text/plain` part of a multipart email,
    but there are cases where we'd rather test against the `text/html` part. To make
    this possible, `test_email` takes an additional argument:
    `$check_type` (defaulting to `text/plain` if unspecified).
    
    Signed-off-by: Gergely Nagy <algernon at bestpractical.com>

diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 8f3b71bc8..62a6fbb34 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -387,7 +387,7 @@ EOF
 }
 
 sub test_email {
-    my ( $text, $content, $msg ) = @_;
+    my ( $text, $content, $msg, $check_type ) = @_;
     my ( $status, $id ) = RT::Test->send_via_mailgate($text);
     is( $status >> 8, 0, "The mail gateway exited normally" );
     ok( $id, "Created ticket" );
@@ -400,6 +400,5 @@ sub test_email {
     $txns->Limit( FIELD => 'Type', VALUE => 'Create' );
     my $txn     = $txns->First;
 
-    is( $txn->Content, $content, $msg );
+    is( $txn->Content(Type => $check_type || "text/plain"), $content, $msg );
 }
-

commit a92b600d5312f931dc5e21df319571ea7b7070c0
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Thu Oct 4 21:42:53 2018 +0200

    t/mail/outlook.t: Add a new test case
    
    The new test case has the HTML part first, the text part second. The goal is to
    test that the text part gets cleaned up anyway.
    
    Signed-off-by: Gergely Nagy <algernon at bestpractical.com>

diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 62a6fbb34..9d49f55d5 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 66;
+use RT::Test tests => 78;
 
 RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
@@ -39,6 +39,52 @@ Content-Transfer-Encoding: quoted-printable
 <html>this is fake</html>
 
 
+------=_NextPart_000_0004_01CB045C.A5A075D0--
+
+EOF
+
+        my $content = <<EOF;
+here is the content
+
+blahmm
+another line
+EOF
+        test_email( $text, $content,
+            $mailer . ' with multipart/alternative, \n\n are replaced' );
+    }
+
+    diag "Test mail with multipart/alternative";
+    {
+        my $text = <<EOF;
+From: root\@localhost
+X-Mailer: $mailer
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: multipart/alternative;
+\tboundary="----=_NextPart_000_0004_01CB045C.A5A075D0"
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/html;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+<html>this is fake</html>
+
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/plain;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: 7bit
+
+here is the content
+
+
+
+blahmm
+
+another line
+
+
 ------=_NextPart_000_0004_01CB045C.A5A075D0--
 
 EOF

commit dc6144d493c40e25f66d270dacd74ce9b8d88c47
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Wed Oct 3 19:14:09 2018 +0200

    Teach RT::EmailParser::RescueOutlook how to clean up HTML mail too
    
    `RescueOutlook` cleaned up the `text/plain` parts of a message, but over the
    years, HTML mail became much more common, and the HTML parts produced by Outlook
    have similar issues. This change attempts to clean up those parts too.
    
    As a side-effect of the change, the way `RescueOutlook` finds the e-mail parts
    to clean up has been improved: now it iterates through all parts, and cleans up
    the first text, and the first HTML part, regardless of order.
    
    Signed-off-by: Gergely Nagy <algernon at bestpractical.com>

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index ad26a291b..285798cd8 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -634,6 +634,11 @@ in it.  it's cool to have a 'text/plain' part, but the problem is the part is
 not so right: all the "\n" in your main message will become "\n\n" :/
 
 this method will fix this bug, i.e. replaces "\n\n" to "\n".
+
+Similarly, if the message is HTML-only, the same problem is present there:
+between each paragraph, there will be an empty one in between with only a line
+break. This method removes those line break-only paragraphs too.
+
 return 1 if it does find the problem in the entity and get it fixed.
 
 =cut
@@ -645,25 +650,32 @@ sub RescueOutlook {
 
     return unless $mime && $self->LooksLikeMSEmail($mime);
 
-    my $text_part;
+    my ($text_part, $html_part);
+
     if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
         my $first = $mime->parts(0);
         if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
         {
-            my $inner_first = $first->parts(0);
-            if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
-            {
-                $text_part = $inner_first;
+            foreach my $part ($first->parts) {
+                if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+                    $text_part = $part;
+                }
+                if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+                    $html_part = $part;
+                }
             }
         }
     }
     elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
-        my $first = $mime->parts(0);
-        if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
-            $text_part = $first;
+        foreach my $part ($mime->parts) {
+            if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+                $text_part = $part;
+            }
+            if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+                $html_part = $part;
+            }
         }
     }
-
     # Add base64 since we've seen examples of double newlines with
     # this type too. Need an example of a multi-part base64 to
     # handle that permutation if it exists.
@@ -671,22 +683,25 @@ sub RescueOutlook {
         $text_part = $mime;    # Assuming single part, already decoded.
     }
 
+    my $ret;
+
     if ($text_part) {
 
         # use the unencoded string
-        my $content = $text_part->bodyhandle->as_string;
-        if ( $content =~ s/\n\n/\n/g ) {
+        my $text_content = $text_part->bodyhandle->as_string;
+
+        if ( $text_content =~ s/\n\n/\n/mg ) {
 
             # Outlook puts a space on extra newlines, remove it
-            $content =~ s/\ +$//mg;
+            $text_content =~ s/\ +$//mg;
 
             # only write only if we did change the content
             if ( my $io = $text_part->open("w") ) {
-                $io->print($content);
+                $io->print($text_content);
                 $io->close;
                 $RT::Logger->debug(
                     "Removed extra newlines from MS Outlook message.");
-                return 1;
+                $ret = 1;
             }
             else {
                 $RT::Logger->error("Can't write to body to fix newlines");
@@ -694,7 +709,28 @@ sub RescueOutlook {
         }
     }
 
-    return;
+    if ($html_part) {
+
+        # use the unencoded string
+        my $html_content = $html_part->bodyhandle->as_string;
+
+        if ( $html_content =~ s{<p(\s+style="[^"]*")?><br>\n?</p>}{}mg ) {
+
+            # only write only if we did change the content
+            if ( my $io = $html_part->open("w") ) {
+                $io->print($html_content);
+                $io->close;
+                $RT::Logger->debug(
+                    "Removed extra newlines from MS Outlook message.");
+                $ret = 1;
+            }
+            else {
+                $RT::Logger->error("Can't write to body to fix newlines");
+            }
+        }
+    }
+
+    return $ret;
 }
 
 =head1 LooksLikeMSEmail
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 9d49f55d5..5b659efa2 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use RT::Test tests => 78;
+use RT::Test tests => 90;
 
 RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
@@ -36,21 +36,25 @@ Content-Type: text/html;
 \tcharset="us-ascii"
 Content-Transfer-Encoding: quoted-printable
 
-<html>this is fake</html>
-
+<html><body><p>here is the content</p><p><br></p><p>another paragraph</p></body></html>
 
 ------=_NextPart_000_0004_01CB045C.A5A075D0--
 
 EOF
 
-        my $content = <<EOF;
+        my $text_content = <<EOF;
 here is the content
 
 blahmm
 another line
 EOF
-        test_email( $text, $content,
-            $mailer . ' with multipart/alternative, \n\n are replaced' );
+        my $html_content = <<EOF;
+<p>here is the content</p><p>another paragraph</p>
+EOF
+        test_email( $text, $text_content,
+                    $mailer . ' with multipart/alternative, \n\n are replaced' );
+        test_email( $text, $html_content,
+                    $mailer . ' with multipart/alternative, line-break-only paragraphs removed from the HTML part', "text/html" );
     }
 
     diag "Test mail with multipart/alternative";

-----------------------------------------------------------------------


More information about the rt-commit mailing list