[Rt-commit] rt branch, 4.4/rescue-outlook-html, updated. rt-4.4.3-58-g99405a545

Gergely Nagy algernon at bestpractical.com
Tue Nov 6 06:45:33 EST 2018


The branch, 4.4/rescue-outlook-html has been updated
       via  99405a54527089c93f9da084cf989a468be09cde (commit)
      from  d4481468c75387e8fc554d83777209e45dbc88c3 (commit)

Summary of changes:
 lib/RT/EmailParser.pm |  8 +++++-
 t/mail/outlook.t      | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)

- Log -----------------------------------------------------------------
commit 99405a54527089c93f9da084cf989a468be09cde
Author: Gergely Nagy <algernon at bestpractical.com>
Date:   Tue Nov 6 12:43:04 2018 +0100

    Make RescueOutlook handle Office HTML mails better
    
    Some Office-generated HTML mail will have extra newlines added the same way
    Outlook adds them, and we should clean these up too. The lines are slightly
    different than in the Outlook case, so we do three substitutions, one for each
    case seen in the wild. If any of them changes the mail body, we'll save the
    cleaned up version of the mail.
    
    Added a test case based on an Office-generated mail found in the wild.

diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index 796e89c68..5d9ae4c56 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -709,7 +709,13 @@ sub RescueOutlook {
         # use the unencoded string
         my $html_content = $html_part->bodyhandle->as_string;
 
-        if ( $html_content =~ s{<p(\s+style="[^"]*")?>(<br>)?\n?</p>}{}mg ) {
+        my $changed;
+
+        $changed = $html_content =~ s{<p(\s+style="[^"]*")?>(<br>)?\n?</p>}{}mg;
+        $changed |= $html_content =~ s{<div><br>\n?</div>}{}mg;
+        $changed |= $html_content =~ s{<p(\s+[^>]+)?><span(\s+[^>]+)?><o:p> </o:p></span></p>}{}mg;
+
+        if ( $changed ) {
             # only write only if we did change the content
             if ( my $io = $html_part->open("w") ) {
                 $io->print($html_content);
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 888634e3e..6d6b9e5c4 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -7,6 +7,75 @@ RT->Config->Set('CheckMoreMSMailHeaders', 1);
 
 # 12.0 is outlook 2007, 14.0 is 2010
 for my $mailer ( 'Microsoft Office Outlook 12.0', 'Microsoft Outlook 14.0' ) {
+    diag "Test mail with HTML content, office-style";
+    {
+        my $text = <<EOF;
+From: root\@localhost
+X-Mailer: $mailer
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: multipart/alternative;
+\tboundary="----=_NextPart_000_0004_01CB045C.A5A075D0"
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+content-type: text/html; charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
+<head>
+<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
+</head>
+<body lang="EN-US" link="blue" vlink="purple">
+<div><font color="#ff0000" size="2"><b>[External Email]</b></font></div>
+<div><br>
+</div>
+<div><br>
+</div>
+<div>
+<div class="WordSection1">
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060">Hi,<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060"><o:p> </o:p></span></p>
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060">A normal line<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060"><o:p> </o:p></span></p>
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060"><o:p> </o:p></span></p>
+<p class="MsoNormal"><span lang="EN-GB" style="font-size:8.0pt;font-family:"Calibri Light",sans-serif;color:#002060">Regards,<o:p></o:p></span></p>
+<p class="MsoNormal"><span lang="EN-GB" style="font-size:8.0pt;font-family:"Calibri Light",sans-serif;color:#002060"><o:p> </o:p></span></p>
+<p class="MsoNormal"><span lang="EN-GB" style="font-size:8.0pt;font-family:"Calibri Light",sans-serif;color:#002060"><o:p> </o:p></span></p>
+</div>
+</div>
+</body>
+</html>
+------=_NextPart_000_0004_01CB045C.A5A075D0--
+
+EOF
+
+        my $html_content = <<EOF;
+
+
+
+
+
+<div><font color="#ff0000" size="2"><b>[External Email]</b></font></div>
+
+
+<div>
+<div class="WordSection1">
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060">Hi,<o:p></o:p></span></p>
+
+<p class="MsoNormal"><span style="font-size:10.0pt;font-family:DengXian;color:#002060">A normal line<o:p></o:p></span></p>
+
+
+<p class="MsoNormal"><span lang="EN-GB" style="font-size:8.0pt;font-family:"Calibri Light",sans-serif;color:#002060">Regards,<o:p></o:p></span></p>
+
+
+</div>
+</div>
+
+EOF
+        test_email( $text, $html_content,
+                    $mailer . ' with multipart/alternative, line-break-only paragraphs removed from the HTML part', "text/html" );
+    }
+
     diag "Test mail with multipart/alternative (in-the-wild case)";
     {
         my $text = <<EOF;

-----------------------------------------------------------------------


More information about the rt-commit mailing list