[Rt-commit] rt branch, 5.0/clean-up-email-for-replying, created. rt-5.0.0-226-g65f716e01c

Dianne Skoll dianne at bestpractical.com
Tue Jan 19 16:17:07 EST 2021


The branch, 5.0/clean-up-email-for-replying has been created
        at  65f716e01c4ac1edc930cf0b0e4819e5588539cb (commit)

- Log -----------------------------------------------------------------
commit 2f658f3e4090dbf619885d8954962855d6d20163
Author: Dianne Skoll <dianne at bestpractical.com>
Date:   Tue Jan 19 16:03:30 2021 -0500

    Do MS Outlook email cleanup when getting quoted content
    
    This is preferable to mangling the email when it enters RT; it's
    much better to store the original, unmodified email in the database
    and do any fixups when we generate the quoted content for a reply.

diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index d5231b0a6e..76f1083fed 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -360,7 +360,10 @@ sub Content {
         $content = $content_obj->Content ||'';
 
         if ( lc $content_obj->ContentType eq 'text/html' ) {
-            $content =~ s/(?:(<\/div>)|<p>|<br\s*\/?>|<div(\s+class="[^"]+")?>)\s*--\s+<br\s*\/?>.*?$/$1/s if $args{'Quote'};
+            if ($args{Quote}) {
+                $content = $self->CleanupContentForReply($content_obj, $content);
+                $content =~ s/(?:(<\/div>)|<p>|<br\s*\/?>|<div(\s+class="[^"]+")?>)\s*--\s+<br\s*\/?>.*?$/$1/s;
+            }
 
             if ($args{Type} ne 'text/html') {
                 $content = RT::Interface::Email::ConvertHTMLToText($content);
@@ -379,7 +382,10 @@ sub Content {
             }
         }
         else {
-            $content =~ s/\n-- \n.*?$//s if $args{'Quote'};
+            if ($args{Quote}) {
+                $content = $self->CleanupContentForReply($content_obj, $content);
+                $content =~ s/\n-- \n.*?$//s;
+            }
             if ($args{Type} eq 'text/html') {
                 # Extremely simple text->html converter
                 $content =~ s/&/&/g;
@@ -413,6 +419,84 @@ sub Content {
     return ($content);
 }
 
+=head2 _LooksLikeMSEmail
+
+Similar to RT::EmailParser->LooksLikeMSEmail, but works on RT::Attachment
+rather than MIME::Entity.
+
+=cut
+sub _LooksLikeMSEmail
+{
+    my ($self) = @_;
+    my $attachment = $self->Attachments->First;
+    return undef unless $attachment;
+
+    my $mailer = $attachment->GetHeader('X-Mailer');
+    # 12.0 is outlook 2007, 14.0 is 2010
+    return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
+
+    if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
+        # Check for additional headers that might
+        # indicate this came from Outlook or through Exchange.
+        # A sample we received had the headers X-MS-Has-Attach: and
+        # X-MS-Tnef-Correlator: and both had no value.
+
+        return 1 if $attachment->Headers =~ /\bX-MS-.{0,50}:/;
+    }
+
+    return 0;    # Doesn't look like MS email.
+}
+
+=head2 CleanupContentForReply $ENTITY, $CONTENT
+
+Various email clients do silly things with content that
+can result in messy-looking replies.  This subroutine
+cleans up the known silly things that email clients
+do.
+
+$ENTITY is the MIME::Entity whose content is to be
+cleaned, and $CONTENT is the raw content (a string).
+
+Returns a cleaned-up version of $CONTENT
+
+=cut
+sub CleanupContentForReply
+{
+    my ($self, $attachment, $content) = @_;
+
+    # Right now, we only clean up mail that looks like MS Outlook or
+    # MS Exchange email.  Bail out if that's not the case
+    return $content unless $self->_LooksLikeMSEmail;
+
+    if (lc($attachment->ContentType) eq 'text/html') {
+        # Remove extra newlines from HTML content.
+        $content =~ s{
+        (<p(\s+style="[^"]*")?>(<br>)?\n?</p>)|
+                (<div><br>\n?</div>)|
+                (<p(\s+[^>]+)?><span(\s+[^>]+)?><o:p> </o:p></span></p>)
+        } {}xmg;
+        return $content;
+    } else {
+        # Assume text/plain.
+
+        # Remove spaces at end of lines
+        $content =~ s/\ +$//mg;
+
+        # If there are an odd number of newlines anywhere in the
+        # content, assume it has already been cleaned up by
+        # RescueOutlook in RT::EmailParser and do not touch it.
+        # Otherwise, replace double-newlines with single-newlines
+        if ($content =~ /(^|[^\n])\n(\n\n)*[^\n]/) {
+            # Odd number of newlines found... don't touch
+            return $content;
+        }
+
+        # Replace double-newlines with single-newlines
+        $content =~ s/\n\n/\n/g;
+    }
+    return $content;
+}
+
 =head2 QuoteHeader
 
 Returns text prepended to content when transaction is quoted

commit 65f716e01c4ac1edc930cf0b0e4819e5588539cb
Author: Dianne Skoll <dianne at bestpractical.com>
Date:   Tue Jan 19 16:04:36 2021 -0500

    Add tests for text/plain MS Outlook cleanup of quoted material.

diff --git a/t/api/transaction-quoting.t b/t/api/transaction-quoting.t
index 243da615e6..bf42c1be99 100644
--- a/t/api/transaction-quoting.t
+++ b/t/api/transaction-quoting.t
@@ -2,7 +2,7 @@
 use strict;
 use warnings;
 use RT;
-use RT::Test tests => 19;
+use RT::Test tests => 28;
 
 use_ok('RT::Transaction');
 
@@ -248,3 +248,121 @@ EXPECTED
 
     is( $result, $expected, 'Text quoted properly after five quotings');
 }
+
+diag "Test cleanup of MS Outlook mail";
+{
+    my $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+X-Mailer: Microsoft Office Outlook 12.0
+
+Hello.
+
+
+
+This email has spurious newlines.  Every
+
+newline is doubled, leading to excessive spacing.
+
+
+
+Will it be cleaned up?
+
+.
+
+    my $expected = <<'QUOTED';
+> Hello.
+> 
+> This email has spurious newlines.  Every
+> newline is doubled, leading to excessive spacing.
+> 
+> Will it be cleaned up?
+QUOTED
+
+    my ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+    is( $status >> 8, 0, "The mail gateway exited normally" );
+    ok( $id, "Created ticket $id" );
+    my $ticket = RT::Ticket->new( RT->SystemUser );
+    $ticket->Load( $id );
+    my $txns = $ticket->Transactions;
+    my $txn = $txns->Next;
+    my $content = $txn->Content(Quote => 1);
+    like($content, qr/\Q$expected/, 'Spurious newlines were removed');
+
+    # Try an email that has *already* been cleaned up
+    $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+X-Mailer: Microsoft Office Outlook 12.0
+
+Hello.
+
+This email has spurious newlines.  Every
+newline is doubled, leading to excessive spacing.
+
+Will it be cleaned up?
+.
+
+    ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+    is( $status >> 8, 0, "The mail gateway exited normally" );
+    ok( $id, "Created ticket $id" );
+    $ticket = RT::Ticket->new( RT->SystemUser );
+    $ticket->Load( $id );
+    $txns = $ticket->Transactions;
+    $txn = $txns->Next;
+    $content = $txn->Content(Quote => 1);
+    like($content, qr/\Q$expected/, 'Spurious newlines were not removed twice');
+
+    # Try an email that has isn't marked as being from Outlook
+    $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+
+Hello.
+
+
+
+This email has spurious newlines.  Every
+
+newline is doubled, leading to excessive spacing.
+
+
+
+Will it be cleaned up?
+
+.
+
+    $expected = <<'QUOTED';
+> Hello.
+> 
+> 
+> 
+> This email has spurious newlines.  Every
+> 
+> newline is doubled, leading to excessive spacing.
+> 
+> 
+> 
+> Will it be cleaned up?
+> 
+QUOTED
+
+    ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+    is( $status >> 8, 0, "The mail gateway exited normally" );
+    ok( $id, "Created ticket $id" );
+    $ticket = RT::Ticket->new( RT->SystemUser );
+    $ticket->Load( $id );
+    $txns = $ticket->Transactions;
+    $txn = $txns->Next;
+    $content = $txn->Content(Quote => 1);
+    like($content, qr/\Q$expected/, 'Spurious newlines were not removed from non-Outlook email');
+
+
+}
+

-----------------------------------------------------------------------


More information about the rt-commit mailing list