[Rt-commit] rt branch, 5.0/clean-up-email-for-replying, created. rt-5.0.0-226-g65f716e01c
Dianne Skoll
dianne at bestpractical.com
Tue Jan 19 16:17:07 EST 2021
The branch, 5.0/clean-up-email-for-replying has been created
at 65f716e01c4ac1edc930cf0b0e4819e5588539cb (commit)
- Log -----------------------------------------------------------------
commit 2f658f3e4090dbf619885d8954962855d6d20163
Author: Dianne Skoll <dianne at bestpractical.com>
Date: Tue Jan 19 16:03:30 2021 -0500
Do MS Outlook email cleanup when getting quoted content
This is preferable to mangling the email when it enters RT; it's
much better to store the original, unmodified email in the database
and do any fixups when we generate the quoted content for a reply.
diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index d5231b0a6e..76f1083fed 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -360,7 +360,10 @@ sub Content {
$content = $content_obj->Content ||'';
if ( lc $content_obj->ContentType eq 'text/html' ) {
- $content =~ s/(?:(<\/div>)|<p>|<br\s*\/?>|<div(\s+class="[^"]+")?>)\s*--\s+<br\s*\/?>.*?$/$1/s if $args{'Quote'};
+ if ($args{Quote}) {
+ $content = $self->CleanupContentForReply($content_obj, $content);
+ $content =~ s/(?:(<\/div>)|<p>|<br\s*\/?>|<div(\s+class="[^"]+")?>)\s*--\s+<br\s*\/?>.*?$/$1/s;
+ }
if ($args{Type} ne 'text/html') {
$content = RT::Interface::Email::ConvertHTMLToText($content);
@@ -379,7 +382,10 @@ sub Content {
}
}
else {
- $content =~ s/\n-- \n.*?$//s if $args{'Quote'};
+ if ($args{Quote}) {
+ $content = $self->CleanupContentForReply($content_obj, $content);
+ $content =~ s/\n-- \n.*?$//s;
+ }
if ($args{Type} eq 'text/html') {
# Extremely simple text->html converter
$content =~ s/&/&/g;
@@ -413,6 +419,84 @@ sub Content {
return ($content);
}
+=head2 _LooksLikeMSEmail
+
+Similar to RT::EmailParser->LooksLikeMSEmail, but works on RT::Attachment
+rather than MIME::Entity.
+
+=cut
+sub _LooksLikeMSEmail
+{
+ my ($self) = @_;
+ my $attachment = $self->Attachments->First;
+ return undef unless $attachment;
+
+ my $mailer = $attachment->GetHeader('X-Mailer');
+ # 12.0 is outlook 2007, 14.0 is 2010
+ return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
+
+ if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
+ # Check for additional headers that might
+ # indicate this came from Outlook or through Exchange.
+ # A sample we received had the headers X-MS-Has-Attach: and
+ # X-MS-Tnef-Correlator: and both had no value.
+
+ return 1 if $attachment->Headers =~ /\bX-MS-.{0,50}:/;
+ }
+
+ return 0; # Doesn't look like MS email.
+}
+
+=head2 CleanupContentForReply $ENTITY, $CONTENT
+
+Various email clients do silly things with content that
+can result in messy-looking replies. This subroutine
+cleans up the known silly things that email clients
+do.
+
+$ENTITY is the MIME::Entity whose content is to be
+cleaned, and $CONTENT is the raw content (a string).
+
+Returns a cleaned-up version of $CONTENT
+
+=cut
+sub CleanupContentForReply
+{
+ my ($self, $attachment, $content) = @_;
+
+ # Right now, we only clean up mail that looks like MS Outlook or
+ # MS Exchange email. Bail out if that's not the case
+ return $content unless $self->_LooksLikeMSEmail;
+
+ if (lc($attachment->ContentType) eq 'text/html') {
+ # Remove extra newlines from HTML content.
+ $content =~ s{
+ (<p(\s+style="[^"]*")?>(<br>)?\n?</p>)|
+ (<div><br>\n?</div>)|
+ (<p(\s+[^>]+)?><span(\s+[^>]+)?><o:p> </o:p></span></p>)
+ } {}xmg;
+ return $content;
+ } else {
+ # Assume text/plain.
+
+ # Remove spaces at end of lines
+ $content =~ s/\ +$//mg;
+
+ # If there are an odd number of newlines anywhere in the
+ # content, assume it has already been cleaned up by
+ # RescueOutlook in RT::EmailParser and do not touch it.
+ # Otherwise, replace double-newlines with single-newlines
+ if ($content =~ /(^|[^\n])\n(\n\n)*[^\n]/) {
+ # Odd number of newlines found... don't touch
+ return $content;
+ }
+
+ # Replace double-newlines with single-newlines
+ $content =~ s/\n\n/\n/g;
+ }
+ return $content;
+}
+
=head2 QuoteHeader
Returns text prepended to content when transaction is quoted
commit 65f716e01c4ac1edc930cf0b0e4819e5588539cb
Author: Dianne Skoll <dianne at bestpractical.com>
Date: Tue Jan 19 16:04:36 2021 -0500
Add tests for text/plain MS Outlook cleanup of quoted material.
diff --git a/t/api/transaction-quoting.t b/t/api/transaction-quoting.t
index 243da615e6..bf42c1be99 100644
--- a/t/api/transaction-quoting.t
+++ b/t/api/transaction-quoting.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
use RT;
-use RT::Test tests => 19;
+use RT::Test tests => 28;
use_ok('RT::Transaction');
@@ -248,3 +248,121 @@ EXPECTED
is( $result, $expected, 'Text quoted properly after five quotings');
}
+
+diag "Test cleanup of MS Outlook mail";
+{
+ my $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+X-Mailer: Microsoft Office Outlook 12.0
+
+Hello.
+
+
+
+This email has spurious newlines. Every
+
+newline is doubled, leading to excessive spacing.
+
+
+
+Will it be cleaned up?
+
+.
+
+ my $expected = <<'QUOTED';
+> Hello.
+>
+> This email has spurious newlines. Every
+> newline is doubled, leading to excessive spacing.
+>
+> Will it be cleaned up?
+QUOTED
+
+ my ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+ is( $status >> 8, 0, "The mail gateway exited normally" );
+ ok( $id, "Created ticket $id" );
+ my $ticket = RT::Ticket->new( RT->SystemUser );
+ $ticket->Load( $id );
+ my $txns = $ticket->Transactions;
+ my $txn = $txns->Next;
+ my $content = $txn->Content(Quote => 1);
+ like($content, qr/\Q$expected/, 'Spurious newlines were removed');
+
+ # Try an email that has *already* been cleaned up
+ $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+X-Mailer: Microsoft Office Outlook 12.0
+
+Hello.
+
+This email has spurious newlines. Every
+newline is doubled, leading to excessive spacing.
+
+Will it be cleaned up?
+.
+
+ ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+ is( $status >> 8, 0, "The mail gateway exited normally" );
+ ok( $id, "Created ticket $id" );
+ $ticket = RT::Ticket->new( RT->SystemUser );
+ $ticket->Load( $id );
+ $txns = $ticket->Transactions;
+ $txn = $txns->Next;
+ $content = $txn->Content(Quote => 1);
+ like($content, qr/\Q$expected/, 'Spurious newlines were not removed twice');
+
+ # Try an email that has isn't marked as being from Outlook
+ $mail = <<'.';
+From: root at localhost
+Subject: Testing spurious newline removal
+MIME-Version: 1.0
+Content-Type: text/plain
+
+Hello.
+
+
+
+This email has spurious newlines. Every
+
+newline is doubled, leading to excessive spacing.
+
+
+
+Will it be cleaned up?
+
+.
+
+ $expected = <<'QUOTED';
+> Hello.
+>
+>
+>
+> This email has spurious newlines. Every
+>
+> newline is doubled, leading to excessive spacing.
+>
+>
+>
+> Will it be cleaned up?
+>
+QUOTED
+
+ ( $status, $id ) = RT::Test->send_via_mailgate($mail);
+ is( $status >> 8, 0, "The mail gateway exited normally" );
+ ok( $id, "Created ticket $id" );
+ $ticket = RT::Ticket->new( RT->SystemUser );
+ $ticket->Load( $id );
+ $txns = $ticket->Transactions;
+ $txn = $txns->Next;
+ $content = $txn->Content(Quote => 1);
+ like($content, qr/\Q$expected/, 'Spurious newlines were not removed from non-Outlook email');
+
+
+}
+
-----------------------------------------------------------------------
More information about the rt-commit
mailing list