[Rt-commit] rt branch, 4.4/rescue-outlook-html, created. rt-4.4.3-54-gdc6144d49
Gergely Nagy
algernon at bestpractical.com
Thu Oct 4 16:49:08 EDT 2018
The branch, 4.4/rescue-outlook-html has been created
at dc6144d493c40e25f66d270dacd74ce9b8d88c47 (commit)
- Log -----------------------------------------------------------------
commit 1b5024ff48825b1a600d6643c53ea87d177ea262
Author: Gergely Nagy <algernon at bestpractical.com>
Date: Thu Oct 4 21:46:17 2018 +0200
t/mail/outlook.t: Make it possible to select the part type with test_email
Normally, we want to test against the `text/plain` part of a multipart email,
but there are cases where we'd rather test against the `text/html` part. To make
this possible, `test_email` takes an additional argument:
`$check_type` (defaulting to `text/plain` if unspecified).
Signed-off-by: Gergely Nagy <algernon at bestpractical.com>
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 8f3b71bc8..62a6fbb34 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -387,7 +387,7 @@ EOF
}
sub test_email {
- my ( $text, $content, $msg ) = @_;
+ my ( $text, $content, $msg, $check_type ) = @_;
my ( $status, $id ) = RT::Test->send_via_mailgate($text);
is( $status >> 8, 0, "The mail gateway exited normally" );
ok( $id, "Created ticket" );
@@ -400,6 +400,5 @@ sub test_email {
$txns->Limit( FIELD => 'Type', VALUE => 'Create' );
my $txn = $txns->First;
- is( $txn->Content, $content, $msg );
+ is( $txn->Content(Type => $check_type || "text/plain"), $content, $msg );
}
-
commit a92b600d5312f931dc5e21df319571ea7b7070c0
Author: Gergely Nagy <algernon at bestpractical.com>
Date: Thu Oct 4 21:42:53 2018 +0200
t/mail/outlook.t: Add a new test case
The new test case has the HTML part first, the text part second. The goal is to
test that the text part gets cleaned up anyway.
Signed-off-by: Gergely Nagy <algernon at bestpractical.com>
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 62a6fbb34..9d49f55d5 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
use strict;
use warnings;
-use RT::Test tests => 66;
+use RT::Test tests => 78;
RT->Config->Set('CheckMoreMSMailHeaders', 1);
@@ -39,6 +39,52 @@ Content-Transfer-Encoding: quoted-printable
<html>this is fake</html>
+------=_NextPart_000_0004_01CB045C.A5A075D0--
+
+EOF
+
+ my $content = <<EOF;
+here is the content
+
+blahmm
+another line
+EOF
+ test_email( $text, $content,
+ $mailer . ' with multipart/alternative, \n\n are replaced' );
+ }
+
+ diag "Test mail with multipart/alternative";
+ {
+ my $text = <<EOF;
+From: root\@localhost
+X-Mailer: $mailer
+To: rt\@@{[RT->Config->Get('rtname')]}
+Subject: outlook basic test
+Content-Type: multipart/alternative;
+\tboundary="----=_NextPart_000_0004_01CB045C.A5A075D0"
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/html;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+<html>this is fake</html>
+
+
+------=_NextPart_000_0004_01CB045C.A5A075D0
+Content-Type: text/plain;
+\tcharset="us-ascii"
+Content-Transfer-Encoding: 7bit
+
+here is the content
+
+
+
+blahmm
+
+another line
+
+
------=_NextPart_000_0004_01CB045C.A5A075D0--
EOF
commit dc6144d493c40e25f66d270dacd74ce9b8d88c47
Author: Gergely Nagy <algernon at bestpractical.com>
Date: Wed Oct 3 19:14:09 2018 +0200
Teach RT::EmailParser::RescueOutlook how to clean up HTML mail too
`RescueOutlook` cleaned up the `text/plain` parts of a message, but over the
years, HTML mail became much more common, and the HTML parts produced by Outlook
have similar issues. This change attempts to clean up those parts too.
As a side-effect of the change, the way `RescueOutlook` finds the e-mail parts
to clean up has been improved: now it iterates through all parts, and cleans up
the first text, and the first HTML part, regardless of order.
Signed-off-by: Gergely Nagy <algernon at bestpractical.com>
diff --git a/lib/RT/EmailParser.pm b/lib/RT/EmailParser.pm
index ad26a291b..285798cd8 100644
--- a/lib/RT/EmailParser.pm
+++ b/lib/RT/EmailParser.pm
@@ -634,6 +634,11 @@ in it. it's cool to have a 'text/plain' part, but the problem is the part is
not so right: all the "\n" in your main message will become "\n\n" :/
this method will fix this bug, i.e. replaces "\n\n" to "\n".
+
+Similarly, if the message is HTML-only, the same problem is present there:
+between each paragraph, there will be an empty one in between with only a line
+break. This method removes those line break-only paragraphs too.
+
return 1 if it does find the problem in the entity and get it fixed.
=cut
@@ -645,25 +650,32 @@ sub RescueOutlook {
return unless $mime && $self->LooksLikeMSEmail($mime);
- my $text_part;
+ my ($text_part, $html_part);
+
if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
my $first = $mime->parts(0);
if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
{
- my $inner_first = $first->parts(0);
- if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
- {
- $text_part = $inner_first;
+ foreach my $part ($first->parts) {
+ if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+ $text_part = $part;
+ }
+ if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+ $html_part = $part;
+ }
}
}
}
elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
- my $first = $mime->parts(0);
- if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
- $text_part = $first;
+ foreach my $part ($mime->parts) {
+ if ( $part->head->get('Content-Type') =~ m{text/plain} && !$text_part) {
+ $text_part = $part;
+ }
+ if ( $part->head->get('Content-Type') =~ m{text/html} && !$html_part) {
+ $html_part = $part;
+ }
}
}
-
# Add base64 since we've seen examples of double newlines with
# this type too. Need an example of a multi-part base64 to
# handle that permutation if it exists.
@@ -671,22 +683,25 @@ sub RescueOutlook {
$text_part = $mime; # Assuming single part, already decoded.
}
+ my $ret;
+
if ($text_part) {
# use the unencoded string
- my $content = $text_part->bodyhandle->as_string;
- if ( $content =~ s/\n\n/\n/g ) {
+ my $text_content = $text_part->bodyhandle->as_string;
+
+ if ( $text_content =~ s/\n\n/\n/mg ) {
# Outlook puts a space on extra newlines, remove it
- $content =~ s/\ +$//mg;
+ $text_content =~ s/\ +$//mg;
# only write only if we did change the content
if ( my $io = $text_part->open("w") ) {
- $io->print($content);
+ $io->print($text_content);
$io->close;
$RT::Logger->debug(
"Removed extra newlines from MS Outlook message.");
- return 1;
+ $ret = 1;
}
else {
$RT::Logger->error("Can't write to body to fix newlines");
@@ -694,7 +709,28 @@ sub RescueOutlook {
}
}
- return;
+ if ($html_part) {
+
+ # use the unencoded string
+ my $html_content = $html_part->bodyhandle->as_string;
+
+ if ( $html_content =~ s{<p(\s+style="[^"]*")?><br>\n?</p>}{}mg ) {
+
+ # only write only if we did change the content
+ if ( my $io = $html_part->open("w") ) {
+ $io->print($html_content);
+ $io->close;
+ $RT::Logger->debug(
+ "Removed extra newlines from MS Outlook message.");
+ $ret = 1;
+ }
+ else {
+ $RT::Logger->error("Can't write to body to fix newlines");
+ }
+ }
+ }
+
+ return $ret;
}
=head1 LooksLikeMSEmail
diff --git a/t/mail/outlook.t b/t/mail/outlook.t
index 9d49f55d5..5b659efa2 100644
--- a/t/mail/outlook.t
+++ b/t/mail/outlook.t
@@ -1,7 +1,7 @@
use strict;
use warnings;
-use RT::Test tests => 78;
+use RT::Test tests => 90;
RT->Config->Set('CheckMoreMSMailHeaders', 1);
@@ -36,21 +36,25 @@ Content-Type: text/html;
\tcharset="us-ascii"
Content-Transfer-Encoding: quoted-printable
-<html>this is fake</html>
-
+<html><body><p>here is the content</p><p><br></p><p>another paragraph</p></body></html>
------=_NextPart_000_0004_01CB045C.A5A075D0--
EOF
- my $content = <<EOF;
+ my $text_content = <<EOF;
here is the content
blahmm
another line
EOF
- test_email( $text, $content,
- $mailer . ' with multipart/alternative, \n\n are replaced' );
+ my $html_content = <<EOF;
+<p>here is the content</p><p>another paragraph</p>
+EOF
+ test_email( $text, $text_content,
+ $mailer . ' with multipart/alternative, \n\n are replaced' );
+ test_email( $text, $html_content,
+ $mailer . ' with multipart/alternative, line-break-only paragraphs removed from the HTML part', "text/html" );
}
diag "Test mail with multipart/alternative";
-----------------------------------------------------------------------
More information about the rt-commit
mailing list