[Rt-commit] rt branch, 4.4/munge-more-attachments, updated. rt-4.4.3-203-ge8b802731

? sunnavy sunnavy at bestpractical.com
Fri Jan 25 07:54:08 EST 2019


The branch, 4.4/munge-more-attachments has been updated
       via  e8b802731b5433018a17bbaa6bc2b879c9a631b0 (commit)
       via  cfe755fbee53ba6b67eda32a40f3d1e27a24db32 (commit)
       via  45df2c8776f95c57d6833c8eeafc2e75abf9d8d8 (commit)
       via  1ab8d8ce8d1d0db8a8b0a8cec9de2d281bf9633a (commit)
      from  d62e36c75ee6d3037b37255c8820d5828a5f23a1 (commit)

Summary of changes:
 lib/RT/Attachment.pm         | 21 +++++++++++---
 lib/RT/Attachments.pm        | 67 +++++++++++++++++++++++++-------------------
 sbin/rt-munge-attachments.in | 43 ++++++++++++++++++++++++++--
 t/api/attachment.t           | 52 ++++++++++++++++++++++++++++++++++
 4 files changed, 147 insertions(+), 36 deletions(-)

- Log -----------------------------------------------------------------
commit 1ab8d8ce8d1d0db8a8b0a8cec9de2d281bf9633a
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Jan 25 03:35:13 2019 +0800

    Make ReplaceHeaders/ReplaceContent return true only if replacement happens
    
    We filter attachments in RT::Attachments::ReplaceAttachments by the
    search string in advance, so replacement is supposed to always happen in
    the inner calls of ReplaceHeaders/ReplaceContent, which makes the
    statement of
    
            return ( 1, $self->loc('Headers cleared') );
    
    always be called and the statement of
    
            return ( 1, $self->loc('No content matches found') );
    
    never be called.
    
    Logically both statements above are not quite correct considering
    attachments that replacement methods are called on might not have
    matches.
    
    This commit fixes this, so we can always use the return values to
    indicate if there are replacements or not.

diff --git a/lib/RT/Attachment.pm b/lib/RT/Attachment.pm
index e443020d2..66a0e4e6d 100644
--- a/lib/RT/Attachment.pm
+++ b/lib/RT/Attachment.pm
@@ -769,14 +769,27 @@ sub ReplaceHeaders {
 
     return ( 0, $self->loc('No Search string provided') ) unless $args{Search};
 
+    my $updated;
     foreach my $header ( $self->SplitHeaders ) {
         my ( $tag, $value ) = split /:/, $header, 2;
         if ( $value =~ s/\Q$args{Search}\E/$args{Replacement}/ig ) {
-            my $ret = $self->SetHeader( $tag, $value );
-            RT::Logger->error("Could not set header: $tag to $value") unless $ret;
+            my ( $ret, $msg ) = $self->SetHeader( $tag, $value );
+            if ( $ret ) {
+                $updated ||= 1;
+            }
+            else {
+                RT::Logger->error("Could not set header: $tag to $value: $msg");
+                return ( $ret, $msg );
+            }
         }
     }
-    return ( 1, $self->loc('Headers cleared') );
+
+    if ( $updated ) {
+        return ( 1, $self->loc('Headers cleared') );
+    }
+    else {
+        return ( 0, $self->loc('No header matches found') );
+    }
 }
 
 =head2 ReplaceContent ( Search => 'SEARCH', Replacement => 'Replacement' )
@@ -824,7 +837,7 @@ sub ReplaceContent {
         $RT::Handle->Commit;
         return ( $ret, 'Content replaced' );
     }
-    return ( 1, $self->loc('No content matches found') );
+    return ( 0, $self->loc('No content matches found') );
 }
 
 

commit 45df2c8776f95c57d6833c8eeafc2e75abf9d8d8
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Jan 25 05:52:24 2019 +0800

    Add --tickets to munge more attachments that have encoded content
    
    Previously attachments were pre-filtered by the search string at SQL
    level, to limit the to-be-processed attachments to a reasonable small
    set. The drawback is it bypassed attachments that have encodings like
    base64, etc. even if the decoded content matches and should be replaced.
    Because of this, we couldn't munge non-ascii strings as they are quite
    probably encoded in the database.
    
    To get around the pre-filter behavior and still keep to-be-processed
    attachments being a small set, --tickets is added. Besides, in real
    life, we believe it'll be the most common usage to munge attachments for
    only a few tickets.

diff --git a/lib/RT/Attachments.pm b/lib/RT/Attachments.pm
index b385e9962..effdaf749 100644
--- a/lib/RT/Attachments.pm
+++ b/lib/RT/Attachments.pm
@@ -266,6 +266,7 @@ sub ReplaceAttachments {
         Replacement => '',
         Headers     => 1,
         Content     => 1,
+        FilterBySearchString => 1,
         @_,
     );
 
@@ -287,34 +288,38 @@ sub ReplaceAttachments {
     };
 
     my $attachments = $self->Clone;
-    $attachments->Limit(
-        FIELD     => 'ContentEncoding',
-        VALUE     => 'none',
-        SUBCLAUSE => 'Encoding',
-    );
-    $attachments->Limit(
-        FIELD           => 'ContentEncoding',
-        OPERATOR        => 'IS',
-        VALUE           => 'NULL',
-        SUBCLAUSE       => 'Encoding',
-    );
-
-    # Adding "\n" is to avoid trailing "=" in QP encoding
-    if ( MIME::QuotedPrint::encode("$args{Search}\n") eq "$args{Search}\n" ) {
+    if ( $args{FilterBySearchString} ) {
         $attachments->Limit(
             FIELD     => 'ContentEncoding',
-            VALUE     => 'quoted-printable',
+            VALUE     => 'none',
             SUBCLAUSE => 'Encoding',
         );
+        $attachments->Limit(
+            FIELD     => 'ContentEncoding',
+            OPERATOR  => 'IS',
+            VALUE     => 'NULL',
+            SUBCLAUSE => 'Encoding',
+        );
+
+        # Adding "\n" is to avoid trailing "=" in QP encoding
+        if ( MIME::QuotedPrint::encode("$args{Search}\n") eq "$args{Search}\n" ) {
+            $attachments->Limit(
+                FIELD     => 'ContentEncoding',
+                VALUE     => 'quoted-printable',
+                SUBCLAUSE => 'Encoding',
+            );
+        }
     }
 
     if ( $args{Headers} ) {
         my $atts = $attachments->Clone;
-        $atts->Limit(
-            FIELD    => 'Headers',
-            OPERATOR => 'LIKE',
-            VALUE    => $args{Search},
-        );
+        if ( $args{FilterBySearchString} ) {
+            $atts->Limit(
+                FIELD    => 'Headers',
+                OPERATOR => 'LIKE',
+                VALUE    => $args{Search},
+            );
+        }
         $atts->Limit(
             FIELD     => 'ContentType',
             OPERATOR  => 'IN',
@@ -339,19 +344,21 @@ sub ReplaceAttachments {
                 $create_munge_txn->( $att->TransactionObj->TicketObj );
             }
             else {
-                RT::Logger->error($msg);
+                RT::Logger->debug($msg);
             }
         }
     }
 
     if ( $args{Content} ) {
         my $atts = $attachments->Clone;
-        $atts->Limit(
-            FIELD     => 'Content',
-            OPERATOR  => 'LIKE',
-            VALUE     => $args{Search},
-            SUBCLAUSE => 'Content',
-        );
+        if ( $args{FilterBySearchString} ) {
+            $atts->Limit(
+                FIELD     => 'Content',
+                OPERATOR  => 'LIKE',
+                VALUE     => $args{Search},
+                SUBCLAUSE => 'Content',
+            );
+        }
         $atts->Limit(
             FIELD    => 'ContentType',
             OPERATOR => 'IN',
@@ -368,13 +375,13 @@ sub ReplaceAttachments {
                 $create_munge_txn->( $att->TransactionObj->TicketObj );
             }
             else {
-                RT::Logger->error($msg);
+                RT::Logger->debug($msg);
             }
         }
     }
 
     my $count = scalar keys %munged;
-    return ( 1, $self->loc( "Updated [quant,_1,ticket's,tickets'] attachment content", $count ) );
+    return wantarray ? ( 1, $self->loc( "Updated [quant,_1,ticket's,tickets'] attachment content", $count ) ) : $count;
 }
 
 RT::Base->_ImportOverlays();
diff --git a/sbin/rt-munge-attachments.in b/sbin/rt-munge-attachments.in
index 3b4717b2f..137db957e 100644
--- a/sbin/rt-munge-attachments.in
+++ b/sbin/rt-munge-attachments.in
@@ -70,7 +70,7 @@ BEGIN {    # BEGIN RT CMD BOILERPLATE
 # Read in the options
 my %opts;
 use Getopt::Long;
-GetOptions( \%opts, "help|h", "search=s", "replacement=s", );
+GetOptions( \%opts, "help|h", "search=s", "replacement=s", 'tickets=s' );
 
 if ( $opts{'help'} || !$opts{'search'} ) {
     require Pod::Usage;
@@ -84,9 +84,39 @@ my $replacement = $opts{'replacement'} || '';
 
 my $search = $opts{'search'};
 
-my $attachments = RT::Attachments->new( RT->SystemUser );
-my ($ret, $msg) = $attachments->ReplaceAttachments(Search => $search, Replacement => $replacement);
+my $attachments;
+if ( $opts{tickets} ) {
+    my @tickets = split /\s*,\s*/, $opts{tickets};
+
+    $attachments = RT::Attachments->new( RT->SystemUser );
+    my $txn_alias   = $attachments->TransactionAlias;
+    $attachments->Limit(
+        ALIAS => $txn_alias,
+        FIELD => 'ObjectType',
+        VALUE => 'RT::Ticket',
+    );
+    my $ticket_alias = $attachments->Join(
+        ALIAS1 => $txn_alias,
+        FIELD1 => 'ObjectId',
+        TABLE2 => 'Tickets',
+        FIELD2 => 'id',
+    );
+    $attachments->Limit(
+        ALIAS    => $ticket_alias,
+        FIELD    => 'EffectiveId',
+        VALUE    => \@tickets,
+        OPERATOR => 'IN',
+    );
+}
+else {
+    $attachments = RT::Attachments->new( RT->SystemUser );
+}
 
+my ( $ret, $msg ) = $attachments->ReplaceAttachments(
+    Search      => $search,
+    Replacement => $replacement,
+    $opts{tickets} ? ( FilterBySearchString => 0 ) : (),
+);
 print STDERR $msg . "\n";
 
 
@@ -130,4 +160,11 @@ If a match is found the content will be removed unless a replacement is provided
 
 Provide a string to replace the value matched by the search.
 
+=item --tickets=1,2,3
+
+Limit attachments to the specified tickets. Note that if tickets are not
+specified, RT will pre-filter attachments by the search string use SQL,
+which might bypass attachments of which contents are encoded(like
+base64). Use this option to prevent the pre-filter behavior.
+
 =back

commit cfe755fbee53ba6b67eda32a40f3d1e27a24db32
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Jan 25 20:31:13 2019 +0800

    Make sure search/replacement strings to munge are decoded ones
    
    We use decoded characters in RT code consistently, and this commit makes
    non-ascii search strings work too.

diff --git a/lib/RT/Attachments.pm b/lib/RT/Attachments.pm
index effdaf749..056bcb854 100644
--- a/lib/RT/Attachments.pm
+++ b/lib/RT/Attachments.pm
@@ -302,7 +302,9 @@ sub ReplaceAttachments {
         );
 
         # Adding "\n" is to avoid trailing "=" in QP encoding
-        if ( MIME::QuotedPrint::encode("$args{Search}\n") eq "$args{Search}\n" ) {
+        if ( MIME::QuotedPrint::encode( Encode::encode( 'UTF-8', "$args{Search}\n" ) ) eq
+            Encode::encode( 'UTF-8', "$args{Search}\n" ) )
+        {
             $attachments->Limit(
                 FIELD     => 'ContentEncoding',
                 VALUE     => 'quoted-printable',
diff --git a/sbin/rt-munge-attachments.in b/sbin/rt-munge-attachments.in
index 137db957e..667672341 100644
--- a/sbin/rt-munge-attachments.in
+++ b/sbin/rt-munge-attachments.in
@@ -113,8 +113,8 @@ else {
 }
 
 my ( $ret, $msg ) = $attachments->ReplaceAttachments(
-    Search      => $search,
-    Replacement => $replacement,
+    Search      => Encode::decode( 'UTF-8', $search ),
+    Replacement => Encode::decode( 'UTF-8', $replacement ),
     $opts{tickets} ? ( FilterBySearchString => 0 ) : (),
 );
 print STDERR $msg . "\n";

commit e8b802731b5433018a17bbaa6bc2b879c9a631b0
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Jan 25 20:42:38 2019 +0800

    Add tests for rt-munge-attachments with --tickets

diff --git a/t/api/attachment.t b/t/api/attachment.t
index 25d736b8b..35906bbc0 100644
--- a/t/api/attachment.t
+++ b/t/api/attachment.t
@@ -155,4 +155,56 @@ diag 'Test clearing and replacing header and content in attachments table';
     is $attachments->Count, 1, 'Headers are not replaced when flagged as false';
 }
 
+diag 'Test clearing and replacing header and content in attachments from example emails';
+{
+    my $email_file =
+      RT::Test::get_relocatable_file( 'multipart-alternative-with-umlaut',
+        ( File::Spec->updir(), 'data', 'emails' ) );
+    my $content = RT::Test->file_content($email_file);
+
+    my $parser = RT::EmailParser->new;
+    $parser->ParseMIMEEntityFromScalar($content);
+    my $ticket = RT::Test->create_ticket( Queue => 'General', Subject => 'test munge', MIMEObj => $parser->Entity );
+    my $decoded_umlaut = Encode::decode( 'UTF-8', 'Grüßen' );
+
+    my $attachments = $ticket->Attachments( WithHeaders => 1, WithContent => 1 );
+    while ( my $att = $attachments->Next ) {
+        if ( $att->Content ) {
+            like( $att->Content, qr/$decoded_umlaut/, "Content contains $decoded_umlaut" );
+            unlike( $att->Content, qr/anonymous/, 'Content lacks anonymous' );
+        }
+        else {
+            like( $att->Headers, qr/"Stever, Gregor" <gst\@example.com>/, 'Headers contain gst at example.com' );
+            unlike( $att->Headers, qr/anon\@example.com/, 'Headers lack anon at example.com' );
+        }
+    }
+
+    RT::Test->run_and_capture(
+        command     => $RT::SbinPath . '/rt-munge-attachments',
+        tickets     => $ticket->id,
+        search      => 'Grüßen',
+        replacement => 'anonymous',
+    );
+
+    RT::Test->run_and_capture(
+        command     => $RT::SbinPath . '/rt-munge-attachments',
+        tickets     => $ticket->id,
+        search      => '"Stever, Gregor" <gst at example.com>',
+        replacement => 'anon at example.com',
+    );
+
+    $attachments = $ticket->Attachments( WithHeaders => 1, WithContent => 1 );
+    while ( my $att = $attachments->Next ) {
+        my $decoded_umlaut = Encode::decode( 'UTF-8', 'Grüßen' );
+        if ( $att->Content ) {
+            unlike( $att->Content, qr/$decoded_umlaut/, "Content lacks $decoded_umlaut" );
+            like( $att->Content, qr/anonymous/, 'Content contains anonymous' );
+        }
+        else {
+            unlike( $att->Headers, qr/"Stever, Gregor" <gst\@example.com>/, 'Headers lack gst at example.com' );
+            like( $att->Headers, qr/anon\@example.com/, 'Headers contain anon at example.com' );
+        }
+    }
+}
+
 done_testing();

-----------------------------------------------------------------------


More information about the rt-commit mailing list