[Rt-commit] rt branch, 4.2/max-fulltext-attachment-size, created. rt-4.2.12-99-ge59b586

Jim Brandt jbrandt at bestpractical.com
Tue Feb 16 11:34:17 EST 2016


The branch, 4.2/max-fulltext-attachment-size has been created
        at  e59b58660c5e0c8a371375e76da208eacf890d97 (commit)

- Log -----------------------------------------------------------------
commit e59b58660c5e0c8a371375e76da208eacf890d97
Author: Jim Brandt <jbrandt at bestpractical.com>
Date:   Tue Feb 16 16:32:37 2016 +0000

    Add max attachment size option for indexer
    
    We received reports of very large attachments causing
    performance problems on constrained systems. Add an
    option to limit the maximum size attachment the
    indexer will try to index.

diff --git a/docs/full_text_indexing.pod b/docs/full_text_indexing.pod
index 24169cb..189c346 100644
--- a/docs/full_text_indexing.pod
+++ b/docs/full_text_indexing.pod
@@ -273,4 +273,11 @@ option to 1, while leaving C<Indexed> set to 0:
 This is not generally suggested, as unindexed full-text searching can
 cause severe performance problems.
 
+=head1 LIMIT ATTACHMENT SIZE
+
+On some systems, very large attachments can cause memory and other
+performance issues for the indexer making it unable to complete
+indexing. See L<RT_Config.pm/"$MaxFulltextAttachmentSize"> for
+details on setting a maximum attachment size to index.
+
 =cut
diff --git a/etc/RT_Config.pm.in b/etc/RT_Config.pm.in
index fd9e971..a2817a7 100644
--- a/etc/RT_Config.pm.in
+++ b/etc/RT_Config.pm.in
@@ -1365,6 +1365,21 @@ Set(%FullTextSearch,
     Indexed => 0,
 );
 
+=item C<$MaxFulltextAttachmentSize>
+
+On some systems, very large attachments can cause memory and other
+performance issues for the indexer making it unable to complete
+indexing. Adding resources like memory and CPU will solve this
+issue, but in cases where that isn't possible, this option
+sets a maximum size in bytes on attachments to index. Attachments
+larger than this limit are skipped and will not be available to
+full text searches.
+
+=cut
+
+# Default 0 means no limit
+Set($MaxFulltextAttachmentSize, 0);
+
 =item C<$DontSearchFileAttachments>
 
 If C<$DontSearchFileAttachments> is set to 1, then uploaded files
diff --git a/sbin/rt-fulltext-indexer.in b/sbin/rt-fulltext-indexer.in
index bd55adb..85b8bd8 100644
--- a/sbin/rt-fulltext-indexer.in
+++ b/sbin/rt-fulltext-indexer.in
@@ -99,6 +99,10 @@ if ( !flock main::DATA, LOCK_EX | LOCK_NB ) {
     }
 }
 
+my $max_size = 0; # No max size
+$max_size = RT->Config->Get('MaxFulltextAttachmentSize')
+    if RT->Config->Get('MaxFulltextAttachmentSize');
+
 my $db_type = RT->Config->Get('DatabaseType');
 my $fts_config = $ENV{RT_FTS_CONFIG} ? JSON::from_json($ENV{RT_FTS_CONFIG})
     : RT->Config->Get('FullTextSearch') || {};
@@ -201,8 +205,16 @@ sub process_bulk_insert {
         my ($attachments) = @_;
         my @insert;
         my $found = 0;
+
         while ( my $a = $attachments->Next ) {
             debug("Found attachment #". $a->id );
+            if ( $max_size and $a->ContentLength > $max_size ){
+                debug("Attachment #" . $a->id . " is " . $a->ContentLength .
+                      " bytes which is larger than configured MaxFulltextAttachmentSize " .
+                      " of " . $max_size . ", skipping");
+                next;
+            }
+
             my $text = $a->Content // "";
             HTML::Entities::decode_entities($text) if $a->ContentType eq "text/html";
             push @insert, $text, $a->id;
@@ -304,9 +316,17 @@ sub process_pg_update {
     attachment_loop( sub {
         my ($attachments) = @_;
         my @insert;
+
         while ( my $a = $attachments->Next ) {
             debug("Found attachment #". $a->id );
 
+            if ( $max_size and $a->ContentLength > $max_size ){
+                debug("Attachment #" . $a->id . " is " . $a->ContentLength .
+                      " bytes which is larger than configured MaxFulltextAttachmentSize " .
+                      " of " . $max_size . ", skipping");
+                next;
+            }
+
             my $text = $a->Content // "";
             HTML::Entities::decode_entities($text) if $a->ContentType eq "text/html";
 

-----------------------------------------------------------------------


More information about the rt-commit mailing list