[Rt-commit] rt branch, 4.4/merged-fulltext, created. rt-4.4.1-325-ge6d6184

Aaron Kondziela aaron at bestpractical.com
Thu Apr 20 23:14:00 EDT 2017


The branch, 4.4/merged-fulltext has been created
        at  e6d618436fcb25dbf7b4b5c53df640ca2e9e2da3 (commit)

- Log -----------------------------------------------------------------
commit e6d618436fcb25dbf7b4b5c53df640ca2e9e2da3
Author: Aaron Kondziela <aaron at bestpractical.com>
Date:   Thu Apr 20 23:08:57 2017 -0400

    Fix searching for merged tickets when using MySQL full text index
    
    Fulltext search results would not show tickets that have been merged when searching on a keyword in Content. This makes search work as expected, showing the merge target ticket. The schema for the MySQL full text index table was altered to denormalize and add the tickets' id and EffectiveId. The fulltext tools in sbin are updated to create and maintain the new index.
    
    Fixes T#170768

diff --git a/docs/UPGRADING-4.4 b/docs/UPGRADING-4.4
index 184c250..e725572 100644
--- a/docs/UPGRADING-4.4
+++ b/docs/UPGRADING-4.4
@@ -517,5 +517,28 @@ AfterUpdateCustomFieldValue. It will be removed in RT 4.6.
 
 =back
 
+=head1 UPGRADING FROM 4.4.1 AND EARLIER
+
+=over 4
+
+=item *
+
+A fix for using the MySQL full text index (F<docs/full_text_indexing.pod>) with
+merged tickets requires changes to the database schema and processing to update
+the data. After you upgrade the RT code, you must drop and re-create the index.
+To do this, run the setup command as you would during initial configuration of
+the fulltext index feature:
+
+    ./sbin/rt-setup-fulltext-index
+
+Please be aware that the F<sbin/rt-fulltext-indexer> tool which keeps the index
+updated requires the new schema. If you have that tool set to run automatically
+via cron or another means, it will fail after the RT upgrade until you have the
+database updated as instructed above. We recommend that you temporarily disable
+automatic running of F<rt-fulltext-indexer> until after you complete this
+upgrade.
+
+=back
+
 =cut
 
diff --git a/lib/RT/Tickets.pm b/lib/RT/Tickets.pm
index 73bf7f5..4ac6800 100644
--- a/lib/RT/Tickets.pm
+++ b/lib/RT/Tickets.pm
@@ -936,13 +936,34 @@ sub _TransContentLimit {
 
         my $alias;
         if ( $config->{'Table'} and $config->{'Table'} ne "Attachments") {
-            $alias = $self->{'_sql_aliases'}{'full_text'} ||= $self->Join(
-                TYPE   => 'LEFT',
-                ALIAS1 => $self->{'_sql_trattachalias'},
-                FIELD1 => 'id',
-                TABLE2 => $config->{'Table'},
-                FIELD2 => 'id',
-            );
+
+            if ($db_type eq 'mysql') {
+                my $attidx_alias = $self->NewAlias($config->{'Table'});
+
+                $alias = $self->Join(
+                    ALIAS1 => 'main',
+                    FIELD1 => 'id',
+                    ALIAS2 => $attidx_alias,
+                    FIELD2 => 'TicketId',
+                );
+                $self->Limit(
+                    LEFTJOIN => $alias,
+                    FIELD => 'EffectiveId',
+                    VALUE => 'main.id',
+                    QUOTEVALUE => 0,
+                    ENTRYAGGREGATOR => 'OR',
+                );
+
+                $self->{_sql_looking_at}{effectiveid} = 0;
+            } else { # not mysql
+                $alias = $self->{'_sql_aliases'}{'full_text'} ||= $self->Join(
+                    TYPE   => 'LEFT',
+                    ALIAS1 => $self->{'_sql_trattachalias'},
+                    FIELD1 => 'id',
+                    TABLE2 => $config->{'Table'},
+                    FIELD2 => 'id',
+                );
+            }
         } else {
             $alias = $self->{'_sql_trattachalias'};
         }
diff --git a/sbin/rt-fulltext-indexer.in b/sbin/rt-fulltext-indexer.in
old mode 100644
new mode 100755
index fb708ee..19200f4
--- a/sbin/rt-fulltext-indexer.in
+++ b/sbin/rt-fulltext-indexer.in
@@ -70,6 +70,7 @@ BEGIN { # BEGIN RT CMD BOILERPLATE
 use RT -init;
 use RT::Interface::CLI ();
 use HTML::Entities;
+use RT::System;
 
 use Getopt::Long qw(GetOptions);
 my %OPT = ( memory => '2M', limit => 0 );
@@ -161,7 +162,7 @@ if ($db_type eq 'mysql') {
 }
 
 sub attachment_loop {
-    my $subref = shift;
+    my ($contentindexer, $denormalizer) = @_;
     my $table = $fts_config->{'Table'};
     $LAST //= 0;
 
@@ -190,18 +191,61 @@ sub attachment_loop {
         $attachments->OrderBy( FIELD => 'id', ORDER => 'asc' );
         $attachments->RowsPerPage( $OPT{'limit'} );
 
-        # Call back to the DB-specific part
-        $subref->($attachments);
+        # Call back to the DB-specific part for content
+        $contentindexer->($attachments);
 
         $LAST = $attachments->Last->id if $attachments->Count;
 
+        my $db_type = RT->Config->Get('DatabaseType');
+        my $fts_config = $ENV{RT_FTS_CONFIG} ? JSON::from_json($ENV{RT_FTS_CONFIG})
+           : RT->Config->Get('FullTextSearch') || {};
+
+        if ($fts_config->{'Enable'} and $db_type eq 'mysql') {
+            # Narrow the query already used above to find only attachments that
+            # go with tickets.
+            # We use this to populate the denormalized ticket id and effective id
+            # in the fulltext-indexed table.
+
+            my $tik_alias = $attachments->Join(
+                ALIAS1 => $txn_alias,
+                FIELD1 => 'ObjectId',
+                TABLE2 => 'Tickets',
+                FIELD2 => 'id',
+            );
+
+            $attachments->Limit(
+                LEFTJOIN => $txn_alias,
+                FIELD => 'ObjectType',
+                OPERATOR => '=',
+                VALUE => 'RT::Ticket',
+                ENTRYAGGREGATOR => 'AND',
+            );
+
+            $attachments->AdditionalColumn(
+                ALIAS => $tik_alias,
+                FIELD => 'id',
+                AS => 'TicketId',
+            );
+
+            $attachments->AdditionalColumn(
+                ALIAS => $tik_alias,
+                FIELD => 'EffectiveId',
+                AS => 'EffectiveId',
+            );
+
+            $attachments->RedoSearch;
+
+            # Call back to the ticket ID denormalizer
+            $denormalizer->($attachments);
+        }
+
         redo if $OPT{'all'} and $attachments->Count == $OPT{'limit'};
     }
 }
 
 sub process_bulk_insert {
     my $dbh = $RT::Handle->dbh;
-    my ($statement, $error) = @_;
+    my ($statement, $update_st, $error) = @_;
 
     # Doing large inserts is faster than individual statements, but
     # comes at a parsing cost; cache the statement handles (99% of which
@@ -250,16 +294,101 @@ sub process_bulk_insert {
             eval { $sthandles{1}->execute( "", $id ) }
                 or die "Failed to insert empty row for attachment $id: " . $dbh->errstr;
         }
+    },
+    sub {
+        # Process denormalized ticket IDs and effective IDs
+        my ($attachments) = @_;
+        my @updates;
+        my $found = 0;
+
+        my $select_st = $attachments->BuildSelectQuery;
+        my $select_sth = $dbh->prepare($select_st);
+
+        $select_sth->execute() or die("Could not execute denormalizer select query: " . $dbh->errstr);
+
+        while (my $row = $select_sth->fetchrow_hashref) {
+            debug("Denormalizing attachment #" . $row->{id});
+            push @updates, ($row->{ticketid}, $row->{effectiveid}, $row->{id});
+            $found++;
+        }
+
+        return unless $found;
+
+        my $update_sth = $dbh->prepare($update_st->());
+
+        while (@updates) {
+            my ($ticketid, $effectiveid, $attachmentid) = splice(@updates,0,3);
+            next if eval { $update_sth->execute($ticketid, $effectiveid, $attachmentid); };
+            $error->($attachmentid, "TicketId:$ticketid EffectiveId:$effectiveid");
+        }
     });
 }
 
+sub process_merged_fulltext {
+    my $update = shift;
+    my $dbh = $RT::Handle->dbh;
+    my $table = $fts_config->{'Table'};
+
+    my $mark = RT->System->FirstAttribute( 'MergeTransactionMark' );
+    my $LAST = defined $mark ? $mark->Content : 0;
+
+    debug("Processing merged tickets for fulltext index - transaction mark is $LAST");
+
+    my $select_st = "SELECT t.id AS TicketId, t.EffectiveId AS EffectiveId, tn.id AS TransactionId "
+        ."FROM Transactions tn JOIN Tickets t ON tn.ObjectId=t.id "
+        ."WHERE tn.Type='AddLink' "
+        ."AND tn.Field='MergedInto' "
+        ."AND tn.ObjectType='RT::Ticket' "
+        ."AND t.id != t.EffectiveId "
+        ."AND tn.id > ? "
+        ."LIMIT ?";
+    my $select_sth = $dbh->prepare($select_st);
+
+    {
+        $select_sth->execute($LAST, $OPT{'limit'})
+            or die("Could not select merges for fulltext update: ".$dbh->errstr);
+
+        my $rows = $select_sth->rows;
+        debug("Found $rows merges to update in $table");
+        return unless $rows;
+
+        my $update_st = "$update $table SET EffectiveId=? WHERE TicketId=?";
+        my $update_sth = $dbh->prepare($update_st);
+
+        while (my $r = $select_sth->fetchrow_hashref) {
+            debug("Updating index with TicketId:".$r->{ticketid}." EffectiveId:".$r->{effectiveid});
+            $update_sth->execute($r->{effectiveid}, $r->{ticketid})
+                or die("Could not update $table for merged tickets: ".$dbh->errstr);
+            $LAST = $r->{transactionid} if $LAST < $r->{transactionid};
+        }
+
+        redo if $OPT{'all'} and $rows == $OPT{'limit'};
+    }
+
+    RT->System->SetAttribute( Name => 'MergeTransactionMark', Content => $LAST );
+    debug("Setting last processed transaction mark $LAST");
+}
+
 sub process_mysql {
     my $dbh = $RT::Handle->dbh;
     my $table = $fts_config->{'Table'};
 
+    {
+        # Check DB schema for merged ticket support before running
+        my $db_is_updated;
+        my $sth = $dbh->prepare("SHOW COLUMNS FROM $table");
+        $sth->execute();
+        while (my $row = $sth->fetchrow_hashref() ) {
+            $db_is_updated = 1 if $row->{field} eq 'EffectiveId';
+        }
+        die("Error: You must update your MySQL schema to use the full text indexer. See docs/UPGRADING-4.4 for details.")
+            unless $db_is_updated;
+    }
+
     ($LAST) = $dbh->selectrow_array("SELECT MAX(id) FROM $table");
 
     my $insert = $fts_config->{Engine} eq "MyISAM" ? "INSERT DELAYED" : "INSERT";
+    my $update = $fts_config->{Engine} eq "MyISAM" ? "UPDATE LOW_PRIORITY" : "UPDATE";
 
     process_bulk_insert(
         sub {
@@ -268,6 +397,9 @@ sub process_mysql {
                 . join(", ", ("(?,?)") x $n);
         },
         sub {
+            return "$update $table SET TicketId=?, EffectiveId=? WHERE id=?";
+        },
+        sub {
             my ($id) = @_;
             if ($dbh->err == 1366 and $dbh->state eq "HY000") {
                 warn "Attachment $id cannot be indexed. Most probably it contains invalid UTF8 bytes. ".
@@ -277,6 +409,10 @@ sub process_mysql {
             }
         }
     );
+
+    my $fulltext = $ENV{RT_FTS_CONFIG} ? JSON::from_json($ENV{RT_FTS_CONFIG})
+        : RT->Config->Get('FullTextSearch') || {};
+    process_merged_fulltext($update) if ($fulltext->{'Enable'});
 }
 
 
@@ -300,6 +436,7 @@ sub process_pg_insert {
             return "INSERT INTO $table($column, id) VALUES "
                 . join(", ", ("(TO_TSVECTOR(?),?)") x $n);
         },
+        sub { return; },
         sub {
             my ($id) = @_;
             if ( $dbh->err == 7 && $dbh->state eq '54000' ) {
diff --git a/sbin/rt-setup-fulltext-index.in b/sbin/rt-setup-fulltext-index.in
index 616cf40..82797c3 100644
--- a/sbin/rt-setup-fulltext-index.in
+++ b/sbin/rt-setup-fulltext-index.in
@@ -176,7 +176,12 @@ if ( $DB{'type'} eq 'mysql' ) {
     my $engine = $RT::Handle->dbh->{mysql_serverversion} < 50600 ? "MyISAM" : "InnoDB";
     my $schema = "CREATE TABLE $table ( "
         ."id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,"
-        ."Content LONGTEXT ) ENGINE=$engine CHARACTER SET utf8";
+        ."Content LONGTEXT,"
+        ."TicketId INT UNSIGNED,"
+        ."EffectiveId INT UNSIGNED,"
+        ."KEY TicketIdKey (TicketId),"
+        ."KEY EffectiveIdKey (EffectiveId)"
+        .") ENGINE=$engine CHARACTER SET utf8";
     insert_schema( $schema );
 
     insert_data( Table => $table, Engine => $engine );
diff --git a/t/fts/indexed_mysql.t b/t/fts/indexed_mysql.t
index 672b220..c239d9c 100644
--- a/t/fts/indexed_mysql.t
+++ b/t/fts/indexed_mysql.t
@@ -44,6 +44,9 @@ sub run_test {
     my ($query, %checks) = @_;
     my $query_prefix = join ' OR ', map 'id = '. $_->id, @tickets;
 
+    # Assumes we are called by run_tests() only
+    local $Test::Builder::Level = $Test::Builder::Level + 2;
+
     my $tix = RT::Tickets->new(RT->SystemUser);
     $tix->FromSQL( "( $query_prefix ) AND ( $query )" );
 
@@ -78,6 +81,64 @@ run_tests(
     "Content LIKE 'french'" => { first => 0, second => 1, third => 0, fourth => 0 },
 );
 
+# Check fulltext results for merged tickets
+# note: merge code itself is tested by ticket/merge.t
+{
+    $q = RT::Test->load_or_create_queue( Name => 'Mergers' );
+    ok ($q && $q->id, 'loaded or created queue Mergers');
+
+    # Create two tickets for merging, overwriting the @tickets used before to
+    # limit the subsequent search to just these ones
+    @tickets = RT::Test->create_tickets(
+        { Queue => $q->id },
+        { Subject => 'MergeSource1',     Content => 'pirates' },
+        { Subject => 'MergeSource2',     Content => 'ninjas'  },
+        { Subject => 'MergeDestination', Content => 'robots'  },
+    );
+
+    sync_index();
+
+    # Make sure we can see tickets as expected before the merge happens
+    run_tests(
+        "Content LIKE 'pirates'" => { MergeSource1 => 1, MergeSource2 => 0, MergeDestination => 0 },
+        "Content LIKE 'ninjas'"  => { MergeSource1 => 0, MergeSource2 => 1, MergeDestination => 0 },
+        "Content LIKE 'robots'"  => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+    );
+
+    my $source1 = $tickets[0];
+    my $source2 = $tickets[1];
+    my $destination = $tickets[2];
+
+    # Sanity check the array indices mean what we think
+    ok ($source1->Subject eq 'MergeSource1', 'Subject of tickets[0] should be MergeSource1');
+    ok ($source2->Subject eq 'MergeSource2', 'Subject of tickets[1] should be MergeSource2');
+    ok ($destination->Subject eq 'MergeDestination', 'Subject of tickets[2] should be MergeDestination');
+
+    # First merge
+    my ($id,$m) = $source1->MergeInto( $destination->id );
+    ok ($id,$m);
+
+    sync_index();
+
+    run_tests(
+        "Content LIKE 'pirates'" => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+        "Content LIKE 'ninjas'"  => { MergeSource1 => 0, MergeSource2 => 1, MergeDestination => 0 },
+        "Content LIKE 'robots'"  => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+    );
+
+    # Second merge
+    ($id,$m) = $source2->MergeInto( $destination->id );
+    ok ($id,$m);
+
+    sync_index();
+
+    run_tests(
+        "Content LIKE 'pirates'" => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+        "Content LIKE 'ninjas'"  => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+        "Content LIKE 'robots'"  => { MergeSource1 => 0, MergeSource2 => 0, MergeDestination => 1 },
+    );
+}
+
 @tickets = ();
 
 done_testing;
diff --git a/t/ticket/merge.t b/t/ticket/merge.t
index 99c723b..ce52739 100644
--- a/t/ticket/merge.t
+++ b/t/ticket/merge.t
@@ -6,6 +6,7 @@ use warnings;
 use RT;
 use RT::Test tests => '44';
 
+# note: merge code also exercised in fts/index_mysql.t
 
 # validate that when merging two tickets, the comments from both tickets
 # are integrated into the new ticket

-----------------------------------------------------------------------


More information about the rt-commit mailing list