[Rt-commit] rt branch 5.0/split-fulltext-searches created. rt-5.0.5-147-g8788a9923d

BPS Git Server git at git.bestpractical.com
Tue Jan 23 22:23:22 UTC 2024


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "rt".

The branch, 5.0/split-fulltext-searches has been created
        at  8788a9923d69c0906b8b5aea6c2721031ef5dad7 (commit)

- Log -----------------------------------------------------------------
commit 8788a9923d69c0906b8b5aea6c2721031ef5dad7
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Sat Jan 20 03:52:11 2024 -0500

    Split fulltext search terms for both correctness and performance
    
    Previously for searches like:
    
        Content LIKE "foo" AND Content LIKE "bar"
    
    If a ticket has "foo" saved in one attachment and "bar" in another, it
    wouldn't match because these fulltext criteria needed to match on a single
    attachment. What we want is to match on ticket level, i.e. as long as the
    ticket has "foo" and "bar" saved in some attachments of the given ticket, it
    should match.
    
    Thus we split the query into 2 and intersect them:
    
        (Content LIKE "foo") INTERSECT (Content LIKE "bar")
    
    For searches like:
    
        Content LIKE "foo" OR Subject LIKE "bar"
    
    We split the query and unite them accordingly:
    
        (Content LIKE "foo") UNION (Subject LIKE "bar")
    
    The latter version has much better performance as it makes use of fulltext
    indexes, unfortunately the former version can not right now.
    
    See also 8450f0a9f2

diff --git a/lib/RT/Interface/Web/QueryBuilder/Tree.pm b/lib/RT/Interface/Web/QueryBuilder/Tree.pm
index 2b1f8cb5c1..8b1d4b7564 100644
--- a/lib/RT/Interface/Web/QueryBuilder/Tree.pm
+++ b/lib/RT/Interface/Web/QueryBuilder/Tree.pm
@@ -404,6 +404,147 @@ sub ParseSQL {
     return @results;
 }
 
+
+=head2 Split Type => intersect|union, Fields => [FIELD1, FIELD2, ...]
+
+E.g. to split "AND" Content terms: Type => 'insersect', Fields => ['Content']
+
+  Status = "open" AND Content LIKE "foo" AND Content LIKE "bar"
+
+will be split into 2 subqueries:
+
+   Status = "open" AND Content LIKE "foo"
+   Status = "open" AND Content LIKE "bar"
+
+then they can be joined via "INTERSECT".
+
+To split "OR" Content terms: Type => 'union', Fields => ['Content']
+
+    Content LIKE "foo" OR Subject LIKE "foo"
+
+will be split into 2 subqueries:
+
+    Content LIKE "foo"
+    Subject LIKE "foo"
+
+then they can be joined via "UNION". Unlike the original version, the new SQL
+can make use of fulltext indexes.
+
+Note that queries like:
+
+    Content LIKE "foo" AND Subject LIKE "foo"
+
+will not be split as there are no benifits, unlike the C<OR> example above.
+
+=cut
+
+sub Split {
+    my $self = shift;
+    my %args = (
+        Type   => undef,
+        Fields => undef,
+        @_,
+    );
+
+    if ( !$args{Type} ) {
+        RT->Logger->warning("Missing Type, skipping");
+        return $self;
+    }
+
+    if ( $args{Type} !~ /^(?:intersect|union)$/i ) {
+        RT->Logger->warning("Unsupported type $args{Type}, should be 'intersect' or 'union', skipping");
+        return $self;
+    }
+
+    if ( !$args{Fields} || @{ $args{Fields} } == 0 ) {
+        RT->Logger->warning("Missing Fields, skipping");
+        return $self;
+    }
+
+    my @items;
+
+    my $relation = lc $args{Type} eq 'intersect' ? 'and' : 'or';
+
+    $self->traverse(
+        sub {
+            my $node = shift;
+            return unless $node->isLeaf;
+
+            if ( grep { lc $node->getNodeValue->{Key} eq lc $_ } @{ $args{Fields} } ) {
+                $node = $node->getParent;
+                if ( lc( $node->getNodeValue // '' ) eq $relation ) {
+                    my @children = $node->getAllChildren;
+
+                    my @splits;
+                    my @others;
+                    for my $child (@children) {
+                        if ( $child->isLeaf && grep { lc $child->getNodeValue->{Key} eq lc $_ } @{ $args{Fields} } )
+                        {
+                            push @splits, $child;
+                        }
+                        else {
+                            push @others, $child;
+                        }
+                    }
+
+                    # Split others from split fields only if it's "OR" like "Content LIKE 'foo' OR Subject LIKE 'foo'"
+                    return unless @splits > 1 || ( $relation eq 'or' && @splits + @others > 1 );
+
+                    my $parent = $node->getParent;
+
+                    my @list;
+
+                    if ( $relation eq 'and' ) {
+                        if ( @others ) {
+                            for my $item ( @splits ) {
+                                my $new = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root');
+                                $new->addChild($item);
+                                $new->addChild($_->clone) for @others;
+                                push @list, $new;
+                            }
+                        }
+                        else {
+                            @list = @splits;
+                        }
+                    }
+                    else {
+                        @list = @splits;
+                        if (@others) {
+                            my $others = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root' );
+                            $others->addChild( $_->clone ) for @others;
+                            push @list, $others;
+                        }
+                    }
+
+                    if ( $parent eq 'root' ) {
+                        for my $item ( @list ) {
+                            my $new = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root');
+                            $new->addChild($item->clone);
+                            push @items, $new->clone->Split(%args);
+                        }
+                    }
+                    else {
+                        my $index = $node->getIndex;
+                        $parent->removeChild($node);
+
+                        for my $item ( @list ) {
+                            $parent->insertChild( $index, $item );
+                            push @items, $self->clone->Split(%args);
+                            $parent->removeChild($item);
+                        }
+                    }
+
+                    return 'ABORT';
+                }
+            }
+        }
+    );
+
+    return @items ? @items : $self;
+}
+
+
+
 RT::Base->_ImportOverlays();
 
 1;
diff --git a/lib/RT/Report.pm b/lib/RT/Report.pm
index 5e76fa29ed..eb11f7aa68 100644
--- a/lib/RT/Report.pm
+++ b/lib/RT/Report.pm
@@ -583,7 +583,7 @@ sub SetupGroupings {
         # within the matching tickets grouped by what is wanted.
         $self->Columns( 'id' );
         if ( RT->Config->Get('UseSQLForACLChecks') ) {
-            my $query = $self->BuildSelectQuery( PreferBind => 0 );
+            my $query = $self->{_split_query} || $self->BuildSelectQuery( PreferBind => 0 );
             $self->CleanSlate;
             $self->Limit( FIELD => 'Id', OPERATOR => 'IN', VALUE => "($query)", QUOTEVALUE => 0 );
         }
diff --git a/lib/RT/SearchBuilder.pm b/lib/RT/SearchBuilder.pm
index 6a707ace01..78844ccdfe 100644
--- a/lib/RT/SearchBuilder.pm
+++ b/lib/RT/SearchBuilder.pm
@@ -96,6 +96,8 @@ sub CleanSlate {
     $self->{'_sql_aliases'} = {};
     delete $self->{'handled_disabled_column'};
     delete $self->{'find_disabled_rows'};
+    delete $self->{'_split_query'};
+    delete $self->{'_untamed_order_by'};
     return $self->SUPER::CleanSlate(@_);
 }
 
@@ -1190,6 +1192,106 @@ sub CurrentUserCanSeeAll {
     return $self->CurrentUser->HasRight( Right => 'SuperUser', Object => RT->System ) ? 1 : 0;
 }
 
+sub BuildSelectQuery {
+    my $self = shift;
+    return $self->_BuildQuery('BuildSelectQuery', @_);
+}
+
+sub BuildSelectCountQuery {
+    my $self = shift;
+    return $self->_BuildQuery('BuildSelectCountQuery', @_);
+}
+
+sub BuildSelectAndCountQuery {
+    my $self = shift;
+    return $self->_BuildQuery('BuildSelectAndCountQuery', @_);
+}
+
+sub _BuildQuery {
+    my $self   = shift;
+    my $method = shift;
+    if ( my $query = $self->{_split_query} ) {
+        my $objects = $self->new( $self->CurrentUser );
+        $objects->Limit( FIELD => 'id', VALUE => "($query)", OPERATOR => 'IN', QUOTEVALUE => 0 );
+
+        # Sync page and columns related info
+        $objects->{$_} = $self->{$_} for qw/first_row show_rows columns/;
+
+        # Sync order by
+        if ( $self->{_untamed_order_by} ) {
+            $objects->OrderByCols( @{ $self->{_untamed_order_by} } );
+        }
+        elsif ( $self->{_order_by} ) {
+            $objects->OrderByCols( @{ $self->{_order_by} } );
+        }
+
+        my $query = $objects->$method(@_);
+        $self->{_bind_values} = $objects->{_bind_values};
+        return $query;
+    }
+    my $super = "SUPER::$method";
+    return $self->$super(@_);
+}
+
+sub _SplitQuery {
+    my $self = shift;
+
+    # SQLite doesn't support user defined precedences via parens for INTERSECT/UNION operations.
+    return if RT->Config->Get('DatabaseType') eq 'SQLite';
+
+    my $query = shift;
+    return unless $query;
+
+    return unless $self->can('SplitFields');
+    my @fields = $self->SplitFields;
+    return unless @fields;
+
+    my $tree = RT::Interface::Web::QueryBuilder::Tree->new;
+    $tree->ParseSQL(
+        Query       => $query,
+        CurrentUser => $self->CurrentUser,
+        Class       => ref $self,
+    );
+
+    my $split;
+    my %intersect_exists;
+    my @union_queries;
+    for my $intersect ( $tree->Split( Type => 'intersect', Fields => \@fields ) ) {
+        my $query = join ' ', map { $_->{TEXT} } @{ $intersect->__LinearizeTree };
+        next if $intersect_exists{$query}++;
+
+        my @union_selects;
+        my %union_exists;
+        for my $union ( $intersect->Split( Type => 'union', Fields => \@fields ) ) {
+            my $query = join ' ', map { $_->{TEXT} } @{ $union->__LinearizeTree };
+            next if $union_exists{$query}++;
+
+            my @intersects = $union->Split( Type => 'intersect', Fields => \@fields );
+            if ( @intersects > 1 ) {
+                push @union_selects, $self->_SplitQuery($query);
+            }
+            else {
+                my $collection = $self->new( $self->CurrentUser );
+                $collection->{_no_split} = 1;
+
+                $collection->FromSQL($query);
+                $collection->OrderByCols();
+                $collection->CurrentUserCanSee
+                    if RT->Config->Get('UseSQLForACLChecks') && $collection->can('CurrentUserCanSee');
+                $collection->Columns('id');
+                push @union_selects, $collection->BuildSelectQuery( PreferBind => 0 );
+            }
+        }
+        push @union_queries, '( ' . join( ' UNION ', @union_selects ) . ' )';
+        $split = 1 if @union_selects > 1;
+    }
+
+    $split = 1 if @union_queries > 1;
+    return unless $split; # Return empty if the query is not split.
+
+    return '(' . join( ' INTERSECT ', @union_queries ) . ')';
+}
+
 RT::Base->_ImportOverlays();
 
 1;
diff --git a/lib/RT/Tickets.pm b/lib/RT/Tickets.pm
index 275fe6653a..4300a4de57 100644
--- a/lib/RT/Tickets.pm
+++ b/lib/RT/Tickets.pm
@@ -271,6 +271,23 @@ sub SortFields {
     return (@SORTFIELDS);
 }
 
+=head2 SplitFields
+
+Returns the list of fields that are supposed to be split into individual
+subqueries and then combined later.
+
+If fulltext search is enabled, it returns C<Content>, otherwise it returns
+an empty list.
+
+=cut
+
+sub SplitFields {
+    my $self = shift;
+    my $config = RT->Config->Get('FullTextSearch') || {};
+    return 'Content' if $config->{Enable};
+    return;
+}
+
 
 # BEGIN SQL STUFF *********************************
 
@@ -1617,6 +1634,9 @@ sub OrderByCols {
     my @res   = ();
     my $order = 0;
 
+    # Save original args so we can redo OrderByCols later for split queries, especially to joins tables if needed.
+    $self->{_untamed_order_by} = \@_;
+
     foreach my $row (@args) {
         if ( $row->{ALIAS} ) {
             push @res, $row;
@@ -3719,6 +3739,12 @@ sub FromSQL {
         );
     }
 
+    if ( !$self->{_no_split} ) {
+        if ( my $split_query = $self->_SplitQuery($query) ) {
+            $self->{_split_query} = $split_query;
+        }
+    }
+
     # set SB's dirty flag
     $self->{'must_redo_search'} = 1;
     $self->{'RecalcTicketLimits'} = 0;
diff --git a/lib/RT/Transactions.pm b/lib/RT/Transactions.pm
index e03d2995a8..4ca82abad6 100644
--- a/lib/RT/Transactions.pm
+++ b/lib/RT/Transactions.pm
@@ -253,6 +253,23 @@ sub SortFields {
     return (@SORTFIELDS);
 }
 
+=head2 SplitFields
+
+Returns the list of fields that are supposed to be split into individual
+subqueries and then combined later.
+
+If fulltext search is enabled, it returns C<Content>, otherwise it returns
+an empty list.
+
+=cut
+
+sub SplitFields {
+    my $self = shift;
+    my $config = RT->Config->Get('FullTextSearch') || {};
+    return 'Content' if $config->{Enable};
+    return;
+}
+
 =head1 Limit Helper Routines
 
 These routines are the targets of a dispatch table depending on the
@@ -1179,6 +1196,12 @@ sub FromSQL {
         return (0, $error);
     }
 
+    if ( !$self->{_no_split} ) {
+        if ( my $split_query = $self->_SplitQuery($query) ) {
+            $self->{_split_query} = $split_query;
+        }
+    }
+
     # set SB's dirty flag
     $self->{'must_redo_search'} = 1;
 

-----------------------------------------------------------------------


hooks/post-receive
-- 
rt


More information about the rt-commit mailing list