[Rt-commit] r19117 - in rt/3.999/trunk: . etc lib/RT lib/RT/Model sbin

ruz at bestpractical.com ruz at bestpractical.com
Thu Apr 9 13:29:07 EDT 2009


Author: ruz
Date: Thu Apr  9 13:29:07 2009
New Revision: 19117

Added:
   rt/3.999/trunk/sbin/rt-fulltext-indexer
   rt/3.999/trunk/sbin/rt-setup-fulltext-index
Modified:
   rt/3.999/trunk/   (props changed)
   rt/3.999/trunk/TODO
   rt/3.999/trunk/etc/RT_Config.pm
   rt/3.999/trunk/lib/RT/Collection.pm
   rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm
   rt/3.999/trunk/lib/RT/Model/TicketCollection.pm
   rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm

Log:
* merge Full Text Search branch
 r19072 at Macintosh (orig r19063):  ruz | 2009-04-07 00:09:15 +0400
 * full text search branch
 r19074 at Macintosh (orig r19065):  ruz | 2009-04-07 00:54:04 +0400
 * move caching of some aliases into RT::Collection
 * add join_transactions method
 r19075 at Macintosh (orig r19066):  ruz | 2009-04-07 00:56:57 +0400
 * use ->{_sql_aliases} for all cached aliases
 * factor out _trans_content_limit from _trans_limit
 r19076 at Macintosh (orig r19067):  ruz | 2009-04-07 01:10:32 +0400
 * switch to TRANSCONTENT type
 r19094 at Macintosh (orig r19085):  ruz | 2009-04-07 23:32:32 +0400
 * implement FTS for mysql and Pg
 r19095 at Macintosh (orig r19086):  ruz | 2009-04-08 00:08:21 +0400
 * add sbin/rt-setup-fulltext-index script - quick help to setup
   required database objects for full text searching
 r19096 at Macintosh (orig r19087):  ruz | 2009-04-08 00:39:01 +0400
 * update todo
 r19108 at Macintosh (orig r19099):  ruz | 2009-04-08 18:45:36 +0400
 * refactor new tool a little
 * generate simple sphinx config for standalone RT's sphinx server
 r19109 at Macintosh (orig r19100):  ruz | 2009-04-08 18:46:27 +0400
 * add FullTextSearch to the RT main config
 r19110 at Macintosh (orig r19101):  ruz | 2009-04-08 18:47:11 +0400
 * use one column name for different DBs
 r19111 at Macintosh (orig r19102):  ruz | 2009-04-08 18:58:49 +0400
 * first incomplete version of indexer
 r19120 at Macintosh (orig r19111):  ruz | 2009-04-09 18:13:05 +0400
 * it's very unlikly that attachments will have own transactions ever,
   so make join_transactions method (every collection has it now) join
   transactions these attachments belong to
 r19121 at Macintosh (orig r19112):  ruz | 2009-04-09 18:18:27 +0400
 * extend select a little bit to search only attachments of tickets
   and only of those that are not deleted
 r19122 at Macintosh (orig r19113):  ruz | 2009-04-09 18:19:40 +0400
 * complete indexer API, now works on mysql and Pg, needs docs and update_last_indexed


Modified: rt/3.999/trunk/TODO
==============================================================================
--- rt/3.999/trunk/TODO	(original)
+++ rt/3.999/trunk/TODO	Thu Apr  9 13:29:07 2009
@@ -11,4 +11,5 @@
 (Type = Set, Field = Status). I think it should be fixed
 to the latter.
 
+RT::load_config and other in RT package should be replaced with RT->load_config
 

Modified: rt/3.999/trunk/etc/RT_Config.pm
==============================================================================
--- rt/3.999/trunk/etc/RT_Config.pm	(original)
+++ rt/3.999/trunk/etc/RT_Config.pm	Thu Apr  9 13:29:07 2009
@@ -174,6 +174,25 @@
 
 set($UseSQLForACLChecks, undef);
 
+=item C<%FullTextSearch>
+
+Full text search (FTS) without indexes is slow operation and by default is
+disabled at all. To enable FTS set key 'Enabled' to true value.
+
+Setup of indexes and filling them with data requires different steps for
+different database back-ends. Use F<sbin/rt-setup-fulltext-index> helper
+for quick start. This script creates required structures in the DB and
+gives some ideas on next steps.
+
+=cut
+
+set(%FullTextSearch,
+    Enable  => 0,
+    Indexed => 0,
+#    Table   => 'AttachmentsIndex',
+#    Column  => 'ftsindex',
+);
+
 =back
 
 =head1 Incoming Mail Gateway Configuration

Modified: rt/3.999/trunk/lib/RT/Collection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Collection.pm	(original)
+++ rt/3.999/trunk/lib/RT/Collection.pm	Thu Apr  9 13:29:07 2009
@@ -305,5 +305,35 @@
     return $self->items_order_by( $self->SUPER::items_array_ref() );
 }
 
+sub clean_slate {
+    my $self = shift;
+    $self->{'_sql_aliases'} = {};
+    return $self->SUPER::clean_slate(@_);
+}
+
+sub join_transactions {
+    my $self = shift;
+    my %args = ( new => 0, @_ );
+
+    return $self->{'_sql_aliases'}{'transactions'}
+        if !$args{'new'} && $self->{'_sql_aliases'}{'transactions'};
+
+    my $alias = $self->join(
+        alias1  => 'main',
+        column1 => 'id',
+        table2  => RT::Model::TransactionCollection->new,
+        column2 => 'object_id',
+    );
+    $self->limit(
+        leftjoin => $alias,
+        alias    => $alias,
+        column   => 'object_type',
+        value    => ref $self->new_item,
+    );
+    $self->{'_sql_aliases'}{'transactions'} = $alias
+        unless $args{'new'};
+
+    return $alias;
+}
 
 1;

Modified: rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm	(original)
+++ rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm	Thu Apr  9 13:29:07 2009
@@ -85,35 +85,6 @@
     return $self->SUPER::_init(@_);
 }
 
-sub clean_slate {
-    my $self = shift;
-    delete $self->{_sql_transaction_alias};
-    return $self->SUPER::clean_slate(@_);
-}
-
-=head2 transaction_alias
-
-Returns alias for transactions table with applied join condition.
-Always return the same alias, so if you want to build some complex
-or recursive joining then you have to create new alias youself.
-
-=cut
-
-sub transaction_alias {
-    my $self = shift;
-    return $self->{'_sql_transaction_alias'}
-        if $self->{'_sql_transaction_alias'};
-
-    my $res = $self->new_alias('Transactions');
-    $self->limit(
-        entry_aggregator => 'AND',
-        column           => 'transaction_id',
-        value            => $res . '.id',
-        quote_value      => 0,
-    );
-    return $self->{'_sql_transaction_alias'} = $res;
-}
-
 =head2 content_type (value => 'text/plain', entry_aggregator => 'OR', operator => '=' ) 
 
 Limit result set to attachments of content_type 'TYPE'...
@@ -185,7 +156,7 @@
     my $self = shift;
     my $tid  = shift;
 
-    my $transactions = $self->transaction_alias;
+    my $transactions = $self->join_transactions;
     $self->limit(
         entry_aggregator => 'AND',
         alias            => $transactions,
@@ -234,5 +205,32 @@
     return $self->next;
 }
 
+=head2 join_transactions
+
+Returns alias for transactions table with applied join condition.
+Always return the same alias, so if you want to build some complex
+or recursive joining then you have to create new alias youself.
+
+=cut
+
+sub join_transactions {
+    my $self = shift;
+    my %args = ( new => 0, @_ );
+
+    return $self->{'_sql_aliases'}{'transactions'}
+        if !$args{'new'} && $self->{'_sql_aliases'}{'transactions'};
+
+    my $alias = $self->join(
+        alias1  => 'main',
+        column1 => 'transaction_id',
+        table2  => RT::Model::TransactionCollection->new,
+        column2 => 'id',
+    );
+    $self->{'_sql_aliases'}{'transactions'} = $alias
+        unless $args{'new'};
+
+    return $alias;
+}
+
 
 1;

Modified: rt/3.999/trunk/lib/RT/Model/TicketCollection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Model/TicketCollection.pm	(original)
+++ rt/3.999/trunk/lib/RT/Model/TicketCollection.pm	Thu Apr  9 13:29:07 2009
@@ -128,7 +128,7 @@
     LastUpdated    => [ 'DATE'         => 'last_updated', ],    #loc_left_pair
     Created         => [ 'DATE'         => 'Created', ],        #loc_left_pair
     Subject         => [ 'STRING', ],                           #loc_left_pair
-    Content         => [ 'TRANSFIELD', ],                       #loc_left_pair
+    Content         => [ 'TRANSCONTENT', ],                     #loc_left_pair
     ContentType    => [ 'TRANSFIELD', ],                        #loc_left_pair
     Filename        => [ 'TRANSFIELD', ],                       #loc_left_pair
     TransactionDate => [ 'TRANSDATE', ],                        #loc_left_pair
@@ -166,6 +166,7 @@
     DATE            => \&_date_limit,
     STRING          => \&_string_limit,
     TRANSFIELD      => \&_trans_limit,
+    TRANSCONTENT    => \&_trans_content_limit,
     TRANSDATE       => \&_trans_date_limit,
     WATCHERFIELD    => \&_watcher_limit,
     MEMBERSHIPFIELD => \&_watcher_membership_limit,
@@ -241,18 +242,8 @@
 
 sub clean_slate {
     my $self = shift;
-    $self->SUPER::clean_slate(@_);
-    delete $self->{$_} foreach qw(
-        _sql_cf_alias
-        _sql_group_members_aliases
-        _sql_object_cfv_alias
-        _sql_role_group_aliases
-        _sql_transalias
-        _sql_trattachalias
-        _sql_u_watchers_alias_for_sort
-        _sql_u_watchers_aliases
-        _sql_current_user_can_see_applied
-    );
+    delete $self->{'_sql_current_user_can_see_applied'};
+    return $self->SUPER::clean_slate(@_);
 }
 
 =head1 Limit Helper Routines
@@ -604,28 +595,14 @@
 
 # This routine should really be factored into translimit.
 sub _trans_date_limit {
-    my ( $sb, $field, $op, $value, @rest ) = @_;
+    my ( $self, $field, $op, $value, @rest ) = @_;
 
     # See the comments for TransLimit, they apply here too
-
-    unless ( $sb->{_sql_transalias} ) {
-        $sb->{_sql_transalias} = $sb->join(
-            alias1  => 'main',
-            column1 => 'id',
-            table2  => RT::Model::TransactionCollection->new,
-            column2 => 'object_id',
-        );
-        $sb->SUPER::limit(
-            alias            => $sb->{_sql_transalias},
-            column           => 'object_type',
-            value            => 'RT::Model::Ticket',
-            entry_aggregator => 'AND',
-        );
-    }
+    my $txn_alias = $self->join_transactions;
 
     my $date = RT::DateTime->new_from_string($value);
 
-    $sb->open_paren;
+    $self->open_paren;
     if ( $op eq "=" ) {
 
         # if we're specifying =, that means we want everything on a
@@ -636,16 +613,16 @@
         my $daystart = $date->iso;
         my $dayend = $date->add(days => 1)->iso;
 
-        $sb->_sql_limit(
-            alias          => $sb->{_sql_transalias},
+        $self->_sql_limit(
+            alias          => $txn_alias,
             column         => 'created',
             operator       => ">=",
             value          => $daystart,
             case_sensitive => 0,
             @rest
         );
-        $sb->_sql_limit(
-            alias          => $sb->{_sql_transalias},
+        $self->_sql_limit(
+            alias          => $txn_alias,
             column         => 'created',
             operator       => "<=",
             value          => $dayend,
@@ -660,8 +637,8 @@
     else {
 
         #Search for the right field
-        $sb->_sql_limit(
-            alias          => $sb->{_sql_transalias},
+        $self->_sql_limit(
+            alias          => $txn_alias,
             column         => 'created',
             operator       => $op,
             value          => $date->iso,
@@ -670,7 +647,7 @@
         );
     }
 
-    $sb->close_paren;
+    $self->close_paren;
 }
 
 =head2 _trans_limit
@@ -717,25 +694,45 @@
     # way they get parsed in the tree they're in different subclauses.
 
     my ( $self, $field, $op, $value, @rest ) = @_;
+    my $txn_alias = $self->join_transactions;
 
-    unless ( $self->{_sql_transalias} ) {
-        $self->{_sql_transalias} = $self->join(
-            alias1  => 'main',
+    unless ( defined $self->{'_sql_aliases'}{'attachments'} ) {
+        $self->{'_sql_aliases'}{'attachments'} = $self->_sql_join(
+            type    => 'left',                                 # not all txns have an attachment
+            alias1  => $txn_alias,
             column1 => 'id',
-            table2  => RT::Model::TransactionCollection->new,
-            column2 => 'object_id',
-        );
-        $self->SUPER::limit(
-            alias            => $self->{_sql_transalias},
-            column           => 'object_type',
-            value            => 'RT::Model::Ticket',
-            entry_aggregator => 'AND',
+            table2  => RT::Model::AttachmentCollection->new,
+            column2 => 'transaction_id',
         );
     }
-    unless ( defined $self->{_sql_trattachalias} ) {
-        $self->{_sql_trattachalias} = $self->_sql_join(
-            type    => 'left',                                 # not all txns have an attachment
-            alias1  => $self->{_sql_transalias},
+
+    $self->_sql_limit(
+        alias            => $self->{'_sql_aliases'}{'attachments'},
+        column           => $field,
+        operator         => $op,
+        value            => $value,
+        case_sensitive   => 0,
+        @rest
+    );
+}
+
+=head2 _trans_content_limit
+
+Limit based on the content of a transaction.
+
+=cut
+
+sub _trans_content_limit {
+    my ( $self, $field, $op, $value, @rest ) = @_;
+
+    my $config = RT->config->get('FullTextSearch') || {};
+    return unless $config->{'Enable'};
+
+    my $txn_alias = $self->join_transactions;
+    unless ( defined $self->{'_sql_aliases'}{'attachments'} ) {
+        $self->{'_sql_aliases'}{'attachments'} = $self->_sql_join(
+            type    => 'left',
+            alias1  => $txn_alias,
             column1 => 'id',
             table2  => RT::Model::AttachmentCollection->new,
             column2 => 'transaction_id',
@@ -744,42 +741,68 @@
 
     $self->open_paren;
 
-    #Search for the right field
-    if ( $field eq 'content'
-        and RT->config->get('DontSearchFileAttachments') )
-    {
-        $self->_sql_limit(
-            alias            => $self->{_sql_trattachalias},
-            column           => 'filename',
-            operator         => 'IS',
-            value            => 'NULL',
-            subclause        => 'contentquery',
-            entry_aggregator => 'AND',
-        );
-        $self->_sql_limit(
-            alias          => $self->{_sql_trattachalias},
-            column         => $field,
-            operator       => $op,
-            value          => $value,
-            case_sensitive => 0,
-            @rest,
-            entry_aggregator => 'AND',
-            subclause        => 'contentquery',
-        );
+    if ( $config->{'Indexed'} ) {
+        my $db_type = RT->config->get('DatabaseType');
+        my $alias;
+        if ( $config->{'Table'} ) {
+            $alias = $self->{'_sql_aliases'}{'full_text'} ||= $self->_sql_join(
+                type    => 'left',
+                alias1  => $self->{'_sql_aliases'}{'attachments'},
+                column1 => 'id',
+                table2  => $config->{'Table'},
+                column2 => 'id',
+            );
+        } else {
+            $alias = $self->{'_sql_aliases'}{'attachments'};
+        }
+        my $column = $config->{'Column'} || 'fts_index';
+        if ( $db_type eq 'mysql' ) {
+            $self->_sql_limit(
+                alias            => $alias,
+                column           => $column,
+                operator         => '=',
+                value            => $value,
+                @rest
+            );
+        }
+        elsif ( $db_type eq 'Pg' ) {
+            my $dbh = $self->_handle->dbh;
+            #XXX: handle negative searches
+            $self->_sql_limit(
+                alias       => $alias,
+                column      => $column,
+                operator    => '@@',
+                value       => 'plainto_tsquery('. $dbh->quote($value) .')',
+                quote_value => 0,
+                @rest
+            );
+        }
+        else {
+            die "Indexed full text search is not supported for $db_type";
+        }
     } else {
         $self->_sql_limit(
-            alias            => $self->{_sql_trattachalias},
+            alias            => $self->{'_sql_aliases'}{'attachments'},
             column           => $field,
             operator         => $op,
             value            => $value,
             case_sensitive   => 0,
-            entry_aggregator => 'AND',
             @rest
         );
     }
 
-    $self->close_paren;
+    if ( RT->config->get('DontSearchFileAttachments') ) {
+        $self->_sql_limit(
+            alias            => $self->{'_sql_aliases'}{'attachments'},
+            column           => 'filename',
+            operator         => 'IS',
+            value            => 'NULL',
+            @rest,
+            entry_aggregator => 'AND',
+        );
+    }
 
+    $self->close_paren;
 }
 
 =head2 _watcher_limit
@@ -915,9 +938,9 @@
             new          => 0,
         );
 
-        my $users = $self->{'_sql_u_watchers_aliases'}{$group_members};
+        my $users = $self->{'_sql_aliases'}{'u_watchers'}{$group_members};
         unless ($users) {
-            $users = $self->{'_sql_u_watchers_aliases'}{$group_members} = $self->new_alias( RT::Model::UserCollection->new );
+            $users = $self->{'_sql_aliases'}{'u_watchers'}{$group_members} = $self->new_alias( RT::Model::UserCollection->new );
             $self->SUPER::limit(
                 leftjoin    => $group_members,
                 alias       => $group_members,
@@ -960,9 +983,9 @@
 sub _role_groupsjoin {
     my $self = shift;
     my %args = ( new => 0, class => 'ticket', type => '', @_ );
-    return $self->{'_sql_role_group_aliases'}
+    return $self->{'_sql_aliases'}{'role_group'}
       { $args{'class'} . '-' . $args{'type'} }
-      if $self->{'_sql_role_group_aliases'}
+      if $self->{'_sql_aliases'}{'role_group'}
           { $args{'class'} . '-' . $args{'type'} }
           && !$args{'new'};
     
@@ -991,7 +1014,7 @@
         value    => $args{'type'},
     ) if $args{'type'};
 
-    $self->{'_sql_role_group_aliases'}{ $args{'class'} . '-' . $args{'type'} } =
+    $self->{'_sql_aliases'}{'role_group'}{ $args{'class'} . '-' . $args{'type'} } =
       $groups
         unless $args{'new'};
 
@@ -1002,8 +1025,8 @@
     my $self = shift;
     my %args = ( new => 1, groups_alias => undef, @_ );
 
-    return $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} }
-        if $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} }
+    return $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} }
+        if $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} }
             && !$args{'new'};
 
     my $alias = $self->join(
@@ -1015,7 +1038,7 @@
         entry_aggregator => 'AND',
     );
 
-    $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} } = $alias
+    $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} } = $alias
         unless $args{'new'};
 
     return $alias;
@@ -1243,15 +1266,15 @@
     my ( $self, $cfkey, $cfid, $field ) = @_;
 
     # Perform one join per CustomField
-    if (   $self->{_sql_object_cfv_alias}{$cfkey}
-        || $self->{_sql_cf_alias}{$cfkey} )
+    if (   $self->{'_sql_aliases'}{'OCFV'}{$cfkey}
+        || $self->{'_sql_aliases'}{'cf'}{$cfkey} )
     {
-        return ( $self->{_sql_object_cfv_alias}{$cfkey}, $self->{_sql_cf_alias}{$cfkey} );
+        return ( $self->{'_sql_aliases'}{'OCFV'}{$cfkey}, $self->{'_sql_aliases'}{'cf'}{$cfkey} );
     }
 
     my ( $TicketCFs, $CFs );
     if ($cfid) {
-        $TicketCFs = $self->{_sql_object_cfv_alias}{$cfkey} = $self->join(
+        $TicketCFs = $self->{'_sql_aliases'}{'OCFV'}{$cfkey} = $self->join(
             type    => 'left',
             alias1  => 'main',
             column1 => 'id',
@@ -1279,7 +1302,7 @@
             value    => '0',
         );
 
-        $CFs = $self->{_sql_cf_alias}{$cfkey} = $self->join(
+        $CFs = $self->{'_sql_aliases'}{'cf'}{$cfkey} = $self->join(
             type    => 'left',
             alias1  => $ocfalias,
             column1 => 'custom_field',
@@ -1301,7 +1324,7 @@
 #            value           => $field,
 #        );
 
-        $TicketCFs = $self->{_sql_object_cfv_alias}{$cfkey} = $self->join(
+        $TicketCFs = $self->{'_sql_aliases'}{'OCFV'}{$cfkey} = $self->join(
             type    => 'left',
             alias1  => $CFs,
             column1 => 'id',
@@ -1547,9 +1570,9 @@
         if ( defined $meta->[0] && $meta->[0] eq 'WATCHERFIELD' ) {
 
             # cache alias as we want to use one alias per watcher type for sorting
-            my $users = $self->{_sql_u_watchers_alias_for_sort}{ $meta->[1] };
+            my $users = $self->{'_sql_aliases'}{'u_watchers_sort'}{ $meta->[1] };
             unless ($users) {
-                $self->{_sql_u_watchers_alias_for_sort}{ $meta->[1] } = $users = ( $self->_watcherjoin( $meta->[1] ) )[2];
+                $self->{'_sql_aliases'}{'u_watchers_sort'}{ $meta->[1] } = $users = ( $self->_watcherjoin( $meta->[1] ) )[2];
             }
             push @res, { %$row, alias => $users, column => $subkey };
         } elsif ( defined $meta->[0] && $meta->[0] =~ /CUSTOMFIELD/i ) {
@@ -2501,13 +2524,9 @@
     my $self = shift;
 
     # Private Member Variables (which should get cleaned)
-    $self->{'_sql_transalias'}               = undef;
-    $self->{'_sql_trattachalias'}            = undef;
-    $self->{'_sql_cf_alias'}                 = undef;
-    $self->{'_sql_object_cfv_alias'}         = undef;
-    $self->{'_sql_watcher_join_users_alias'} = undef;
-    $self->{'_sql_query'}                    = '';
-    $self->{'_sql_looking_at'}               = {};
+    $self->{'_sql_aliases'}    = {};
+    $self->{'_sql_query'}      = '';
+    $self->{'_sql_looking_at'} = {};
 }
 
 sub _sql_limit {

Modified: rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm	(original)
+++ rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm	Thu Apr  9 13:29:07 2009
@@ -480,7 +480,7 @@
 
     # XXX: we need a current user here, but who is current user?
     my $attachs = RT::Model::AttachmentCollection->new( current_user => RT->system_user );
-    my $txn_alias = $attachs->transaction_alias;
+    my $txn_alias = $attachs->join_transactions;
     $attachs->limit(
         alias  => $txn_alias,
         column => 'type',

Added: rt/3.999/trunk/sbin/rt-fulltext-indexer
==============================================================================
--- (empty file)
+++ rt/3.999/trunk/sbin/rt-fulltext-indexer	Thu Apr  9 13:29:07 2009
@@ -0,0 +1,346 @@
+#!/usr/bin/env perl
+# BEGIN BPS TAGGED BLOCK {{{
+# 
+# COPYRIGHT:
+# 
+# This software is Copyright (c) 1996-2008 Best Practical Solutions, LLC
+#                                          <jesse at bestpractical.com>
+# 
+# (Except where explicitly superseded by other copyright notices)
+# 
+# 
+# LICENSE:
+# 
+# This work is made available to you under the terms of Version 2 of
+# the GNU General Public License. A copy of that license should have
+# been provided with this software, but in any event can be snarfed
+# from www.gnu.org.
+# 
+# This work is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+# 
+# 
+# CONTRIBUTION SUBMISSION POLICY:
+# 
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
+# 
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+# 
+# END BPS TAGGED BLOCK }}}
+use strict;
+use warnings;
+
+use RT;
+BEGIN {RT->init_jifty};
+use RT::Interface::CLI qw{ clean_env };
+
+use Getopt::Long;
+
+clean_env();
+RT::load_config();
+RT::init();
+
+no warnings 'once';
+
+# Read in the options
+my %opts;
+GetOptions( \%opts, 'help', 'debug', 'limit', 'skip' );
+if ( $opts{'help'} ) {
+    require Pod::Usage;
+    import Pod::Usage;
+    pod2usage(-message => "RT Email Dashboards\n", -verbose => 1);
+    exit 1;
+}
+
+my $fts_config = RT->config->get('FullTextSearch') || {};
+unless ( $fts_config->{'Enable'} ) {
+    print STDERR "Full text search disabled in the RT config."
+        ." Read documentation for %FullTextSearch config option.";
+    exit 1;
+}
+unless ( $fts_config->{'Indexed'} ) {
+    print STDERR "Full text search is enabled in the RT config,"
+        ." however full text search works without special index,"
+        ." so this tool is not required."
+        ." Read documentation for %FullTextSearch config option.";
+    exit 1;
+}
+
+my $db_type = RT->config->get('DatabaseType');
+
+my @types = qw(text html);
+foreach my $type ( @types ) {
+    my $attaches = attachments($type);
+    $attaches->limit( column => 'id', operator => '>', value => last_indexed($type) );
+    $attaches->order_by( column => 'id', order => 'asc' );
+    $attaches->rows_per_page( $opts{'limit'} || 100 );
+
+    my $found = 0;
+    while ( my $a = $attaches->next ) {
+        print "bla\n";
+        debug("Found attachment #". $a->id );
+        next if filter( $type, $a );
+        debug("Attachment #". $a->id ." hasn't been filtered" );
+        my $txt = extract($type, $a) or next;
+        debug("Extracted text from attachment #". $a->id );
+        $found++;
+        process( $type, $a, $txt );
+        debug("Processed attachment #". $a->id );
+    }
+    finalize( $type, $attaches ) if $found;
+    clean( $type );
+}
+
+sub attachments {
+    my $type = shift;
+    my $res = RT::Model::AttachmentCollection->new( current_user => RT->system_user );
+
+    my $txn_alias = $res->join_transactions;
+    $res->limit( alias => $txn_alias, column => 'object_type', value => 'RT::Model::Ticket' );
+    my $ticket_alias = $res->join(
+        alias1 => $txn_alias, column1 => 'object_id',
+        table2 => new RT::Model::TicketCollection, column2 => 'id'
+    );
+    $res->limit( alias => $ticket_alias, column => 'status', operator => '!=', value => 'deleted' );
+
+    return goto_specific(
+        suffix => $type,
+        error => "Don't know how to find $type attachments",
+        arguments => [$res],
+    );
+}
+
+sub last_indexed {
+    my ($type) = (@_);
+    return goto_specific(
+        suffix => $db_type,
+        error => "Don't know how to find last indexed $type attachment for $db_type DB",
+        arguments => \@_,
+    );
+}
+
+sub filter {
+    my $type = shift;
+    return goto_specific(
+        suffix    => $type,
+        arguments => \@_,
+    );
+}
+
+sub extract {
+    my $type = shift;
+    return goto_specific(
+        suffix    => $type,
+        error     => "No way to convert $type attachment into text",
+        arguments => \@_,
+    );
+}
+
+sub process {
+    return goto_specific(
+        suffix    => $db_type,
+        error     => "No processer for $db_type DB",
+        arguments => \@_,
+    );
+}
+
+sub finalize {
+    return goto_specific(
+        suffix    => $db_type,
+        arguments => \@_,
+    );
+}
+
+sub clean {
+    return goto_specific(
+        prefix    => $db_type,
+        arguments => \@_,
+    );
+}
+
+{
+sub last_indexed_mysql {
+    my $type = shift;
+    my $attr = RT->system->first_attribute('LastIndexedAttachments');
+    return 0 unless $attr;
+    return 0 unless exists $attr->{ $type };
+    return $attr->{ $type } || 0;
+}
+
+sub process_mysql {
+    my ($type, $attachment, $text) = (@_);
+
+    my $doc = sphinx_template();
+
+    my $element = $doc->createElement('sphinx:document');
+    $element->setAttribute( id => $attachment->id );
+    $element->appendTextChild( content => $$text );
+
+    $doc->documentElement->appendChild( $element );
+}
+
+my $doc = undef;
+sub sphinx_template {
+    return $doc if $doc;
+
+    require XML::LibXML;
+    $doc = XML::LibXML::Document->new('1.0', 'UTF-8');
+    my $root = $doc->createElement('sphinx:docset');
+    $doc->setDocumentElement( $root );
+
+    my $schema = $doc->createElement('sphinx:schema');
+    $root->appendChild( $schema );
+    foreach ( qw(content) ) {
+        my $field = $doc->createElement('sphinx:field');
+        $field->setAttribute( name => $_ );
+        $schema->appendChild( $field );
+    }
+
+    return $doc;
+}
+
+sub finalize_mysql {
+    my ($type, $attachments) = @_;
+    sphinx_template()->toFH(*STDOUT, 1);
+}
+
+sub clean_mysql {
+    $doc = undef;
+}
+
+}
+
+sub last_indexed_pg {
+    my $type = shift;
+    my $attachments = attachments( $type );
+    my $alias = 'main';
+    if ( $fts_config->{'Table'} ) {
+        $alias = $attachments->join(
+            type    => 'left',
+            column1 => 'id',
+            table2  => $fts_config->{'Table'},
+            column2 => 'id',
+        );
+    }
+    $attachments->limit( alias => $alias, column => $fts_config->{'Column'}, operator => 'IS NOT', value => 'NULL' );
+    $attachments->order_by( column => 'id', order => 'desc' );
+    my $res = $attachments->first;
+    return 0 unless $res;
+    return $res->id;
+}
+
+sub process_pg {
+    my ($type, $attachment, $text) = (@_);
+
+    my $dbh = Jifty->handle->dbh;
+    my $table = $fts_config->{'Table'};
+    my $column = $fts_config->{'Column'};
+
+    my $query;
+    if ( $table ) {
+        if ( my ($id) = $dbh->selectrow_array("SELECT id FROM $table WHERE id = ?", undef, $attachment->id) ) {
+            $query = "UPDATE $table SET $column = to_tsvector(?) WHERE id = ?";
+        } else {
+            $query = "INSERT INTO $table($column, id) VALUES(to_tsvector(?), ?)";
+        }
+    } else {
+        $query = "UPDATE Attachments SET $column = to_tsvector(?) WHERE id = ?";
+    }
+
+    my $status = $dbh->do( $query, undef, $$text, $attachment->id );
+    unless ( $status ) {
+        die "error: ". $dbh->errstr;
+    }
+}
+
+sub attachments_text {
+    my $res = shift;
+    $res->limit( column => 'content_type', value => 'text/plain' );
+    return $res;
+}
+
+sub extract_text {
+    my $attachment = shift;
+    my $text = $attachment->content;
+    return undef unless defined $text && length($text);
+    return \$text;
+}
+
+sub attachments_html {
+    my $res = shift;
+    $res->limit( column => 'content_type', value => 'text/html' );
+    return $res;
+}
+
+sub filter_html {
+    my $attachment = shift;
+    if ( my $parent = $attachment->parent ) {
+# skip html parts that are alternatives
+        return 1 if $parent->id
+            && $parent->content_type eq 'mulitpart/alternative';
+    }
+    return 0;
+}
+
+sub extract_html {
+    my $attachment = shift;
+    my $text = $attachment->content;
+    return undef unless defined $text && length($text);
+# TODO: html -> text
+    return \$text;
+}
+
+sub goto_specific {
+    my %args = (@_);
+
+    my $func = (caller(1))[3];
+    $func =~ s/.*:://;
+    my $call = $func ."_". lc $args{'suffix'};
+    unless ( defined &$call ) {
+        return undef unless $args{'error'};
+        require Carp; Carp::croak( $args{'error'} );
+    }
+    @_ = @{ $args{'arguments'} };
+    goto &$call;
+}
+
+
+# helper functions
+sub verbose  { print _(@_), "\n" if $opts{verbose} || $opts{verbose}; 1 }
+sub debug    { print _(@_), "\n" if $opts{debug}; 1 }
+sub error    { Jifty->log->error(_(@_)); verbose(@_); 1 }
+sub warning  { Jifty->log->warn(_(@_)); verbose(@_); 1 }
+
+=head1 NAME
+
+rt-fulltext-indexer - Indexer for full text search
+
+=head1 SYNOPSIS
+
+    /opt/rt3/local/sbin/rt-fulltext-indexer --help
+
+    /opt/rt3/local/sbin/rt-fulltext-indexer --limit 100
+
+=head1 DESCRIPTION
+
+=cut
+

Added: rt/3.999/trunk/sbin/rt-setup-fulltext-index
==============================================================================
--- (empty file)
+++ rt/3.999/trunk/sbin/rt-setup-fulltext-index	Thu Apr  9 13:29:07 2009
@@ -0,0 +1,274 @@
+#!/usr/bin/env perl
+# BEGIN BPS TAGGED BLOCK {{{
+# 
+# COPYRIGHT:
+# 
+# This software is Copyright (c) 1996-2008 Best Practical Solutions, LLC
+#                                          <jesse at bestpractical.com>
+# 
+# (Except where explicitly superseded by other copyright notices)
+# 
+# 
+# LICENSE:
+# 
+# This work is made available to you under the terms of Version 2 of
+# the GNU General Public License. A copy of that license should have
+# been provided with this software, but in any event can be snarfed
+# from www.gnu.org.
+# 
+# This work is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+# 
+# 
+# CONTRIBUTION SUBMISSION POLICY:
+# 
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
+# 
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+# 
+# END BPS TAGGED BLOCK }}}
+use strict;
+use warnings;
+
+use RT;
+BEGIN {RT->init_jifty};
+use RT::Interface::CLI qw{ clean_env };
+
+use Getopt::Long;
+
+clean_env();
+RT::load_config();
+RT::init();
+
+no warnings 'once';
+
+# Read in the options
+my %opts;
+GetOptions( \%opts, "help", "dryrun" );
+
+if ($opts{'help'}) {
+    require Pod::Usage;
+    import Pod::Usage;
+    pod2usage(-message => "RT Email Dashboards\n", -verbose => 1);
+    exit 1;
+}
+
+# helper functions
+sub verbose  { print _(@_), "\n" if $opts{verbose} || $opts{verbose}; 1 }
+sub debug    { print _(@_), "\n" if $opts{debug}; 1 }
+sub error    { Jifty->log->error(_(@_)); verbose(@_); 1 }
+sub warning  { Jifty->log->warn(_(@_)); verbose(@_); 1 }
+
+my %default = (
+    Table  => 'AttachmentsIndex',
+    Column => 'fts_index',
+);
+
+
+my $db_type = RT->config->get('DatabaseType');
+if ( $db_type eq 'mysql' ) {
+    my $dbh = Jifty->handle->dbh;
+    
+    my $sphinx = ($dbh->selectrow_array("show variables like 'have_sphinx'"))[1];
+    unless ( lc $sphinx eq 'yes' ) {
+        print STDERR "Mysql server you have compiled without sphinx storage engine (sphinxse).\n";
+        print STDERR "Either use system packages with sphinxse, binaries from Sphinx site
+                      or compile mysql according to instructions in Sphinx's docs.\n";
+        exit 1;
+    }
+
+    my $table = prompt(
+        message => 'Enter name of a DB table that will be used to connect to the sphinx server',
+        default => $default{'Column'},
+    );
+    my $url = prompt(
+        message => 'Enter URL of the sphinx search server, it should be sphinx://<server>:<port>/<index name>. Simple config for this sphinx instance will be generated for you.',
+        default => 'sphinx://localhost:3312/rt',
+    );
+
+    my $schema = <<END;
+CREATE TABLE $table (
+    id     INTEGER NOT NULL,
+    weight INTEGER NOT NULL,
+    $default{'Column'}  VARCHAR(3072) NOT NULL,
+    INDEX($default{'Column'})
+) ENGINE=SPHINX CONNECTION="$url"
+END
+
+    print_rt_config( Table => $table, Column => $default{'Column'} );
+    insert_schema( $schema );
+
+    require URI;
+    my $urlo = URI−>new( $url );
+    my $host  = $urlo->host;
+    my $port  = $urlo->port;
+    my $index = $urlo->path;
+
+    my %sphinx_conf = ();
+    $sphinx_conf{'host'} = RT->config->get('DatabaseHost');
+    $sphinx_conf{'db'}   = RT->config->get('DatabaseName');
+    $sphinx_conf{'user'} = RT->config->get('DatabaseUser');
+    $sphinx_conf{'pass'} = RT->config->get('DatabasePassword');
+
+    print "Here is simple sphinx config, you can use it to index text/plain attachments in your DB."
+        ." This config is not ideal. You should read Sphinx docs to get better ideas.";
+    print <<END
+
+source rt {
+    type            = mysql
+
+    sql_host        = $sphinx_conf{'host'}
+    sql_db          = $sphinx_conf{'db'}
+    sql_user        = $sphinx_conf{'user'}
+    sql_pass        = $sphinx_conf{'pass'}
+
+    sql_query       = \
+        SELECT a.id, a.content FROM Attachments a
+        JOIN Transactions txn ON a.transaction_id = txn.id AND txn.object_type = 'RT::Model::Ticket' \
+        JOIN Tickets t ON txn.object_id = t.id \
+        WHERE a.content_type = 'text/plain' AND t.Status != 'deleted'
+
+    sql_query_info  = SELECT * FROM Attachments WHERE id=$id
+}
+
+index $index {
+    source                  = rt
+    path                    = $RT::VarPath/sphinx/index
+    docinfo                 = extern
+    charset_type            = utf-8
+}
+
+indexer {
+    mem_limit               = 32M
+}
+
+searchd {
+    port                    = $port
+    log                     = $RT::VarPath/sphinx/searchd.log
+    query_log               = $RT::VarPath/sphinx/query.log
+    read_timeout            = 5
+    max_children            = 30
+    pid_file                = $RT::VarPath/sphinx/searchd.pid
+    max_matches             = 1000
+    seamless_rotate         = 1
+    preopen_indexes         = 0
+    unlink_old              = 1
+}
+
+END
+
+}
+elsif ( $db_type eq 'Pg' ) {
+    my $dbh = Jifty->handle->dbh;
+    
+    my $table = prompt(
+        message    => 'Enter name of a DB table that will be used to connect to the sphinx server',
+        default => 'AttachmentsIndex',
+    );
+
+    my $schema = <<END;
+CREATE TABLE $table (
+    id      INTEGER NOT NULL,
+    $default{'Column'} tsvector
+)
+END
+
+    print_rt_config( Table => $table, Column => $default{'Column'} );
+
+    insert_schema( $schema );
+
+    print <<END;
+Now you have to create an index on the column. You have choice
+between GiST or GIN, the first is times slower to search, but
+it takes less place and faster to update. Anyway, both are faster
+then searches without them.
+
+Either run:
+
+    CREATE INDEX ${column}_idx ON $table USING gin($default{'Column'});
+
+or
+
+    CREATE INDEX ${column}_idx ON $table USING gist($default{'Column'});
+
+END
+}
+else {
+    die "Not yet supported"; 
+}
+
+sub prompt {
+    my %args = @_;
+
+    local $| = 1;
+    print $args{'message'};
+    if ( $args{'default'} ) {
+        print "\n[". $args{'default'} .']: ';
+    } else {
+        print ":\n";
+    }
+
+    my $res = <STDIN>;
+    chomp $res;
+    return $args{'default'} if !$res && $args{'default'};
+    return $res;
+}
+
+sub print_rt_config {
+    my %args = @_;
+
+    print <<END;
+
+Configure your RT via site config:
+set( %FullTextSearch,
+    Enable  => 1,
+    Indexed => 1,
+    Table   => '$args{'Table'}',
+    Column  => '$args{'Column'}',
+);
+END
+
+}
+
+sub insert_schema {
+    my $schema = shift;
+    print "Going to do the following change in the DB:\n";
+    print $schema;
+    return if $opts{'dryrun'};
+
+    my $res = $dbh->do( $schema );
+    unless ( $res ) {
+        die "Couldn't create the table: ". $dbh->errstr;
+    }
+}
+
+=head1 NAME
+
+rt-setup-fulltext-index - Helps create indexes for full text search
+
+=head1 SYNOPSIS
+
+    /opt/rt3/local/sbin/rt-setup-fulltext-index
+
+=head1 DESCRIPTION
+
+=cut


More information about the Rt-commit mailing list