[Rt-commit] r19117 - in rt/3.999/trunk: . etc lib/RT lib/RT/Model sbin
ruz at bestpractical.com
ruz at bestpractical.com
Thu Apr 9 13:29:07 EDT 2009
Author: ruz
Date: Thu Apr 9 13:29:07 2009
New Revision: 19117
Added:
rt/3.999/trunk/sbin/rt-fulltext-indexer
rt/3.999/trunk/sbin/rt-setup-fulltext-index
Modified:
rt/3.999/trunk/ (props changed)
rt/3.999/trunk/TODO
rt/3.999/trunk/etc/RT_Config.pm
rt/3.999/trunk/lib/RT/Collection.pm
rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm
rt/3.999/trunk/lib/RT/Model/TicketCollection.pm
rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm
Log:
* merge Full Text Search branch
r19072 at Macintosh (orig r19063): ruz | 2009-04-07 00:09:15 +0400
* full text search branch
r19074 at Macintosh (orig r19065): ruz | 2009-04-07 00:54:04 +0400
* move caching of some aliases into RT::Collection
* add join_transactions method
r19075 at Macintosh (orig r19066): ruz | 2009-04-07 00:56:57 +0400
* use ->{_sql_aliases} for all cached aliases
* factor out _trans_content_limit from _trans_limit
r19076 at Macintosh (orig r19067): ruz | 2009-04-07 01:10:32 +0400
* switch to TRANSCONTENT type
r19094 at Macintosh (orig r19085): ruz | 2009-04-07 23:32:32 +0400
* implement FTS for mysql and Pg
r19095 at Macintosh (orig r19086): ruz | 2009-04-08 00:08:21 +0400
* add sbin/rt-setup-fulltext-index script - quick help to setup
required database objects for full text searching
r19096 at Macintosh (orig r19087): ruz | 2009-04-08 00:39:01 +0400
* update todo
r19108 at Macintosh (orig r19099): ruz | 2009-04-08 18:45:36 +0400
* refactor new tool a little
* generate simple sphinx config for standalone RT's sphinx server
r19109 at Macintosh (orig r19100): ruz | 2009-04-08 18:46:27 +0400
* add FullTextSearch to the RT main config
r19110 at Macintosh (orig r19101): ruz | 2009-04-08 18:47:11 +0400
* use one column name for different DBs
r19111 at Macintosh (orig r19102): ruz | 2009-04-08 18:58:49 +0400
* first incomplete version of indexer
r19120 at Macintosh (orig r19111): ruz | 2009-04-09 18:13:05 +0400
* it's very unlikly that attachments will have own transactions ever,
so make join_transactions method (every collection has it now) join
transactions these attachments belong to
r19121 at Macintosh (orig r19112): ruz | 2009-04-09 18:18:27 +0400
* extend select a little bit to search only attachments of tickets
and only of those that are not deleted
r19122 at Macintosh (orig r19113): ruz | 2009-04-09 18:19:40 +0400
* complete indexer API, now works on mysql and Pg, needs docs and update_last_indexed
Modified: rt/3.999/trunk/TODO
==============================================================================
--- rt/3.999/trunk/TODO (original)
+++ rt/3.999/trunk/TODO Thu Apr 9 13:29:07 2009
@@ -11,4 +11,5 @@
(Type = Set, Field = Status). I think it should be fixed
to the latter.
+RT::load_config and other in RT package should be replaced with RT->load_config
Modified: rt/3.999/trunk/etc/RT_Config.pm
==============================================================================
--- rt/3.999/trunk/etc/RT_Config.pm (original)
+++ rt/3.999/trunk/etc/RT_Config.pm Thu Apr 9 13:29:07 2009
@@ -174,6 +174,25 @@
set($UseSQLForACLChecks, undef);
+=item C<%FullTextSearch>
+
+Full text search (FTS) without indexes is slow operation and by default is
+disabled at all. To enable FTS set key 'Enabled' to true value.
+
+Setup of indexes and filling them with data requires different steps for
+different database back-ends. Use F<sbin/rt-setup-fulltext-index> helper
+for quick start. This script creates required structures in the DB and
+gives some ideas on next steps.
+
+=cut
+
+set(%FullTextSearch,
+ Enable => 0,
+ Indexed => 0,
+# Table => 'AttachmentsIndex',
+# Column => 'ftsindex',
+);
+
=back
=head1 Incoming Mail Gateway Configuration
Modified: rt/3.999/trunk/lib/RT/Collection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Collection.pm (original)
+++ rt/3.999/trunk/lib/RT/Collection.pm Thu Apr 9 13:29:07 2009
@@ -305,5 +305,35 @@
return $self->items_order_by( $self->SUPER::items_array_ref() );
}
+sub clean_slate {
+ my $self = shift;
+ $self->{'_sql_aliases'} = {};
+ return $self->SUPER::clean_slate(@_);
+}
+
+sub join_transactions {
+ my $self = shift;
+ my %args = ( new => 0, @_ );
+
+ return $self->{'_sql_aliases'}{'transactions'}
+ if !$args{'new'} && $self->{'_sql_aliases'}{'transactions'};
+
+ my $alias = $self->join(
+ alias1 => 'main',
+ column1 => 'id',
+ table2 => RT::Model::TransactionCollection->new,
+ column2 => 'object_id',
+ );
+ $self->limit(
+ leftjoin => $alias,
+ alias => $alias,
+ column => 'object_type',
+ value => ref $self->new_item,
+ );
+ $self->{'_sql_aliases'}{'transactions'} = $alias
+ unless $args{'new'};
+
+ return $alias;
+}
1;
Modified: rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm (original)
+++ rt/3.999/trunk/lib/RT/Model/AttachmentCollection.pm Thu Apr 9 13:29:07 2009
@@ -85,35 +85,6 @@
return $self->SUPER::_init(@_);
}
-sub clean_slate {
- my $self = shift;
- delete $self->{_sql_transaction_alias};
- return $self->SUPER::clean_slate(@_);
-}
-
-=head2 transaction_alias
-
-Returns alias for transactions table with applied join condition.
-Always return the same alias, so if you want to build some complex
-or recursive joining then you have to create new alias youself.
-
-=cut
-
-sub transaction_alias {
- my $self = shift;
- return $self->{'_sql_transaction_alias'}
- if $self->{'_sql_transaction_alias'};
-
- my $res = $self->new_alias('Transactions');
- $self->limit(
- entry_aggregator => 'AND',
- column => 'transaction_id',
- value => $res . '.id',
- quote_value => 0,
- );
- return $self->{'_sql_transaction_alias'} = $res;
-}
-
=head2 content_type (value => 'text/plain', entry_aggregator => 'OR', operator => '=' )
Limit result set to attachments of content_type 'TYPE'...
@@ -185,7 +156,7 @@
my $self = shift;
my $tid = shift;
- my $transactions = $self->transaction_alias;
+ my $transactions = $self->join_transactions;
$self->limit(
entry_aggregator => 'AND',
alias => $transactions,
@@ -234,5 +205,32 @@
return $self->next;
}
+=head2 join_transactions
+
+Returns alias for transactions table with applied join condition.
+Always return the same alias, so if you want to build some complex
+or recursive joining then you have to create new alias youself.
+
+=cut
+
+sub join_transactions {
+ my $self = shift;
+ my %args = ( new => 0, @_ );
+
+ return $self->{'_sql_aliases'}{'transactions'}
+ if !$args{'new'} && $self->{'_sql_aliases'}{'transactions'};
+
+ my $alias = $self->join(
+ alias1 => 'main',
+ column1 => 'transaction_id',
+ table2 => RT::Model::TransactionCollection->new,
+ column2 => 'id',
+ );
+ $self->{'_sql_aliases'}{'transactions'} = $alias
+ unless $args{'new'};
+
+ return $alias;
+}
+
1;
Modified: rt/3.999/trunk/lib/RT/Model/TicketCollection.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/Model/TicketCollection.pm (original)
+++ rt/3.999/trunk/lib/RT/Model/TicketCollection.pm Thu Apr 9 13:29:07 2009
@@ -128,7 +128,7 @@
LastUpdated => [ 'DATE' => 'last_updated', ], #loc_left_pair
Created => [ 'DATE' => 'Created', ], #loc_left_pair
Subject => [ 'STRING', ], #loc_left_pair
- Content => [ 'TRANSFIELD', ], #loc_left_pair
+ Content => [ 'TRANSCONTENT', ], #loc_left_pair
ContentType => [ 'TRANSFIELD', ], #loc_left_pair
Filename => [ 'TRANSFIELD', ], #loc_left_pair
TransactionDate => [ 'TRANSDATE', ], #loc_left_pair
@@ -166,6 +166,7 @@
DATE => \&_date_limit,
STRING => \&_string_limit,
TRANSFIELD => \&_trans_limit,
+ TRANSCONTENT => \&_trans_content_limit,
TRANSDATE => \&_trans_date_limit,
WATCHERFIELD => \&_watcher_limit,
MEMBERSHIPFIELD => \&_watcher_membership_limit,
@@ -241,18 +242,8 @@
sub clean_slate {
my $self = shift;
- $self->SUPER::clean_slate(@_);
- delete $self->{$_} foreach qw(
- _sql_cf_alias
- _sql_group_members_aliases
- _sql_object_cfv_alias
- _sql_role_group_aliases
- _sql_transalias
- _sql_trattachalias
- _sql_u_watchers_alias_for_sort
- _sql_u_watchers_aliases
- _sql_current_user_can_see_applied
- );
+ delete $self->{'_sql_current_user_can_see_applied'};
+ return $self->SUPER::clean_slate(@_);
}
=head1 Limit Helper Routines
@@ -604,28 +595,14 @@
# This routine should really be factored into translimit.
sub _trans_date_limit {
- my ( $sb, $field, $op, $value, @rest ) = @_;
+ my ( $self, $field, $op, $value, @rest ) = @_;
# See the comments for TransLimit, they apply here too
-
- unless ( $sb->{_sql_transalias} ) {
- $sb->{_sql_transalias} = $sb->join(
- alias1 => 'main',
- column1 => 'id',
- table2 => RT::Model::TransactionCollection->new,
- column2 => 'object_id',
- );
- $sb->SUPER::limit(
- alias => $sb->{_sql_transalias},
- column => 'object_type',
- value => 'RT::Model::Ticket',
- entry_aggregator => 'AND',
- );
- }
+ my $txn_alias = $self->join_transactions;
my $date = RT::DateTime->new_from_string($value);
- $sb->open_paren;
+ $self->open_paren;
if ( $op eq "=" ) {
# if we're specifying =, that means we want everything on a
@@ -636,16 +613,16 @@
my $daystart = $date->iso;
my $dayend = $date->add(days => 1)->iso;
- $sb->_sql_limit(
- alias => $sb->{_sql_transalias},
+ $self->_sql_limit(
+ alias => $txn_alias,
column => 'created',
operator => ">=",
value => $daystart,
case_sensitive => 0,
@rest
);
- $sb->_sql_limit(
- alias => $sb->{_sql_transalias},
+ $self->_sql_limit(
+ alias => $txn_alias,
column => 'created',
operator => "<=",
value => $dayend,
@@ -660,8 +637,8 @@
else {
#Search for the right field
- $sb->_sql_limit(
- alias => $sb->{_sql_transalias},
+ $self->_sql_limit(
+ alias => $txn_alias,
column => 'created',
operator => $op,
value => $date->iso,
@@ -670,7 +647,7 @@
);
}
- $sb->close_paren;
+ $self->close_paren;
}
=head2 _trans_limit
@@ -717,25 +694,45 @@
# way they get parsed in the tree they're in different subclauses.
my ( $self, $field, $op, $value, @rest ) = @_;
+ my $txn_alias = $self->join_transactions;
- unless ( $self->{_sql_transalias} ) {
- $self->{_sql_transalias} = $self->join(
- alias1 => 'main',
+ unless ( defined $self->{'_sql_aliases'}{'attachments'} ) {
+ $self->{'_sql_aliases'}{'attachments'} = $self->_sql_join(
+ type => 'left', # not all txns have an attachment
+ alias1 => $txn_alias,
column1 => 'id',
- table2 => RT::Model::TransactionCollection->new,
- column2 => 'object_id',
- );
- $self->SUPER::limit(
- alias => $self->{_sql_transalias},
- column => 'object_type',
- value => 'RT::Model::Ticket',
- entry_aggregator => 'AND',
+ table2 => RT::Model::AttachmentCollection->new,
+ column2 => 'transaction_id',
);
}
- unless ( defined $self->{_sql_trattachalias} ) {
- $self->{_sql_trattachalias} = $self->_sql_join(
- type => 'left', # not all txns have an attachment
- alias1 => $self->{_sql_transalias},
+
+ $self->_sql_limit(
+ alias => $self->{'_sql_aliases'}{'attachments'},
+ column => $field,
+ operator => $op,
+ value => $value,
+ case_sensitive => 0,
+ @rest
+ );
+}
+
+=head2 _trans_content_limit
+
+Limit based on the content of a transaction.
+
+=cut
+
+sub _trans_content_limit {
+ my ( $self, $field, $op, $value, @rest ) = @_;
+
+ my $config = RT->config->get('FullTextSearch') || {};
+ return unless $config->{'Enable'};
+
+ my $txn_alias = $self->join_transactions;
+ unless ( defined $self->{'_sql_aliases'}{'attachments'} ) {
+ $self->{'_sql_aliases'}{'attachments'} = $self->_sql_join(
+ type => 'left',
+ alias1 => $txn_alias,
column1 => 'id',
table2 => RT::Model::AttachmentCollection->new,
column2 => 'transaction_id',
@@ -744,42 +741,68 @@
$self->open_paren;
- #Search for the right field
- if ( $field eq 'content'
- and RT->config->get('DontSearchFileAttachments') )
- {
- $self->_sql_limit(
- alias => $self->{_sql_trattachalias},
- column => 'filename',
- operator => 'IS',
- value => 'NULL',
- subclause => 'contentquery',
- entry_aggregator => 'AND',
- );
- $self->_sql_limit(
- alias => $self->{_sql_trattachalias},
- column => $field,
- operator => $op,
- value => $value,
- case_sensitive => 0,
- @rest,
- entry_aggregator => 'AND',
- subclause => 'contentquery',
- );
+ if ( $config->{'Indexed'} ) {
+ my $db_type = RT->config->get('DatabaseType');
+ my $alias;
+ if ( $config->{'Table'} ) {
+ $alias = $self->{'_sql_aliases'}{'full_text'} ||= $self->_sql_join(
+ type => 'left',
+ alias1 => $self->{'_sql_aliases'}{'attachments'},
+ column1 => 'id',
+ table2 => $config->{'Table'},
+ column2 => 'id',
+ );
+ } else {
+ $alias = $self->{'_sql_aliases'}{'attachments'};
+ }
+ my $column = $config->{'Column'} || 'fts_index';
+ if ( $db_type eq 'mysql' ) {
+ $self->_sql_limit(
+ alias => $alias,
+ column => $column,
+ operator => '=',
+ value => $value,
+ @rest
+ );
+ }
+ elsif ( $db_type eq 'Pg' ) {
+ my $dbh = $self->_handle->dbh;
+ #XXX: handle negative searches
+ $self->_sql_limit(
+ alias => $alias,
+ column => $column,
+ operator => '@@',
+ value => 'plainto_tsquery('. $dbh->quote($value) .')',
+ quote_value => 0,
+ @rest
+ );
+ }
+ else {
+ die "Indexed full text search is not supported for $db_type";
+ }
} else {
$self->_sql_limit(
- alias => $self->{_sql_trattachalias},
+ alias => $self->{'_sql_aliases'}{'attachments'},
column => $field,
operator => $op,
value => $value,
case_sensitive => 0,
- entry_aggregator => 'AND',
@rest
);
}
- $self->close_paren;
+ if ( RT->config->get('DontSearchFileAttachments') ) {
+ $self->_sql_limit(
+ alias => $self->{'_sql_aliases'}{'attachments'},
+ column => 'filename',
+ operator => 'IS',
+ value => 'NULL',
+ @rest,
+ entry_aggregator => 'AND',
+ );
+ }
+ $self->close_paren;
}
=head2 _watcher_limit
@@ -915,9 +938,9 @@
new => 0,
);
- my $users = $self->{'_sql_u_watchers_aliases'}{$group_members};
+ my $users = $self->{'_sql_aliases'}{'u_watchers'}{$group_members};
unless ($users) {
- $users = $self->{'_sql_u_watchers_aliases'}{$group_members} = $self->new_alias( RT::Model::UserCollection->new );
+ $users = $self->{'_sql_aliases'}{'u_watchers'}{$group_members} = $self->new_alias( RT::Model::UserCollection->new );
$self->SUPER::limit(
leftjoin => $group_members,
alias => $group_members,
@@ -960,9 +983,9 @@
sub _role_groupsjoin {
my $self = shift;
my %args = ( new => 0, class => 'ticket', type => '', @_ );
- return $self->{'_sql_role_group_aliases'}
+ return $self->{'_sql_aliases'}{'role_group'}
{ $args{'class'} . '-' . $args{'type'} }
- if $self->{'_sql_role_group_aliases'}
+ if $self->{'_sql_aliases'}{'role_group'}
{ $args{'class'} . '-' . $args{'type'} }
&& !$args{'new'};
@@ -991,7 +1014,7 @@
value => $args{'type'},
) if $args{'type'};
- $self->{'_sql_role_group_aliases'}{ $args{'class'} . '-' . $args{'type'} } =
+ $self->{'_sql_aliases'}{'role_group'}{ $args{'class'} . '-' . $args{'type'} } =
$groups
unless $args{'new'};
@@ -1002,8 +1025,8 @@
my $self = shift;
my %args = ( new => 1, groups_alias => undef, @_ );
- return $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} }
- if $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} }
+ return $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} }
+ if $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} }
&& !$args{'new'};
my $alias = $self->join(
@@ -1015,7 +1038,7 @@
entry_aggregator => 'AND',
);
- $self->{'_sql_group_members_aliases'}{ $args{'groups_alias'} } = $alias
+ $self->{'_sql_aliases'}{'group_members'}{ $args{'groups_alias'} } = $alias
unless $args{'new'};
return $alias;
@@ -1243,15 +1266,15 @@
my ( $self, $cfkey, $cfid, $field ) = @_;
# Perform one join per CustomField
- if ( $self->{_sql_object_cfv_alias}{$cfkey}
- || $self->{_sql_cf_alias}{$cfkey} )
+ if ( $self->{'_sql_aliases'}{'OCFV'}{$cfkey}
+ || $self->{'_sql_aliases'}{'cf'}{$cfkey} )
{
- return ( $self->{_sql_object_cfv_alias}{$cfkey}, $self->{_sql_cf_alias}{$cfkey} );
+ return ( $self->{'_sql_aliases'}{'OCFV'}{$cfkey}, $self->{'_sql_aliases'}{'cf'}{$cfkey} );
}
my ( $TicketCFs, $CFs );
if ($cfid) {
- $TicketCFs = $self->{_sql_object_cfv_alias}{$cfkey} = $self->join(
+ $TicketCFs = $self->{'_sql_aliases'}{'OCFV'}{$cfkey} = $self->join(
type => 'left',
alias1 => 'main',
column1 => 'id',
@@ -1279,7 +1302,7 @@
value => '0',
);
- $CFs = $self->{_sql_cf_alias}{$cfkey} = $self->join(
+ $CFs = $self->{'_sql_aliases'}{'cf'}{$cfkey} = $self->join(
type => 'left',
alias1 => $ocfalias,
column1 => 'custom_field',
@@ -1301,7 +1324,7 @@
# value => $field,
# );
- $TicketCFs = $self->{_sql_object_cfv_alias}{$cfkey} = $self->join(
+ $TicketCFs = $self->{'_sql_aliases'}{'OCFV'}{$cfkey} = $self->join(
type => 'left',
alias1 => $CFs,
column1 => 'id',
@@ -1547,9 +1570,9 @@
if ( defined $meta->[0] && $meta->[0] eq 'WATCHERFIELD' ) {
# cache alias as we want to use one alias per watcher type for sorting
- my $users = $self->{_sql_u_watchers_alias_for_sort}{ $meta->[1] };
+ my $users = $self->{'_sql_aliases'}{'u_watchers_sort'}{ $meta->[1] };
unless ($users) {
- $self->{_sql_u_watchers_alias_for_sort}{ $meta->[1] } = $users = ( $self->_watcherjoin( $meta->[1] ) )[2];
+ $self->{'_sql_aliases'}{'u_watchers_sort'}{ $meta->[1] } = $users = ( $self->_watcherjoin( $meta->[1] ) )[2];
}
push @res, { %$row, alias => $users, column => $subkey };
} elsif ( defined $meta->[0] && $meta->[0] =~ /CUSTOMFIELD/i ) {
@@ -2501,13 +2524,9 @@
my $self = shift;
# Private Member Variables (which should get cleaned)
- $self->{'_sql_transalias'} = undef;
- $self->{'_sql_trattachalias'} = undef;
- $self->{'_sql_cf_alias'} = undef;
- $self->{'_sql_object_cfv_alias'} = undef;
- $self->{'_sql_watcher_join_users_alias'} = undef;
- $self->{'_sql_query'} = '';
- $self->{'_sql_looking_at'} = {};
+ $self->{'_sql_aliases'} = {};
+ $self->{'_sql_query'} = '';
+ $self->{'_sql_looking_at'} = {};
}
sub _sql_limit {
Modified: rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm
==============================================================================
--- rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm (original)
+++ rt/3.999/trunk/lib/RT/ScripAction/SendEmail.pm Thu Apr 9 13:29:07 2009
@@ -480,7 +480,7 @@
# XXX: we need a current user here, but who is current user?
my $attachs = RT::Model::AttachmentCollection->new( current_user => RT->system_user );
- my $txn_alias = $attachs->transaction_alias;
+ my $txn_alias = $attachs->join_transactions;
$attachs->limit(
alias => $txn_alias,
column => 'type',
Added: rt/3.999/trunk/sbin/rt-fulltext-indexer
==============================================================================
--- (empty file)
+++ rt/3.999/trunk/sbin/rt-fulltext-indexer Thu Apr 9 13:29:07 2009
@@ -0,0 +1,346 @@
+#!/usr/bin/env perl
+# BEGIN BPS TAGGED BLOCK {{{
+#
+# COPYRIGHT:
+#
+# This software is Copyright (c) 1996-2008 Best Practical Solutions, LLC
+# <jesse at bestpractical.com>
+#
+# (Except where explicitly superseded by other copyright notices)
+#
+#
+# LICENSE:
+#
+# This work is made available to you under the terms of Version 2 of
+# the GNU General Public License. A copy of that license should have
+# been provided with this software, but in any event can be snarfed
+# from www.gnu.org.
+#
+# This work is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+#
+#
+# CONTRIBUTION SUBMISSION POLICY:
+#
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
+#
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+#
+# END BPS TAGGED BLOCK }}}
+use strict;
+use warnings;
+
+use RT;
+BEGIN {RT->init_jifty};
+use RT::Interface::CLI qw{ clean_env };
+
+use Getopt::Long;
+
+clean_env();
+RT::load_config();
+RT::init();
+
+no warnings 'once';
+
+# Read in the options
+my %opts;
+GetOptions( \%opts, 'help', 'debug', 'limit', 'skip' );
+if ( $opts{'help'} ) {
+ require Pod::Usage;
+ import Pod::Usage;
+ pod2usage(-message => "RT Email Dashboards\n", -verbose => 1);
+ exit 1;
+}
+
+my $fts_config = RT->config->get('FullTextSearch') || {};
+unless ( $fts_config->{'Enable'} ) {
+ print STDERR "Full text search disabled in the RT config."
+ ." Read documentation for %FullTextSearch config option.";
+ exit 1;
+}
+unless ( $fts_config->{'Indexed'} ) {
+ print STDERR "Full text search is enabled in the RT config,"
+ ." however full text search works without special index,"
+ ." so this tool is not required."
+ ." Read documentation for %FullTextSearch config option.";
+ exit 1;
+}
+
+my $db_type = RT->config->get('DatabaseType');
+
+my @types = qw(text html);
+foreach my $type ( @types ) {
+ my $attaches = attachments($type);
+ $attaches->limit( column => 'id', operator => '>', value => last_indexed($type) );
+ $attaches->order_by( column => 'id', order => 'asc' );
+ $attaches->rows_per_page( $opts{'limit'} || 100 );
+
+ my $found = 0;
+ while ( my $a = $attaches->next ) {
+ print "bla\n";
+ debug("Found attachment #". $a->id );
+ next if filter( $type, $a );
+ debug("Attachment #". $a->id ." hasn't been filtered" );
+ my $txt = extract($type, $a) or next;
+ debug("Extracted text from attachment #". $a->id );
+ $found++;
+ process( $type, $a, $txt );
+ debug("Processed attachment #". $a->id );
+ }
+ finalize( $type, $attaches ) if $found;
+ clean( $type );
+}
+
+sub attachments {
+ my $type = shift;
+ my $res = RT::Model::AttachmentCollection->new( current_user => RT->system_user );
+
+ my $txn_alias = $res->join_transactions;
+ $res->limit( alias => $txn_alias, column => 'object_type', value => 'RT::Model::Ticket' );
+ my $ticket_alias = $res->join(
+ alias1 => $txn_alias, column1 => 'object_id',
+ table2 => new RT::Model::TicketCollection, column2 => 'id'
+ );
+ $res->limit( alias => $ticket_alias, column => 'status', operator => '!=', value => 'deleted' );
+
+ return goto_specific(
+ suffix => $type,
+ error => "Don't know how to find $type attachments",
+ arguments => [$res],
+ );
+}
+
+sub last_indexed {
+ my ($type) = (@_);
+ return goto_specific(
+ suffix => $db_type,
+ error => "Don't know how to find last indexed $type attachment for $db_type DB",
+ arguments => \@_,
+ );
+}
+
+sub filter {
+ my $type = shift;
+ return goto_specific(
+ suffix => $type,
+ arguments => \@_,
+ );
+}
+
+sub extract {
+ my $type = shift;
+ return goto_specific(
+ suffix => $type,
+ error => "No way to convert $type attachment into text",
+ arguments => \@_,
+ );
+}
+
+sub process {
+ return goto_specific(
+ suffix => $db_type,
+ error => "No processer for $db_type DB",
+ arguments => \@_,
+ );
+}
+
+sub finalize {
+ return goto_specific(
+ suffix => $db_type,
+ arguments => \@_,
+ );
+}
+
+sub clean {
+ return goto_specific(
+ prefix => $db_type,
+ arguments => \@_,
+ );
+}
+
+{
+sub last_indexed_mysql {
+ my $type = shift;
+ my $attr = RT->system->first_attribute('LastIndexedAttachments');
+ return 0 unless $attr;
+ return 0 unless exists $attr->{ $type };
+ return $attr->{ $type } || 0;
+}
+
+sub process_mysql {
+ my ($type, $attachment, $text) = (@_);
+
+ my $doc = sphinx_template();
+
+ my $element = $doc->createElement('sphinx:document');
+ $element->setAttribute( id => $attachment->id );
+ $element->appendTextChild( content => $$text );
+
+ $doc->documentElement->appendChild( $element );
+}
+
+my $doc = undef;
+sub sphinx_template {
+ return $doc if $doc;
+
+ require XML::LibXML;
+ $doc = XML::LibXML::Document->new('1.0', 'UTF-8');
+ my $root = $doc->createElement('sphinx:docset');
+ $doc->setDocumentElement( $root );
+
+ my $schema = $doc->createElement('sphinx:schema');
+ $root->appendChild( $schema );
+ foreach ( qw(content) ) {
+ my $field = $doc->createElement('sphinx:field');
+ $field->setAttribute( name => $_ );
+ $schema->appendChild( $field );
+ }
+
+ return $doc;
+}
+
+sub finalize_mysql {
+ my ($type, $attachments) = @_;
+ sphinx_template()->toFH(*STDOUT, 1);
+}
+
+sub clean_mysql {
+ $doc = undef;
+}
+
+}
+
+sub last_indexed_pg {
+ my $type = shift;
+ my $attachments = attachments( $type );
+ my $alias = 'main';
+ if ( $fts_config->{'Table'} ) {
+ $alias = $attachments->join(
+ type => 'left',
+ column1 => 'id',
+ table2 => $fts_config->{'Table'},
+ column2 => 'id',
+ );
+ }
+ $attachments->limit( alias => $alias, column => $fts_config->{'Column'}, operator => 'IS NOT', value => 'NULL' );
+ $attachments->order_by( column => 'id', order => 'desc' );
+ my $res = $attachments->first;
+ return 0 unless $res;
+ return $res->id;
+}
+
+sub process_pg {
+ my ($type, $attachment, $text) = (@_);
+
+ my $dbh = Jifty->handle->dbh;
+ my $table = $fts_config->{'Table'};
+ my $column = $fts_config->{'Column'};
+
+ my $query;
+ if ( $table ) {
+ if ( my ($id) = $dbh->selectrow_array("SELECT id FROM $table WHERE id = ?", undef, $attachment->id) ) {
+ $query = "UPDATE $table SET $column = to_tsvector(?) WHERE id = ?";
+ } else {
+ $query = "INSERT INTO $table($column, id) VALUES(to_tsvector(?), ?)";
+ }
+ } else {
+ $query = "UPDATE Attachments SET $column = to_tsvector(?) WHERE id = ?";
+ }
+
+ my $status = $dbh->do( $query, undef, $$text, $attachment->id );
+ unless ( $status ) {
+ die "error: ". $dbh->errstr;
+ }
+}
+
+sub attachments_text {
+ my $res = shift;
+ $res->limit( column => 'content_type', value => 'text/plain' );
+ return $res;
+}
+
+sub extract_text {
+ my $attachment = shift;
+ my $text = $attachment->content;
+ return undef unless defined $text && length($text);
+ return \$text;
+}
+
+sub attachments_html {
+ my $res = shift;
+ $res->limit( column => 'content_type', value => 'text/html' );
+ return $res;
+}
+
+sub filter_html {
+ my $attachment = shift;
+ if ( my $parent = $attachment->parent ) {
+# skip html parts that are alternatives
+ return 1 if $parent->id
+ && $parent->content_type eq 'mulitpart/alternative';
+ }
+ return 0;
+}
+
+sub extract_html {
+ my $attachment = shift;
+ my $text = $attachment->content;
+ return undef unless defined $text && length($text);
+# TODO: html -> text
+ return \$text;
+}
+
+sub goto_specific {
+ my %args = (@_);
+
+ my $func = (caller(1))[3];
+ $func =~ s/.*:://;
+ my $call = $func ."_". lc $args{'suffix'};
+ unless ( defined &$call ) {
+ return undef unless $args{'error'};
+ require Carp; Carp::croak( $args{'error'} );
+ }
+ @_ = @{ $args{'arguments'} };
+ goto &$call;
+}
+
+
+# helper functions
+sub verbose { print _(@_), "\n" if $opts{verbose} || $opts{verbose}; 1 }
+sub debug { print _(@_), "\n" if $opts{debug}; 1 }
+sub error { Jifty->log->error(_(@_)); verbose(@_); 1 }
+sub warning { Jifty->log->warn(_(@_)); verbose(@_); 1 }
+
+=head1 NAME
+
+rt-fulltext-indexer - Indexer for full text search
+
+=head1 SYNOPSIS
+
+ /opt/rt3/local/sbin/rt-fulltext-indexer --help
+
+ /opt/rt3/local/sbin/rt-fulltext-indexer --limit 100
+
+=head1 DESCRIPTION
+
+=cut
+
Added: rt/3.999/trunk/sbin/rt-setup-fulltext-index
==============================================================================
--- (empty file)
+++ rt/3.999/trunk/sbin/rt-setup-fulltext-index Thu Apr 9 13:29:07 2009
@@ -0,0 +1,274 @@
+#!/usr/bin/env perl
+# BEGIN BPS TAGGED BLOCK {{{
+#
+# COPYRIGHT:
+#
+# This software is Copyright (c) 1996-2008 Best Practical Solutions, LLC
+# <jesse at bestpractical.com>
+#
+# (Except where explicitly superseded by other copyright notices)
+#
+#
+# LICENSE:
+#
+# This work is made available to you under the terms of Version 2 of
+# the GNU General Public License. A copy of that license should have
+# been provided with this software, but in any event can be snarfed
+# from www.gnu.org.
+#
+# This work is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+#
+#
+# CONTRIBUTION SUBMISSION POLICY:
+#
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
+#
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+#
+# END BPS TAGGED BLOCK }}}
+use strict;
+use warnings;
+
+use RT;
+BEGIN {RT->init_jifty};
+use RT::Interface::CLI qw{ clean_env };
+
+use Getopt::Long;
+
+clean_env();
+RT::load_config();
+RT::init();
+
+no warnings 'once';
+
+# Read in the options
+my %opts;
+GetOptions( \%opts, "help", "dryrun" );
+
+if ($opts{'help'}) {
+ require Pod::Usage;
+ import Pod::Usage;
+ pod2usage(-message => "RT Email Dashboards\n", -verbose => 1);
+ exit 1;
+}
+
+# helper functions
+sub verbose { print _(@_), "\n" if $opts{verbose} || $opts{verbose}; 1 }
+sub debug { print _(@_), "\n" if $opts{debug}; 1 }
+sub error { Jifty->log->error(_(@_)); verbose(@_); 1 }
+sub warning { Jifty->log->warn(_(@_)); verbose(@_); 1 }
+
+my %default = (
+ Table => 'AttachmentsIndex',
+ Column => 'fts_index',
+);
+
+
+my $db_type = RT->config->get('DatabaseType');
+if ( $db_type eq 'mysql' ) {
+ my $dbh = Jifty->handle->dbh;
+
+ my $sphinx = ($dbh->selectrow_array("show variables like 'have_sphinx'"))[1];
+ unless ( lc $sphinx eq 'yes' ) {
+ print STDERR "Mysql server you have compiled without sphinx storage engine (sphinxse).\n";
+ print STDERR "Either use system packages with sphinxse, binaries from Sphinx site
+ or compile mysql according to instructions in Sphinx's docs.\n";
+ exit 1;
+ }
+
+ my $table = prompt(
+ message => 'Enter name of a DB table that will be used to connect to the sphinx server',
+ default => $default{'Column'},
+ );
+ my $url = prompt(
+ message => 'Enter URL of the sphinx search server, it should be sphinx://<server>:<port>/<index name>. Simple config for this sphinx instance will be generated for you.',
+ default => 'sphinx://localhost:3312/rt',
+ );
+
+ my $schema = <<END;
+CREATE TABLE $table (
+ id INTEGER NOT NULL,
+ weight INTEGER NOT NULL,
+ $default{'Column'} VARCHAR(3072) NOT NULL,
+ INDEX($default{'Column'})
+) ENGINE=SPHINX CONNECTION="$url"
+END
+
+ print_rt_config( Table => $table, Column => $default{'Column'} );
+ insert_schema( $schema );
+
+ require URI;
+ my $urlo = URI−>new( $url );
+ my $host = $urlo->host;
+ my $port = $urlo->port;
+ my $index = $urlo->path;
+
+ my %sphinx_conf = ();
+ $sphinx_conf{'host'} = RT->config->get('DatabaseHost');
+ $sphinx_conf{'db'} = RT->config->get('DatabaseName');
+ $sphinx_conf{'user'} = RT->config->get('DatabaseUser');
+ $sphinx_conf{'pass'} = RT->config->get('DatabasePassword');
+
+ print "Here is simple sphinx config, you can use it to index text/plain attachments in your DB."
+ ." This config is not ideal. You should read Sphinx docs to get better ideas.";
+ print <<END
+
+source rt {
+ type = mysql
+
+ sql_host = $sphinx_conf{'host'}
+ sql_db = $sphinx_conf{'db'}
+ sql_user = $sphinx_conf{'user'}
+ sql_pass = $sphinx_conf{'pass'}
+
+ sql_query = \
+ SELECT a.id, a.content FROM Attachments a
+ JOIN Transactions txn ON a.transaction_id = txn.id AND txn.object_type = 'RT::Model::Ticket' \
+ JOIN Tickets t ON txn.object_id = t.id \
+ WHERE a.content_type = 'text/plain' AND t.Status != 'deleted'
+
+ sql_query_info = SELECT * FROM Attachments WHERE id=$id
+}
+
+index $index {
+ source = rt
+ path = $RT::VarPath/sphinx/index
+ docinfo = extern
+ charset_type = utf-8
+}
+
+indexer {
+ mem_limit = 32M
+}
+
+searchd {
+ port = $port
+ log = $RT::VarPath/sphinx/searchd.log
+ query_log = $RT::VarPath/sphinx/query.log
+ read_timeout = 5
+ max_children = 30
+ pid_file = $RT::VarPath/sphinx/searchd.pid
+ max_matches = 1000
+ seamless_rotate = 1
+ preopen_indexes = 0
+ unlink_old = 1
+}
+
+END
+
+}
+elsif ( $db_type eq 'Pg' ) {
+ my $dbh = Jifty->handle->dbh;
+
+ my $table = prompt(
+ message => 'Enter name of a DB table that will be used to connect to the sphinx server',
+ default => 'AttachmentsIndex',
+ );
+
+ my $schema = <<END;
+CREATE TABLE $table (
+ id INTEGER NOT NULL,
+ $default{'Column'} tsvector
+)
+END
+
+ print_rt_config( Table => $table, Column => $default{'Column'} );
+
+ insert_schema( $schema );
+
+ print <<END;
+Now you have to create an index on the column. You have choice
+between GiST or GIN, the first is times slower to search, but
+it takes less place and faster to update. Anyway, both are faster
+then searches without them.
+
+Either run:
+
+ CREATE INDEX ${column}_idx ON $table USING gin($default{'Column'});
+
+or
+
+ CREATE INDEX ${column}_idx ON $table USING gist($default{'Column'});
+
+END
+}
+else {
+ die "Not yet supported";
+}
+
+sub prompt {
+ my %args = @_;
+
+ local $| = 1;
+ print $args{'message'};
+ if ( $args{'default'} ) {
+ print "\n[". $args{'default'} .']: ';
+ } else {
+ print ":\n";
+ }
+
+ my $res = <STDIN>;
+ chomp $res;
+ return $args{'default'} if !$res && $args{'default'};
+ return $res;
+}
+
+sub print_rt_config {
+ my %args = @_;
+
+ print <<END;
+
+Configure your RT via site config:
+set( %FullTextSearch,
+ Enable => 1,
+ Indexed => 1,
+ Table => '$args{'Table'}',
+ Column => '$args{'Column'}',
+);
+END
+
+}
+
+sub insert_schema {
+ my $schema = shift;
+ print "Going to do the following change in the DB:\n";
+ print $schema;
+ return if $opts{'dryrun'};
+
+ my $res = $dbh->do( $schema );
+ unless ( $res ) {
+ die "Couldn't create the table: ". $dbh->errstr;
+ }
+}
+
+=head1 NAME
+
+rt-setup-fulltext-index - Helps create indexes for full text search
+
+=head1 SYNOPSIS
+
+ /opt/rt3/local/sbin/rt-setup-fulltext-index
+
+=head1 DESCRIPTION
+
+=cut
More information about the Rt-commit
mailing list