[Rt-commit] rt branch, 4.2/mysql-native-fts, created. rt-4.2.9-69-g93051de
Alex Vandiver
alexmv at bestpractical.com
Thu Jan 8 16:31:50 EST 2015
The branch, 4.2/mysql-native-fts has been created
at 93051de875cd60ea79fddc869b8e23c287d12870 (commit)
- Log -----------------------------------------------------------------
commit 09968885b1ce079a73644e554693346e7045bf87
Author: Alex Vandiver <alexmv at bestpractical.com>
Date: Thu Apr 3 19:15:10 2014 -0400
Support native FTS on MySQL 5.6 and above
diff --git a/docs/full_text_indexing.pod b/docs/full_text_indexing.pod
index 797f586..8f40fbe 100644
--- a/docs/full_text_indexing.pod
+++ b/docs/full_text_indexing.pod
@@ -61,15 +61,78 @@ C<cron>:
=head1 MYSQL
-MySQL does not support full-text indexing natively. However, it does
-integrate with the external Sphinx engine, available from
+MySQL does not support full-text indexing natively until version 5.6 and
+above. For prior versions, RT can integrate with the external Sphinx
+full-text search engine.
+
+=head2 MySQL 5.6 and above
+
+MySQL 5.6 includes full-text search of InnoDB tables. However, as RT
+marks attachment data as C<BINARY>, it cannot index this content without
+creating additional tables. To create the required table, run:
+
+ /opt/rt4/sbin/rt-setup-fulltext-index
+
+If you have a non-standard database administrator username or password,
+you may need to pass the C<--dba> or C<--dba-password> options:
+
+ /opt/rt4/sbin/rt-setup-fulltext-index --dba root --dba-password secret
+
+This will also output an appropriate C<%FullTextSearch> configuration to
+add to your F<RT_SiteConfig.pm>; you will need to restart your webserver
+after making these changes. However, the index will also need to be
+filled before it can be used. To update the index initially, run:
+
+ /opt/rt4/sbin/rt-fulltext-indexer --all
+
+This will tokenize and index all existing attachments in your database;
+it may take quite a while if your database already has a large number of
+tickets in it.
+
+=head3 Updating the index
+
+To keep the index up-to-date, you will need to run:
+
+ /opt/rt4/sbin/rt-fulltext-indexer
+
+...at regular intervals. By default, this will only tokenize up to 100
+tickets at a time; you can adjust this upwards by passing
+C<--limit 500>. Larger batch sizes will take longer and
+consume more memory.
+
+If there is already an instances of C<rt-fulltext-indexer> running, new
+ones will exit abnormally (with exit code 1) and the error message
+"rt-fulltext-indexer is already running." You can suppress this message
+and end those processes normally (with exit code 0) using the C<--quiet>
+option; this is particularly useful when running the command via
+C<cron>:
+
+ /opt/rt4/sbin/rt-fulltext-indexer --quiet
+
+=head3 Caveats
+
+Searching is done in "boolean mode." As such, the TicketSQL query
+C<Content LIKE 'winter 2014'> will return tickets with transactions that
+contain I<either> word. To find transactions which contain both (but
+not necessarily adjacent), use C<Content LIKE '+winter +2014'>. To find
+transactions containing the precise phrase, use C<Content LIKE '"winter
+2014">.
+
+See the mysql documentation, at
+L<http://dev.mysql.com/doc/refman/5.6/en/fulltext-boolean.html>, for a
+list of the full capabilities.
+
+
+=head2 MySQL with Sphinx
+
+RT can also integrate with the external Sphinx engine, available from
L<http://sphinxsearch.com>. Unfortunately, Sphinx integration (using
SphinxSE) does require that you recompile MySQL from source. Most
distribution-provided packages for MySQL do not include SphinxSE
integration, merely the external Sphinx tools; these are not sufficient
for RT's needs.
-=head2 Compiling MySQL and SphinxSE
+=head3 Compiling MySQL and SphinxSE
MySQL 5.1 supports adding pluggable storage engines; after compiling
against the appropriate version of MySQL, the F<ha_sphinx.so> file is
@@ -83,7 +146,7 @@ versions may work as well. Complete compilation and installation
instructions for MySQL with SphinxSE can be found at
L<http://sphinxsearch.com/docs/current.html#sphinxse-mysql51>.
-=head2 Creating and configuring the index
+=head3 Creating and configuring the index
Once MySQL has been recompiled with SphinxSE, and Sphinx itself is
installed, you may create the required SphinxSE communication table via:
@@ -110,7 +173,7 @@ Finally, start the Sphinx search daemon:
searchd
-=head2 Updating the index
+=head3 Updating the index
To keep the index up-to-date, you will need to run:
@@ -119,7 +182,7 @@ To keep the index up-to-date, you will need to run:
...at regular intervals in order to pick up new and updated attachments
from RT's database. Failure to do so will result in stale data.
-=head2 Caveats
+=head3 Caveats
RT's integration with Sphinx relies on the use of a special index; there
exist queries where the MySQL optimizer elects to I<not> use that index,
diff --git a/lib/RT/Config.pm b/lib/RT/Config.pm
index b4ae6c8..555f81e 100644
--- a/lib/RT/Config.pm
+++ b/lib/RT/Config.pm
@@ -586,11 +586,25 @@ our %META;
$RT::Logger->error("No Table set for full-text index; disabling");
$v->{Enable} = $v->{Indexed} = 0;
} elsif ($v->{'Table'} eq "Attachments") {
- $RT::Logger->error("Table for full-text index is set to Attachments, not SphinxSE table; disabling");
+ $RT::Logger->error("Table for full-text index is set to Attachments, not FTS table; disabling");
$v->{Enable} = $v->{Indexed} = 0;
- } elsif (not $v->{'MaxMatches'}) {
- $RT::Logger->warn("No MaxMatches set for full-text index; defaulting to 10000");
- $v->{MaxMatches} = 10_000;
+ } else {
+ my (undef, $create) = eval { $RT::Handle->dbh->selectrow_array("SHOW CREATE TABLE " . $v->{Table}); };
+ my ($engine) = ($create||'') =~ /engine=(\S+)/i;
+ if (not $create) {
+ $RT::Logger->error("External table ".$v->{Table}." does not exist");
+ $v->{Enable} = $v->{Indexed} = 0;
+ } elsif (lc $engine eq "sphinx") {
+ # External Sphinx indexer
+ $v->{Sphinx} = 1;
+ unless ($v->{'MaxMatches'}) {
+ $RT::Logger->warn("No MaxMatches set for full-text index; defaulting to 10000");
+ $v->{MaxMatches} = 10_000;
+ }
+ } else {
+ # Internal, one-column table
+ $v->{Column} = 'Content';
+ }
}
} else {
$RT::Logger->error("Indexed full-text-search not supported for $dbtype");
diff --git a/lib/RT/SearchBuilder.pm b/lib/RT/SearchBuilder.pm
index 073b01d..202eb6f 100644
--- a/lib/RT/SearchBuilder.pm
+++ b/lib/RT/SearchBuilder.pm
@@ -897,7 +897,8 @@ sub Limit {
|(NOT\s*)?MATCHES
|IS(\s*NOT)?
|(NOT\s*)?IN
- |\@\@)$/ix) {
+ |\@\@
+ |AGAINST)$/ix) {
$RT::Logger->crit("Possible SQL injection attack: $ARGS{FIELD} $ARGS{OPERATOR}");
%ARGS = (
%ARGS,
diff --git a/lib/RT/Tickets.pm b/lib/RT/Tickets.pm
index a6b07e4..58900a2 100644
--- a/lib/RT/Tickets.pm
+++ b/lib/RT/Tickets.pm
@@ -927,6 +927,28 @@ sub _TransContentLimit {
QUOTEVALUE => 0,
);
}
+ elsif ( $db_type eq 'mysql' and not $config->{Sphinx}) {
+ my $dbh = $RT::Handle->dbh;
+ $self->Limit(
+ %rest,
+ FUNCTION => "MATCH($alias.Content)",
+ OPERATOR => 'AGAINST',
+ VALUE => "(". $dbh->quote($value) ." IN BOOLEAN MODE)",
+ QUOTEVALUE => 0,
+ );
+ # As with Oracle, above, this forces the LEFT JOINs into
+ # JOINS, which allows the FULLTEXT index to be used.
+ # Orthogonally, the IS NOT NULL clause also helps the
+ # optimizer decide to use the index.
+ $self->Limit(
+ ENTRYAGGREGATOR => 'AND',
+ ALIAS => $alias,
+ FIELD => "Content",
+ OPERATOR => 'IS NOT',
+ VALUE => 'NULL',
+ QUOTEVALUE => 0,
+ );
+ }
elsif ( $db_type eq 'mysql' ) {
# XXX: We could theoretically skip the join to Attachments,
# and have Sphinx simply index and group by the TicketId,
diff --git a/sbin/rt-fulltext-indexer.in b/sbin/rt-fulltext-indexer.in
index 98c9409..189c9ae 100644
--- a/sbin/rt-fulltext-indexer.in
+++ b/sbin/rt-fulltext-indexer.in
@@ -90,6 +90,14 @@ if ( $db_type eq 'Pg' ) {
);
push @OPT_LIST, 'limit=i', 'all!';
}
+elsif ( $db_type eq 'mysql' ) {
+ %OPT = (
+ %OPT,
+ limit => 0,
+ all => 0,
+ );
+ push @OPT_LIST, 'limit=i', 'all!';
+}
elsif ( $db_type eq 'Oracle' ) {
%OPT = (
%OPT,
@@ -147,7 +155,7 @@ if ( $db_type eq 'Oracle' ) {
$index, $OPT{'memory'}
);
exit;
-} elsif ( $db_type eq 'mysql' ) {
+} elsif ( $fts_config->{Sphinx} ) {
print STDERR <<EOT;
Updates to the external Sphinx index are done via running the sphinx
@@ -251,6 +259,23 @@ sub process {
);
}
+sub last_indexed_mysql { last_indexed_pg(@_); }
+sub process_mysql {
+ my ($type, $attachment, $text) = (@_);
+
+ my $dbh = $RT::Handle->dbh;
+ my $table = $fts_config->{'Table'};
+
+ my $query;
+ if ( my ($id) = $dbh->selectrow_array("SELECT id FROM $table WHERE id = ?", undef, $attachment->id) ) {
+ $query = "UPDATE $table SET Content = ? WHERE id = ?";
+ } else {
+ $query = "INSERT INTO $table(Content, id) VALUES(?, ?)";
+ }
+
+ $dbh->do( $query, undef, $$text, $attachment->id );
+}
+
sub last_indexed_pg {
my $type = shift;
my $attachments = attachments( $type );
diff --git a/sbin/rt-setup-fulltext-index.in b/sbin/rt-setup-fulltext-index.in
index 8870de9..2d6d6be 100644
--- a/sbin/rt-setup-fulltext-index.in
+++ b/sbin/rt-setup-fulltext-index.in
@@ -131,7 +131,48 @@ my $dbh = $RT::Handle->dbh;
$dbh->{'RaiseError'} = 1;
$dbh->{'PrintError'} = 1;
+# MySQL could either be native of sphinx; find out which
+if ($DB{'type'} eq "mysql") {
+ my $index_type = lc($OPT{'index-type'} || '');
+
+ # Default to sphinx on < 5.6, and error if they provided mysql
+ if ($RT::Handle->dbh->{mysql_serverversion} < 50600) {
+ $index_type ||= 'sphinx';
+ die "Native MySQL indexing is only supported in MySQL 5.6 and above"
+ if $index_type ne 'sphinx';
+ }
+
+ while ( $index_type ne 'sphinx' and $index_type ne 'mysql' ) {
+ $index_type = lc prompt(
+ message => "MySQL 5.6 and above support native full-text indexing; for compatibility\n"
+ ."with earlier versions of RT, the external Sphinx indexer is still supported.\n"
+ ."Which indexing solution would you prefer?",
+ default => 'mysql',
+ silent => !$OPT{'ask'},
+ );
+ };
+ $DB{'type'} = $index_type;
+}
+
if ( $DB{'type'} eq 'mysql' ) {
+ # MySQL 5.6 has FTS on InnoDB "text" columns -- which the
+ # Attachments table doesn't have, but we can make it have.
+ my $table = $OPT{'table'} || prompt(
+ message => "Enter the name of a new MySQL table that will be used to store the\n"
+ . "full-text content and indexes:",
+ default => $DEFAULT{'table'},
+ silent => !$OPT{'ask'},
+ );
+ do_error_is_ok( dba_handle() => "DROP TABLE $table" )
+ unless $OPT{'dryrun'};
+
+ my $schema = "CREATE TABLE $table ( "
+ ."id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,"
+ ."Content LONGTEXT, FULLTEXT(Content) ) ENGINE=InnoDB CHARACTER SET utf8";
+ insert_schema( $schema );
+
+ print_rt_config( Table => $table );
+} elsif ($DB{'type'} eq 'sphinx') {
check_sphinx();
my $table = $OPT{'table'} || prompt(
message => "Enter name of a new MySQL table that will be used to connect to the\n"
diff --git a/t/fts/indexed_mysql.t b/t/fts/indexed_mysql.t
new file mode 100644
index 0000000..a0145a9
--- /dev/null
+++ b/t/fts/indexed_mysql.t
@@ -0,0 +1,83 @@
+
+use strict;
+use warnings;
+
+use RT::Test tests => undef;
+plan skip_all => 'Not mysql' unless RT->Config->Get('DatabaseType') eq 'mysql';
+plan skip_all => "Need mysql 5.6 or higher"
+ unless $RT::Handle->dbh->{mysql_serverversion} > 50600;
+
+RT->Config->Set( FullTextSearch => Enable => 1, Indexed => 1, Table => 'AttachmentsIndex' );
+
+setup_indexing();
+
+my $q = RT::Test->load_or_create_queue( Name => 'General' );
+ok $q && $q->id, 'loaded or created queue';
+my $queue = $q->Name;
+
+sub setup_indexing {
+ my %args = (
+ 'no-ask' => 1,
+ command => $RT::SbinPath .'/rt-setup-fulltext-index',
+ dba => $ENV{'RT_DBA_USER'},
+ 'dba-password' => $ENV{'RT_DBA_PASSWORD'},
+ );
+ my ($exit_code, $output) = RT::Test->run_and_capture( %args );
+ ok(!$exit_code, "setted up index") or diag "output: $output";
+}
+
+sub sync_index {
+ my %args = (
+ command => $RT::SbinPath .'/rt-fulltext-indexer',
+ );
+ my ($exit_code, $output) = RT::Test->run_and_capture( %args );
+ ok(!$exit_code, "setted up index") or diag "output: $output";
+}
+
+sub run_tests {
+ my @test = @_;
+ while ( my ($query, $checks) = splice @test, 0, 2 ) {
+ run_test( $query, %$checks );
+ }
+}
+
+my @tickets;
+sub run_test {
+ my ($query, %checks) = @_;
+ my $query_prefix = join ' OR ', map 'id = '. $_->id, @tickets;
+
+ my $tix = RT::Tickets->new(RT->SystemUser);
+ $tix->FromSQL( "( $query_prefix ) AND ( $query )" );
+
+ my $error = 0;
+
+ my $count = 0;
+ $count++ foreach grep $_, values %checks;
+ is($tix->Count, $count, "found correct number of ticket(s) by '$query'") or $error = 1;
+
+ my $good_tickets = ($tix->Count == $count);
+ while ( my $ticket = $tix->Next ) {
+ next if $checks{ $ticket->Subject };
+ diag $ticket->Subject ." ticket has been found when it's not expected";
+ $good_tickets = 0;
+ }
+ ok( $good_tickets, "all tickets are good with '$query'" ) or $error = 1;
+
+ diag "Wrong SQL query for '$query':". $tix->BuildSelectQuery if $error;
+}
+
+ at tickets = RT::Test->create_tickets(
+ { Queue => $q->id },
+ { Subject => 'book', Content => 'book' },
+ { Subject => 'bar', Content => 'bar' },
+);
+sync_index();
+
+run_tests(
+ "Content LIKE 'book'" => { book => 1, bar => 0 },
+ "Content LIKE 'bar'" => { book => 0, bar => 1 },
+);
+
+ at tickets = ();
+
+done_testing;
commit 93051de875cd60ea79fddc869b8e23c287d12870
Author: Alex Vandiver <alexmv at bestpractical.com>
Date: Mon Mar 17 21:32:05 2014 -0400
Using a separate MyISAM table, we can also support FTS on MySQL < 5.6
diff --git a/docs/full_text_indexing.pod b/docs/full_text_indexing.pod
index 8f40fbe..339625b 100644
--- a/docs/full_text_indexing.pod
+++ b/docs/full_text_indexing.pod
@@ -61,15 +61,15 @@ C<cron>:
=head1 MYSQL
-MySQL does not support full-text indexing natively until version 5.6 and
-above. For prior versions, RT can integrate with the external Sphinx
-full-text search engine.
+On MySQL, full-text search can either be done using native support
+(which may use MyISAM tables on pre-5.6 versions of MySQL), or RT can
+integrate with the external Sphinx full-text search engine.
-=head2 MySQL 5.6 and above
+=head2 Native MySQL
-MySQL 5.6 includes full-text search of InnoDB tables. However, as RT
-marks attachment data as C<BINARY>, it cannot index this content without
-creating additional tables. To create the required table, run:
+As RT marks attachment data as C<BINARY>, MySQL cannot index this
+content without creating an additional table. To create the required
+table (which is InnoDB on versions of MySQL which support it), run:
/opt/rt4/sbin/rt-setup-fulltext-index
diff --git a/sbin/rt-setup-fulltext-index.in b/sbin/rt-setup-fulltext-index.in
index 2d6d6be..b5659c7 100644
--- a/sbin/rt-setup-fulltext-index.in
+++ b/sbin/rt-setup-fulltext-index.in
@@ -136,17 +136,23 @@ if ($DB{'type'} eq "mysql") {
my $index_type = lc($OPT{'index-type'} || '');
# Default to sphinx on < 5.6, and error if they provided mysql
+ my $msg;
if ($RT::Handle->dbh->{mysql_serverversion} < 50600) {
- $index_type ||= 'sphinx';
- die "Native MySQL indexing is only supported in MySQL 5.6 and above"
- if $index_type ne 'sphinx';
+ $msg = "Complete support for full-text search requires MySQL 5.6 or higher. For prior\n"
+ ."versions such as yours, full-text indexing can either be provided using MyISAM\n"
+ ."tables, or the external Sphinx indexer. Using MyISAM tables requires that your\n"
+ ."database be tuned to support them, as RT uses InnoDB tables for all other content.\n"
+ ."Using Sphinx will require recompiling MySQL. Which indexing solution would you\n"
+ ."prefer?"
+ } else {
+ $msg = "MySQL 5.6 and above support native full-text indexing; for compatibility\n"
+ ."with earlier versions of RT, the external Sphinx indexer is still supported.\n"
+ ."Which indexing solution would you prefer?"
}
while ( $index_type ne 'sphinx' and $index_type ne 'mysql' ) {
$index_type = lc prompt(
- message => "MySQL 5.6 and above support native full-text indexing; for compatibility\n"
- ."with earlier versions of RT, the external Sphinx indexer is still supported.\n"
- ."Which indexing solution would you prefer?",
+ message => $msg,
default => 'mysql',
silent => !$OPT{'ask'},
);
@@ -166,9 +172,10 @@ if ( $DB{'type'} eq 'mysql' ) {
do_error_is_ok( dba_handle() => "DROP TABLE $table" )
unless $OPT{'dryrun'};
+ my $engine = $RT::Handle->dbh->{mysql_serverversion} < 50600 ? "MyISAM" : "InnoDB";
my $schema = "CREATE TABLE $table ( "
."id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,"
- ."Content LONGTEXT, FULLTEXT(Content) ) ENGINE=InnoDB CHARACTER SET utf8";
+ ."Content LONGTEXT, FULLTEXT(Content) ) ENGINE=$engine CHARACTER SET utf8";
insert_schema( $schema );
print_rt_config( Table => $table );
diff --git a/t/fts/indexed_mysql.t b/t/fts/indexed_mysql.t
index a0145a9..672b220 100644
--- a/t/fts/indexed_mysql.t
+++ b/t/fts/indexed_mysql.t
@@ -4,8 +4,6 @@ use warnings;
use RT::Test tests => undef;
plan skip_all => 'Not mysql' unless RT->Config->Get('DatabaseType') eq 'mysql';
-plan skip_all => "Need mysql 5.6 or higher"
- unless $RT::Handle->dbh->{mysql_serverversion} > 50600;
RT->Config->Set( FullTextSearch => Enable => 1, Indexed => 1, Table => 'AttachmentsIndex' );
@@ -68,14 +66,16 @@ sub run_test {
@tickets = RT::Test->create_tickets(
{ Queue => $q->id },
- { Subject => 'book', Content => 'book' },
- { Subject => 'bar', Content => 'bar' },
+ { Subject => 'first', Content => 'english' },
+ { Subject => 'second', Content => 'french' },
+ { Subject => 'third', Content => 'spanish' },
+ { Subject => 'fourth', Content => 'german' },
);
sync_index();
run_tests(
- "Content LIKE 'book'" => { book => 1, bar => 0 },
- "Content LIKE 'bar'" => { book => 0, bar => 1 },
+ "Content LIKE 'english'" => { first => 1, second => 0, third => 0, fourth => 0 },
+ "Content LIKE 'french'" => { first => 0, second => 1, third => 0, fourth => 0 },
);
@tickets = ();
diff --git a/t/fts/indexed_sphinx.t b/t/fts/indexed_sphinx.t
index 0a4f026..a09b0d2 100644
--- a/t/fts/indexed_sphinx.t
+++ b/t/fts/indexed_sphinx.t
@@ -15,8 +15,6 @@ plan skip_all => "No searchd and indexer under PATH"
plan tests => 15;
-RT->Config->Set( FullTextSearch => Enable => 1, Indexed => 1, Table => 'AttachmentsIndex', MaxMatches => 1000 );
-
setup_indexing();
my $q = RT::Test->load_or_create_queue( Name => 'General' );
@@ -33,6 +31,7 @@ sub setup_indexing {
dba => $ENV{'RT_DBA_USER'},
'dba-password' => $ENV{'RT_DBA_PASSWORD'},
url => "sphinx://127.0.0.1:$port/rt",
+ 'index-type' => 'sphinx',
);
ok(!$exit_code, "setted up index");
diag "output: $output" if $ENV{'TEST_VERBOSE'};
@@ -118,6 +117,8 @@ sub run_test {
);
sync_index();
+RT->Config->Set( FullTextSearch => Enable => 1, Indexed => 1, Table => 'AttachmentsIndex', MaxMatches => 1000, Sphinx => 1 );
+
run_tests(
"Content LIKE 'book'" => { book => 1, bar => 0 },
"Content LIKE 'bar'" => { book => 0, bar => 1 },
-----------------------------------------------------------------------
More information about the rt-commit
mailing list