[Bps-public-commit] rt-extension-tika branch, master, updated. e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57
Dave Goehrig
dave at bestpractical.com
Tue Dec 6 12:04:07 EST 2016
The branch, master has been updated
via e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57 (commit)
via 8766cb8750243cd82f9dddc12929b0e45fa0e17f (commit)
from f6e0e54dcd2efc6775015b673473bb6d2874a8e4 (commit)
Summary of changes:
Makefile.PL | 42 ++++++++++++++++++++++
lib/RT/Extension/Tika.pm | 17 ++++-----
sbin/rt-tika-fulltext-indexer | 19 +++++++---
...ulltext-indexer => rt-tika-fulltext-indexer.in} | 26 +++++++++-----
4 files changed, 79 insertions(+), 25 deletions(-)
copy sbin/{rt-tika-fulltext-indexer => rt-tika-fulltext-indexer.in} (95%)
- Log -----------------------------------------------------------------
commit 8766cb8750243cd82f9dddc12929b0e45fa0e17f
Author: Dave Goehrig <dave at bestpractical.com>
Date: Mon Dec 5 11:52:10 2016 -0500
Add substitution support
diff --git a/Makefile.PL b/Makefile.PL
index fe5c9f8..2f8498f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -8,5 +8,47 @@ requires_rt '4.4.0';
rt_too_new '4.6.0';
install_share 'jar';
+my ($lib_path) = $INC{'RT.pm'} =~ /^(.*)[\\\/]/;
+my $local_lib_path = "$RT::LocalPath/lib";
+unshift @INC, $local_lib_path, $lib_path;
+
+use Config;
+my $secure_perl_path = $Config{perlpath};
+if ($^O ne 'VMS') {
+ $secure_perl_path .= $Config{_exe}
+ unless $secure_perl_path =~ m/$Config{_exe}$/i;
+}
+
+my @tools;
+use File::Find ();
+File::Find::find({
+ no_chdir => 1,
+ wanted => sub {
+ if ( -f $File::Find::name ) {
+ $File::Find::name =~ /([^\.]+)\.in$/;
+ push @tools, $1;
+ }
+ },
+ },
+ 'sbin'
+);
+
+use Data::Dumper;
+print STDERR Dumper \@tools;
+
+substitute(
+ {
+ PERL => $ENV{PERL} || $secure_perl_path,
+ RT_LIB_PATH => join(' ',$local_lib_path,$lib_path )
+ }, {
+ sufix => '.in'
+ },
+ @tools
+) if @tools;
+
+requires 'Apache::Tika';
+requires 'File::MimeInfo::Magic';
+requires 'IO::Scalar';
+
sign;
WriteAll;
diff --git a/lib/RT/Extension/Tika.pm b/lib/RT/Extension/Tika.pm
index 4fc5c2c..2d718c9 100644
--- a/lib/RT/Extension/Tika.pm
+++ b/lib/RT/Extension/Tika.pm
@@ -83,6 +83,10 @@ You can get a list of options (host, port, CORS) by running:
By default the server will listen on localhost:9998
+If you change the default path you will need to set the TikaURL in your RT_SiteConfig.pm
+
+ Set($TikaURL, 'http://someotherhost:9998/');
+
=item Add the indexer to a cron job
In the directory /opt/rt4 you can run the indexer as:
@@ -128,7 +132,8 @@ sub extractFile {
sub extract {
my ($file) = @_;
- my $tika = Apache::Tika->new();
+ my $url = RT->Config->Get('TikaUrl') || 'http://localhost:9998/';
+ my $tika = Apache::Tika->new( url => $url );
my $io = new IO::Scalar \$file;
my $mime_type = mimetype($io);
diff --git a/sbin/rt-tika-fulltext-indexer b/sbin/rt-tika-fulltext-indexer
index 335e812..3f35d36 100755
--- a/sbin/rt-tika-fulltext-indexer
+++ b/sbin/rt-tika-fulltext-indexer
@@ -1,4 +1,6 @@
#!/usr/bin/perl
+### before: #!@PERL@
+#
# BEGIN BPS TAGGED BLOCK {{{
#
# COPYRIGHT:
@@ -50,6 +52,9 @@ use strict;
use warnings;
use 5.010;
+### after: use lib qw(@RT_LIB_PATH@);
+ use lib qw(/opt/rt4/local/lib /opt/rt4/lib);
+
# fix lib paths, some may be relative
BEGIN { # BEGIN RT CMD BOILERPLATE
require File::Spec;
@@ -65,7 +70,6 @@ BEGIN { # BEGIN RT CMD BOILERPLATE
}
unshift @INC, $lib;
}
-
}
use RT -init;
@@ -399,13 +403,18 @@ sub warning { $RT::Logger->warn(_(@_)); 1 }
=head1 NAME
-rt-fulltext-indexer - Indexer for full text search
+rt-tika-fulltext-indexer - Indexer for full text search using Apache Tika
=head1 DESCRIPTION
-This is a helper script to keep full text indexes in sync with data.
-Read F<docs/full_text_indexing.pod> for complete details on how and when
-to run it.
+This is a helper script to keep full text indexes in sync with data. It is
+a largely drop in replacement for RT's rt-fulltext-indexer script.
+
+It makes use of Apache Tika L<http://tika.apache.org> to convert attachments
+to a plain text representation for searching.
+
+Read RT's F<docs/full_text_indexing.pod> for additional details on how and when
+to run it.
=cut
diff --git a/sbin/rt-tika-fulltext-indexer b/sbin/rt-tika-fulltext-indexer.in
similarity index 96%
copy from sbin/rt-tika-fulltext-indexer
copy to sbin/rt-tika-fulltext-indexer.in
index 335e812..93f53d6 100755
--- a/sbin/rt-tika-fulltext-indexer
+++ b/sbin/rt-tika-fulltext-indexer.in
@@ -1,4 +1,6 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
+### before: #!@PERL@
+#
# BEGIN BPS TAGGED BLOCK {{{
#
# COPYRIGHT:
@@ -50,6 +52,9 @@ use strict;
use warnings;
use 5.010;
+### after: use lib qw(@RT_LIB_PATH@);
+ use lib qw(/opt/rt4/local/lib /opt/rt4/lib);
+
# fix lib paths, some may be relative
BEGIN { # BEGIN RT CMD BOILERPLATE
require File::Spec;
@@ -65,7 +70,6 @@ BEGIN { # BEGIN RT CMD BOILERPLATE
}
unshift @INC, $lib;
}
-
}
use RT -init;
@@ -399,13 +403,18 @@ sub warning { $RT::Logger->warn(_(@_)); 1 }
=head1 NAME
-rt-fulltext-indexer - Indexer for full text search
+rt-tika-fulltext-indexer - Indexer for full text search using Apache Tika
=head1 DESCRIPTION
-This is a helper script to keep full text indexes in sync with data.
-Read F<docs/full_text_indexing.pod> for complete details on how and when
-to run it.
+This is a helper script to keep full text indexes in sync with data. It is
+a largely drop in replacement for RT's rt-fulltext-indexer script.
+
+It makes use of Apache Tika L<http://tika.apache.org> to convert attachments
+to a plain text representation for searching.
+
+Read RT's F<docs/full_text_indexing.pod> for additional details on how and when
+to run it.
=cut
commit e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57
Author: Dave Goehrig <dave at bestpractical.com>
Date: Tue Dec 6 12:03:42 2016 -0500
changing TikaMimeTypes to be full replacement
diff --git a/lib/RT/Extension/Tika.pm b/lib/RT/Extension/Tika.pm
index 2d718c9..baf3f3c 100644
--- a/lib/RT/Extension/Tika.pm
+++ b/lib/RT/Extension/Tika.pm
@@ -122,22 +122,12 @@ This is free software, licensed under:
=cut
-sub extractFile {
- my ($filename) = @_;
- open my $fh, "< $filename";
- my $file = do { local $/; <$fh> };
- close $fh;
- return extract($file);
-}
-
sub extract {
my ($file) = @_;
my $url = RT->Config->Get('TikaUrl') || 'http://localhost:9998/';
my $tika = Apache::Tika->new( url => $url );
-
my $io = new IO::Scalar \$file;
my $mime_type = mimetype($io);
-
return $tika->tika($file,$mime_type);
}
diff --git a/sbin/rt-tika-fulltext-indexer.in b/sbin/rt-tika-fulltext-indexer.in
index 93f53d6..79a0c5e 100755
--- a/sbin/rt-tika-fulltext-indexer.in
+++ b/sbin/rt-tika-fulltext-indexer.in
@@ -174,11 +174,10 @@ sub attachment_loop {
{
# Indexes all text/plain and text/html attachments
my $attachments = RT::Attachments->new( RT->SystemUser );
- my @extra = (RT->Config->Get('TikaMimeTypes'));
- my @MIMETYPES = ( 'text/plain', 'text/html','application/pdf',
+ my @MIMETYPES = (RT->Config->Get('TikaMimeTypes')) ||
+ ( 'text/plain', 'text/html','application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
- @extra
);
my $txn_alias = $attachments->Join(
ALIAS1 => 'main',
-----------------------------------------------------------------------
More information about the Bps-public-commit
mailing list