[Bps-public-commit] rt-extension-tika branch, master, updated. e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57

Dave Goehrig dave at bestpractical.com
Tue Dec 6 12:04:07 EST 2016


The branch, master has been updated
       via  e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57 (commit)
       via  8766cb8750243cd82f9dddc12929b0e45fa0e17f (commit)
      from  f6e0e54dcd2efc6775015b673473bb6d2874a8e4 (commit)

Summary of changes:
 Makefile.PL                                        | 42 ++++++++++++++++++++++
 lib/RT/Extension/Tika.pm                           | 17 ++++-----
 sbin/rt-tika-fulltext-indexer                      | 19 +++++++---
 ...ulltext-indexer => rt-tika-fulltext-indexer.in} | 26 +++++++++-----
 4 files changed, 79 insertions(+), 25 deletions(-)
 copy sbin/{rt-tika-fulltext-indexer => rt-tika-fulltext-indexer.in} (95%)

- Log -----------------------------------------------------------------
commit 8766cb8750243cd82f9dddc12929b0e45fa0e17f
Author: Dave Goehrig <dave at bestpractical.com>
Date:   Mon Dec 5 11:52:10 2016 -0500

    Add substitution support

diff --git a/Makefile.PL b/Makefile.PL
index fe5c9f8..2f8498f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -8,5 +8,47 @@ requires_rt '4.4.0';
 rt_too_new '4.6.0';
 install_share 'jar';
 
+my ($lib_path) = $INC{'RT.pm'} =~ /^(.*)[\\\/]/;
+my $local_lib_path = "$RT::LocalPath/lib";
+unshift @INC, $local_lib_path, $lib_path;
+
+use Config;
+my $secure_perl_path = $Config{perlpath};
+if ($^O ne 'VMS') {
+    $secure_perl_path .= $Config{_exe}
+        unless $secure_perl_path =~ m/$Config{_exe}$/i;
+}
+
+my @tools;
+use File::Find ();
+File::Find::find({
+	no_chdir => 1,
+	wanted => sub {
+		if ( -f $File::Find::name ) {
+			$File::Find::name =~ /([^\.]+)\.in$/;
+			push @tools, $1;
+		}
+	},
+	},
+	'sbin'
+);
+
+use Data::Dumper;
+print STDERR Dumper  \@tools;
+
+substitute(
+	{ 
+		PERL => $ENV{PERL} || $secure_perl_path,
+		RT_LIB_PATH => join(' ',$local_lib_path,$lib_path ) 
+	}, {
+		sufix => '.in'
+	},
+	@tools
+) if @tools; 
+
+requires 'Apache::Tika';
+requires 'File::MimeInfo::Magic';
+requires 'IO::Scalar';
+
 sign;
 WriteAll;
diff --git a/lib/RT/Extension/Tika.pm b/lib/RT/Extension/Tika.pm
index 4fc5c2c..2d718c9 100644
--- a/lib/RT/Extension/Tika.pm
+++ b/lib/RT/Extension/Tika.pm
@@ -83,6 +83,10 @@ You can get a list of options (host, port, CORS) by running:
 
 By default the server will listen on localhost:9998
 
+If you change the default path you will need to set the TikaURL in your RT_SiteConfig.pm
+
+    Set($TikaURL, 'http://someotherhost:9998/');
+
 =item Add the indexer to a cron job
 
 In the directory /opt/rt4 you can run the indexer as:
@@ -128,7 +132,8 @@ sub extractFile {
 
 sub extract {
 	my ($file) = @_;
-	my $tika = Apache::Tika->new();
+	my $url = RT->Config->Get('TikaUrl') || 'http://localhost:9998/';
+	my $tika = Apache::Tika->new( url => $url );
 
 	my $io = new IO::Scalar \$file;
         my $mime_type = mimetype($io);
diff --git a/sbin/rt-tika-fulltext-indexer b/sbin/rt-tika-fulltext-indexer
index 335e812..3f35d36 100755
--- a/sbin/rt-tika-fulltext-indexer
+++ b/sbin/rt-tika-fulltext-indexer
@@ -1,4 +1,6 @@
 #!/usr/bin/perl
+### before: #!@PERL@
+#
 # BEGIN BPS TAGGED BLOCK {{{
 #
 # COPYRIGHT:
@@ -50,6 +52,9 @@ use strict;
 use warnings;
 use 5.010;
 
+### after:     use lib qw(@RT_LIB_PATH@);
+    use lib qw(/opt/rt4/local/lib /opt/rt4/lib);
+
 # fix lib paths, some may be relative
 BEGIN { # BEGIN RT CMD BOILERPLATE
     require File::Spec;
@@ -65,7 +70,6 @@ BEGIN { # BEGIN RT CMD BOILERPLATE
         }
         unshift @INC, $lib;
     }
-
 }
 
 use RT -init;
@@ -399,13 +403,18 @@ sub warning  { $RT::Logger->warn(_(@_)); 1 }
 
 =head1 NAME
 
-rt-fulltext-indexer - Indexer for full text search
+rt-tika-fulltext-indexer - Indexer for full text search using Apache Tika
 
 =head1 DESCRIPTION
 
-This is a helper script to keep full text indexes in sync with data.
-Read F<docs/full_text_indexing.pod> for complete details on how and when
-to run it.
+This is a helper script to keep full text indexes in sync with data.  It is
+a largely drop in replacement for RT's rt-fulltext-indexer script.
+
+It makes use of Apache Tika L<http://tika.apache.org> to convert attachments 
+to a plain text representation for searching.
+
+Read RT's F<docs/full_text_indexing.pod> for additional details on how and when
+to run it. 
 
 =cut
 
diff --git a/sbin/rt-tika-fulltext-indexer b/sbin/rt-tika-fulltext-indexer.in
similarity index 96%
copy from sbin/rt-tika-fulltext-indexer
copy to sbin/rt-tika-fulltext-indexer.in
index 335e812..93f53d6 100755
--- a/sbin/rt-tika-fulltext-indexer
+++ b/sbin/rt-tika-fulltext-indexer.in
@@ -1,4 +1,6 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
+### before: #!@PERL@
+#
 # BEGIN BPS TAGGED BLOCK {{{
 #
 # COPYRIGHT:
@@ -50,6 +52,9 @@ use strict;
 use warnings;
 use 5.010;
 
+### after:     use lib qw(@RT_LIB_PATH@);
+    use lib qw(/opt/rt4/local/lib /opt/rt4/lib);
+
 # fix lib paths, some may be relative
 BEGIN { # BEGIN RT CMD BOILERPLATE
     require File::Spec;
@@ -65,7 +70,6 @@ BEGIN { # BEGIN RT CMD BOILERPLATE
         }
         unshift @INC, $lib;
     }
-
 }
 
 use RT -init;
@@ -399,13 +403,18 @@ sub warning  { $RT::Logger->warn(_(@_)); 1 }
 
 =head1 NAME
 
-rt-fulltext-indexer - Indexer for full text search
+rt-tika-fulltext-indexer - Indexer for full text search using Apache Tika
 
 =head1 DESCRIPTION
 
-This is a helper script to keep full text indexes in sync with data.
-Read F<docs/full_text_indexing.pod> for complete details on how and when
-to run it.
+This is a helper script to keep full text indexes in sync with data.  It is
+a largely drop in replacement for RT's rt-fulltext-indexer script.
+
+It makes use of Apache Tika L<http://tika.apache.org> to convert attachments 
+to a plain text representation for searching.
+
+Read RT's F<docs/full_text_indexing.pod> for additional details on how and when
+to run it. 
 
 =cut
 

commit e658fad7ec32e68d1d5dd21bd25bfe8a118c8e57
Author: Dave Goehrig <dave at bestpractical.com>
Date:   Tue Dec 6 12:03:42 2016 -0500

    changing TikaMimeTypes to be full replacement

diff --git a/lib/RT/Extension/Tika.pm b/lib/RT/Extension/Tika.pm
index 2d718c9..baf3f3c 100644
--- a/lib/RT/Extension/Tika.pm
+++ b/lib/RT/Extension/Tika.pm
@@ -122,22 +122,12 @@ This is free software, licensed under:
 
 =cut
 
-sub extractFile {
-	my ($filename) = @_;
-	open my $fh, "< $filename";
-	my $file = do { local $/;  <$fh> };
-	close $fh;
-	return extract($file);
-}
-
 sub extract {
 	my ($file) = @_;
 	my $url = RT->Config->Get('TikaUrl') || 'http://localhost:9998/';
 	my $tika = Apache::Tika->new( url => $url );
-
 	my $io = new IO::Scalar \$file;
         my $mime_type = mimetype($io);
-
 	return $tika->tika($file,$mime_type);
 }
 
diff --git a/sbin/rt-tika-fulltext-indexer.in b/sbin/rt-tika-fulltext-indexer.in
index 93f53d6..79a0c5e 100755
--- a/sbin/rt-tika-fulltext-indexer.in
+++ b/sbin/rt-tika-fulltext-indexer.in
@@ -174,11 +174,10 @@ sub attachment_loop {
     {
         # Indexes all text/plain and text/html attachments
         my $attachments = RT::Attachments->new( RT->SystemUser );
-	my @extra = (RT->Config->Get('TikaMimeTypes'));
-	my @MIMETYPES = ( 'text/plain', 'text/html','application/pdf', 
+	my @MIMETYPES = (RT->Config->Get('TikaMimeTypes')) || 
+		( 'text/plain', 'text/html','application/pdf', 
 		'application/msword', 
 		'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-		@extra
 	);
         my $txn_alias = $attachments->Join(
             ALIAS1 => 'main',

-----------------------------------------------------------------------


More information about the Bps-public-commit mailing list