[Bps-public-commit] rt-extension-tika branch, master, updated. 06db98e80a930ae143da86b44d71e20e02e5ee67

Dave Goehrig dave at bestpractical.com
Mon Feb 27 15:29:14 EST 2017


The branch, master has been updated
       via  06db98e80a930ae143da86b44d71e20e02e5ee67 (commit)
      from  c6bd5fedde171427ab8fdfcf5fa5dd4042281cd8 (commit)

Summary of changes:
 lib/RT/Extension/Tika.pm   |  12 +++++++++++
 t/docs/README              |   4 ++++
 t/docs/testOpenOffice2.odf | Bin 0 -> 10977 bytes
 t/docs/testOpenOffice2.odt | Bin 0 -> 26448 bytes
 t/docs/testPDF.pdf         | Bin 0 -> 34824 bytes
 t/docs/testWORD.doc        | Bin 0 -> 32768 bytes
 t/docs/testWORD.docx       | Bin 0 -> 13436 bytes
 t/tika.t                   |  51 +++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 67 insertions(+)
 create mode 100644 t/docs/README
 create mode 100644 t/docs/testOpenOffice2.odf
 create mode 100644 t/docs/testOpenOffice2.odt
 create mode 100644 t/docs/testPDF.pdf
 create mode 100644 t/docs/testWORD.doc
 create mode 100644 t/docs/testWORD.docx
 create mode 100644 t/tika.t

- Log -----------------------------------------------------------------
commit 06db98e80a930ae143da86b44d71e20e02e5ee67
Author: Dave Goehrig <dave at bestpractical.com>
Date:   Mon Feb 27 15:28:58 2017 -0500

    adding unit tests

diff --git a/lib/RT/Extension/Tika.pm b/lib/RT/Extension/Tika.pm
index 03e7d5c..76c217d 100644
--- a/lib/RT/Extension/Tika.pm
+++ b/lib/RT/Extension/Tika.pm
@@ -128,6 +128,18 @@ currently running that make sure that job first.
 
 =back
 
+=head1 TESTING
+
+In order to run the unit tests for this extension, you should:
+
+    java -jar jar/tika-server.jar &
+    prove -I/opt/rt4/lib t/tika.t
+
+These tests require that the server be running locally on the
+default port in order to work.  The sample test files are in
+the t/docs/ directory, and are loaded relative to the current
+working directory.
+
 =head1 AUTHOR
 
 Best Practical Solutions, LLC E<lt>modules at bestpractical.comE<gt>
diff --git a/t/docs/README b/t/docs/README
new file mode 100644
index 0000000..7caa036
--- /dev/null
+++ b/t/docs/README
@@ -0,0 +1,4 @@
+These files are from the Apache Tika test-documents 
+and are licensed under the Apache License Version 2.
+
+http://www.apache.org/licenses/
diff --git a/t/docs/testOpenOffice2.odf b/t/docs/testOpenOffice2.odf
new file mode 100644
index 0000000..a814972
Binary files /dev/null and b/t/docs/testOpenOffice2.odf differ
diff --git a/t/docs/testOpenOffice2.odt b/t/docs/testOpenOffice2.odt
new file mode 100644
index 0000000..bc31925
Binary files /dev/null and b/t/docs/testOpenOffice2.odt differ
diff --git a/t/docs/testPDF.pdf b/t/docs/testPDF.pdf
new file mode 100644
index 0000000..1f1bcff
Binary files /dev/null and b/t/docs/testPDF.pdf differ
diff --git a/t/docs/testWORD.doc b/t/docs/testWORD.doc
new file mode 100644
index 0000000..c1f4f3d
Binary files /dev/null and b/t/docs/testWORD.doc differ
diff --git a/t/docs/testWORD.docx b/t/docs/testWORD.docx
new file mode 100644
index 0000000..5ef040e
Binary files /dev/null and b/t/docs/testWORD.docx differ
diff --git a/t/tika.t b/t/tika.t
new file mode 100644
index 0000000..aacbaa6
--- /dev/null
+++ b/t/tika.t
@@ -0,0 +1,51 @@
+use strict;
+use warnings;
+
+use RT::Test tests => undef;
+
+use lib './lib';
+
+use RT::Extension::Tika;
+
+sub read_file {
+    local $/ = undef;
+    open FP, "< $_[0]";
+    my $data = <FP>;
+    close FP;
+    return $data;
+}
+
+my %files;
+%files = map { $_ => read_file($_) } qw(
+    t/docs/testOpenOffice2.odf
+    t/docs/testOpenOffice2.odt
+    t/docs/testPDF.pdf
+    t/docs/testWORD.doc
+    t/docs/testWORD.docx
+);
+    
+
+is RT::Extension::Tika::mime_file($files{'t/docs/testOpenOffice2.odf'}), 'application/zip', 'test odf mime loading';
+is RT::Extension::Tika::mime_file($files{'t/docs/testOpenOffice2.odt'}), 'application/vnd.oasis.opendocument.text', 'test odt mime loading';
+is RT::Extension::Tika::mime_file($files{'t/docs/testPDF.pdf'}), 'application/pdf', 'test pdf mime loading';
+is RT::Extension::Tika::mime_file($files{'t/docs/testWORD.doc'}), 'application/msword', 'test doc mime loading';
+is RT::Extension::Tika::mime_file($files{'t/docs/testWORD.docx'}), 'application/zip', 'test docx mime loading';
+
+
+is RT::Extension::Tika::config_url, 'http://localhost:9998/', 'check default config url';
+
+my $request = RT::Extension::Tika::request( 
+    RT::Extension::Tika::config_url,
+    $files{'t/docs/testOpenOffice2.odf'},
+    'application/zip');
+is $request->is_error, '', 'not an error';
+is $request->code, 200, 'request works';
+like $request->content, qr/The quick brown fox jumps over the lazy dog/, 'odf content';    
+
+like RT::Extension::Tika::extract($files{'t/docs/testOpenOffice2.odt'}), qr/This is a sample Open Office document/, 'odt extraction';
+like RT::Extension::Tika::extract($files{'t/docs/testPDF.pdf'}), qr/Tika - Content Analysis Toolkit/, 'pdf extraction';
+like RT::Extension::Tika::extract($files{'t/docs/testWORD.doc'}), qr/This is a sample Microsoft Word Document/, 'doc extraction';
+like RT::Extension::Tika::extract($files{'t/docs/testWORD.docx'}), qr/This is a sample Microsoft Word Document/, 'docx extraction';
+        
+
+done_testing;

-----------------------------------------------------------------------


More information about the Bps-public-commit mailing list