[Rt-commit] [svn] r1065 - in Text-Quoted: . t
jesse at pallas.eruditorum.org
jesse at pallas.eruditorum.org
Mon Jun 14 15:20:04 EDT 2004
Author: jesse
Date: Mon Jun 14 15:20:04 2004
New Revision: 1065
Added:
Text-Quoted/Changes
Text-Quoted/MANIFEST
Text-Quoted/Makefile.PL
Text-Quoted/Quoted.pm (contents, props changed)
Text-Quoted/README
Text-Quoted/t/
Text-Quoted/t/1.t
Text-Quoted/t/2.t
Text-Quoted/t/3.t
Text-Quoted/t/4.t
Log:
Imported version 1.5 from simon
Added: Text-Quoted/Changes
==============================================================================
--- (empty file)
+++ Text-Quoted/Changes Mon Jun 14 15:20:04 2004
@@ -0,0 +1,6 @@
+Revision history for Perl extension Text::Quoted.
+
+1.0 Tue Dec 3 15:01:07 2002
+ - original version; created by h2xs 1.22 with options
+ -AX -n Text::Quoted
+
Added: Text-Quoted/MANIFEST
==============================================================================
--- (empty file)
+++ Text-Quoted/MANIFEST Mon Jun 14 15:20:04 2004
@@ -0,0 +1,9 @@
+Changes
+Makefile.PL
+MANIFEST
+Quoted.pm
+README
+t/1.t
+t/2.t
+t/3.t
+t/4.t
Added: Text-Quoted/Makefile.PL
==============================================================================
--- (empty file)
+++ Text-Quoted/Makefile.PL Mon Jun 14 15:20:04 2004
@@ -0,0 +1,14 @@
+use 5.006;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Text::Quoted',
+ 'VERSION_FROM' => 'Quoted.pm', # finds $VERSION
+ 'PREREQ_PM' => {
+ Text::Autoformat => 0
+ }, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'Quoted.pm', # retrieve abstract from module
+ AUTHOR => 'Simon Cozens <simon at kasei.com>') : ()),
+);
Added: Text-Quoted/Quoted.pm
==============================================================================
--- (empty file)
+++ Text-Quoted/Quoted.pm Mon Jun 14 15:20:04 2004
@@ -0,0 +1,271 @@
+package Text::Quoted;
+our $VERSION = "1.5";
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(extract);
+
+use Text::Autoformat (); # Provides the Hang package, heh, heh.
+
+=head1 NAME
+
+Text::Quoted - Extract the structure of a quoted mail message
+
+=head1 SYNOPSIS
+
+ use Text::Quoted;
+ my $structure = extract($text);
+
+=head1 DESCRIPTION
+
+C<Text::Quoted> examines the structure of some text which may contain
+multiple different levels of quoting, and turns the text into a nested
+data structure.
+
+The structure is an array reference containing hash references for each
+paragraph belonging to the same author. Each level of quoting recursively
+adds another list reference. So for instance, this:
+
+ > foo
+ > # Bar
+ > baz
+
+ quux
+
+turns into:
+
+ [
+ [
+ { text => 'foo', quoter => '>', raw => '> foo' },
+ [
+ { text => 'Bar', quoter => '> #', raw => '> # Bar' }
+ ],
+ { text => 'baz', quoter => '>', raw => '> baz' }
+ ],
+
+ { empty => 1 },
+ { text => 'quux', quoter => '', raw => 'quux' }
+ ];
+
+This also tells you about what's in the hash references: C<raw> is the
+paragraph of text as it appeared in the original input; C<text> is what
+it looked like when we stripped off the quotation characters, and C<quoter>
+is the quotation string.
+
+=cut
+
+sub extract {
+ my $text = shift;
+ my @paras = classify($text);
+ my @needed;
+ for my $p (@paras) {
+ push @needed, { map { $_ => $p->{$_} } qw(raw empty text quoter) };
+ }
+
+ return organize("", at needed);
+}
+
+=head1 CREDITS
+
+Most of the heavy lifting is done by a modified version of Damian Conway's
+C<Text::Autoformat>.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2002 Kasei Limited
+
+This software is distributed WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+It may be used and redistributed under the terms of the Artistic License.
+
+=cut
+
+sub organize {
+ my $top_level = shift;
+ my @todo = @_;
+ my @ret;
+ # Recursively form a data structure which reflects the quoting
+ # structure of the list.
+ while (@todo) {
+ my $line = shift @todo;
+ if (defn($line->{quoter}) eq defn($top_level)) {
+ # Just append lines at "my" level.
+ push @ret, $line
+ if exists $line->{quoter} or exists $line->{empty};
+ } elsif (defn($line->{quoter}) =~ /^\Q$top_level\E.+/) {
+ # Find all the lines at a quoting level "below" me.
+ my $newquoter = find_below($top_level, $line, @todo);
+ my @next = $line;
+ push @next, shift @todo
+ while defined $todo[0]->{quoter}
+ and $todo[0]->{quoter} =~ /^\Q$newquoter/;
+ # Find the
+ # And pass them on to organize()!
+ #print "Trying to organise the following lines over $newquoter:\n";
+ #print $_->{raw}."\n" for @next;
+ #print "!-!-!-\n";
+ push @ret, organize($newquoter, @next);
+ }# else { die "bugger! I had $top_level, but now I have $line->{raw}\n"; }
+ }
+ return \@ret;
+}
+
+# Given, say:
+# X
+# > > hello
+# > foo bar
+# Stuff
+#
+# After "X", we're moving to another level of quoting - but which one?
+# Naively, you'd pick out the prefix of the next line, "> >", but this
+# is incorrect - "> >" is actually a "sub-quote" of ">". This routine
+# works out which is the next level below us.
+
+sub find_below {
+ my ($top_level, @stuff) = @_;
+ #print "## Looking for the next level of quoting after $top_level\n";
+ #print "## We have:\n";
+ #print "## $_->{raw}\n" for @stuff;
+
+ my @prefices = sort { length $a <=> length $b }
+ map { $_->{quoter} } @stuff;
+ # Find the prefices, shortest first.
+
+ # return $prefices[0] if $prefices[0] eq $prefices[-1];
+
+ for (@prefices) {
+ # And return the first one which is "below" where we are right
+ # now but is a proper subset of the next line.
+ next unless $_;
+ if ($_ =~ /^\Q$top_level\E.+/ and $stuff[0]->{quoter} =~ /\Q$_\E/) {
+ #print "## We decided on $_\n";
+ return $_;
+ }
+ }
+ die "Can't happen";
+}
+
+# Everything below this point is essentially Text::Autoformat.
+
+# BITS OF A TEXT LINE
+
+my $quotechar = qq{[!#%=|:]};
+my $quotechunk = qq{(?:$quotechar(?!\\w)|\\w*>+)};
+my $quoter = qq{(?:(?i)(?:$quotechunk(?:[ \\t]*$quotechunk)*))};
+
+my $separator = q/(?:[-_]{2,}|[=#*]{3,}|[+~]{4,})/;
+
+sub defn($) { return $_[0] if defined $_[0]; return ""; }
+
+sub classify
+{
+ my $text = shift;
+ # DETABIFY
+ my @rawlines = split /\n/, $text;
+ use Text::Tabs;
+ @rawlines = expand(@rawlines);
+
+ # PARSE EACH LINE
+
+ my $pre = 0;
+ my @lines;
+ foreach (@rawlines)
+ {
+ push @lines, { raw => $_ };
+ s/\A([ \t]*)($quoter?)([ \t]*)//;
+ $lines[-1]{presig} = $lines[-1]{prespace} = defn $1;
+ $lines[-1]{presig} .= $lines[-1]{quoter} = defn $2;
+ $lines[-1]{presig} .= $lines[-1]{quotespace} = defn $3;
+ $lines[-1]{hang} = defn(Hang->new($_));
+
+ s/([ \t]*)(.*?)(\s*)$//;
+ $lines[-1]{hangspace} = defn $1;
+ $lines[-1]{text} = defn $2;
+ $lines[-1]{empty} = $lines[-1]{hang}->empty() && $2 !~ /\S/;
+ $lines[-1]{separator} = $lines[-1]{text} =~ /^$separator$/;
+ }
+
+ # SUBDIVIDE DOCUMENT INTO COHERENT SUBSECTIONS
+
+ my @chunks;
+ push @chunks, [shift @lines];
+ foreach my $line (@lines)
+ {
+ if ($line->{separator} ||
+ $line->{quoter} ne $chunks[-1][-1]->{quoter} ||
+ $line->{empty} ||
+ @chunks && $chunks[-1][-1]->{empty})
+ {
+ push @chunks, [$line];
+ }
+ else
+ {
+ push @{$chunks[-1]}, $line;
+ }
+ }
+
+ # REDIVIDE INTO PARAGRAPHS
+
+ my @paras;
+ foreach my $chunk ( @chunks )
+ {
+ my $first = 1;
+ my $firstfrom;
+ foreach my $line ( @{$chunk} )
+ {
+ if ($first ||
+ $line->{quoter} ne $paras[-1]->{quoter} ||
+ $paras[-1]->{separator}
+ )
+ {
+ push @paras, $line;
+ $first = 0;
+ $firstfrom = length($line->{raw}) - length($line->{text});
+ }
+ else
+ {
+ my $extraspace = length($line->{raw}) - length($line->{text}) - $firstfrom;
+ $paras[-1]->{text} .= "\n" . q{ }x$extraspace . $line->{text};
+ $paras[-1]->{raw} .= "\n" . $line->{raw};
+ }
+ }
+ }
+
+ my $remainder = "";
+
+ # ALIGN QUOTERS
+ # DETERMINE HANGING MARKER TYPE (BULLET, ALPHA, ROMAN, ETC.)
+
+ my %sigs;
+ my $lastquoted = 0;
+ my $lastprespace = 0;
+ for my $i ( 0..$#paras )
+ {
+ my $para = $paras[$i];
+ if ($para->{quoter})
+ {
+ if ($lastquoted) { $para->{prespace} = $lastprespace }
+ else { $lastquoted = 1; $lastprespace = $para->{prespace} }
+ }
+ else
+ {
+ $lastquoted = 0;
+ }
+ }
+
+ # Reapply hangs
+ for (@paras) {
+ next unless my $hang = $_->{hang};
+ next unless $hang->stringify;
+ $_->{text} = $hang->stringify . " ".$_->{text};
+ }
+ return @paras;
+}
+
+sub val { return "" }
+1;
Added: Text-Quoted/README
==============================================================================
--- (empty file)
+++ Text-Quoted/README Mon Jun 14 15:20:04 2004
@@ -0,0 +1,38 @@
+Text/Quoted version 0.01
+========================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+ blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2002 Simon Cozens
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
Added: Text-Quoted/t/1.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/1.t Mon Jun 14 15:20:04 2004
@@ -0,0 +1,60 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 3;
+BEGIN { use_ok('Text::Quoted') };
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<EOF;
+> foo
+> # Bar
+> baz
+
+quux
+EOF
+
+is_deeply(extract($a),
+[[{text => 'foo',empty => '',quoter => '>',raw => '> foo'},
+ [{text => 'Bar',empty => '',quoter => '> #',raw => '> # Bar'}],
+ {text => 'baz',empty => '',quoter => '>',raw => '> baz'}
+ ],
+ {text => '',empty => '1',quoter => '',raw => ''},
+ {text => 'quux',empty => '',quoter => '',raw => 'quux'}],
+"Sample text is organized properly");
+
+$b = <<EOF;
+
+> foo
+> > > baz
+> > quux
+> quuux
+quuuux
+EOF
+
+$b_dump =
+[
+ { text => '', empty => '1', quoter => '', raw => '' },
+ [
+ { text => 'foo', empty => '', quoter => '>', raw => '> foo' },
+ [
+ [
+ { text => 'baz', empty => '', quoter => '> > >',
+ raw => '> > > baz' }
+ ],
+ { text => 'quux', empty => '', quoter => '> >', raw => '> > quux' }
+ ],
+ { text => 'quuux', empty => '', quoter => '>', raw => '> quuux' }
+ ],
+ { text => 'quuuux', empty => '', quoter => '', raw => 'quuuux' }
+ ];
+
+
+is_deeply(extract($b), $b_dump, "Skipping levels works OK");
Added: Text-Quoted/t/2.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/2.t Mon Jun 14 15:20:04 2004
@@ -0,0 +1,154 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<'EOF';
+>>>>> "dc" == darren chamberlain writes:
+
+>> If I don't do "use Template;" in my startup script, each child will
+>> get the pleasure of loading and compiling it all when the first script
+>> that uses Template gets executed.
+
+dc> Unless one of the other modules that you use in your startup script
+dc> happens to use Template, in which case you'll be OK.
+
+Well, that's still "use Template;" as far as I'm concerned.
+
+I was really just being pedantic... but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.
+
+I personally don't think the preload should be called automagically,
+even if it does the right thing most of the time.
+
+_______________________________________________
+templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates
+EOF
+
+$expected = [
+ [
+ [
+ {
+ 'quoter' => '>>>>>',
+ 'text' => '"dc" == darren chamberlain writes:',
+ 'raw' => '>>>>> "dc" == darren chamberlain writes:',
+ 'empty' => ''
+ }
+ ]
+ ],
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ [
+ {
+ 'quoter' => '>>',
+ 'text' => 'If I don\'t do "use Template;" in my startup script, each child will
+get the pleasure of loading and compiling it all when the first script
+that uses Template gets executed.',
+ 'raw' => '>> If I don\'t do "use Template;" in my startup script, each child will
+>> get the pleasure of loading and compiling it all when the first script
+>> that uses Template gets executed.',
+ 'empty' => ''
+ }
+ ],
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ [
+ {
+ 'quoter' => 'dc>',
+ 'text' => 'Unless one of the other modules that you use in your startup script
+happens to use Template, in which case you\'ll be OK.',
+ 'raw' => 'dc> Unless one of the other modules that you use in your startup script
+dc> happens to use Template, in which case you\'ll be OK.',
+ 'empty' => ''
+ }
+ ],
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ {
+ 'quoter' => '',
+ 'text' => 'Well, that\'s still "use Template;" as far as I\'m concerned.',
+ 'raw' => 'Well, that\'s still "use Template;" as far as I\'m concerned.',
+ 'empty' => ''
+ },
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ {
+ 'quoter' => '',
+ 'text' => 'I was really just being pedantic... but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.',
+ 'raw' => 'I was really just being pedantic... but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.',
+ 'empty' => ''
+ },
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ {
+ 'quoter' => '',
+ 'text' => 'I personally don\'t think the preload should be called automagically,
+even if it does the right thing most of the time.',
+ 'raw' => 'I personally don\'t think the preload should be called automagically,
+even if it does the right thing most of the time.',
+ 'empty' => ''
+ },
+ {
+ 'quoter' => '',
+ 'text' => '',
+ 'raw' => '',
+ 'empty' => '1'
+ },
+ {
+ 'quoter' => '',
+ 'text' => '_______________________________________________',
+ 'raw' => '_______________________________________________',
+ 'empty' => ''
+ },
+ {
+ 'quoter' => '',
+ 'text' => 'templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates',
+ 'raw' => 'templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates',
+ 'empty' => ''
+ }
+ ];
+
+
+is_deeply(extract($a), $expected,
+ "Supercite doesn't screw me up as badly as before");
Added: Text-Quoted/t/3.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/3.t Mon Jun 14 15:20:04 2004
@@ -0,0 +1,98 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<'EOF';
+From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc
+> > An
+> > alternative solution is to not have those phone calls at work,
+> > faciliitated by worked very hard for a reasonably workday, then
+> > leaving... thus having time to deal with personal issues when not at
+> > work.
+iabc
+> Unfortunately, personal issues can't be conveniently shoved aside
+eight
+> hours a day. People with kids especially have to deal with issues
+> realted to picking them up and dropping them off at various times, as
+x
+EOF
+
+$expected = [
+ {
+ 'quoter' => '',
+ 'text' => 'From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc',
+ 'raw' => 'From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc',
+ 'empty' => ''
+ },
+ [
+ [
+ {
+ 'quoter' => '> >',
+ 'text' => 'An
+alternative solution is to not have those phone calls at work,
+faciliitated by worked very hard for a reasonably workday, then
+leaving... thus having time to deal with personal issues when not at
+work.',
+ 'raw' => '> > An
+> > alternative solution is to not have those phone calls at work,
+> > faciliitated by worked very hard for a reasonably workday, then
+> > leaving... thus having time to deal with personal issues when not at
+> > work.',
+ 'empty' => ''
+ }
+ ]
+ ],
+ {
+ 'quoter' => '',
+ 'text' => 'iabc',
+ 'raw' => 'iabc',
+ 'empty' => ''
+ },
+ [
+ {
+ 'quoter' => '>',
+ 'text' => 'Unfortunately, personal issues can\'t be conveniently shoved aside',
+ 'raw' => '> Unfortunately, personal issues can\'t be conveniently shoved aside',
+ 'empty' => ''
+ }
+ ],
+ {
+ 'quoter' => '',
+ 'text' => 'eight',
+ 'raw' => 'eight',
+ 'empty' => ''
+ },
+ [
+ {
+ 'quoter' => '>',
+ 'text' => 'hours a day. People with kids especially have to deal with issues
+realted to picking them up and dropping them off at various times, as',
+ 'raw' => '> hours a day. People with kids especially have to deal with issues
+> realted to picking them up and dropping them off at various times, as',
+ 'empty' => ''
+ }
+ ],
+ {
+ 'quoter' => '',
+ 'text' => 'x',
+ 'raw' => 'x',
+ 'empty' => ''
+ }
+ ];
+
+is_deeply(extract($a), $expected,
+ "Supercite doesn't screw me up as badly as before");
Added: Text-Quoted/t/4.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/4.t Mon Jun 14 15:20:04 2004
@@ -0,0 +1,18 @@
+#!/usr/bin/perl
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+# I don't really care what the results are, so long as we don't
+# segfault.
+
+my $ntk = <<'NTK';
+ _ _ _____ _ __ <*the* weekly high-tech sarcastic update for the uk>
+| \ | |_ _| |/ / _ __ __2002-07-26_ o join! mail an empty message to
+| \| | | | | ' / | '_ \ / _ \ \ /\ / / o ntknow-subscribe at lists.ntk.net
+| |\ | | | | . \ | | | | (_) \ v v / o website (+ archive) lives at:
+|_| \_| |_| |_|\_\|_| |_|\___/ \_/\_/ o http://www.ntk.net/
+NTK
+
+ok(extract($ntk), "It's not pretty, but at least it works");
More information about the Rt-commit
mailing list