[Rt-commit] [svn] r1065 - in Text-Quoted: . t

jesse at pallas.eruditorum.org jesse at pallas.eruditorum.org
Mon Jun 14 15:20:04 EDT 2004


Author: jesse
Date: Mon Jun 14 15:20:04 2004
New Revision: 1065

Added:
   Text-Quoted/Changes
   Text-Quoted/MANIFEST
   Text-Quoted/Makefile.PL
   Text-Quoted/Quoted.pm   (contents, props changed)
   Text-Quoted/README
   Text-Quoted/t/
   Text-Quoted/t/1.t
   Text-Quoted/t/2.t
   Text-Quoted/t/3.t
   Text-Quoted/t/4.t
Log:
Imported version 1.5 from simon

Added: Text-Quoted/Changes
==============================================================================
--- (empty file)
+++ Text-Quoted/Changes	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,6 @@
+Revision history for Perl extension Text::Quoted.
+
+1.0  Tue Dec  3 15:01:07 2002
+	- original version; created by h2xs 1.22 with options
+		-AX -n Text::Quoted
+

Added: Text-Quoted/MANIFEST
==============================================================================
--- (empty file)
+++ Text-Quoted/MANIFEST	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,9 @@
+Changes
+Makefile.PL
+MANIFEST
+Quoted.pm
+README
+t/1.t
+t/2.t
+t/3.t
+t/4.t

Added: Text-Quoted/Makefile.PL
==============================================================================
--- (empty file)
+++ Text-Quoted/Makefile.PL	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,14 @@
+use 5.006;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    'NAME'		=> 'Text::Quoted',
+    'VERSION_FROM'	=> 'Quoted.pm', # finds $VERSION
+    'PREREQ_PM'		=> {
+        Text::Autoformat => 0
+    }, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?    ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM => 'Quoted.pm', # retrieve abstract from module
+       AUTHOR     => 'Simon Cozens <simon at kasei.com>') : ()),
+);

Added: Text-Quoted/Quoted.pm
==============================================================================
--- (empty file)
+++ Text-Quoted/Quoted.pm	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,271 @@
+package Text::Quoted;
+our $VERSION = "1.5";
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(extract);
+
+use Text::Autoformat (); # Provides the Hang package, heh, heh.
+
+=head1 NAME
+
+Text::Quoted - Extract the structure of a quoted mail message
+
+=head1 SYNOPSIS
+
+    use Text::Quoted;
+    my $structure = extract($text);
+
+=head1 DESCRIPTION
+
+C<Text::Quoted> examines the structure of some text which may contain
+multiple different levels of quoting, and turns the text into a nested
+data structure. 
+
+The structure is an array reference containing hash references for each
+paragraph belonging to the same author. Each level of quoting recursively
+adds another list reference. So for instance, this:
+
+    > foo
+    > # Bar
+    > baz
+
+    quux
+
+turns into:
+
+    [
+      [
+        { text => 'foo', quoter => '>', raw => '> foo' },
+        [ 
+            { text => 'Bar', quoter => '> #', raw => '> # Bar' } 
+        ],
+        { text => 'baz', quoter => '>', raw => '> baz' }
+      ],
+
+      { empty => 1 },
+      { text => 'quux', quoter => '', raw => 'quux' }
+    ];
+
+This also tells you about what's in the hash references: C<raw> is the
+paragraph of text as it appeared in the original input; C<text> is what
+it looked like when we stripped off the quotation characters, and C<quoter>
+is the quotation string.
+
+=cut
+
+sub extract {
+    my $text = shift;
+    my @paras = classify($text);
+    my @needed;
+    for my $p (@paras) {
+        push @needed, { map { $_ => $p->{$_} } qw(raw empty text quoter) };
+    }
+    
+    return organize("", at needed);
+}
+
+=head1 CREDITS
+
+Most of the heavy lifting is done by a modified version of Damian Conway's
+C<Text::Autoformat>.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2002 Kasei Limited
+
+This software is distributed WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+It may be used and redistributed under the terms of the Artistic License.
+
+=cut
+
+sub organize {
+    my $top_level = shift;
+    my @todo = @_;
+    my @ret;
+    # Recursively form a data structure which reflects the quoting
+    # structure of the list.
+    while (@todo) {
+        my $line = shift @todo;
+        if (defn($line->{quoter}) eq defn($top_level)) {
+            # Just append lines at "my" level.
+            push @ret, $line 
+                if exists $line->{quoter} or exists $line->{empty};
+        } elsif (defn($line->{quoter}) =~ /^\Q$top_level\E.+/) {
+            # Find all the lines at a quoting level "below" me.
+            my $newquoter = find_below($top_level, $line, @todo);
+            my @next = $line;
+            push @next, shift @todo 
+                while defined $todo[0]->{quoter} 
+                      and $todo[0]->{quoter} =~ /^\Q$newquoter/;
+            # Find the 
+            # And pass them on to organize()!
+            #print "Trying to organise the following lines over $newquoter:\n";
+            #print $_->{raw}."\n" for @next;
+            #print "!-!-!-\n";
+            push @ret, organize($newquoter, @next);
+        }#  else { die "bugger! I had $top_level, but now I have $line->{raw}\n"; }
+    }
+    return \@ret;
+}
+
+# Given, say:
+#   X
+#   > > hello
+#   > foo bar
+#   Stuff
+#
+# After "X", we're moving to another level of quoting - but which one?
+# Naively, you'd pick out the prefix of the next line, "> >", but this
+# is incorrect - "> >" is actually a "sub-quote" of ">". This routine
+# works out which is the next level below us.
+
+sub find_below {
+    my ($top_level, @stuff) = @_;
+    #print "## Looking for the next level of quoting after $top_level\n";
+    #print "## We have:\n";
+    #print "## $_->{raw}\n" for @stuff;
+
+    my @prefices = sort { length $a <=> length $b } 
+                   map { $_->{quoter} } @stuff;
+    # Find the prefices, shortest first.
+
+    # return $prefices[0] if $prefices[0] eq $prefices[-1];
+ 
+    for (@prefices) { 
+        # And return the first one which is "below" where we are right
+        # now but is a proper subset of the next line. 
+        next unless $_;
+        if ($_ =~ /^\Q$top_level\E.+/ and $stuff[0]->{quoter} =~ /\Q$_\E/) {
+            #print "## We decided on $_\n";
+            return $_;
+        }
+    }
+    die "Can't happen";
+}
+
+# Everything below this point is essentially Text::Autoformat.
+
+# BITS OF A TEXT LINE
+
+my $quotechar = qq{[!#%=|:]};
+my $quotechunk = qq{(?:$quotechar(?!\\w)|\\w*>+)};
+my $quoter = qq{(?:(?i)(?:$quotechunk(?:[ \\t]*$quotechunk)*))};
+
+my $separator = q/(?:[-_]{2,}|[=#*]{3,}|[+~]{4,})/;
+
+sub defn($) { return $_[0] if defined $_[0]; return ""; }
+
+sub classify
+{
+    my $text = shift;
+	# DETABIFY
+	my @rawlines = split /\n/, $text;
+	use Text::Tabs;
+	@rawlines = expand(@rawlines);
+
+	# PARSE EACH LINE
+
+	my $pre = 0;
+	my @lines;
+	foreach (@rawlines)
+	{
+			push @lines, { raw	   => $_ };
+			s/\A([ \t]*)($quoter?)([ \t]*)//;
+			$lines[-1]{presig} =  $lines[-1]{prespace}   = defn $1;
+			$lines[-1]{presig} .= $lines[-1]{quoter}     = defn $2;
+			$lines[-1]{presig} .= $lines[-1]{quotespace} = defn $3;
+			$lines[-1]{hang}       = defn(Hang->new($_));
+
+			s/([ \t]*)(.*?)(\s*)$//;
+			$lines[-1]{hangspace} = defn $1;
+			$lines[-1]{text} = defn $2;
+			$lines[-1]{empty} = $lines[-1]{hang}->empty() && $2 !~ /\S/;
+			$lines[-1]{separator} = $lines[-1]{text} =~ /^$separator$/;
+	}
+
+	# SUBDIVIDE DOCUMENT INTO COHERENT SUBSECTIONS
+
+	my @chunks;
+	push @chunks, [shift @lines];
+	foreach my $line (@lines)
+	{
+		if ($line->{separator} ||
+		    $line->{quoter} ne $chunks[-1][-1]->{quoter} ||
+		    $line->{empty} ||
+		    @chunks && $chunks[-1][-1]->{empty})
+		{
+			push @chunks, [$line];
+		}
+		else
+		{
+			push @{$chunks[-1]}, $line;
+		}
+	}
+
+	# REDIVIDE INTO PARAGRAPHS
+
+	my @paras;
+	foreach my $chunk ( @chunks )
+	{
+		my $first = 1;
+		my $firstfrom;
+		foreach my $line ( @{$chunk} )
+		{
+			if ($first ||
+			    $line->{quoter} ne $paras[-1]->{quoter} ||
+			    $paras[-1]->{separator}
+			   )
+			{
+				push @paras, $line;
+				$first = 0;
+				$firstfrom = length($line->{raw}) - length($line->{text});
+			}
+			else
+			{
+    my $extraspace = length($line->{raw}) - length($line->{text}) - $firstfrom;
+				$paras[-1]->{text} .= "\n" . q{ }x$extraspace . $line->{text};
+				$paras[-1]->{raw} .= "\n" . $line->{raw};
+			}
+		}
+	}
+
+	my $remainder = "";
+
+	# ALIGN QUOTERS
+	# DETERMINE HANGING MARKER TYPE (BULLET, ALPHA, ROMAN, ETC.)
+
+	my %sigs;
+	my $lastquoted = 0;
+	my $lastprespace = 0;
+	for my $i ( 0..$#paras )
+	{
+		my $para = $paras[$i];
+	 if ($para->{quoter})
+		{
+			if ($lastquoted) { $para->{prespace} = $lastprespace }
+			else		 { $lastquoted = 1; $lastprespace = $para->{prespace} }
+		}
+		else
+		{
+			$lastquoted = 0;
+		}
+	}
+
+        # Reapply hangs
+    for (@paras) {
+        next unless my $hang = $_->{hang};
+        next unless $hang->stringify;
+        $_->{text} = $hang->stringify . " ".$_->{text};
+    }
+    return @paras;
+}
+
+sub val { return "" }
+1;

Added: Text-Quoted/README
==============================================================================
--- (empty file)
+++ Text-Quoted/README	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,38 @@
+Text/Quoted version 0.01
+========================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2002 Simon Cozens
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. 
+

Added: Text-Quoted/t/1.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/1.t	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,60 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 3;
+BEGIN { use_ok('Text::Quoted') };
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<EOF;
+> foo
+> # Bar
+> baz
+
+quux
+EOF
+
+is_deeply(extract($a),
+[[{text => 'foo',empty => '',quoter => '>',raw => '> foo'},
+  [{text => 'Bar',empty => '',quoter => '> #',raw => '> # Bar'}],
+  {text => 'baz',empty => '',quoter => '>',raw => '> baz'}
+ ],
+ {text => '',empty => '1',quoter => '',raw => ''},
+ {text => 'quux',empty => '',quoter => '',raw => 'quux'}],
+"Sample text is organized properly");
+
+$b = <<EOF;
+
+> foo
+> > > baz
+> > quux
+> quuux
+quuuux
+EOF
+
+$b_dump = 
+[
+      { text => '', empty => '1', quoter => '', raw => '' },
+      [
+        { text => 'foo', empty => '', quoter => '>', raw => '> foo' },
+        [
+          [
+            { text => 'baz', empty => '', quoter => '> > >',
+              raw => '> > > baz' }
+          ],
+          { text => 'quux', empty => '', quoter => '> >', raw => '> > quux' }
+        ],
+        { text => 'quuux', empty => '', quoter => '>', raw => '> quuux' }
+      ],
+      { text => 'quuuux', empty => '', quoter => '', raw => 'quuuux' }
+    ];
+
+
+is_deeply(extract($b), $b_dump, "Skipping levels works OK");

Added: Text-Quoted/t/2.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/2.t	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,154 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<'EOF';
+>>>>> "dc" == darren chamberlain writes:
+
+>> If I don't do "use Template;" in my startup script, each child will
+>> get the pleasure of loading and compiling it all when the first script
+>> that uses Template gets executed.
+
+dc> Unless one of the other modules that you use in your startup script
+dc> happens to use Template, in which case you'll be OK.
+
+Well, that's still "use Template;" as far as I'm concerned.
+
+I was really just being pedantic...  but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.
+
+I personally don't think the preload should be called automagically,
+even if it does the right thing most of the time.
+
+_______________________________________________
+templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates
+EOF
+
+$expected = [
+          [
+            [
+              {
+                'quoter' => '>>>>>',
+                'text' => '"dc" == darren chamberlain writes:',
+                'raw' => '>>>>> "dc" == darren chamberlain writes:',
+                'empty' => ''
+              }
+            ]
+          ],
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          [
+            {
+              'quoter' => '>>',
+              'text' => 'If I don\'t do "use Template;" in my startup script, each child will
+get the pleasure of loading and compiling it all when the first script
+that uses Template gets executed.',
+              'raw' => '>> If I don\'t do "use Template;" in my startup script, each child will
+>> get the pleasure of loading and compiling it all when the first script
+>> that uses Template gets executed.',
+              'empty' => ''
+            }
+          ],
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          [
+            {
+              'quoter' => 'dc>',
+              'text' => 'Unless one of the other modules that you use in your startup script
+happens to use Template, in which case you\'ll be OK.',
+              'raw' => 'dc> Unless one of the other modules that you use in your startup script
+dc> happens to use Template, in which case you\'ll be OK.',
+              'empty' => ''
+            }
+          ],
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          {
+            'quoter' => '',
+            'text' => 'Well, that\'s still "use Template;" as far as I\'m concerned.',
+            'raw' => 'Well, that\'s still "use Template;" as far as I\'m concerned.',
+            'empty' => ''
+          },
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          {
+            'quoter' => '',
+            'text' => 'I was really just being pedantic...  but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.',
+            'raw' => 'I was really just being pedantic...  but think of a hosting situation
+where the startup is pretty bare, and some Registry program uses the
+template.',
+            'empty' => ''
+          },
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          {
+            'quoter' => '',
+            'text' => 'I personally don\'t think the preload should be called automagically,
+even if it does the right thing most of the time.',
+            'raw' => 'I personally don\'t think the preload should be called automagically,
+even if it does the right thing most of the time.',
+            'empty' => ''
+          },
+          {
+            'quoter' => '',
+            'text' => '',
+            'raw' => '',
+            'empty' => '1'
+          },
+          {
+            'quoter' => '',
+            'text' => '_______________________________________________',
+            'raw' => '_______________________________________________',
+            'empty' => ''
+          },
+          {
+            'quoter' => '',
+            'text' => 'templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates',
+            'raw' => 'templates mailing list
+templates at template-toolkit.org
+http://www.template-toolkit.org/mailman/listinfo/templates',
+            'empty' => ''
+          }
+        ];
+
+
+is_deeply(extract($a), $expected, 
+          "Supercite doesn't screw me up as badly as before");

Added: Text-Quoted/t/3.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/3.t	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,98 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+$a = <<'EOF';
+From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc
+> > An
+> > alternative solution is to not have those phone calls at work,
+> > faciliitated by worked very hard for a reasonably workday, then
+> > leaving... thus having time to deal with personal issues when not at
+> > work.
+iabc
+> Unfortunately, personal issues can't be conveniently shoved aside
+eight
+> hours a day.  People with kids especially have to deal with issues
+> realted to picking them up and dropping them off at various times, as
+x
+EOF
+
+$expected = [
+          {
+            'quoter' => '',
+            'text' => 'From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc',
+            'raw' => 'From: "Brian Christopher Robinson" <brian.c.robinson at trw.com>
+zxc',
+            'empty' => ''
+          },
+          [
+            [
+              {
+                'quoter' => '> >',
+                'text' => 'An
+alternative solution is to not have those phone calls at work,
+faciliitated by worked very hard for a reasonably workday, then
+leaving... thus having time to deal with personal issues when not at
+work.',
+                'raw' => '> > An
+> > alternative solution is to not have those phone calls at work,
+> > faciliitated by worked very hard for a reasonably workday, then
+> > leaving... thus having time to deal with personal issues when not at
+> > work.',
+                'empty' => ''
+              }
+            ]
+          ],
+          {
+            'quoter' => '',
+            'text' => 'iabc',
+            'raw' => 'iabc',
+            'empty' => ''
+          },
+          [
+            {
+              'quoter' => '>',
+              'text' => 'Unfortunately, personal issues can\'t be conveniently shoved aside',
+              'raw' => '> Unfortunately, personal issues can\'t be conveniently shoved aside',
+              'empty' => ''
+            }
+          ],
+          {
+            'quoter' => '',
+            'text' => 'eight',
+            'raw' => 'eight',
+            'empty' => ''
+          },
+          [
+            {
+              'quoter' => '>',
+              'text' => 'hours a day.  People with kids especially have to deal with issues
+realted to picking them up and dropping them off at various times, as',
+              'raw' => '> hours a day.  People with kids especially have to deal with issues
+> realted to picking them up and dropping them off at various times, as',
+              'empty' => ''
+            }
+          ],
+          {
+            'quoter' => '',
+            'text' => 'x',
+            'raw' => 'x',
+            'empty' => ''
+          }
+        ];
+
+is_deeply(extract($a), $expected, 
+          "Supercite doesn't screw me up as badly as before");

Added: Text-Quoted/t/4.t
==============================================================================
--- (empty file)
+++ Text-Quoted/t/4.t	Mon Jun 14 15:20:04 2004
@@ -0,0 +1,18 @@
+#!/usr/bin/perl
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 1;
+use Text::Quoted;
+
+# I don't really care what the results are, so long as we don't
+# segfault.
+
+my $ntk = <<'NTK';
+ _   _ _____ _  __ <*the* weekly high-tech sarcastic update for the uk>
+| \ | |_   _| |/ / _ __   __2002-07-26_ o join! mail an empty message to
+|  \| | | | | ' / | '_ \ / _ \ \ /\ / / o ntknow-subscribe at lists.ntk.net
+| |\  | | | | . \ | | | | (_) \ v  v /  o website (+ archive) lives at:
+|_| \_| |_| |_|\_\|_| |_|\___/ \_/\_/   o     http://www.ntk.net/ 
+NTK
+
+ok(extract($ntk), "It's not pretty, but at least it works");


More information about the Rt-commit mailing list