[Bps-public-commit] email-address-list branch, master, updated. 8687bacbb8924d6a74cbce2318bfaf6f011f4166

Alex Vandiver alexmv at bestpractical.com
Mon Jan 27 14:30:21 EST 2014


The branch, master has been updated
       via  8687bacbb8924d6a74cbce2318bfaf6f011f4166 (commit)
       via  59a0e6c3045d3f3569b3a53bba34dbca203978a4 (commit)
       via  207afa5750f5658b89275266b5609b60211d5afd (commit)
       via  0ed9339a4f611663a9cf5424c028887d943daf8a (commit)
      from  4341cba0fe0fb1207f914bf83f26e9bec1142bb4 (commit)

Summary of changes:
 Changes                             | 13 +++++++++
 META.yml                            | 12 ++++++--
 Makefile.PL                         |  7 +++++
 README                              | 46 ++++++++++++++++++++++++++++--
 inc/Module/Install/ReadmeFromPod.pm |  2 +-
 lib/Email/Address/List.pm           | 57 +++++++++++++++++++++++++++++++++----
 t/pathological.t                    | 15 ++++++++++
 7 files changed, 141 insertions(+), 11 deletions(-)
 create mode 100644 Changes
 create mode 100644 t/pathological.t

- Log -----------------------------------------------------------------
commit 0ed9339a4f611663a9cf5424c028887d943daf8a
Author: Alex Vandiver <alexmv at bestpractical.com>
Date:   Wed Jan 15 19:11:48 2014 -0500

    Prevent atext atoms from backtracking; atoms should be fully greedy
    
    This prevents a quadratic exposion in the 'obs-phrase' regular
    expression.  In reduced form, 'obs-phrase' is equivalent to 'atom*',
    which is '(atext+)*'.  In the context of 'obs-mailbox', this comes
    before a required '<' -- but due to the alternation with obs-addr-spec,
    the optimizer cannot short-circuit the match due to a lack of '<'
    characters in the string.  As such, it tries every possible different
    possibility of splitting the phrase into one-or-more character atoms.
    This leads to a quadratic growth in the length of the string.
    
    As the 'atext' class is intentionally constructed to never contain any
    "interesting" characters upon which alternation might hinge,
    backtracking through them is never fruitful -- force the match on atoms
    to be non-backtracking.

diff --git a/lib/Email/Address/List.pm b/lib/Email/Address/List.pm
index ae65c75..467fae2 100644
--- a/lib/Email/Address/List.pm
+++ b/lib/Email/Address/List.pm
@@ -177,7 +177,7 @@ $RE{'cfws'}           = qr/$RE{'comment'}|\s+/;
 $RE{'qcontent'}       = qr/$RE{'qtext'}|$RE{'quoted_pair'}/;
 $RE{'quoted-string'}  = qr/$RE{'cfws'}*"$RE{'qcontent'}+"$RE{'cfws'}*/;
 
-$RE{'atom'}           = qr/$RE{'cfws'}*$RE{'atext'}+$RE{'cfws'}*/;
+$RE{'atom'}           = qr/$RE{'cfws'}*$RE{'atext'}++$RE{'cfws'}*/;
 
 $RE{'word'}           = qr/$RE{'cfws'}* (?: $RE{'atom'} | "$RE{'qcontent'}+" ) $RE{'cfws'}*/x;
 $RE{'phrase'}         = qr/$RE{'word'}+/x;
diff --git a/t/pathological.t b/t/pathological.t
new file mode 100644
index 0000000..1cb6bd4
--- /dev/null
+++ b/t/pathological.t
@@ -0,0 +1,15 @@
+use strict; use warnings;
+use Test::More tests => 1;
+use Email::Address::List;
+use Time::HiRes;
+
+my $start = Time::HiRes::time();
+my @addresses = Email::Address::List->parse("a" x 25);
+
+# Realistic expected is ~0.0001s.  In the pathological case, however, it
+# will take ~80s.  0.5s is thus unlikely to trip either false-positive
+# of false-negative, being approximitely two orders of magnitude away
+# from both.  We use actual elapsed time, rather than alarm(), for
+# portability.
+ok(Time::HiRes::time() - $start < 0.5,
+   "Extracting from a long string should take finite time");

commit 207afa5750f5658b89275266b5609b60211d5afd
Author: Alex Vandiver <alexmv at bestpractical.com>
Date:   Wed Jan 15 20:11:41 2014 -0500

    Version bump to 0.02

diff --git a/Changes b/Changes
new file mode 100644
index 0000000..edaa249
--- /dev/null
+++ b/Changes
@@ -0,0 +1,8 @@
+0.02 2014-01-15
+
+ - Prevent a quadratic-time check when the provided string did not
+   contain a valid address
+
+0.01 2012-11-17
+
+ - Initial release
diff --git a/META.yml b/META.yml
index be19835..c77c3e8 100644
--- a/META.yml
+++ b/META.yml
@@ -17,4 +17,4 @@ no_index:
   directory:
     - inc
     - t
-version: 0.01
+version: 0.02
diff --git a/Makefile.PL b/Makefile.PL
index 1ad335e..2f121ad 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,4 +1,5 @@
 use inc::Module::Install;
 all_from 'lib/Email/Address/List.pm';
 readme_from 'lib/Email/Address/List.pm';
+sign;
 WriteAll;
diff --git a/inc/Module/Install/ReadmeFromPod.pm b/inc/Module/Install/ReadmeFromPod.pm
index fb7075f..b5e03c3 100644
--- a/inc/Module/Install/ReadmeFromPod.pm
+++ b/inc/Module/Install/ReadmeFromPod.pm
@@ -7,7 +7,7 @@ use warnings;
 use base qw(Module::Install::Base);
 use vars qw($VERSION);
 
-$VERSION = '0.18';
+$VERSION = '0.22';
 
 sub readme_from {
   my $self = shift;
diff --git a/lib/Email/Address/List.pm b/lib/Email/Address/List.pm
index 467fae2..0450453 100644
--- a/lib/Email/Address/List.pm
+++ b/lib/Email/Address/List.pm
@@ -2,7 +2,7 @@ use strict; use warnings; use 5.008;
 
 package Email::Address::List;
 
-our $VERSION = '0.01';
+our $VERSION = '0.02';
 use Email::Address;
 
 =head1 NAME

commit 59a0e6c3045d3f3569b3a53bba34dbca203978a4
Author: Alex Vandiver <alexmv at bestpractical.com>
Date:   Wed Jan 22 18:55:29 2014 -0500

    Fold in changes present in released 0.01, but not in repository

diff --git a/META.yml b/META.yml
index c77c3e8..9598b0e 100644
--- a/META.yml
+++ b/META.yml
@@ -1,13 +1,17 @@
 ---
 abstract: 'RFC close address list parsing'
+author:
+  - 'Ruslan Zakirov <ruz at bestpractical.com>'
 build_requires:
   ExtUtils::MakeMaker: 6.36
+  JSON: 0
+  Test::More: 0
 configure_requires:
   ExtUtils::MakeMaker: 6.36
 distribution_type: module
 dynamic_config: 1
 generated_by: 'Module::Install version 1.06'
-license: unknown
+license: perl
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
   version: 1.4
@@ -17,4 +21,8 @@ no_index:
   directory:
     - inc
     - t
+requires:
+  Email::Address: 0
+resources:
+  license: http://dev.perl.org/licenses/
 version: 0.02
diff --git a/Makefile.PL b/Makefile.PL
index 2f121ad..eeec1b8 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,5 +1,11 @@
 use inc::Module::Install;
 all_from 'lib/Email/Address/List.pm';
 readme_from 'lib/Email/Address/List.pm';
+
+requires 'Email::Address';
+
+test_requires 'Test::More';
+test_requires 'JSON';
+
 sign;
 WriteAll;
diff --git a/README b/README
index a2742ce..8264f35 100644
--- a/README
+++ b/README
@@ -1,6 +1,38 @@
 NAME
     Email::Address::List - RFC close address list parsing
 
+SYNOPSIS
+        use Email::Address::List;
+
+        my $header = <<'END';
+        Foo Bar <simple at example.com>, (an obsolete comment),,,
+         a group:
+          a . wierd . address @
+          for-real .biz
+         ; invalid thingy, <
+         more at example.com
+         >
+        END
+
+        my @list = Email::Address::List->parse($header);
+        foreach my $e ( @list ) {
+            if ($e->{'type'} eq 'mailbox') {
+                print "an address: ", $e->{'value'}->format ,"\n";
+            }
+            else {
+                print $e->{'type'}, "\n"
+            }
+        }
+
+        # prints:
+        # an address: "Foo Bar" <simple at example.com>
+        # comment
+        # group start
+        # an address: a.wierd.address at forreal.biz
+        # group end
+        # unknown
+        # an address: more at example.com
+
 DESCRIPTION
     Parser for From, To, Cc, Bcc, Reply-To, Sender and previous prefixed
     with Resent- (eg Resent-From) headers.
@@ -10,13 +42,15 @@ REASONING
     mentioned headers and this module is derived work from Email::Address.
 
     However, mentioned headers are structured and contain lists of
-    addresses. Most of the time you want to parse it from start to end
-    keeping every bit even if it's a invalid input.
+    addresses. Most of the time you want to parse such field from start to
+    end keeping everything even if it's an invalid input.
 
 METHODS
   parse
     A class method that takes a header value (w/o name and :) and a set of
-    named options. See below.
+    named options, for example:
+
+        my @list = Email::Address::List->parse( $line, option => 1 );
 
     Returns list of hashes. Each hash at least has 'type' key that describes
     the entry. Types:
@@ -84,3 +118,9 @@ METHODS
         Skip anything that is not recognizable. It still tries to recover as
         described earlier.
 
+AUTHOR
+    Ruslan Zakirov <ruz at bestpractical.com>
+
+LICENSE
+    Under the same terms as Perl itself.
+
diff --git a/lib/Email/Address/List.pm b/lib/Email/Address/List.pm
index 0450453..26070a9 100644
--- a/lib/Email/Address/List.pm
+++ b/lib/Email/Address/List.pm
@@ -9,6 +9,39 @@ use Email::Address;
 
 Email::Address::List - RFC close address list parsing
 
+=head1 SYNOPSIS
+
+    use Email::Address::List;
+
+    my $header = <<'END';
+    Foo Bar <simple at example.com>, (an obsolete comment),,,
+     a group:
+      a . wierd . address @
+      for-real .biz
+     ; invalid thingy, <
+     more at example.com
+     >
+    END
+
+    my @list = Email::Address::List->parse($header);
+    foreach my $e ( @list ) {
+        if ($e->{'type'} eq 'mailbox') {
+            print "an address: ", $e->{'value'}->format ,"\n";
+        }
+        else {
+            print $e->{'type'}, "\n"
+        }
+    }
+
+    # prints:
+    # an address: "Foo Bar" <simple at example.com>
+    # comment
+    # group start
+    # an address: a.wierd.address at forreal.biz
+    # group end
+    # unknown
+    # an address: more at example.com
+
 =head1 DESCRIPTION
 
 Parser for From, To, Cc, Bcc, Reply-To, Sender and
@@ -21,15 +54,18 @@ even mentioned headers and this module is derived work
 from Email::Address.
 
 However, mentioned headers are structured and contain lists
-of addresses. Most of the time you want to parse it from start
-to end keeping every bit even if it's a invalid input.
+of addresses. Most of the time you want to parse such field
+from start to end keeping everything even if it's an invalid
+input.
 
 =head1 METHODS
 
 =head2 parse
 
 A class method that takes a header value (w/o name and :) and
-a set of named options. See below.
+a set of named options, for example:
+
+    my @list = Email::Address::List->parse( $line, option => 1 );
 
 Returns list of hashes. Each hash at least has 'type' key that
 describes the entry. Types:
@@ -352,3 +388,14 @@ sub _process_mailbox {
 }
 
 
+=head1 AUTHOR
+
+Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>
+
+=head1 LICENSE
+
+Under the same terms as Perl itself.
+
+=cut
+
+1;

commit 8687bacbb8924d6a74cbce2318bfaf6f011f4166
Author: Alex Vandiver <alexmv at bestpractical.com>
Date:   Wed Jan 22 18:57:12 2014 -0500

    Version bump to 0.03

diff --git a/Changes b/Changes
index edaa249..a3254ed 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.03 2013-01-22
+
+ - Include documentation updates present in the released version of
+   0.01, but never checked into the repository
+
 0.02 2014-01-15
 
  - Prevent a quadratic-time check when the provided string did not
diff --git a/META.yml b/META.yml
index 9598b0e..c4cd6e4 100644
--- a/META.yml
+++ b/META.yml
@@ -25,4 +25,4 @@ requires:
   Email::Address: 0
 resources:
   license: http://dev.perl.org/licenses/
-version: 0.02
+version: 0.03
diff --git a/lib/Email/Address/List.pm b/lib/Email/Address/List.pm
index 26070a9..5a25cdd 100644
--- a/lib/Email/Address/List.pm
+++ b/lib/Email/Address/List.pm
@@ -2,7 +2,7 @@ use strict; use warnings; use 5.008;
 
 package Email::Address::List;
 
-our $VERSION = '0.02';
+our $VERSION = '0.03';
 use Email::Address;
 
 =head1 NAME

-----------------------------------------------------------------------



More information about the Bps-public-commit mailing list