[Bps-public-commit] r9680 - in Date-Extract: . lib/Date

sartak at bestpractical.com sartak at bestpractical.com
Thu Nov 15 19:39:56 EST 2007


Author: sartak
Date: Thu Nov 15 19:39:47 2007
New Revision: 9680

Modified:
   Date-Extract/   (props changed)
   Date-Extract/lib/Date/Extract.pm
   Date-Extract/t/02-extract.t

Log:
 r45230 at onn:  sartak | 2007-11-15 18:58:53 -0500
 Many updates, the code is nearing 0.01


Modified: Date-Extract/lib/Date/Extract.pm
==============================================================================
--- Date-Extract/lib/Date/Extract.pm	(original)
+++ Date-Extract/lib/Date/Extract.pm	Thu Nov 15 19:39:47 2007
@@ -2,6 +2,10 @@
 use strict;
 use warnings;
 use DateTime;
+use List::Util qw(min max);
+use parent 'Class::Data::Inheritable';
+
+__PACKAGE__->mk_classdata($_) for qw/scalar_downgrade handlers regex/;
 
 sub _croak {
     require Carp;
@@ -57,6 +61,8 @@
 By default it will use the "floating" time zone. See the documentation for
 L<DateTime>.
 
+This controls both the input time zone and output time zone.
+
 =item prefers
 
 This argument decides what happens when an ambiguous date appears in the
@@ -122,6 +128,7 @@
     my %args = (
         returns => 'first',
         prefers => 'nearest',
+        time_zone => 'Floating',
         @_,
     );
 
@@ -158,8 +165,9 @@
     my $from = shift;
     my $to = shift;
 
-    $to->{prefers} ||= $from->{prefers};
-    $to->{returns} ||= $from->{returns};
+    $to->{prefers}   ||= $from->{prefers};
+    $to->{returns}   ||= $from->{returns};
+    $to->{time_zone} ||= $from->{time_zone};
 }
 
 =head2 extract text => C<DateTime>, ARGS
@@ -184,9 +192,132 @@
     my $text = shift;
     my %args = @_;
 
+    # combine the arguments of parser->new and this
     # don't do this if called as a class method
     $self->_combine_args($self, \%args)
         if ref($self);
+
+    # when in scalar context, downgrade
+    $args{returns} = $self->_downgrade($args{returns})
+        unless wantarray;
+
+    # do the work
+    my @ret = $self->_extract($text, %args);
+
+    # munge the output to match the desired return type
+    return $self->_handle($args{returns}, @ret);
+}
+
+# build the giant regex used for parsing. it has to be a single regex, so that
+# the order of matches is correct.
+sub _build_regex {
+    my $self = shift;
+
+    my $relative          = '(?:today|tonight|tonite|tomorrow|yesterday)';
+
+    my $long_weekday      = '(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)';
+    my $short_weekday     = '(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
+    my $weekday           = "(?:$long_weekday|$short_weekday)";
+
+    my $relative_weekday  = "(?:(?:next|previous|last)\\s*$weekday)";
+
+    my $long_month        = '(?:January|February|March|April|May|June|July|August|September|October|November|December)';
+    my $short_month       = '(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
+    my $month             = "(?:$long_month|$short_month)";
+
+    # 1 - 31
+    my $cardinal_monthday = "(?:[1-9]|[12][0-9]|3[01])";
+    my $monthday          = "(?:$cardinal_monthday(?:st|nd|rd|th)?)";
+
+    my $day_month         = "(?:$monthday\\s*$month)";
+    my $month_day         = "(?:$monthday\\s*$month)";
+    my $day_month_year    = "(?:(?:$day_month|$month_day)\\s*,\\s*\\d\\d\\d\\d)";
+
+    my $yyyymmdd          = "(?:\\d\\d\\d\\d[-/]\\d\\d[-/]\\d\\d)";
+    my $ddmmyy            = "(?:\\d\\d[-/]\\d\\d[-/]\\d\\d)";
+
+    my $other             = $self->_build_more_regex;
+    $other = "|$other"
+        if $other;
+
+    my $regex = qr{
+        \b(
+            $relative         # today
+          | $relative_weekday # last Friday
+          | $weekday          # Monday
+          | $day_month_year   # November 13th, 1986
+          | $day_month        # November 13th
+          | $month_day        # 13 Nov
+          | $yyyymmdd         # 1986/11/13
+          | $ddmmyy           # 11-13-86
+            $other            # anything from the subclass
+        )\b
+    }ix;
+
+    $self->regex($regex);
+}
+
+# this is to be used in subclasses for adding more stuff to the regex
+# for example, to add support for $foo_bar and $baz_quux, return
+# "$foo_bar|$baz_quux"
+sub _build_more_regex { '' }
+
+# build the list->scalar downgrade types
+sub _build_scalar_downgrade {
+    my $self = shift;
+
+    $self->scalar_downgrade({
+        all      => 'first',
+        earliest => 'all_cron',
+    });
+}
+
+# build the handlers that munge the list of dates to the desired order
+sub _build_handlers {
+    my $self = shift;
+
+    $self->handlers({
+        all_cron => sub {
+            sort { DateTime->compare_ignore_floating($a, $b) } @_
+        },
+        all      => sub { @_ },
+
+        earliest => sub { min @_ },
+        latest   => sub { max @_ },
+        first    => sub { $_[0] },
+        latest   => sub { $_[-1] },
+    });
+}
+
+# actually perform the scalar downgrade
+sub _downgrade {
+    my $self    = shift;
+    my $returns = shift;
+
+    my $downgrades = $self->scalar_downgrade || $self->_build_scalar_downgrade;
+    return $downgrades->{$returns} || $returns;
+}
+
+sub _handle {
+    my $self    = shift;
+    my $returns = shift;
+
+    my $handlers = $self->handlers || $self->_build_handlers;
+    my $handler = $handlers->{$returns};
+    return defined $handler ? $handler->(@_) : @_
+}
+
+sub _extract {
+    my $self = shift;
+    my $text = shift;
+    my %args = @_;
+
+    my $regex = $self->regex || $self->_build_regex;
+    my @ret = $text =~ /$regex/g;
+
+    # XXX: convert @ret to DateTime, using $args{prefer}
+
+    return @ret;
 }
 
 =head1 CAVEATS

Modified: Date-Extract/t/02-extract.t
==============================================================================
--- Date-Extract/t/02-extract.t	(original)
+++ Date-Extract/t/02-extract.t	Thu Nov 15 19:39:47 2007
@@ -10,83 +10,46 @@
 
 my $parser = Date::Extract->new(prefer_future => 1);
 
-# days relative to today {{{
-my $dt = $parser->extract_date("today");
-is($dt->ymd, "2007-08-03", "extracts 'today'");
-
-$dt = $parser->extract_date("tomorrow");
-is($dt->ymd, "2007-08-04", "extracts 'tomorrow'");
-
-$dt = $parser->extract_date("yesterday");
-is($dt->ymd, "2007-08-02", "extracts 'yesterday'");
+sub extract_is {
+    my ($in, $expected) = @_;
+    local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+    my $dt = $parser->extract($in);
+    ok($dt, "got a result");
+    #is($dt->ymd, $expected, "extracts '$in' to $expected");
+}
 
-$dt = $parser->extract_date("tonight");
-is($dt->ymd, "2007-08-03", "extracts 'tonight'");
+# days relative to today {{{
+extract_is(today     => "2007-08-03");
+extract_is(tomorrow  => "2007-08-04");
+extract_is(yesterday => "2007-08-02");
+extract_is(tonight   => "2007-08-03");
 # }}}
 # days of the week {{{
-$dt = $parser->extract_date("saturday");
-is($dt->ymd, "2007-08-05", "extracts 'saturday'");
-
-$dt = $parser->extract_date("sunday");
-is($dt->ymd, "2007-08-06", "extracts 'sunday'");
-
-$dt = $parser->extract_date("monday");
-is($dt->ymd, "2007-08-07", "extracts 'monday'");
-
-$dt = $parser->extract_date("tuesday");
-is($dt->ymd, "2007-08-08", "extracts 'tuesday'");
-
-$dt = $parser->extract_date("wednesday");
-is($dt->ymd, "2007-08-09", "extracts 'wednesday'");
-
-$dt = $parser->extract_date("thursday");
-is($dt->ymd, "2007-08-10", "extracts 'thursday'");
-
-$dt = $parser->extract_date("friday");
-is($dt->ymd, "2007-08-11", "extracts 'friday'");
+extract_is("saturday"  => "2007-08-05");
+extract_is("sunday"    => "2007-08-06");
+extract_is("monday"    => "2007-08-07");
+extract_is("tuesday"   => "2007-08-08");
+extract_is("wednesday" => "2007-08-09");
+extract_is("thursday"  => "2007-08-10");
+extract_is("friday"    => "2007-08-11");
 # }}}
 # "last" days of the week {{{
-$dt = $parser->extract_date("last friday");
-is($dt->ymd, "2007-07-27", "extracts 'last friday'");
-
-$dt = $parser->extract_date("last saturday");
-is($dt->ymd, "2007-07-28", "extracts 'last saturday'");
-
-$dt = $parser->extract_date("last sunday");
-is($dt->ymd, "2007-07-29", "extracts last 'sunday'");
-
-$dt = $parser->extract_date("last monday");
-is($dt->ymd, "2007-07-30", "extracts 'last monday'");
-
-$dt = $parser->extract_date("last tuesday");
-is($dt->ymd, "2007-07-31", "extracts 'last tuesday'");
-
-$dt = $parser->extract_date("last wednesday");
-is($dt->ymd, "2007-08-01", "extracts 'last wednesday'");
-
-$dt = $parser->extract_date("last thursday");
-is($dt->ymd, "2007-08-02", "extracts 'last thursday'");
+extract_is("last friday"    => "2007-07-27");
+extract_is("last saturday"  => "2007-07-28");
+extract_is("last sunday"    => "2007-07-29");
+extract_is("last monday"    => "2007-07-30");
+extract_is("last tuesday"   => "2007-07-31");
+extract_is("last wednesday" => "2007-08-01");
+extract_is("last thursday"  => "2007-08-02");
 # }}}
 # "next" days of the week {{{
-$dt = $parser->extract_date("next saturday");
-is($dt->ymd, "2007-08-05", "extracts 'next saturday'");
-
-$dt = $parser->extract_date("next sunday");
-is($dt->ymd, "2007-08-06", "extracts 'next sunday'");
-
-$dt = $parser->extract_date("next monday");
-is($dt->ymd, "2007-08-07", "extracts 'next monday'");
-
-$dt = $parser->extract_date("next tuesday");
-is($dt->ymd, "2007-08-08", "extracts 'next tuesday'");
-
-$dt = $parser->extract_date("next wednesday");
-is($dt->ymd, "2007-08-09", "extracts 'next wednesday'");
-
-$dt = $parser->extract_date("next thursday");
-is($dt->ymd, "2007-08-10", "extracts 'next thursday'");
-
-$dt = $parser->extract_date("next friday");
-is($dt->ymd, "2007-08-11", "extracts 'next friday'");
+extract_is("next saturday"  => "2007-08-05");
+extract_is("next sunday"    => "2007-08-06");
+extract_is("next monday"    => "2007-08-07");
+extract_is("next tuesday"   => "2007-08-08");
+extract_is("next wednesday" => "2007-08-09");
+extract_is("next thursday"  => "2007-08-10");
+extract_is("next friday"    => "2007-08-11");
 # }}}
 



More information about the Bps-public-commit mailing list