[Bps-public-commit] r9631 - in Parse-BooleanLogic/lib: Parse

ruz at bestpractical.com ruz at bestpractical.com
Fri Nov 9 18:32:23 EST 2007


Author: ruz
Date: Fri Nov  9 18:32:22 2007
New Revision: 9631

Added:
   Parse-BooleanLogic/lib/
   Parse-BooleanLogic/lib/Parse/
   Parse-BooleanLogic/lib/Parse/BooleanLogic.pm

Log:
* libs

Added: Parse-BooleanLogic/lib/Parse/BooleanLogic.pm
==============================================================================
--- (empty file)
+++ Parse-BooleanLogic/lib/Parse/BooleanLogic.pm	Fri Nov  9 18:32:22 2007
@@ -0,0 +1,260 @@
+
+=head1 NAME
+
+Parse::BooleanLogic - parser of boolean expressions
+
+=head1 SYNOPSIS
+    
+    my $parser = new Parse::BooleanLogic;
+    my $tree = $parser->as_array( string => 'x = 10' );
+    $tree = $parser->as_array( string => 'x = 10 OR (x > 20 AND x < 30)' );
+
+    $parser->parse(
+        string   => 'x = 10 OR (x > 20 AND x < 30)',
+        callback => {
+            open_paren   => sub { ... },
+            operator     => sub { ... },
+            operand      => sub { ... },
+            close_paren  => sub { ... },
+            error        => sub { ... },
+        },
+    );
+
+=head1 DESCRIPTION
+
+This module is quite fast parser for boolean expressions. Original it's been writen for
+Request Tracker for parsing SQL like expressions and it's still capable to, but
+it can be used to parse other boolean logic sentences with OPERANDs joined using
+binary OPERATORs and grouped and nested using parentheses (OPEN_PAREN and CLOSE_PAREN).
+
+=head1 METHODS
+
+=head2 as_array
+
+Takes a string and parses it into perl structure, where parentheses represented using
+array references, operands are hash references with one key/value pair: operand,
+when binary operators are simple scalars. So string C<x = 10 OR (x > 20 AND x < 30)>
+is parsed into the following structure:
+
+    [
+        { operand => 'x = 10' },
+        'OR',
+        [
+            { operand => 'x > 20' },
+            'AND',
+            { operand => 'x < 30' },
+        ]
+    ]
+
+=head2 parse
+
+Takes named arguments: string and callback. Where the first one is scalar with
+expression, the latter is a reference to hash with callbacks: open_paren, operator
+operand, close_paren and error. Callback for errors is optional and parser dies if
+it's omitted. Each callback is called when parser finds corresponding element in the
+string. In all cases the current match is passed as argument into the callback.
+
+Here is simple example based on L</as_array> method:
+
+    # result tree and the current group
+    my ($tree, $node);
+    $tree = $node = [];
+
+    # stack with nested groups, outer most in the bottom, inner on the top
+    my @pnodes = ();
+
+    my %callback;
+    # on open_paren put the current group on top of the stack,
+    # create new empty group and at the same time put it into
+    # the end of previous one
+    $callback{'open_paren'} = sub {
+        push @pnodes, $node;
+        push @{ $pnodes[-1] }, $node = []
+    };
+
+    # on close_paren just switch to previous group by taking it
+    # from the top of the stack
+    $callback{'close_paren'}     = sub { $node = pop @pnodes };
+
+    # push binary operators as is and operands as hash references
+    $callback{'operator'} = sub { push @$node, $_[0] };
+    $callback{'operand'}       = sub { push @$node, { operand => $_[0] } };
+
+    # run parser
+    $parser->parse( string => $string, callback => \%callback );
+
+    return $tree;
+
+Using this method you can build other representations of an expression.
+
+=cut
+
+use strict;
+use warnings;
+
+package Parse::BooleanLogic;
+
+our $VERSION = '0.02';
+
+use constant OPERAND     => 1;
+use constant OPERATOR    => 2;
+use constant OPEN_PAREN  => 4;
+use constant CLOSE_PAREN => 8;
+my @tokens = qw[OPERAND OPERATOR OPEN_PAREN CLOSE_PAREN];
+
+use Regexp::Common qw(delimited);
+my $re_operator    = qr[(?i:AND|OR)];
+my $re_open_paren  = qr[\(];
+my $re_close_paren = qr[\)];
+
+my $re_tokens      = qr{(?:$re_operator|$re_open_paren|$re_close_paren)};
+my $re_delim       = qr{$RE{delimited}{-delim=>qq{\'\"}}};
+my $re_operand     = qr{(?!\s)(?:$re_delim|(?!$re_tokens|["']).+?(?=$re_tokens|["']|\Z))+};
+
+sub new {
+    my $proto = shift;
+    return bless {@_}, ref($proto) || $proto;
+}
+
+{ # static variables
+
+my ($tree, $node, @pnodes);
+my %callback;
+$callback{'open_paren'} = sub {
+    push @pnodes, $node;
+    push @{ $pnodes[-1] }, $node = []
+};
+$callback{'close_paren'}     = sub { $node = pop @pnodes };
+$callback{'operator'} = sub { push @$node, $_[0] };
+$callback{'operand'} = sub { push @$node, { operand => $_[0] } };
+
+sub as_array {
+    my $self = shift;
+    my $string = shift;
+    my %arg = (@_);
+
+    $node = $tree = [];
+    @pnodes = ();
+
+    unless ( $arg{'operand_cb'} || $arg{'error_cb'} ) {
+        $self->parse(string => $string, callback => \%callback);
+        return $tree;
+    }
+
+    my %cb = %callback;
+    if ( $arg{'operand_cb'} ) {
+        $cb{'operand'} = sub { push @$node, $arg{'operand_cb'}->( $_[0] ) };
+    }
+    $cb{'error'} = $arg{'error_cb'} if $arg{'error_cb'};
+    $self->parse(string => $string, callback => \%cb);
+    return $tree;
+} }
+
+sub parse {
+    my $self = shift;
+    my %args = (
+        string => '',
+        callback => {},
+        @_
+    );
+    my ($string, $cb) = @args{qw(string callback)};
+    $string = '' unless defined $string;
+
+    my $want = OPERAND | OPEN_PAREN;
+    my $last = 0;
+
+    my $depth = 0;
+
+    while ( $string =~ /(
+                        $re_operator
+                        |$re_open_paren
+                        |$re_close_paren
+                        |$re_operand
+                       )/iogx )
+    {
+        my $match = $1;
+        next if $match =~ /^\s*$/;
+
+        # Highest priority is last
+        my $current = 0;
+        $current = OPERAND     if ($want & OPERAND)     && $match =~ /^$re_operand$/io;
+        $current = OPERATOR    if ($want & OPERATOR)    && $match =~ /^$re_operator$/io;
+        $current = OPEN_PAREN  if ($want & OPEN_PAREN)  && $match =~ /^$re_open_paren$/io;
+        $current = CLOSE_PAREN if ($want & CLOSE_PAREN) && $match =~ /^$re_close_paren$/io;
+
+        unless ($current && $want & $current) {
+            my $tmp = substr($string, 0, pos($string)- length($match));
+            $tmp .= '>'. $match .'<--here'. substr($string, pos($string));
+            my $msg = "Wrong expression, expecting a ". $self->bitmask_to_string($want) ." in '$tmp'";
+            $cb->{'error'}? $cb->{'error'}->($msg): die $msg;
+            return;
+        }
+
+        # State Machine:
+        if ( $current & OPEN_PAREN ) {
+            $cb->{'open_paren'}->( $match );
+            $depth++;
+            $want = OPERAND | OPEN_PAREN;
+        }
+        elsif ( $current & CLOSE_PAREN ) {
+            $cb->{'close_paren'}->( $match );
+            $depth--;
+            $want = OPERATOR;
+            $want |= CLOSE_PAREN if $depth;
+        }
+        elsif ( $current & OPERATOR ) {
+            $cb->{'operator'}->( $match );
+            $want = OPERAND | OPEN_PAREN;
+        }
+        elsif ( $current & OPERAND ) {
+            $match =~ s/\s+$//;
+            $cb->{'operand'}->( $match );
+            $want = OPERATOR;
+            $want |= CLOSE_PAREN if $depth;
+        }
+
+        $last = $current;
+    }
+
+    unless ( !$last || $last & (CLOSE_PAREN | OPERAND) ) {
+        my $msg = "Incomplete query, last element ("
+            . $self->bitmask_to_string($last)
+            . ") is not CLOSE_PAREN or OPERAND in '$string'";
+        $cb->{'error'}? $cb->{'error'}->($msg): die $msg;
+        return;
+    }
+
+    if ( $depth ) {
+        my $msg = "Incomplete query, $depth paren(s) isn't closed in '$string'";
+        $cb->{'error'}? $cb->{'error'}->($msg): die $msg;
+        return;
+    }
+}
+
+sub bitmask_to_string {
+    my $self = shift;
+    my $mask = shift;
+
+    my @res;
+    for( my $i = 0; $i < @tokens; $i++ ) {
+        next unless $mask & (1<<$i);
+        push @res, $tokens[$i];
+    }
+
+    my $tmp = join ', ', splice @res, 0, -1;
+    unshift @res, $tmp if $tmp;
+    return join ' or ', @res;
+}
+
+1;
+
+=head1 AUTHORS
+
+Ruslan Zakirov E<lt>ruz at cpan.orgE<gt>, Robert Spier E<lt>rspier at pobox.comE<gt>
+
+=head1 COPYRIGHT
+
+This program is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut



More information about the Bps-public-commit mailing list