[Rt-commit] r3698 - in Text-Tags: . lib/Text lib/Text/Tags t

glasser at bestpractical.com glasser at bestpractical.com
Thu Aug 18 22:03:21 EDT 2005


Author: glasser
Date: Thu Aug 18 22:03:20 2005
New Revision: 3698

Modified:
   Text-Tags/Changes
   Text-Tags/MANIFEST
   Text-Tags/lib/Text/Tags.pm
   Text-Tags/lib/Text/Tags/Parser.pm
   Text-Tags/t/01.parse.t
   Text-Tags/t/02.join.t
Log:
Allow tags to be separated by commas; ban tags with both kinds of quotes.

Modified: Text-Tags/Changes
==============================================================================
--- Text-Tags/Changes	(original)
+++ Text-Tags/Changes	Thu Aug 18 22:03:20 2005
@@ -1,5 +1,12 @@
 Revision history for Text-Tags
 
-0.0.1  Tue Jun 21 18:07:54 2005
+0.02   Thu Aug 18 21:55:34 EDT 2005
+
+* Allow commas as separators.
+
+* Disallow any tag containing both double and single quotes -- the old rule
+  was far too complex.
+
+0.01  Tue Jun 21 18:07:54 2005
        Initial release.
 

Modified: Text-Tags/MANIFEST
==============================================================================
--- Text-Tags/MANIFEST	(original)
+++ Text-Tags/MANIFEST	Thu Aug 18 22:03:20 2005
@@ -1,9 +1,20 @@
 Changes
-MANIFEST
-META.yml # Will be created by "make dist"
+inc/Module/Install.pm
+inc/Module/Install/Base.pm
+inc/Module/Install/Can.pm
+inc/Module/Install/Fetch.pm
+inc/Module/Install/Makefile.pm
+inc/Module/Install/Metadata.pm
+inc/Module/Install/Win32.pm
+inc/Module/Install/WriteAll.pm
+lib/Text/Tags.pm
+lib/Text/Tags/Parser.pm
 Makefile.PL
+MANIFEST
+META.yml			# Will be created by "make dist"
 README
-lib/Text/Tags.pm
 t/00.load.t
+t/01.parse.t
+t/02.join.t
 t/pod-coverage.t
 t/pod.t

Modified: Text-Tags/lib/Text/Tags.pm
==============================================================================
--- Text-Tags/lib/Text/Tags.pm	(original)
+++ Text-Tags/lib/Text/Tags.pm	Thu Aug 18 22:03:20 2005
@@ -1,6 +1,6 @@
 package Text::Tags;
 
-our $VERSION = '0.01';
+our $VERSION = '0.02';
 
 use warnings;
 use strict;

Modified: Text-Tags/lib/Text/Tags/Parser.pm
==============================================================================
--- Text-Tags/lib/Text/Tags/Parser.pm	(original)
+++ Text-Tags/lib/Text/Tags/Parser.pm	Thu Aug 18 22:03:20 2005
@@ -15,17 +15,19 @@
     my @tags;
     my %seen;
 
-    # In this regexp, exactly one paren-group matches.
+    # In this regexp, the actual content of the tag is in the last
+    # paren-group which matches in each alternative.
     # Thus it can be accessed as $+
     while (
-        $string =~ /\G \s* (?:
-                         " ([^"]*) (?: " | $) |      # double-quoted string
-                         ' ([^']*) (?: ' | $) |      # single-quoted string
-			 (\S+)                       # other 
+        $string =~ /\G [\s,]* (?:
+                        (") ([^"]*) (?: " | $) |      # double-quoted string
+                        (') ([^']*) (?: ' | $) |      # single-quoted string
+                        ([^\s,]+)                   # other 
 		     )/gx
         )
     {
         my $tag = $+;
+        my $is_quoted = $1 || $3;
 
         # shed explictly quoted empty strings
         next unless length $tag;
@@ -58,48 +60,37 @@
         my $quote;
 
         if ( $tag =~ /"/ and $tag =~ /'/ ) {
-
-            # This *could* be an illegal tag.
-
-            if ( $tag =~ /^['"]/ or $tag =~ / / ) {
-
-                # Yup, it's illegal
-                $tag =~ tr/"/'/;
-                $quote = q(");
-            } else {
-
-                # It has quotes in the inside, but no spaces or at the
-                # front, so just leave it unquoted.
-                $quote = q();
-            }
+            # This is an illegal tag.  Normalize to just single-quotes.
+            # Quote it too, though technically the new form might not need it.
+            $tag =~ tr/"/'/;
+            $quote = q{"};
         } elsif ( $tag =~ /"/ ) {
-
-         # It contains a ", so either it needs to be unquoted or single-quoted
-            if ( $tag =~ / / or $tag =~ /^"/ ) {
-                $quote = q(');
+            # It contains a ", so either it needs to be unquoted or
+            # single-quoted
+            if ( $tag =~ / / or $tag =~ /,/ or $tag =~ /^"/ ) {
+                $quote = q{'};
             } else {
-                $quote = q();
+                $quote = q{};
             }
         } elsif ( $tag =~ /'/ ) {
-
-         # It contains a ', so either it needs to be unquoted or double-quoted
-            if ( $tag =~ / / or $tag =~ /^'/ ) {
-                $quote = q(");
+            # It contains a ', so either it needs to be unquoted or
+            # double-quoted
+            if ( $tag =~ / / or $tag =~ /,/ or $tag =~ /^'/ ) {
+                $quote = q{"};
             } else {
-                $quote = q();
+                $quote = q{};
             }
-        } elsif ( $tag =~ / / ) {
-
+        } elsif ( $tag =~ /[ ,]/ ) {
             # By this point we know that it contains no quotes.
-            $quote = q(");
+            # But it needs to be quoted.
+            $quote = q{"};
         } else {
-
             # No special characters at all!
-            $quote = q();
+            $quote = q{};
         }
 
-# $tag is now fully normalized (both by whitespace and by anti-illegalization).
-# Have we seen it?
+        # $tag is now fully normalized (both by whitespace and by
+        # anti-illegalization).  Have we seen it?
 
         next if $seen{$tag}++;
 
@@ -116,7 +107,6 @@
 
 Text::Tags::Parser - parses "folksonomy" space-separated tags
 
-
 =head1 SYNOPSIS
 
     use Text::Tags::Parser;
@@ -125,29 +115,29 @@
   
 =head1 DESCRIPTION
 
-Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+Parses "folksonomies", which are simple space-or-comma-separated-but-optionally-quoted tag lists.
 
-Specifically, tags can be any string, with the following exception: if it
-contains both a single quote and a double quote, then it cannot contain
-whitespace or start with a quote.  Fortunately, this is a pretty obscure
-restriction.  In addition, all whitespace inside tags is normalized to a single
-space (with no leading or trailing whitespace).  
-
-In a tag list string, tags can optionally be quoted with either single or
-double quotes.  B<There is no escaping of either kind of quote>, although you
-can include one type of quote inside a string quoted with the other.  Quotes
-can also just be included inside tags, as long as they aren't at the beginning;
-thus a tag like C<joe's> can just be entered without any extra quoting.  Tags
-are separated by whitespace, though quoted tags can run into each other without
-whitespace.  Empty tags (put in explicitly with C<""> or C<''>) are ignored.
+Specifically, tags can be any string, as long as they don't contain both a
+single and a double quote.  Hopefully, this is a pretty obscure restriction.  In
+addition, all whitespace inside tags is normalized to a single space (with no
+leading or trailing whitespace).  
+
+In a tag list string, tags can optionally be quoted with either single or double
+quotes.  B<There is no escaping of either kind of quote>, although you can
+include one type of quote inside a string quoted with the other.  Quotes can
+also just be included inside tags, as long as they aren't at the beginning; thus
+a tag like C<joe's> can just be entered without any extra quoting.  Tags are
+separated by whitespace and/or commas, though quoted tags can run into each
+other without whitespace.  Empty tags (put in explicitly with C<""> or C<''>)
+are ignored.  (Note that commas are not normalized with whitespace, and can be
+included in a tag if you quote them.)
 
 Why did the previous paragraph need to be so detailed?  Because L<Text::Tags::Parser> 
 B<always successfully parses> every line.  That is, every single tags line converts into
 a list of tags, without any error conditions.  For general use, you can just understand the
-rules as being B<separate tags with spaces, and put either kind of quotes around tags that
+rules as being B<separate tags with spaces or commas, and put either kind of quotes around tags that
 need to have spaces>.
 
-
 =head1 METHODS
 
 =over 4

Modified: Text-Tags/t/01.parse.t
==============================================================================
--- Text-Tags/t/01.parse.t	(original)
+++ Text-Tags/t/01.parse.t	Thu Aug 18 22:03:20 2005
@@ -1,55 +1,70 @@
-use Test::More tests => 50;
+use strict;
+use warnings;
+use Test::More tests => 56;
 
 BEGIN { use_ok 'Text::Tags::Parser' }
 
 my $parser = Text::Tags::Parser->new;
 isa_ok($parser, 'Text::Tags::Parser');
 
-is_deeply( [ $parser->parse_tags('')], []);
-is_deeply( [ $parser->parse_tags('foo')], ['foo']);
-is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
-is_deeply( [ $parser->parse_tags('   foo')], ['foo']);
-is_deeply( [ $parser->parse_tags("\t foo")], ['foo']);
-is_deeply( [ $parser->parse_tags('foo   ')], ['foo']);
-is_deeply( [ $parser->parse_tags('  foo   ')], ['foo']);
-is_deeply( [ $parser->parse_tags('  foo   bar  ')], ['foo', 'bar']);
-is_deeply( [ $parser->parse_tags('  foo bar  ')], ['foo', 'bar']);
-is_deeply( [ $parser->parse_tags('  foo       bar     baz ')], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags('  "foo"       bar     baz ')], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{  "foo"       bar     'baz' })], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{  "foo"       bar     'baz})], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{  "foo"       bar     "baz})], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{  "f\\"oo"       bar     "baz})], [q(f\\), q(oo"), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{  "f'oo"       bar     "baz})], [q(f'oo), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{I've       bar     "baz})], [q(I've), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{"eep"bap})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{"eep"'bap'})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{"eep""bap"})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{ a'b"c   })], [ q/a'b"c/ ]);
-is_deeply( [ $parser->parse_tags(q{ a' bla  })], [ q/a'/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ a" bla  })], [ q/a"/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "'a" bla  })], [ q/'a/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ '"a' bla  })], [ q/"a/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "a bla  })], [ q/a bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "" bla  })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ '' bla  })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ ""'' bla  })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ """" bla  })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla """" })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla '' })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla '' '' baz "" })], [ q/bla/, q/baz/ ]);
-is_deeply( [ $parser->parse_tags(q{  "foo bar"  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  "foo     bar"  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  "foo bar  "  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  "   foo bar  "  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  'foo bar'  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  'foo     bar'  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  'foo bar  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{  '   foo bar  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{  ' \t  foo bar  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{  '   foo  \n bar  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{  '   foo bar \n  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{  '   foo  \t  \n\n \r  bar  '  })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ foo bar foo   })], [qw[foo bar]]);
-is_deeply( [ $parser->parse_tags(qq{ foo foo foo    bar foo   })], [qw[foo bar]]);
-is_deeply( [ $parser->parse_tags(qq{ "foo bar" "   foo  bar    " 'foo  bar   ' baz   })], ["foo bar", "baz"]);
+sub p {
+    my $string = shift;
+    is_deeply( [ $parser->parse_tags($string) ], [ @_ ] );
+} 
+
+p('' => );
+p('foo' => 'foo');
+p(' foo' => 'foo');
+p('   foo' => 'foo');
+p("\t foo" => 'foo');
+p('foo   ' => 'foo');
+p('  foo   ' => 'foo');
+p('  foo   bar  ' => 'foo', 'bar');
+p('  foo bar  ' => 'foo', 'bar');
+p('  foo       bar     baz ' => 'foo', 'bar', 'baz');
+p('  "foo"       bar     baz ' => 'foo', 'bar', 'baz');
+p(q{  "foo"       bar     'baz' } => 'foo', 'bar', 'baz');
+p(q{  "foo"       bar     'baz} => 'foo', 'bar', 'baz');
+p(q{  "foo"       bar     "baz} => 'foo', 'bar', 'baz');
+p(q{  "f\\"oo"       bar     "baz} => q(f\\), q(oo"), q(bar), q(baz));
+p(q{  "f'oo"       bar     "baz} => q(f'oo), q(bar), q(baz));
+p(q{I've       bar     "baz} => q(I've), q(bar), q(baz));
+p(q{"eep"bap} =>  qw/eep bap/ );
+p(q{"eep"'bap'} =>  qw/eep bap/ );
+p(q{"eep""bap"} =>  qw/eep bap/ );
+p(q{ a'b"c   } =>  q/a'b"c/ );
+p(q{ a' bla  } =>  q/a'/, q/bla/ );
+p(q{ a" bla  } =>  q/a"/, q/bla/ );
+p(q{ "'a" bla  } =>  q/'a/, q/bla/ );
+p(q{ '"a' bla  } =>  q/"a/, q/bla/ );
+p(q{ "a bla  } =>  q/a bla/ );
+p(q{ "" bla  } =>  q/bla/ );
+p(q{ '' bla  } =>  q/bla/ );
+p(q{ ""'' bla  } =>  q/bla/ );
+p(q{ """" bla  } =>  q/bla/ );
+p(q{ bla """" } =>  q/bla/ );
+p(q{ bla '' } =>  q/bla/ );
+p(q{ bla '' '' baz "" } =>  q/bla/, q/baz/ );
+p(q{  "foo bar"  } => 'foo bar');
+p(q{  "foo     bar"  } => 'foo bar');
+p(q{  "foo bar  "  } => 'foo bar');
+p(q{  "   foo bar  "  } => 'foo bar');
+p(q{  'foo bar'  } => 'foo bar');
+p(q{  'foo     bar'  } => 'foo bar');
+p(q{  'foo bar  '  } => 'foo bar');
+p(q{  '   foo bar  '  } => 'foo bar');
+p(qq{  ' \t  foo bar  '  } => 'foo bar');
+p(qq{  '   foo  \n bar  '  } => 'foo bar');
+p(qq{  '   foo bar \n  '  } => 'foo bar');
+p(qq{  '   foo  \t  \n\n \r  bar  '  } => 'foo bar');
+p(qq{ foo bar foo   } => qw[foo bar]);
+p(qq{ foo foo foo    bar foo   } => qw[foo bar]);
+p(qq{ "foo bar" "   foo  bar    " 'foo  bar   ' baz   } => "foo bar", "baz");
+
+p(q{a,b} => qw/a b/);
+p(q{ a , b } => qw/a b/);
+p(q{ a, b } => qw/a b/);
+p(q{ a ,b } => qw/a b/);
+p(q{ " a, b"} => 'a, b');
+p(qq{ "   a   ,    \tb" c}, "a , b", "c");
+

Modified: Text-Tags/t/02.join.t
==============================================================================
--- Text-Tags/t/02.join.t	(original)
+++ Text-Tags/t/02.join.t	Thu Aug 18 22:03:20 2005
@@ -14,6 +14,6 @@
 is($parser->join_tags('beep', 'fo"r'), q{beep fo"r});
 is($parser->join_tags(q{"Foo's"}), q{"'Foo's'"});
 is($parser->join_tags(q{Bob "Foo's"}), q{"Bob 'Foo's'"});
-is($parser->join_tags(q{a'b"c}, 'bla'), q{a'b"c bla});
+is($parser->join_tags(q{a'b"c}, 'bla'), q{"a'b'c" bla});
 is($parser->join_tags(q{ab"c  bah}, 'bla'), q{'ab"c bah' bla});
 is($parser->join_tags(q{ab'c  bah}, 'bla'), q{"ab'c bah" bla});


More information about the Rt-commit mailing list