[Rt-commit] r3698 - in Text-Tags: . lib/Text lib/Text/Tags t
glasser at bestpractical.com
glasser at bestpractical.com
Thu Aug 18 22:03:21 EDT 2005
Author: glasser
Date: Thu Aug 18 22:03:20 2005
New Revision: 3698
Modified:
Text-Tags/Changes
Text-Tags/MANIFEST
Text-Tags/lib/Text/Tags.pm
Text-Tags/lib/Text/Tags/Parser.pm
Text-Tags/t/01.parse.t
Text-Tags/t/02.join.t
Log:
Allow tags to be separated by commas; ban tags with both kinds of quotes.
Modified: Text-Tags/Changes
==============================================================================
--- Text-Tags/Changes (original)
+++ Text-Tags/Changes Thu Aug 18 22:03:20 2005
@@ -1,5 +1,12 @@
Revision history for Text-Tags
-0.0.1 Tue Jun 21 18:07:54 2005
+0.02 Thu Aug 18 21:55:34 EDT 2005
+
+* Allow commas as separators.
+
+* Disallow any tag containing both double and single quotes -- the old rule
+ was far too complex.
+
+0.01 Tue Jun 21 18:07:54 2005
Initial release.
Modified: Text-Tags/MANIFEST
==============================================================================
--- Text-Tags/MANIFEST (original)
+++ Text-Tags/MANIFEST Thu Aug 18 22:03:20 2005
@@ -1,9 +1,20 @@
Changes
-MANIFEST
-META.yml # Will be created by "make dist"
+inc/Module/Install.pm
+inc/Module/Install/Base.pm
+inc/Module/Install/Can.pm
+inc/Module/Install/Fetch.pm
+inc/Module/Install/Makefile.pm
+inc/Module/Install/Metadata.pm
+inc/Module/Install/Win32.pm
+inc/Module/Install/WriteAll.pm
+lib/Text/Tags.pm
+lib/Text/Tags/Parser.pm
Makefile.PL
+MANIFEST
+META.yml # Will be created by "make dist"
README
-lib/Text/Tags.pm
t/00.load.t
+t/01.parse.t
+t/02.join.t
t/pod-coverage.t
t/pod.t
Modified: Text-Tags/lib/Text/Tags.pm
==============================================================================
--- Text-Tags/lib/Text/Tags.pm (original)
+++ Text-Tags/lib/Text/Tags.pm Thu Aug 18 22:03:20 2005
@@ -1,6 +1,6 @@
package Text::Tags;
-our $VERSION = '0.01';
+our $VERSION = '0.02';
use warnings;
use strict;
Modified: Text-Tags/lib/Text/Tags/Parser.pm
==============================================================================
--- Text-Tags/lib/Text/Tags/Parser.pm (original)
+++ Text-Tags/lib/Text/Tags/Parser.pm Thu Aug 18 22:03:20 2005
@@ -15,17 +15,19 @@
my @tags;
my %seen;
- # In this regexp, exactly one paren-group matches.
+ # In this regexp, the actual content of the tag is in the last
+ # paren-group which matches in each alternative.
# Thus it can be accessed as $+
while (
- $string =~ /\G \s* (?:
- " ([^"]*) (?: " | $) | # double-quoted string
- ' ([^']*) (?: ' | $) | # single-quoted string
- (\S+) # other
+ $string =~ /\G [\s,]* (?:
+ (") ([^"]*) (?: " | $) | # double-quoted string
+ (') ([^']*) (?: ' | $) | # single-quoted string
+ ([^\s,]+) # other
)/gx
)
{
my $tag = $+;
+ my $is_quoted = $1 || $3;
# shed explictly quoted empty strings
next unless length $tag;
@@ -58,48 +60,37 @@
my $quote;
if ( $tag =~ /"/ and $tag =~ /'/ ) {
-
- # This *could* be an illegal tag.
-
- if ( $tag =~ /^['"]/ or $tag =~ / / ) {
-
- # Yup, it's illegal
- $tag =~ tr/"/'/;
- $quote = q(");
- } else {
-
- # It has quotes in the inside, but no spaces or at the
- # front, so just leave it unquoted.
- $quote = q();
- }
+ # This is an illegal tag. Normalize to just single-quotes.
+ # Quote it too, though technically the new form might not need it.
+ $tag =~ tr/"/'/;
+ $quote = q{"};
} elsif ( $tag =~ /"/ ) {
-
- # It contains a ", so either it needs to be unquoted or single-quoted
- if ( $tag =~ / / or $tag =~ /^"/ ) {
- $quote = q(');
+ # It contains a ", so either it needs to be unquoted or
+ # single-quoted
+ if ( $tag =~ / / or $tag =~ /,/ or $tag =~ /^"/ ) {
+ $quote = q{'};
} else {
- $quote = q();
+ $quote = q{};
}
} elsif ( $tag =~ /'/ ) {
-
- # It contains a ', so either it needs to be unquoted or double-quoted
- if ( $tag =~ / / or $tag =~ /^'/ ) {
- $quote = q(");
+ # It contains a ', so either it needs to be unquoted or
+ # double-quoted
+ if ( $tag =~ / / or $tag =~ /,/ or $tag =~ /^'/ ) {
+ $quote = q{"};
} else {
- $quote = q();
+ $quote = q{};
}
- } elsif ( $tag =~ / / ) {
-
+ } elsif ( $tag =~ /[ ,]/ ) {
# By this point we know that it contains no quotes.
- $quote = q(");
+ # But it needs to be quoted.
+ $quote = q{"};
} else {
-
# No special characters at all!
- $quote = q();
+ $quote = q{};
}
-# $tag is now fully normalized (both by whitespace and by anti-illegalization).
-# Have we seen it?
+ # $tag is now fully normalized (both by whitespace and by
+ # anti-illegalization). Have we seen it?
next if $seen{$tag}++;
@@ -116,7 +107,6 @@
Text::Tags::Parser - parses "folksonomy" space-separated tags
-
=head1 SYNOPSIS
use Text::Tags::Parser;
@@ -125,29 +115,29 @@
=head1 DESCRIPTION
-Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+Parses "folksonomies", which are simple space-or-comma-separated-but-optionally-quoted tag lists.
-Specifically, tags can be any string, with the following exception: if it
-contains both a single quote and a double quote, then it cannot contain
-whitespace or start with a quote. Fortunately, this is a pretty obscure
-restriction. In addition, all whitespace inside tags is normalized to a single
-space (with no leading or trailing whitespace).
-
-In a tag list string, tags can optionally be quoted with either single or
-double quotes. B<There is no escaping of either kind of quote>, although you
-can include one type of quote inside a string quoted with the other. Quotes
-can also just be included inside tags, as long as they aren't at the beginning;
-thus a tag like C<joe's> can just be entered without any extra quoting. Tags
-are separated by whitespace, though quoted tags can run into each other without
-whitespace. Empty tags (put in explicitly with C<""> or C<''>) are ignored.
+Specifically, tags can be any string, as long as they don't contain both a
+single and a double quote. Hopefully, this is a pretty obscure restriction. In
+addition, all whitespace inside tags is normalized to a single space (with no
+leading or trailing whitespace).
+
+In a tag list string, tags can optionally be quoted with either single or double
+quotes. B<There is no escaping of either kind of quote>, although you can
+include one type of quote inside a string quoted with the other. Quotes can
+also just be included inside tags, as long as they aren't at the beginning; thus
+a tag like C<joe's> can just be entered without any extra quoting. Tags are
+separated by whitespace and/or commas, though quoted tags can run into each
+other without whitespace. Empty tags (put in explicitly with C<""> or C<''>)
+are ignored. (Note that commas are not normalized with whitespace, and can be
+included in a tag if you quote them.)
Why did the previous paragraph need to be so detailed? Because L<Text::Tags::Parser>
B<always successfully parses> every line. That is, every single tags line converts into
a list of tags, without any error conditions. For general use, you can just understand the
-rules as being B<separate tags with spaces, and put either kind of quotes around tags that
+rules as being B<separate tags with spaces or commas, and put either kind of quotes around tags that
need to have spaces>.
-
=head1 METHODS
=over 4
Modified: Text-Tags/t/01.parse.t
==============================================================================
--- Text-Tags/t/01.parse.t (original)
+++ Text-Tags/t/01.parse.t Thu Aug 18 22:03:20 2005
@@ -1,55 +1,70 @@
-use Test::More tests => 50;
+use strict;
+use warnings;
+use Test::More tests => 56;
BEGIN { use_ok 'Text::Tags::Parser' }
my $parser = Text::Tags::Parser->new;
isa_ok($parser, 'Text::Tags::Parser');
-is_deeply( [ $parser->parse_tags('')], []);
-is_deeply( [ $parser->parse_tags('foo')], ['foo']);
-is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
-is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
-is_deeply( [ $parser->parse_tags("\t foo")], ['foo']);
-is_deeply( [ $parser->parse_tags('foo ')], ['foo']);
-is_deeply( [ $parser->parse_tags(' foo ')], ['foo']);
-is_deeply( [ $parser->parse_tags(' foo bar ')], ['foo', 'bar']);
-is_deeply( [ $parser->parse_tags(' foo bar ')], ['foo', 'bar']);
-is_deeply( [ $parser->parse_tags(' foo bar baz ')], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(' "foo" bar baz ')], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{ "foo" bar 'baz' })], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{ "foo" bar 'baz})], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{ "foo" bar "baz})], ['foo', 'bar', 'baz']);
-is_deeply( [ $parser->parse_tags(q{ "f\\"oo" bar "baz})], [q(f\\), q(oo"), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{ "f'oo" bar "baz})], [q(f'oo), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{I've bar "baz})], [q(I've), q(bar), q(baz)]);
-is_deeply( [ $parser->parse_tags(q{"eep"bap})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{"eep"'bap'})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{"eep""bap"})], [ qw/eep bap/ ]);
-is_deeply( [ $parser->parse_tags(q{ a'b"c })], [ q/a'b"c/ ]);
-is_deeply( [ $parser->parse_tags(q{ a' bla })], [ q/a'/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ a" bla })], [ q/a"/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "'a" bla })], [ q/'a/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ '"a' bla })], [ q/"a/, q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "a bla })], [ q/a bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ "" bla })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ '' bla })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ ""'' bla })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ """" bla })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla """" })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla '' })], [ q/bla/ ]);
-is_deeply( [ $parser->parse_tags(q{ bla '' '' baz "" })], [ q/bla/, q/baz/ ]);
-is_deeply( [ $parser->parse_tags(q{ "foo bar" })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ "foo bar" })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ "foo bar " })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ " foo bar " })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ 'foo bar' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ 'foo bar' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ 'foo bar ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(q{ ' foo bar ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ ' \t foo bar ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ ' foo \n bar ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ ' foo bar \n ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ ' foo \t \n\n \r bar ' })], ['foo bar']);
-is_deeply( [ $parser->parse_tags(qq{ foo bar foo })], [qw[foo bar]]);
-is_deeply( [ $parser->parse_tags(qq{ foo foo foo bar foo })], [qw[foo bar]]);
-is_deeply( [ $parser->parse_tags(qq{ "foo bar" " foo bar " 'foo bar ' baz })], ["foo bar", "baz"]);
+sub p {
+ my $string = shift;
+ is_deeply( [ $parser->parse_tags($string) ], [ @_ ] );
+}
+
+p('' => );
+p('foo' => 'foo');
+p(' foo' => 'foo');
+p(' foo' => 'foo');
+p("\t foo" => 'foo');
+p('foo ' => 'foo');
+p(' foo ' => 'foo');
+p(' foo bar ' => 'foo', 'bar');
+p(' foo bar ' => 'foo', 'bar');
+p(' foo bar baz ' => 'foo', 'bar', 'baz');
+p(' "foo" bar baz ' => 'foo', 'bar', 'baz');
+p(q{ "foo" bar 'baz' } => 'foo', 'bar', 'baz');
+p(q{ "foo" bar 'baz} => 'foo', 'bar', 'baz');
+p(q{ "foo" bar "baz} => 'foo', 'bar', 'baz');
+p(q{ "f\\"oo" bar "baz} => q(f\\), q(oo"), q(bar), q(baz));
+p(q{ "f'oo" bar "baz} => q(f'oo), q(bar), q(baz));
+p(q{I've bar "baz} => q(I've), q(bar), q(baz));
+p(q{"eep"bap} => qw/eep bap/ );
+p(q{"eep"'bap'} => qw/eep bap/ );
+p(q{"eep""bap"} => qw/eep bap/ );
+p(q{ a'b"c } => q/a'b"c/ );
+p(q{ a' bla } => q/a'/, q/bla/ );
+p(q{ a" bla } => q/a"/, q/bla/ );
+p(q{ "'a" bla } => q/'a/, q/bla/ );
+p(q{ '"a' bla } => q/"a/, q/bla/ );
+p(q{ "a bla } => q/a bla/ );
+p(q{ "" bla } => q/bla/ );
+p(q{ '' bla } => q/bla/ );
+p(q{ ""'' bla } => q/bla/ );
+p(q{ """" bla } => q/bla/ );
+p(q{ bla """" } => q/bla/ );
+p(q{ bla '' } => q/bla/ );
+p(q{ bla '' '' baz "" } => q/bla/, q/baz/ );
+p(q{ "foo bar" } => 'foo bar');
+p(q{ "foo bar" } => 'foo bar');
+p(q{ "foo bar " } => 'foo bar');
+p(q{ " foo bar " } => 'foo bar');
+p(q{ 'foo bar' } => 'foo bar');
+p(q{ 'foo bar' } => 'foo bar');
+p(q{ 'foo bar ' } => 'foo bar');
+p(q{ ' foo bar ' } => 'foo bar');
+p(qq{ ' \t foo bar ' } => 'foo bar');
+p(qq{ ' foo \n bar ' } => 'foo bar');
+p(qq{ ' foo bar \n ' } => 'foo bar');
+p(qq{ ' foo \t \n\n \r bar ' } => 'foo bar');
+p(qq{ foo bar foo } => qw[foo bar]);
+p(qq{ foo foo foo bar foo } => qw[foo bar]);
+p(qq{ "foo bar" " foo bar " 'foo bar ' baz } => "foo bar", "baz");
+
+p(q{a,b} => qw/a b/);
+p(q{ a , b } => qw/a b/);
+p(q{ a, b } => qw/a b/);
+p(q{ a ,b } => qw/a b/);
+p(q{ " a, b"} => 'a, b');
+p(qq{ " a , \tb" c}, "a , b", "c");
+
Modified: Text-Tags/t/02.join.t
==============================================================================
--- Text-Tags/t/02.join.t (original)
+++ Text-Tags/t/02.join.t Thu Aug 18 22:03:20 2005
@@ -14,6 +14,6 @@
is($parser->join_tags('beep', 'fo"r'), q{beep fo"r});
is($parser->join_tags(q{"Foo's"}), q{"'Foo's'"});
is($parser->join_tags(q{Bob "Foo's"}), q{"Bob 'Foo's'"});
-is($parser->join_tags(q{a'b"c}, 'bla'), q{a'b"c bla});
+is($parser->join_tags(q{a'b"c}, 'bla'), q{"a'b'c" bla});
is($parser->join_tags(q{ab"c bah}, 'bla'), q{'ab"c bah' bla});
is($parser->join_tags(q{ab'c bah}, 'bla'), q{"ab'c bah" bla});
More information about the Rt-commit
mailing list