[Rt-commit] r3238 - in Text-Tags: . lib lib/Text lib/Text/Tags t

Wed Jun 22 15:32:30 EDT 2005

Author: glasser
Date: Wed Jun 22 15:32:29 2005
New Revision: 3238

Added:
   Text-Tags/Changes
   Text-Tags/MANIFEST
   Text-Tags/Makefile.PL
   Text-Tags/README
   Text-Tags/lib/
   Text-Tags/lib/Text/
   Text-Tags/lib/Text/Tags/
   Text-Tags/lib/Text/Tags.pm
   Text-Tags/lib/Text/Tags/Parser.pm
   Text-Tags/t/
   Text-Tags/t/00.load.t
   Text-Tags/t/01.parse.t
   Text-Tags/t/02.join.t
   Text-Tags/t/pod-coverage.t
   Text-Tags/t/pod.t
Log:
First stab at Text::Tags.  I'll CPAN this later today, probably.

Added: Text-Tags/Changes
==============================================================================

--- (empty file)
+++ Text-Tags/Changes	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,5 @@
+Revision history for Text-Tags
+
+0.0.1  Tue Jun 21 18:07:54 2005
+       Initial release.
+

Added: Text-Tags/MANIFEST
==============================================================================
--- (empty file)
+++ Text-Tags/MANIFEST	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,9 @@
+Changes
+MANIFEST
+META.yml # Will be created by "make dist"
+Makefile.PL
+README
+lib/Text/Tags.pm
+t/00.load.t
+t/pod-coverage.t
+t/pod.t

Added: Text-Tags/Makefile.PL
==============================================================================
--- (empty file)
+++ Text-Tags/Makefile.PL	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,10 @@
+use inc::Module::Install;
+
+name ('Text-Tags');
+author ('David Glasser <glasser at bestpractical.com>');
+version_from ('lib/Text/Tags.pm');
+abstract_from('lib/Text/Tags.pm');
+license('perl');
+requires('Test::More');
+
+&WriteAll;

Added: Text-Tags/README
==============================================================================
--- (empty file)
+++ Text-Tags/README	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,39 @@
+Text-Tags version 0.0.1
+
+Text::Tags is a folksonomy parser.  It takes a string of space-separated "tags"
+and splits them.  You can include spaces in tags by putting them in "double quotes"
+or 'single quotes'.  There is no backslash-escaping.  All whitespace is compressed to a single
+space.
+
+A major feature is that there are no "error cases" -- *every* possible string parses
+successfully.  (However, there are a few types of strings that can't appear in a tag,
+generally involving tags that contain both single and double quotes.)
+
+
+INSTALLATION
+
+To install this module, run the following commands:
+
+    perl Makefile.PL
+    make
+    make test
+    make install
+
+
+
+DEPENDENCIES
+
+None.
+
+AUTHOR
+
+David Glasser <glasser at bestpractical.com>
+
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2005, Best Practical Solutions LLC.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+

Added: Text-Tags/lib/Text/Tags.pm
==============================================================================
--- (empty file)
+++ Text-Tags/lib/Text/Tags.pm	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,65 @@
+package Text::Tags;
+
+our $VERSION = '0.01';
+
+use warnings;
+use strict;
+
+1; # Magic true value required at end of module
+__END__
+
+=head1 NAME
+
+Text::Tags - parses "folksonomy" space-separated tags (stub module)
+
+
+=head1 SYNOPSIS
+
+    use Text::Tags::Parser;
+    my @tags = Text::Tags::Parser->new->parse_tags(q{ foo  bar  "baz bap" jenny's   'beep beep' });
+    my $line = Text::Tags::Parser->new->join_tags('foo', 'bar', 'baz bap', "jenny's", 'beep beep');
+
+  
+=head1 DESCRIPTION
+
+Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+See L<Text::Tags::Parser> for the actual module; L<Text::Tags> may be used in a future
+version of the distribution.
+
+
+
+=head1 AUTHOR
+
+David Glasser  C<< <glasser at bestpractical.com> >>
+
+
+=head1 LICENCE AND COPYRIGHT
+
+Copyright (c) 2005, Best Practical Solutions, LLC.  All rights reserved.
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER OF WARRANTY
+
+BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
+YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+NECESSARY SERVICING, REPAIR, OR CORRECTION.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
+THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.

Added: Text-Tags/lib/Text/Tags/Parser.pm
==============================================================================
--- (empty file)
+++ Text-Tags/lib/Text/Tags/Parser.pm	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,218 @@
+package Text::Tags::Parser;
+
+use warnings;
+use strict;
+
+sub new {
+    my $class = shift;
+    bless {}, $class;
+} 
+
+sub parse_tags {
+    my $self = shift;
+    my $string = shift;
+
+    my @tags;
+    my %seen;
+   
+    # In this regexp, exactly one paren-group matches.
+    # Thus it can be accessed as $+
+    while ($string =~ /\G \s* (?:
+                         " ([^"]*) (?: " | $) |      # double-quoted string
+                         ' ([^']*) (?: ' | $) |      # single-quoted string
+			 (\S+)                       # other 
+		     )/gx) {
+	my $tag = $+;
+
+	# shed explictly quoted empty strings
+	next unless length $tag;
+
+	$tag =~ s/^\s+//; $tag =~ s/\s+$//;
+	$tag =~ s/\s+/ /g;
+	
+	# Tags should be unique, but in the right order
+	push @tags, $tag unless $seen{$tag}++;
+    } 
+
+    return @tags;
+} 
+
+sub join_tags {
+    my $self = shift;
+    my @tags = @_;
+
+    my %seen;
+    my @quoted_tags;
+
+    for my $tag (@tags) {
+	$tag =~ s/^\s+//; $tag =~ s/\s+$//;
+	$tag =~ s/\s+/ /g;
+
+	next unless length $tag;
+
+	my $quote;
+
+	if ($tag =~ /"/ and $tag =~ /'/) {
+	    # This *could* be an illegal tag.
+
+	    if ($tag =~ /^['"]/ or $tag =~/ /) {
+		# Yup, it's illegal
+		$tag =~ tr/"/'/;
+		$quote = q(");
+	    } else {
+		# It has quotes in the inside, but no spaces or at the
+		# front, so just leave it unquoted.
+		$quote = q();
+	    } 
+	} elsif ($tag =~ /"/) {
+	    # It contains a ", so either it needs to be unquoted or single-quoted
+	    if ($tag =~ / / or $tag =~ /^"/) {
+		$quote = q(');
+	    } else {
+		$quote = q();
+	    } 
+	} elsif ($tag =~ /'/) {
+	    # It contains a ', so either it needs to be unquoted or double-quoted
+	    if ($tag =~ / / or $tag =~ /^'/) {
+		$quote = q(");
+	    } else {
+		$quote = q();
+	    } 
+	} elsif ($tag =~ / /) {
+	    # By this point we know that it contains no quotes.
+	    $quote = q(");
+	} else {
+	    # No special characters at all!
+	    $quote = q();
+	} 
+
+	# $tag is now fully normalized (both by whitespace and by anti-illegalization).
+	# Have we seen it?
+
+	next if $seen{$tag}++;
+
+	push @quoted_tags, "$quote$tag$quote";
+    } 
+
+    return join ' ', @quoted_tags;
+} 
+
+
+1; # Magic true value required at end of module
+__END__
+
+=head1 NAME
+
+Text::Tags::Parser - parses "folksonomy" space-separated tags
+
+
+=head1 SYNOPSIS
+
+    use Text::Tags::Parser;
+    my @tags = Text::Tags::Parser->new->parse_tags(q{ foo  bar  "baz bap" jenny's   'beep beep' });
+    my $line = Text::Tags::Parser->new->join_tags('foo', 'bar', 'baz bap', "jenny's", 'beep beep');
+  
+=head1 DESCRIPTION
+
+Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+
+Specifically, tags can be any string, with the following exception: if it
+contains both a single quote and a double quote, then it cannot contain
+whitespace or start with a quote.  Fortunately, this is a pretty obscure
+restriction.  In addition, all whitespace inside tags is normalized to a single
+space (with no leading or trailing whitespace).  
+
+In a tag list string, tags can optionally be quoted with either single or
+double quotes.  B<There is no escaping of either kind of quote>, although you
+can include one type of quote inside a string quoted with the other.  Quotes
+can also just be included inside tags, as long as they aren't at the beginning;
+thus a tag like C<joe's> can just be entered without any extra quoting.  Tags
+are separated by whitespace, though quoted tags can run into each other without
+whitespace.  Empty tags (put in explicitly with C<""> or C<''>) are ignored.
+
+Why did the previous paragraph need to be so detailed?  Because L<Text::Tags::Parser> 
+B<always successfully parses> every line.  That is, every single tags line converts into
+a list of tags, without any error conditions.  For general use, you can just understand the
+rules as being B<separate tags with spaces, and put either kind of quotes around tags that
+need to have spaces>.
+
+
+=head1 METHODS
+
+=over 4
+
+=item B<new>
+
+Creates a new L<Text::Tags::Parser> object.  In this version of the module, the objects
+do not actually hold any state, but this could change in a future version.
+
+=item B<parse_tags>($string)
+
+Given a tag list string, returns a list of tags (unquoted) using the rules described 
+above.
+Any given tag will show up at most once in the output list.
+
+=item B<join_tags>(@tags)
+
+Given a list of tags, returns a tag list string containing them (appropriately quoted).
+Note that illegal tags will have all of their double quotes converted to single quotes.
+Any given tag will show up at most once in the output string.
+
+=back
+
+
+=head1 DEPENDENCIES
+
+None.
+
+=head1 BUGS AND LIMITATIONS
+
+The rules are kind of complicated, but at least they are well-defined.
+
+Please report any bugs or feature requests to
+C<bug-text-tags at rt.cpan.org>, or through the web interface at
+L<http://rt.cpan.org>.
+
+=head1 SEE ALSO
+
+L<Text::Folksonomies>, a module with similar functionality
+but has much more simplistic quote handling.  (Specifically, it doesn't
+allow you to put any type of quote into a tag.)  But if you don't care
+about that sort of support, it seems to work fine.
+
+
+=head1 AUTHOR
+
+David Glasser  C<< <glasser at bestpractical.com> >>
+
+
+=head1 LICENCE AND COPYRIGHT
+
+Copyright (c) 2005, Best Practical Solutions, LLC.  All rights reserved.
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER OF WARRANTY
+
+BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
+YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+NECESSARY SERVICING, REPAIR, OR CORRECTION.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
+THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.

Added: Text-Tags/t/00.load.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/00.load.t	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,8 @@
+use Test::More tests => 2;
+
+BEGIN {
+use_ok( 'Text::Tags' );
+use_ok( 'Text::Tags::Parser' );
+}
+
+diag( "Testing Text::Tags $Text::Tags::VERSION" );

Added: Text-Tags/t/01.parse.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/01.parse.t	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,55 @@
+use Test::More tests => 50;
+
+BEGIN { use_ok 'Text::Tags::Parser' }
+
+my $parser = Text::Tags::Parser->new;
+isa_ok($parser, 'Text::Tags::Parser');
+
+is_deeply( [ $parser->parse_tags('')], []);
+is_deeply( [ $parser->parse_tags('foo')], ['foo']);
+is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
+is_deeply( [ $parser->parse_tags('   foo')], ['foo']);
+is_deeply( [ $parser->parse_tags("\t foo")], ['foo']);
+is_deeply( [ $parser->parse_tags('foo   ')], ['foo']);
+is_deeply( [ $parser->parse_tags('  foo   ')], ['foo']);
+is_deeply( [ $parser->parse_tags('  foo   bar  ')], ['foo', 'bar']);
+is_deeply( [ $parser->parse_tags('  foo bar  ')], ['foo', 'bar']);
+is_deeply( [ $parser->parse_tags('  foo       bar     baz ')], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags('  "foo"       bar     baz ')], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{  "foo"       bar     'baz' })], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{  "foo"       bar     'baz})], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{  "foo"       bar     "baz})], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{  "f\\"oo"       bar     "baz})], [q(f\\), q(oo"), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{  "f'oo"       bar     "baz})], [q(f'oo), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{I've       bar     "baz})], [q(I've), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{"eep"bap})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{"eep"'bap'})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{"eep""bap"})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{ a'b"c   })], [ q/a'b"c/ ]);
+is_deeply( [ $parser->parse_tags(q{ a' bla  })], [ q/a'/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ a" bla  })], [ q/a"/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "'a" bla  })], [ q/'a/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ '"a' bla  })], [ q/"a/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "a bla  })], [ q/a bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "" bla  })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ '' bla  })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ ""'' bla  })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ """" bla  })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla """" })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla '' })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla '' '' baz "" })], [ q/bla/, q/baz/ ]);
+is_deeply( [ $parser->parse_tags(q{  "foo bar"  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  "foo     bar"  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  "foo bar  "  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  "   foo bar  "  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  'foo bar'  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  'foo     bar'  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  'foo bar  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{  '   foo bar  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{  ' \t  foo bar  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{  '   foo  \n bar  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{  '   foo bar \n  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{  '   foo  \t  \n\n \r  bar  '  })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ foo bar foo   })], [qw[foo bar]]);
+is_deeply( [ $parser->parse_tags(qq{ foo foo foo    bar foo   })], [qw[foo bar]]);
+is_deeply( [ $parser->parse_tags(qq{ "foo bar" "   foo  bar    " 'foo  bar   ' baz   })], ["foo bar", "baz"]);

Added: Text-Tags/t/02.join.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/02.join.t	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,19 @@
+use Test::More tests => 14;
+
+BEGIN { use_ok 'Text::Tags::Parser' }
+
+my $parser = Text::Tags::Parser->new;
+isa_ok($parser, 'Text::Tags::Parser');
+
+is($parser->join_tags(), q{});
+is($parser->join_tags(qw/foo bar baz/), q{foo bar baz});
+is($parser->join_tags(qw/foo bar baz bar/), q{foo bar baz});
+is($parser->join_tags(qw/foo bar's baz /), q{foo bar's baz});
+is($parser->join_tags('foo', 'foo   bar'), q{foo "foo bar"});
+is($parser->join_tags('foo', 'fo"o   bar'), q{foo 'fo"o bar'});
+is($parser->join_tags('beep', 'fo"r'), q{beep fo"r});
+is($parser->join_tags(q{"Foo's"}), q{"'Foo's'"});
+is($parser->join_tags(q{Bob "Foo's"}), q{"Bob 'Foo's'"});
+is($parser->join_tags(q{a'b"c}, 'bla'), q{a'b"c bla});
+is($parser->join_tags(q{ab"c  bah}, 'bla'), q{'ab"c bah' bla});
+is($parser->join_tags(q{ab'c  bah}, 'bla'), q{"ab'c bah" bla});

Added: Text-Tags/t/pod-coverage.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/pod-coverage.t	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod::Coverage 1.04";
+plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
+all_pod_coverage_ok();

Added: Text-Tags/t/pod.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/pod.t	Wed Jun 22 15:32:29 2005
@@ -0,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();