[Rt-commit] r3238 - in Text-Tags: . lib lib/Text lib/Text/Tags t
glasser at bestpractical.com
glasser at bestpractical.com
Wed Jun 22 15:32:30 EDT 2005
Author: glasser
Date: Wed Jun 22 15:32:29 2005
New Revision: 3238
Added:
Text-Tags/Changes
Text-Tags/MANIFEST
Text-Tags/Makefile.PL
Text-Tags/README
Text-Tags/lib/
Text-Tags/lib/Text/
Text-Tags/lib/Text/Tags/
Text-Tags/lib/Text/Tags.pm
Text-Tags/lib/Text/Tags/Parser.pm
Text-Tags/t/
Text-Tags/t/00.load.t
Text-Tags/t/01.parse.t
Text-Tags/t/02.join.t
Text-Tags/t/pod-coverage.t
Text-Tags/t/pod.t
Log:
First stab at Text::Tags. I'll CPAN this later today, probably.
Added: Text-Tags/Changes
==============================================================================
--- (empty file)
+++ Text-Tags/Changes Wed Jun 22 15:32:29 2005
@@ -0,0 +1,5 @@
+Revision history for Text-Tags
+
+0.0.1 Tue Jun 21 18:07:54 2005
+ Initial release.
+
Added: Text-Tags/MANIFEST
==============================================================================
--- (empty file)
+++ Text-Tags/MANIFEST Wed Jun 22 15:32:29 2005
@@ -0,0 +1,9 @@
+Changes
+MANIFEST
+META.yml # Will be created by "make dist"
+Makefile.PL
+README
+lib/Text/Tags.pm
+t/00.load.t
+t/pod-coverage.t
+t/pod.t
Added: Text-Tags/Makefile.PL
==============================================================================
--- (empty file)
+++ Text-Tags/Makefile.PL Wed Jun 22 15:32:29 2005
@@ -0,0 +1,10 @@
+use inc::Module::Install;
+
+name ('Text-Tags');
+author ('David Glasser <glasser at bestpractical.com>');
+version_from ('lib/Text/Tags.pm');
+abstract_from('lib/Text/Tags.pm');
+license('perl');
+requires('Test::More');
+
+&WriteAll;
Added: Text-Tags/README
==============================================================================
--- (empty file)
+++ Text-Tags/README Wed Jun 22 15:32:29 2005
@@ -0,0 +1,39 @@
+Text-Tags version 0.0.1
+
+Text::Tags is a folksonomy parser. It takes a string of space-separated "tags"
+and splits them. You can include spaces in tags by putting them in "double quotes"
+or 'single quotes'. There is no backslash-escaping. All whitespace is compressed to a single
+space.
+
+A major feature is that there are no "error cases" -- *every* possible string parses
+successfully. (However, there are a few types of strings that can't appear in a tag,
+generally involving tags that contain both single and double quotes.)
+
+
+INSTALLATION
+
+To install this module, run the following commands:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+
+
+DEPENDENCIES
+
+None.
+
+AUTHOR
+
+David Glasser <glasser at bestpractical.com>
+
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2005, Best Practical Solutions LLC.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
Added: Text-Tags/lib/Text/Tags.pm
==============================================================================
--- (empty file)
+++ Text-Tags/lib/Text/Tags.pm Wed Jun 22 15:32:29 2005
@@ -0,0 +1,65 @@
+package Text::Tags;
+
+our $VERSION = '0.01';
+
+use warnings;
+use strict;
+
+1; # Magic true value required at end of module
+__END__
+
+=head1 NAME
+
+Text::Tags - parses "folksonomy" space-separated tags (stub module)
+
+
+=head1 SYNOPSIS
+
+ use Text::Tags::Parser;
+ my @tags = Text::Tags::Parser->new->parse_tags(q{ foo bar "baz bap" jenny's 'beep beep' });
+ my $line = Text::Tags::Parser->new->join_tags('foo', 'bar', 'baz bap', "jenny's", 'beep beep');
+
+
+=head1 DESCRIPTION
+
+Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+See L<Text::Tags::Parser> for the actual module; L<Text::Tags> may be used in a future
+version of the distribution.
+
+
+
+=head1 AUTHOR
+
+David Glasser C<< <glasser at bestpractical.com> >>
+
+
+=head1 LICENCE AND COPYRIGHT
+
+Copyright (c) 2005, Best Practical Solutions, LLC. All rights reserved.
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER OF WARRANTY
+
+BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
+YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+NECESSARY SERVICING, REPAIR, OR CORRECTION.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
+THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
Added: Text-Tags/lib/Text/Tags/Parser.pm
==============================================================================
--- (empty file)
+++ Text-Tags/lib/Text/Tags/Parser.pm Wed Jun 22 15:32:29 2005
@@ -0,0 +1,218 @@
+package Text::Tags::Parser;
+
+use warnings;
+use strict;
+
+sub new {
+ my $class = shift;
+ bless {}, $class;
+}
+
+sub parse_tags {
+ my $self = shift;
+ my $string = shift;
+
+ my @tags;
+ my %seen;
+
+ # In this regexp, exactly one paren-group matches.
+ # Thus it can be accessed as $+
+ while ($string =~ /\G \s* (?:
+ " ([^"]*) (?: " | $) | # double-quoted string
+ ' ([^']*) (?: ' | $) | # single-quoted string
+ (\S+) # other
+ )/gx) {
+ my $tag = $+;
+
+ # shed explictly quoted empty strings
+ next unless length $tag;
+
+ $tag =~ s/^\s+//; $tag =~ s/\s+$//;
+ $tag =~ s/\s+/ /g;
+
+ # Tags should be unique, but in the right order
+ push @tags, $tag unless $seen{$tag}++;
+ }
+
+ return @tags;
+}
+
+sub join_tags {
+ my $self = shift;
+ my @tags = @_;
+
+ my %seen;
+ my @quoted_tags;
+
+ for my $tag (@tags) {
+ $tag =~ s/^\s+//; $tag =~ s/\s+$//;
+ $tag =~ s/\s+/ /g;
+
+ next unless length $tag;
+
+ my $quote;
+
+ if ($tag =~ /"/ and $tag =~ /'/) {
+ # This *could* be an illegal tag.
+
+ if ($tag =~ /^['"]/ or $tag =~/ /) {
+ # Yup, it's illegal
+ $tag =~ tr/"/'/;
+ $quote = q(");
+ } else {
+ # It has quotes in the inside, but no spaces or at the
+ # front, so just leave it unquoted.
+ $quote = q();
+ }
+ } elsif ($tag =~ /"/) {
+ # It contains a ", so either it needs to be unquoted or single-quoted
+ if ($tag =~ / / or $tag =~ /^"/) {
+ $quote = q(');
+ } else {
+ $quote = q();
+ }
+ } elsif ($tag =~ /'/) {
+ # It contains a ', so either it needs to be unquoted or double-quoted
+ if ($tag =~ / / or $tag =~ /^'/) {
+ $quote = q(");
+ } else {
+ $quote = q();
+ }
+ } elsif ($tag =~ / /) {
+ # By this point we know that it contains no quotes.
+ $quote = q(");
+ } else {
+ # No special characters at all!
+ $quote = q();
+ }
+
+ # $tag is now fully normalized (both by whitespace and by anti-illegalization).
+ # Have we seen it?
+
+ next if $seen{$tag}++;
+
+ push @quoted_tags, "$quote$tag$quote";
+ }
+
+ return join ' ', @quoted_tags;
+}
+
+
+1; # Magic true value required at end of module
+__END__
+
+=head1 NAME
+
+Text::Tags::Parser - parses "folksonomy" space-separated tags
+
+
+=head1 SYNOPSIS
+
+ use Text::Tags::Parser;
+ my @tags = Text::Tags::Parser->new->parse_tags(q{ foo bar "baz bap" jenny's 'beep beep' });
+ my $line = Text::Tags::Parser->new->join_tags('foo', 'bar', 'baz bap', "jenny's", 'beep beep');
+
+=head1 DESCRIPTION
+
+Parses "folksonomies", which are simple space-separated-but-optionally-quoted tag lists.
+
+Specifically, tags can be any string, with the following exception: if it
+contains both a single quote and a double quote, then it cannot contain
+whitespace or start with a quote. Fortunately, this is a pretty obscure
+restriction. In addition, all whitespace inside tags is normalized to a single
+space (with no leading or trailing whitespace).
+
+In a tag list string, tags can optionally be quoted with either single or
+double quotes. B<There is no escaping of either kind of quote>, although you
+can include one type of quote inside a string quoted with the other. Quotes
+can also just be included inside tags, as long as they aren't at the beginning;
+thus a tag like C<joe's> can just be entered without any extra quoting. Tags
+are separated by whitespace, though quoted tags can run into each other without
+whitespace. Empty tags (put in explicitly with C<""> or C<''>) are ignored.
+
+Why did the previous paragraph need to be so detailed? Because L<Text::Tags::Parser>
+B<always successfully parses> every line. That is, every single tags line converts into
+a list of tags, without any error conditions. For general use, you can just understand the
+rules as being B<separate tags with spaces, and put either kind of quotes around tags that
+need to have spaces>.
+
+
+=head1 METHODS
+
+=over 4
+
+=item B<new>
+
+Creates a new L<Text::Tags::Parser> object. In this version of the module, the objects
+do not actually hold any state, but this could change in a future version.
+
+=item B<parse_tags>($string)
+
+Given a tag list string, returns a list of tags (unquoted) using the rules described
+above.
+Any given tag will show up at most once in the output list.
+
+=item B<join_tags>(@tags)
+
+Given a list of tags, returns a tag list string containing them (appropriately quoted).
+Note that illegal tags will have all of their double quotes converted to single quotes.
+Any given tag will show up at most once in the output string.
+
+=back
+
+
+=head1 DEPENDENCIES
+
+None.
+
+=head1 BUGS AND LIMITATIONS
+
+The rules are kind of complicated, but at least they are well-defined.
+
+Please report any bugs or feature requests to
+C<bug-text-tags at rt.cpan.org>, or through the web interface at
+L<http://rt.cpan.org>.
+
+=head1 SEE ALSO
+
+L<Text::Folksonomies>, a module with similar functionality
+but has much more simplistic quote handling. (Specifically, it doesn't
+allow you to put any type of quote into a tag.) But if you don't care
+about that sort of support, it seems to work fine.
+
+
+=head1 AUTHOR
+
+David Glasser C<< <glasser at bestpractical.com> >>
+
+
+=head1 LICENCE AND COPYRIGHT
+
+Copyright (c) 2005, Best Practical Solutions, LLC. All rights reserved.
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER OF WARRANTY
+
+BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
+YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+NECESSARY SERVICING, REPAIR, OR CORRECTION.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
+THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
Added: Text-Tags/t/00.load.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/00.load.t Wed Jun 22 15:32:29 2005
@@ -0,0 +1,8 @@
+use Test::More tests => 2;
+
+BEGIN {
+use_ok( 'Text::Tags' );
+use_ok( 'Text::Tags::Parser' );
+}
+
+diag( "Testing Text::Tags $Text::Tags::VERSION" );
Added: Text-Tags/t/01.parse.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/01.parse.t Wed Jun 22 15:32:29 2005
@@ -0,0 +1,55 @@
+use Test::More tests => 50;
+
+BEGIN { use_ok 'Text::Tags::Parser' }
+
+my $parser = Text::Tags::Parser->new;
+isa_ok($parser, 'Text::Tags::Parser');
+
+is_deeply( [ $parser->parse_tags('')], []);
+is_deeply( [ $parser->parse_tags('foo')], ['foo']);
+is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
+is_deeply( [ $parser->parse_tags(' foo')], ['foo']);
+is_deeply( [ $parser->parse_tags("\t foo")], ['foo']);
+is_deeply( [ $parser->parse_tags('foo ')], ['foo']);
+is_deeply( [ $parser->parse_tags(' foo ')], ['foo']);
+is_deeply( [ $parser->parse_tags(' foo bar ')], ['foo', 'bar']);
+is_deeply( [ $parser->parse_tags(' foo bar ')], ['foo', 'bar']);
+is_deeply( [ $parser->parse_tags(' foo bar baz ')], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(' "foo" bar baz ')], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{ "foo" bar 'baz' })], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{ "foo" bar 'baz})], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{ "foo" bar "baz})], ['foo', 'bar', 'baz']);
+is_deeply( [ $parser->parse_tags(q{ "f\\"oo" bar "baz})], [q(f\\), q(oo"), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{ "f'oo" bar "baz})], [q(f'oo), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{I've bar "baz})], [q(I've), q(bar), q(baz)]);
+is_deeply( [ $parser->parse_tags(q{"eep"bap})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{"eep"'bap'})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{"eep""bap"})], [ qw/eep bap/ ]);
+is_deeply( [ $parser->parse_tags(q{ a'b"c })], [ q/a'b"c/ ]);
+is_deeply( [ $parser->parse_tags(q{ a' bla })], [ q/a'/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ a" bla })], [ q/a"/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "'a" bla })], [ q/'a/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ '"a' bla })], [ q/"a/, q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "a bla })], [ q/a bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ "" bla })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ '' bla })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ ""'' bla })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ """" bla })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla """" })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla '' })], [ q/bla/ ]);
+is_deeply( [ $parser->parse_tags(q{ bla '' '' baz "" })], [ q/bla/, q/baz/ ]);
+is_deeply( [ $parser->parse_tags(q{ "foo bar" })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ "foo bar" })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ "foo bar " })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ " foo bar " })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ 'foo bar' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ 'foo bar' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ 'foo bar ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(q{ ' foo bar ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ ' \t foo bar ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ ' foo \n bar ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ ' foo bar \n ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ ' foo \t \n\n \r bar ' })], ['foo bar']);
+is_deeply( [ $parser->parse_tags(qq{ foo bar foo })], [qw[foo bar]]);
+is_deeply( [ $parser->parse_tags(qq{ foo foo foo bar foo })], [qw[foo bar]]);
+is_deeply( [ $parser->parse_tags(qq{ "foo bar" " foo bar " 'foo bar ' baz })], ["foo bar", "baz"]);
Added: Text-Tags/t/02.join.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/02.join.t Wed Jun 22 15:32:29 2005
@@ -0,0 +1,19 @@
+use Test::More tests => 14;
+
+BEGIN { use_ok 'Text::Tags::Parser' }
+
+my $parser = Text::Tags::Parser->new;
+isa_ok($parser, 'Text::Tags::Parser');
+
+is($parser->join_tags(), q{});
+is($parser->join_tags(qw/foo bar baz/), q{foo bar baz});
+is($parser->join_tags(qw/foo bar baz bar/), q{foo bar baz});
+is($parser->join_tags(qw/foo bar's baz /), q{foo bar's baz});
+is($parser->join_tags('foo', 'foo bar'), q{foo "foo bar"});
+is($parser->join_tags('foo', 'fo"o bar'), q{foo 'fo"o bar'});
+is($parser->join_tags('beep', 'fo"r'), q{beep fo"r});
+is($parser->join_tags(q{"Foo's"}), q{"'Foo's'"});
+is($parser->join_tags(q{Bob "Foo's"}), q{"Bob 'Foo's'"});
+is($parser->join_tags(q{a'b"c}, 'bla'), q{a'b"c bla});
+is($parser->join_tags(q{ab"c bah}, 'bla'), q{'ab"c bah' bla});
+is($parser->join_tags(q{ab'c bah}, 'bla'), q{"ab'c bah" bla});
Added: Text-Tags/t/pod-coverage.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/pod-coverage.t Wed Jun 22 15:32:29 2005
@@ -0,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod::Coverage 1.04";
+plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
+all_pod_coverage_ok();
Added: Text-Tags/t/pod.t
==============================================================================
--- (empty file)
+++ Text-Tags/t/pod.t Wed Jun 22 15:32:29 2005
@@ -0,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();
More information about the Rt-commit
mailing list