[Rt-commit] r5028 - in Locale-Maketext-Lexicon: . lib/Locale/Maketext lib/Locale/Maketext/Extract t

autrijus at bestpractical.com autrijus at bestpractical.com
Wed Apr 12 23:43:20 EDT 2006


Author: autrijus
Date: Wed Apr 12 23:43:19 2006
New Revision: 5028

Modified:
   Locale-Maketext-Lexicon/Changes
   Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract.pm
   Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract/Run.pm
   Locale-Maketext-Lexicon/lib/Locale/Maketext/Lexicon.pm
   Locale-Maketext-Lexicon/script/xgettext.pl
   Locale-Maketext-Lexicon/t/5-extract.t

Log:
This be 0.60.

* Locale::Maketext::Extract: Bump version to 0.20.
  No functionality changes to Locale::Maketext::Lexicon; all changes
  in this release are to Locale::Maketext::Extract.

* The ->compile call now moves ->entries into ->compiled_entries,
  instead of mutating them in-place.  This allows repeated cycles
  of ->extract/->compile calls before the final ->write_po.

* Consequently, in the ->read_po/->write_po API, the "$verbatim" flag
  is now ignored.

* ->read_po no longer fails on empty PO files.

* The return values of ->msgids/->msgstr are now always in gettext
  style and never in maketext style.

* In xgettext.pl, the -u flag (specifies the style of the source
  programs as gettext instead of the default maketext) now has nothing
  to do with output escaping.  This closes another long-standing
  bug of "vanishing backslashes" when the same .po file is written
  once with -u and once without.

* The --unescaped flag to "xgettext.pl" is deprecated and renamed
  to the more appropriate "--use-gettext-style".

* Locale::Maketext::Extract::Run now accepts single-letter directory
  names instead of erroneously stripping them out.

Modified: Locale-Maketext-Lexicon/Changes
==============================================================================
--- Locale-Maketext-Lexicon/Changes	(original)
+++ Locale-Maketext-Lexicon/Changes	Wed Apr 12 23:43:19 2006
@@ -1,3 +1,33 @@
+[Changes for 0.60 - 2006-04-13]
+
+* Locale::Maketext::Extract: Bump version to 0.20.
+  No functionality changes to Locale::Maketext::Lexicon; all changes
+  in this release are to Locale::Maketext::Extract.
+
+* The ->compile call now moves ->entries into ->compiled_entries,
+  instead of mutating them in-place.  This allows repeated cycles
+  of ->extract/->compile calls before the final ->write_po.
+
+* Consequently, in the ->read_po/->write_po API, the "$verbatim" flag
+  is now ignored.
+
+* ->read_po no longer fails on empty PO files.
+
+* The return values of ->msgids/->msgstr are now always in gettext
+  style and never in maketext style.
+
+* In xgettext.pl, the -u flag (specifies the style of the source
+  programs as gettext instead of the default maketext) now has nothing
+  to do with output escaping.  This closes another long-standing
+  bug of "vanishing backslashes" when the same .po file is written
+  once with -u and once without.
+
+* The --unescaped flag to "xgettext.pl" is deprecated and renamed
+  to the more appropriate "--use-gettext-style".
+
+* Locale::Maketext::Extract::Run now accepts single-letter directory
+  names instead of erroneously stripping them out.
+
 [Changes for 0.58 - 2006-04-13]
 
 * Locale::Maketext::Extract: Direct calls to ->write_po now

Modified: Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract.pm
==============================================================================
--- Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract.pm	(original)
+++ Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract.pm	Wed Apr 12 23:43:19 2006
@@ -1,5 +1,5 @@
 package Locale::Maketext::Extract;
-$Locale::Maketext::Extract::VERSION = '0.12';
+$Locale::Maketext::Extract::VERSION = '0.20';
 
 use strict;
 
@@ -12,7 +12,11 @@
     my $Ext = Locale::Maketext::Extract->new;
     $Ext->read_po('messages.po');
     $Ext->extract_file($_) for <*.pl>;
-    $Ext->compile;
+
+    # Set $entries_are_in_gettext_format if the .pl files above use
+    # loc('%1') instead of loc('[_1]')
+    $Ext->compile($entries_are_in_gettext_format);
+
     $Ext->write_po('messages.po');
 
 =head1 DESCRIPTION
@@ -63,7 +67,7 @@
 
 sub new {
     my $class = shift;
-    bless({ header => '', entries => {}, lexicon => {}, @_ }, $class);
+    bless({ header => '', entries => {}, compiled_entries => {}, lexicon => {}, @_ }, $class);
 }
 
 =head2 Accessors
@@ -71,6 +75,8 @@
     header, set_header
     lexicon, set_lexicon, msgstr, set_msgstr
     entries, set_entries, entry, add_entry, del_entry
+    compiled_entries, set_compiled_entries, compiled_entry,
+    add_compiled_entry, del_compiled_entry
     clear
 
 =cut
@@ -87,24 +93,32 @@
 sub entries { $_[0]{entries} }
 sub set_entries { $_[0]{entries} = $_[1] || {} }
 
+sub compiled_entries { $_[0]{compiled_entries} }
+sub set_compiled_entries { $_[0]{compiled_entries} = $_[1] || {} }
+
 sub entry { @{$_[0]->entries->{$_[1]} || [] } }
 sub add_entry { push @{$_[0]->entries->{$_[1]}}, $_[2] }
 sub del_entry { delete $_[0]->entries->{$_[1]} }
 
+sub compiled_entry { @{$_[0]->compiled_entries->{$_[1]} || [] } }
+sub add_compiled_entry { push @{$_[0]->compiled_entries->{$_[1]}}, $_[2] }
+sub del_compiled_entry { delete $_[0]->compiled_entries->{$_[1]} }
+
 sub clear {
     $_[0]->set_header;
     $_[0]->set_lexicon;
     $_[0]->set_entries;
+    $_[0]->set_compiled_entries;
 }
 
 =head2 PO File manipulation
 
-=head3 method read_po ($file, $verbatim?)
+=head3 method read_po ($file)
 
 =cut
 
 sub read_po {
-    my ($self, $file, $verbatim) = @_;
+    my ($self, $file) = @_;
     my $header = '';
 
     local *LEXICON;
@@ -118,20 +132,24 @@
     $self->set_header("$header\n");
 
     require Locale::Maketext::Lexicon::Gettext;
-    my $lexicon = Locale::Maketext::Lexicon::Gettext->parse($_, <LEXICON>);
-
-    $self->set_lexicon(
-        $verbatim ? { map _to_gettext($_), %$lexicon } : $lexicon
+    my $lexicon = (
+        defined($_)
+            ? Locale::Maketext::Lexicon::Gettext->parse($_, <LEXICON>)
+            : {}
     );
+
+    # Internally the lexicon is in gettext format already.
+    $self->set_lexicon( { map _maketext_to_gettext($_), %$lexicon } );
+
     close LEXICON;
 }
 
-=head3 method write_po ($file, $add_format?, $verbatim?)
+=head3 method write_po ($file, $add_format_marker?)
 
 =cut
 
 sub write_po {
-    my ($self, $file, $add_format, $verbatim) = @_;
+    my ($self, $file, $add_format_marker) = @_;
 
     local *LEXICON;
     open LEXICON, ">$file" or die "Can't write to $file$!\n";
@@ -143,8 +161,8 @@
         print LEXICON "\n";
         print LEXICON $self->msg_positions($msgid);
         print LEXICON $self->msg_variables($msgid);
-        print LEXICON $self->msg_format($msgid) if $add_format;
-        print LEXICON $self->msg_out($msgid, $verbatim);
+        print LEXICON $self->msg_format($msgid) if $add_format_marker;
+        print LEXICON $self->msg_out($msgid);
     }
 }
 
@@ -305,35 +323,45 @@
 
 =head2 Compilation
 
-    compile
-    normalize_space
+=head3 compile($entries_are_in_gettext_style?)
+
+Merges the C<entries> into C<compiled_entries>.
+
+If C<$entries_are_in_gettext_style> is true, the previously extracted entries
+are assumed to be in the B<Gettext> style (e.g. C<%1>).
+
+Otherwise they are assumed to be in B<Maketext> style (e.g. C<[_1]>) and are
+converted into B<Gettext> style before merging into C<compiled_entries>.
+
+The C<entries> are I<not> cleared after each compilation; use
+C<->set_entries()> to clear them if you need to extract from sources with
+varying styles.
 
 =cut
 
 sub compile {
-    my ($self, $verbatim) = @_;
+    my ($self, $entries_are_in_gettext_style) = @_;
     my $entries = $self->entries;
     my $lexicon = $self->lexicon;
+    my $comp    = $self->compiled_entries;
 
-    foreach my $str (sort keys %$entries) {
-        my $ostr    = $str;
-        my $entry   = $entries->{$str};
-        my $lexi    = $lexicon->{$ostr};
-
-        $str  = _to_gettext($str, $verbatim);
-        $lexi = _to_gettext($lexi, $verbatim);
-
-        $lexicon->{$str} ||= '';
-        next if $ostr eq $str;
-
-        $lexicon->{$str} ||= $lexi;
-        delete $entries->{$ostr}; delete $lexicon->{$ostr};
-        $entries->{$str} = $entry;
+    while (my ($k, $v) = each %$entries) {
+        my $compiled_key = (
+            ($entries_are_in_gettext_style)
+                ? $k
+                : _maketext_to_gettext($k)
+        );
+        $comp->{ $compiled_key } = $v;
+        $lexicon->{ $compiled_key } = '' unless exists $lexicon->{$compiled_key};
     }
 
     return %$lexicon;
 }
 
+=head3 normalize_space
+
+=cut
+
 my %Escapes = map {("\\$_" => eval("qq(\\$_)"))} qw(t r f b a e);
 sub normalize_space {
     my ($self, $msgid) = @_;
@@ -361,7 +389,7 @@
 
 sub msg_positions {
     my ($self, $msgid) = @_;
-    my %files = (map { ( " $_->[0]:$_->[1]" => 1 ) } $self->entry($msgid));
+    my %files = (map { ( " $_->[0]:$_->[1]" => 1 ) } $self->compiled_entry($msgid));
     return join('', '#:', sort(keys %files), "\n");
 }
 
@@ -370,7 +398,7 @@
     my $out = '';
 
     my %seen;
-    foreach my $entry ( grep { $_->[2] } $self->entry($msgid) ) {
+    foreach my $entry ( grep { $_->[2] } $self->compiled_entry($msgid) ) {
         my ($file, $line, $var) = @$entry;
         $var =~ s/^\s*,\s*//; $var =~ s/\s*$//;
         $out .= "#. ($var)\n" unless !length($var) or $seen{$var}++;
@@ -386,14 +414,9 @@
 }
 
 sub msg_out {
-    my ($self, $msgid, $verbatim) = @_;
+    my ($self, $msgid) = @_;
     my $msgstr = $self->msgstr($msgid);
 
-    if (!$verbatim) {
-        $msgid =~ s/(?=[\\"])/\\/g;
-        $msgstr =~ s/(?=[\\"])/\\/g;
-    }
-
     return "msgid "  . _format($msgid) .
            "msgstr " . _format($msgstr);
 }
@@ -401,7 +424,7 @@
 =head2 Internal utilities
 
     _default_header
-    _to_gettext
+    _maketext_to_gettext
     _escape
     _format
 
@@ -428,23 +451,14 @@
 .
 }
 
-sub _to_gettext {
-    my ($text, $verbatim) = @_;
+sub _maketext_to_gettext {
+    my $text = shift;
     return '' unless defined $text;
 
-    $text =~ s/\\/\\\\/g;
-    $text =~ s/\"/\\"/g;
-
-    while (my ($char, $esc) = each %Escapes) {
-        $text =~ s/$esc/$char/g;
-    }
-    return $text if $verbatim;
-
-    $text =~ s/((?<!~)(?:~~)*)\[_([1-9]\d*)\]/$1%$2/g;
-    $text =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/$1%$2("""$3""")/g;
-    $text = join('', map {
-        /^""".*"""$/ ? _escape(substr($_, 3, -3)) : $_
-    } split(/(""".*?""")/, $text));
+    $text =~ s{((?<!~)(?:~~)*)\[_([1-9]\d*|\*)\]}
+              {$1%$2}g;
+    $text =~ s{((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]} 
+              {"$1%$2(" . _escape($3) . ')'}eg;
 
     $text =~ s/~([\~\[\]])/$1/g;
     return $text;
@@ -459,6 +473,12 @@
 sub _format {
     my $str = shift;
 
+    $str =~ s/(?=[\\"])/\\/g;
+
+    while (my ($char, $esc) = each %Escapes) {
+        $str =~ s/$esc/$char/g;
+    }
+
     return "\"$str\"\n" unless $str =~ /\n/;
     my $multi_line = ($str =~ /\n(?!\z)/);
     $str =~ s/\n/\\n"\n"/g;

Modified: Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract/Run.pm
==============================================================================
--- Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract/Run.pm	(original)
+++ Locale-Maketext-Lexicon/lib/Locale/Maketext/Extract/Run.pm	Wed Apr 12 23:43:19 2006
@@ -26,7 +26,7 @@
     Getopt::Long::GetOptions( \%opts,
         'f|files-from:s@',
         'D|directory:s@',
-        'u|unescaped',
+        'u|use-gettext-style|unescaped',
         'g|gnu-gettext',
         'o|output:s@',
         'd|default-domain:s',
@@ -59,21 +59,19 @@
     }
 
     @ARGV = ('-') unless @ARGV;
-    s!^.[/\\]!! for @ARGV;
+    s!^\.[/\\]!! for @ARGV;
 
     my $cwd = getcwd();
 
     foreach my $dir (@{$opts{p}||['.']}) {
         foreach my $po (@po) {
             my $Ext = Locale::Maketext::Extract->new;
-            $Ext->read_po($po, $opts{u}) if -r $po and -s _;
+            $Ext->read_po($po) if -r $po and -s _;
             $Ext->extract_file($_) for grep !/\.po$/i, @ARGV;
             $Ext->compile($opts{u}) or next;
 
             chdir $dir;
-
-            use constant ALWAYS_VERBATIM => 1;
-            $Ext->write_po($po, $opts{g}, ALWAYS_VERBATIM);
+            $Ext->write_po($po, $opts{g});
             chdir $cwd;
         }
     }

Modified: Locale-Maketext-Lexicon/lib/Locale/Maketext/Lexicon.pm
==============================================================================
--- Locale-Maketext-Lexicon/lib/Locale/Maketext/Lexicon.pm	(original)
+++ Locale-Maketext-Lexicon/lib/Locale/Maketext/Lexicon.pm	Wed Apr 12 23:43:19 2006
@@ -1,5 +1,5 @@
 package Locale::Maketext::Lexicon;
-$Locale::Maketext::Lexicon::VERSION = '0.58';
+$Locale::Maketext::Lexicon::VERSION = '0.60';
 
 use strict;
 
@@ -9,7 +9,7 @@
 
 =head1 VERSION
 
-This document describes version 0.58 of Locale::Maketext::Lexicon,
+This document describes version 0.60 of Locale::Maketext::Lexicon,
 released April 13, 2006.
 
 =head1 SYNOPSIS

Modified: Locale-Maketext-Lexicon/script/xgettext.pl
==============================================================================
--- Locale-Maketext-Lexicon/script/xgettext.pl	(original)
+++ Locale-Maketext-Lexicon/script/xgettext.pl	Wed Apr 12 23:43:19 2006
@@ -42,6 +42,14 @@
 
 =back
 
+=head2 Input file format:
+
+=item B<-u>, B<--use-gettext-style>
+
+Specifies that the source programs uses the B<Gettext> style (e.g.
+C<%1>) instead of the B<Maketext> style (e.g. C<[_1]>) in its
+localization calls.
+
 =head2 Output file location:
 
 =over 4
@@ -65,12 +73,6 @@
 
 =over 4
 
-=item B<-u>, B<--unescaped>
-
-Disables conversion from B<Maketext> format to B<Gettext> format -- i.e.
-leave all brackets alone.  This is useful if you are also using the
-B<Gettext> syntax in your program.
-
 =item B<-g>, B<--gnu-gettext>
 
 Enables GNU gettext interoperability by printing C<#, perl-maketext-format>

Modified: Locale-Maketext-Lexicon/t/5-extract.t
==============================================================================
--- Locale-Maketext-Lexicon/t/5-extract.t	(original)
+++ Locale-Maketext-Lexicon/t/5-extract.t	Wed Apr 12 23:43:19 2006
@@ -10,34 +10,34 @@
 extract_ok('_("123")'		=> 123,		    'Simple extraction');
 
 extract_ok('_("[_1] is happy")'	=> '%1 is happy',   '[_1] to %1');
-extract_ok('_("[_1] is happy")' => '[_1] is happy', '[_1] verbatim', 1);
+extract_ok('_("%1 is happy")'   => '%1 is happy',   '%1 verbatim', 1);
 
 extract_ok('_("[*,_1] counts")'	=> '%*(%1) counts', '[*,_1] to %*(%1)');
-extract_ok('_("[*,_1] counts")'	=> '[*,_1] counts', '[*,_1] verbatim', 1);
+extract_ok('_("%*(%1) counts")'	=> '%*(%1) counts', '%*(%1) verbatim', 1);
 
 extract_ok('_("[*,_1,_2] counts")' => '%*(%1,%2) counts',
     '[*,_1,_2] to %*(%1,%2)');
 extract_ok('_("[*,_1,_2] counts")' => '[*,_1,_2] counts',
     '[*,_1,_2] verbatim', 1);
 
-extract_ok(q(_('foo\$bar'))	=> 'foo\\\\$bar',   'Escaped \$ in q');
+extract_ok(q(_('foo\$bar'))	=> 'foo\\$bar',   'Escaped \$ in q');
 extract_ok(q(_("foo\$bar"))	=> 'foo$bar',	    'Normalized \$ in qq');
 
-extract_ok(q(_('foo\x20bar'))	=> 'foo\\\\x20bar', 'Escaped \x in q');
+extract_ok(q(_('foo\x20bar'))	=> 'foo\\x20bar', 'Escaped \x in q');
 extract_ok(q(_("foo\x20bar"))	=> 'foo bar',	    'Normalized \x in qq');
 
-extract_ok(q(_('foo\nbar'))	=> 'foo\\\\nbar',   'Escaped \n in qq');
+extract_ok(q(_('foo\nbar'))	=> 'foo\\nbar',   'Escaped \n in qq');
 extract_ok(q(_("foo\nbar"))	=> "foo\nbar",	    'Normalized \n in qq');
 extract_ok(qq(_("foo\nbar"))	=> "foo\nbar",	    'Normalized literal \n in qq');
 
 extract_ok(q(_("foo\nbar"))	=> "foo\nbar",	    'Trailing \n in qq');
 extract_ok(qq(_("foobar\n"))	=> "foobar\n",	    'Trailing literal \n in qq');
 
-extract_ok(q(_('foo\bar'))	=> 'foo\\\\bar',    'Escaped \ in q');
-extract_ok(q(_('foo\\\\bar'))	=> 'foo\\\\bar',    'Normalized \\\\ in q');
-extract_ok(q(_("foo\bar"))	=> 'foo\bar',	    'Interpolated \t in qq');
+extract_ok(q(_('foo\bar'))	=> 'foo\\bar',    'Escaped \ in q');
+extract_ok(q(_('foo\\\\bar'))	=> 'foo\\bar',    'Normalized \\\\ in q');
+extract_ok(q(_("foo\bar"))	=> "foo\bar",	  'Interpolated \b in qq');
 
-extract_ok(q([% loc( 'foo "bar" baz' ) %]) => 'foo \\"bar\\" baz', 'Escaped double quote in text');
+extract_ok(q([% loc( 'foo "bar" baz' ) %]) => 'foo "bar" baz', 'Escaped double quote in text');
 
 sub extract_ok {
     my ($text, $result, $info, $verbatim) = @_;


More information about the Rt-commit mailing list