[Rt-commit] rt branch, master, updated. rt-4.1.5-207-g68bfa68

Sun Dec 30 00:21:44 EST 2012

The branch, master has been updated
       via  68bfa68172f36ed2fa4834fe737d7aa36dae0016 (commit)
       via  4f595c73a6c6c42aa7d75564694f53849d72fc41 (commit)
       via  a9c0032f5fe8fbaef2e1a018f162ab878a3df5ff (commit)
       via  fcb0448f310b4986906b02eefa311100ee7f35a4 (commit)
       via  77c964770516a14bbddbe5b11fe0bc9458108974 (commit)
      from  c63590c740189456df20efa0bb7d1effa89e3189 (commit)

Summary of changes:
 devel/tools/extract-message-catalog | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

- Log -----------------------------------------------------------------
commit 68bfa68172f36ed2fa4834fe737d7aa36dae0016
Merge: c63590c 4f595c7
Author: Alex Vandiver <alexmv at bestpractical.com>
Date:   Sat Dec 29 23:59:55 2012 -0500

    Merge branch '4.2/placeholder-mismatch'

diff --cc devel/tools/extract-message-catalog
index 7e6ec35,0000000..28efd11
mode 100755,000000..100755

--- a/devel/tools/extract-message-catalog
+++ b/devel/tools/extract-message-catalog
@@@ -1,385 -1,0 +1,418 @@@
 +#!/usr/bin/env perl
 +# BEGIN BPS TAGGED BLOCK {{{
 +#
 +# COPYRIGHT:
 +#
 +# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC
 +#                                          <sales at bestpractical.com>
 +#
 +# (Except where explicitly superseded by other copyright notices)
 +#
 +#
 +# LICENSE:
 +#
 +# This work is made available to you under the terms of Version 2 of
 +# the GNU General Public License. A copy of that license should have
 +# been provided with this software, but in any event can be snarfed
 +# from www.gnu.org.
 +#
 +# This work is distributed in the hope that it will be useful, but
 +# WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +# General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 +# 02110-1301 or visit their web page on the internet at
 +# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
 +#
 +#
 +# CONTRIBUTION SUBMISSION POLICY:
 +#
 +# (The following paragraph is not intended to limit the rights granted
 +# to you to modify and distribute this software under the terms of
 +# the GNU General Public License and is only of importance to you if
 +# you choose to contribute your changes and enhancements to the
 +# community by submitting them to Best Practical Solutions, LLC.)
 +#
 +# By intentionally submitting any modifications, corrections or
 +# derivatives to this work, or any other work intended for use with
 +# Request Tracker, to Best Practical Solutions, LLC, you confirm that
 +# you are the copyright holder for those contributions and you grant
 +# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
 +# royalty-free, perpetual, license to use, copy, create derivative
 +# works based on those contributions, and sublicense and distribute
 +# those contributions and any derivatives thereof.
 +#
 +# END BPS TAGGED BLOCK }}}
 +# Portions Copyright 2002 Autrijus Tang <autrijus at autrijus.org>
 +
 +use strict;
 +use warnings;
 +
 +use File::Find;
 +use File::Copy;
 +use Regexp::Common;
 +use Carp;
 +
 +# po dir is for extensions
 + at ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;
 +
 +our %FILECAT;
 +
 +# extract all strings and stuff them into %FILECAT
 +# scan html dir for extensions
 +File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin sbin lib share html etc) );
 +
 +# ensure proper escaping and [_1] => %1 transformation
 +foreach my $str ( sort keys %FILECAT ) {
 +    my $entry = $FILECAT{$str};
 +    my $oldstr = $str;
 +
 +    $str =~ s/\\/\\\\/g;
 +    $str =~ s/\"/\\"/g;
 +    $str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
 +    $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
 +    $str =~ s/~([\[\]])/$1/g;
 +
 +    delete $FILECAT{$oldstr};
 +    $FILECAT{$str} = $entry;
 +}
 +
 +# update all language dictionaries
 +foreach my $dict (@ARGV) {
 +    $dict = "share/po/$dict.pot" if ( $dict eq 'rt' );
 +    $dict = "share/po/$dict.po" unless -f $dict or $dict =~ m!/!;
 +
 +    my $lang = $dict;
 +    $lang =~ s|.*/||;
 +    $lang =~ s|\.po$||;
 +    $lang =~ s|\.pot$||;
 +
 +    update($lang, $dict);
 +}
 +
 +# warn about various red flags in loc strings
 +foreach my $str ( sort keys %FILECAT ) {
 +    my $entry = $FILECAT{$str};
 +    my $entry_count = @$entry;
 +
 +    # doesn't exist in the current codebase, ignore for now
 +    next if $entry_count == 0;
 +
 +    my ($filename, $line) = @{ $entry->[0] };
 +
 +    my $location = "$filename line $line" . ($entry_count > 1 ? " (and ".($entry_count-1)." other places)" : "");
 +
 +    if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
 +        warn "Extraneous whitespace in '$str' at $location\n";
 +    }
 +
 +    if ($str =~ /([\$\@]\w+)/) {
 +        warn "Interpolated variable '$1' in '$str' at $location\n";
 +    }
 +}
 +
 +
 +sub extract_strings_from_code {
 +    my $file = $_;
 +
 +    local $/;
 +    return if ( -d $_ || !-e _ );
 +    return
 +      if ( $File::Find::dir =~
 +        qr!lib/blib|lib/t/autogen|var|m4|local|share/fonts! );
 +    return if ( /\.(?:pot|po|bak|gif|png|psd|jpe?g|svg|css|js)$/ );
 +    return if ( /~|,D|,B$|extract-message-catalog$|tweak-template-locstring$/ );
 +    return if ( /StyleGuide.pod/ );
 +    return if ( /^[\.#]/ );
 +    return if ( -f "$_.in" );
 +
 +    print "Looking at $File::Find::name\n";
 +    my $filename = $File::Find::name;
 +    $filename =~ s'^\./'';
 +    $filename =~ s'\.in$'';
 +
 +    unless (open _, '<', $file) {
 +        print "Cannot open $file for reading ($!), skipping.\n";
 +        return;
 +    }
 +
 +    my $re_space_wo_nl = qr{(?!\n)\s};
 +    my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx;
 +    my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx;
 +    my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx;
 +    my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx;
 +    my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep};
 +
 +    $_ = <_>;
 +
 +    # Mason filter: <&|/l>...</&> and <&|/l_unsafe>...</&>
 +    my $line = 1;
 +    while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?)</&>)!sg) {
 +        my ( $all, $vars, $str ) = ( $1, $2, $3 );
 +        $vars =~ s/[\n\r]//g;
 +        $line += ( $all =~ tr/\n/\n/ );
 +        $str =~ s/\\'/\'/g;
 +        #print "STR IS $str\n";
 +        push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
 +    }
 +
 +    # Localization function: loc(...)
 +    $line = 1;
 +    pos($_) = 0;
 +    while (m/\G(.*?\bloc$RE{balanced}{-parens=>'()'}{-keep})/sg) {
 +        my ( $all, $match ) = ( $1, $2 );
 +        $line += ( $all =~ tr/\n/\n/ );
 +
 +        my ( $vars, $str );
 +        if ( $match =~
 +                /\(\s*($re_delim)(.*?)\s*\)$/so ) {
 +
 +            $str = substr( $1, 1, -1 );       # $str comes before $vars now
 +            $vars = $9;
 +        }
 +        else {
 +            next;
 +        }
 +
 +        $vars =~ s/[\n\r]//g;
 +        $str  =~ s/\\'/\'/g;
 +
 +        push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
 +    }
 +
 +    # Comment-based mark: "..." # loc
 +    $line = 1;
 +    pos($_) = 0;
 +    while (m/\G(.*?($re_delim)[\}\)\],;]*$re_loc_suffix)/smgo) {
 +        my ( $all, $str ) = ( $1, $2 );
 +        $line += ( $all =~ tr/\n/\n/ );
 +        unless ( defined $str ) {
 +            warn "Couldn't process loc at $filename:$line";
 +            next;
 +        }
 +        $str = substr($str, 1, -1);
 +	$str =~ s/\\'/\'/g;
 +	push @{ $FILECAT{$str} }, [ $filename, $line, '' ];
 +    }
 +
 +    # Comment-based qw mark: "qw(...)" # loc_qw
 +    $line = 1;
 +    pos($_) = 0;
 +    while (m/\G(.*?(?:qw\(([^)]+)\)[\}\)\],;]*)?$re_loc_qw_suffix)/smgo) {
 +        my ( $all, $str ) = ( $1, $2 );
 +        $line += ( $all =~ tr/\n/\n/ );
 +        unless ( defined $str ) {
 +            warn "Couldn't process loc_qw at $filename:$line";
 +            next;
 +        }
 +        foreach my $value (split ' ', $str) {
 +            push @{ $FILECAT{$value} }, [ $filename, $line, '' ];
 +        }
 +    }
 +
 +    # Comment-based left pair mark: "..." => ... # loc_left_pair
 +    $line = 1;
 +    pos($_) = 0;
 +    while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) {
 +        my ( $all, $key ) = ( $1, $2 );
 +        $line += ( $all =~ tr/\n/\n/ );
 +        unless ( defined $key ) {
 +            warn "Couldn't process loc_left_pair at $filename:$line";
 +            next;
 +        }
 +	$key  =~ s/\\'/\'/g;
 +	push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
 +    }
 +
 +    # Comment-based pair mark: "..." => "..." # loc_pair
 +    $line = 1;
 +    pos($_) = 0;
 +    while (m/\G(.*?(?:(\w+)\s*=>\s*($re_delim)[\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
 +        my ( $all, $key, $val ) = ( $1, $2, $3 );
 +        $line += ( $all =~ tr/\n/\n/ );
 +        unless ( defined $key && defined $val ) {
 +            warn "Couldn't process loc_pair at $filename:$line";
 +            next;
 +        }
 +	$val = substr($val, 1, -1);
 +	$key  =~ s/\\'/\'/g;
 +	$val  =~ s/\\'/\'/g;
 +	push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
 +	push @{ $FILECAT{$val} }, [ $filename, $line, '' ];
 +    }
 +
 +    close (_);
 +}
 +
++sub uniq {
++    my %seen;
++    return grep { !$seen{$_}++ } @_;
++}
++
 +sub update {
 +    my $lang = shift;
 +    my $file = shift;
 +    my ( %Lexicon, %Header);
 +    my $out = '';
 +
 +    unless (!-e $file or -w $file) {
 +	warn "Can't write to $lang, skipping...\n";
 +	return;
 +    }
 +
 +    print "Updating $lang...\n";
 +
 +    my @lines;
 +    @lines = (<LEXICON>) if open LEXICON, '<', $file;
 +    @lines = grep { !/^(#(:|\.)\s*|$)/ } @lines;
 +    while (@lines) {
 +        my $msghdr = "";
 +        $msghdr .= shift @lines while ( $lines[0] && $lines[0] !~ /^(#~ )?msgid/ );
 +        
 +        my $msgid  = "";
 +
 +# '#~ ' is the prefix of launchpad for msg that's not found the the source
 +# we'll remove the prefix later so we can still show them with our own mark
 +
 +        $msgid .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgid|")/ );
 +        my $msgstr = "";
 +        $msgstr .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgstr|")/ );
 +
 +        last unless $msgid;
 +
 +        chomp $msgid;
 +        chomp $msgstr;
 +
 +        $msgid  =~ s/^#~ //mg;
 +        $msgstr =~ s/^#~ //mg;
 +
 +        $msgid  =~ s/^msgid "(.*)"\s*?$/$1/m    or warn "$msgid in $file";
 +
 +        if ( $msgid eq '' ) {
 +            # null msgid, msgstr will have head info
 +            $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/ms or warn "$msgstr  in $file";
 +        }
 +        else {
 +            $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/m or warn "$msgstr  in $file";
 +        }
 +
 +        if ( $msgid ne ''  ) {
 +            for my $msg ( \$msgid, \$msgstr ) {
 +                if ( $$msg =~ /\n/ ) {
 +                    my @lines = split /\n/, $$msg;
 +                    $$msg =
 +                      shift @lines;   # first line don't need to handle any more
 +                    for (@lines) {
 +                        if (/^"(.*)"\s*$/) {
 +                            $$msg .= $1;
 +                        }
 +                    }
 +                }
 +
 +                # convert \\n back to \n
 +                $$msg =~ s/(?!\\)\\n/\n/g;
 +            }
 +        }
 +
 +        $Lexicon{$msgid} = $msgstr;
 +        $Header{$msgid}  = $msghdr;
++
++        validate_msgstr($lang, $msgid, $msgstr);
 +    }
 +
 +    my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );
 +
 +    foreach my $str ( keys %FILECAT ) {
 +        $Lexicon{$str} ||= '';
 +    }
 +    foreach ( sort keys %Lexicon ) {
 +        my $f = join ( ' ', sort map $_->[0].":".$_->[1], @{ $FILECAT{$_} } );
 +        my $nospace = $_;
 +        $nospace =~ s/ +$//;
 +
 +        if ( !$Lexicon{$_} and $Lexicon{$nospace} ) {
 +            $Lexicon{$_} =
 +              $Lexicon{$nospace} . ( ' ' x ( length($_) - length($nospace) ) );
 +        }
 +
 +        next if !length( $Lexicon{$_} ) and $is_english;
 +
 +        my %seen;
 +        $out .= $Header{$_} if exists $Header{$_};
 +
 +
 +
 +        next if (!$f && $_ && !$Lexicon{$_});
 +        if ( $f && $f !~ /^\s+$/ ) {
 +
 +            $out .= "#: $f\n";
 +        }
 +        elsif ($_) {
 +            $out .= "#: NOT FOUND IN SOURCE\n";
 +        }
 +        foreach my $entry ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $FILECAT{$_} } ) {
 +            my ( $file, $line, $var ) = @{$entry};
 +            $var =~ s/^\s*,\s*//;
 +            $var =~ s/\s*$//;
 +            $out .= "#. ($var)\n" unless $seen{$var}++;
 +        }
 +        $out .= 'msgid ' . fmt($_) . "msgstr \"$Lexicon{$_}\"\n\n";
 +    }
 +
 +    open PO, '>', $file or die "Couldn't open '$file' for writing: $!";
 +    print PO $out;
 +    close PO;
 +
 +    return 1;
 +}
 +
++sub validate_msgstr {
++    my $lang   = shift;
++    my $msgid  = shift;
++    my $msgstr = shift;
++
++    return if $msgstr eq ''; # no translation for this string
++    return if $msgid eq '';  # special case: po file metadata
++
++    # we uniq because a string can use a placeholder more than once
++    # (eg %1 %quant(%1, ...) like in our czech localization
++    my @expected_variables = uniq($msgid =~ /%\d+/g);
++    my @got_variables = uniq($msgstr =~ /%\d+/g);
++
++    # this catches the case where expected uses %1,%2 and got uses %1,%3
++    # unlike a simple @expected_variables == @got_variables
++    my $expected = join ", ", sort @expected_variables;
++    my $got      = join ", ", sort @got_variables;
++    return 0 if $expected eq $got;
++
++    warn "$lang string does not have the same placeholders\n";
++    warn "expected (" . $expected . ") in msgid: $msgid\n";
++    warn "     got (" . $got      . ") in msgstr: $msgstr\n\n";
++
++    return 1;
++}
++
 +sub escape {
 +    my $text = shift;
 +    $text =~ s/\b_(\d+)/%$1/;
 +    return $text;
 +}
 +
 +sub fmt {
 +    my $str = shift;
 +    return "\"$str\"\n" unless $str =~ /\n/;
 +
 +    my $multi_line = ($str =~ /\n(?!\z)/);
 +    $str =~ s/\n/\\n"\n"/g;
 +
 +    if ($str =~ /\n"$/) {
 +        chop $str;
 +    }
 +    else {
 +        $str .= "\"\n";
 +    }
 +    return $multi_line ? qq(""\n"$str) : qq("$str);
 +}

-----------------------------------------------------------------------