Home | History | Annotate | Download | only in Scripts
      1 #!/usr/bin/perl -w
      2 
      3 # Copyright (C) 2006, 2007, 2009 Apple Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions
      7 # are met:
      8 #
      9 # 1.  Redistributions of source code must retain the above copyright
     10 #     notice, this list of conditions and the following disclaimer. 
     11 # 2.  Redistributions in binary form must reproduce the above copyright
     12 #     notice, this list of conditions and the following disclaimer in the
     13 #     documentation and/or other materials provided with the distribution. 
     14 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     15 #     its contributors may be used to endorse or promote products derived
     16 #     from this software without specific prior written permission. 
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 # This script is like the genstrings tool (minus most of the options) with these differences.
     30 #
     31 #    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
     32 #       from NSBundle.h, and doesn't support tables (although they would be easy to add).
     33 #    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
     34 #       @"" strings only reliably support ASCII since they are decoded based on the system encoding
     35 #       at runtime, so give different results on US and Japanese systems for example).
     36 #    3) It looks for strings that are not marked for localization, using both macro names that are
     37 #       known to be used for debugging in Intrigue source code and an exceptions file.
     38 #    4) It finds the files to work on rather than taking them as parameters, and also uses a
     39 #       hardcoded location for both the output file and the exceptions file.
     40 #       It would have been nice to use the project to find the source files, but it's too hard to
     41 #       locate source files after parsing a .pbxproj file.
     42 
     43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
     44 
     45 use strict;
     46 
     47 sub UnescapeHexSequence($);
     48 
     49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
     50 
     51 @ARGV >= 1 or die "Usage: extract-localizable-strings <exceptions file> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n";
     52 
     53 my $exceptionsFile = shift @ARGV;
     54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n";
     55 
     56 my $fileToUpdate = shift @ARGV;
     57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
     58 
     59 my @directories = ();
     60 my @directoriesToSkip = ();
     61 if (@ARGV < 1) {
     62     push(@directories, ".");
     63 } else {
     64     for my $dir (@ARGV) {
     65         if ($dir =~ /^-(.*)$/) {
     66             push @directoriesToSkip, $1;
     67         } else {
     68             push @directories, $dir;
     69         }
     70     }
     71 }
     72 
     73 my $sawError = 0;
     74 
     75 my $localizedCount = 0;
     76 my $keyCollisionCount = 0;
     77 my $notLocalizedCount = 0;
     78 my $NSLocalizeCount = 0;
     79 
     80 my %exception;
     81 my %usedException;
     82 
     83 if (open EXCEPTIONS, $exceptionsFile) {
     84     while (<EXCEPTIONS>) {
     85         chomp;
     86         if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
     87             if ($exception{$_}) {
     88                 print "$exceptionsFile:$.:exception for $_ appears twice\n";
     89                 print "$exceptionsFile:$exception{$_}:first appearance\n";
     90             } else {
     91                 $exception{$_} = $.;
     92             }
     93         } else {
     94             print "$exceptionsFile:$.:syntax error\n";
     95         }
     96     }
     97     close EXCEPTIONS;
     98 }
     99 
    100 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
    101 for my $dir (@directoriesToSkip) {
    102     $quotedDirectoriesString .= ' -path "' . $dir . '" -prune';
    103 }
    104 
    105 my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp"` );
    106 
    107 for my $file (sort @files) {
    108     next if $file =~ /\/WebLocalizableStrings\.h$/;
    109     next if $file =~ /\/icu\//;
    110 
    111     $file =~ s-^./--;
    112 
    113     open SOURCE, $file or die "can't open $file\n";
    114     
    115     my $inComment = 0;
    116     
    117     my $expected = "";
    118     my $macroLine;
    119     my $macro;
    120     my $UIString;
    121     my $key;
    122     my $comment;
    123     
    124     my $string;
    125     my $stringLine;
    126     my $nestingLevel;
    127     
    128     my $previousToken = "";
    129 
    130     while (<SOURCE>) {
    131         chomp;
    132         
    133         # Handle continued multi-line comment.
    134         if ($inComment) {
    135             next unless s-.*\*/--;
    136             $inComment = 0;
    137         }
    138     
    139         # Handle all the tokens in the line.
    140         while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
    141             my $token = $1;
    142             
    143             if ($token eq "\"") {
    144                 if ($expected and $expected ne "a quoted string") {
    145                     print "$file:$.:ERROR:found a quoted string but expected $expected\n";
    146                     $sawError = 1;
    147                     $expected = "";
    148                 }
    149                 if (s-^(([^\\$token]|\\.)*?)$token--) {
    150                     if (!defined $string) {
    151                         $stringLine = $.;
    152                         $string = $1;
    153                     } else {
    154                         $string .= $1;
    155                     }
    156                 } else {
    157                     print "$file:$.:ERROR:mismatched quotes\n";
    158                     $sawError = 1;
    159                     $_ = "";
    160                 }
    161                 next;
    162             }
    163             
    164             if (defined $string) {
    165 handleString:
    166                 if ($expected) {
    167                     if (!defined $UIString) {
    168                         # FIXME: Validate UTF-8 here?
    169                         $UIString = $string;
    170                         $expected = ",";
    171                     } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) {
    172                         # FIXME: Validate UTF-8 here?
    173                         $key = $string;
    174                         $expected = ",";
    175                     } elsif (!defined $comment) {
    176                         # FIXME: Validate UTF-8 here?
    177                         $comment = $string;
    178                         $expected = ")";
    179                     }
    180                 } else {
    181                     if (defined $nestingLevel) {
    182                         # In a debug macro, no need to localize.
    183                     } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
    184                         # File name, no need to localize.
    185                     } elsif ($previousToken eq "extern" and $string eq "C") {
    186                         # extern "C", no need to localize.
    187                     } elsif ($string eq "") {
    188                         # Empty string can sometimes be localized, but we need not complain if not.
    189                     } elsif ($exception{$file}) {
    190                         $usedException{$file} = 1;
    191                     } elsif ($exception{"\"$string\""}) {
    192                         $usedException{"\"$string\""} = 1;
    193                     } elsif ($exception{"$file:\"$string\""}) {
    194                         $usedException{"$file:\"$string\""} = 1;
    195                     } else {
    196                         print "$file:$stringLine:\"$string\" is not marked for localization\n";
    197                         $notLocalizedCount++;
    198                     }
    199                 }
    200                 $string = undef;
    201                 last if !defined $token;
    202             }
    203             
    204             $previousToken = $token;
    205 
    206             if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) {
    207                 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n";
    208                 $nestingLevel = 0 if !defined $nestingLevel;
    209                 $sawError = 1;
    210                 $NSLocalizeCount++;
    211             } elsif ($token eq "/*") {
    212                 if (!s-^.*?\*/--) {
    213                     $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
    214                     $inComment = 1;
    215                 }
    216             } elsif ($token eq "//") {
    217                 $_ = ""; # Discard the rest of the line
    218             } elsif ($token eq "'") {
    219                 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
    220                     print "$file:$.:ERROR:mismatched single quote\n";
    221                     $sawError = 1;
    222                     $_ = "";
    223                 }
    224             } else {
    225                 if ($expected and $expected ne $token) {
    226                     print "$file:$.:ERROR:found $token but expected $expected\n";
    227                     $sawError = 1;
    228                     $expected = "";
    229                 }
    230                 if ($token =~ /UI_STRING(_KEY)?$/) {
    231                     $expected = "(";
    232                     $macro = $token;
    233                     $UIString = undef;
    234                     $key = undef;
    235                     $comment = undef;
    236                     $macroLine = $.;
    237                 } elsif ($token eq "(" or $token eq "[") {
    238                     ++$nestingLevel if defined $nestingLevel;
    239                     $expected = "a quoted string" if $expected;
    240                 } elsif ($token eq ",") {
    241                     $expected = "a quoted string" if $expected;
    242                 } elsif ($token eq ")" or $token eq "]") {
    243                     $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
    244                     if ($expected) {
    245                         $key = $UIString if !defined $key;
    246                         HandleUIString($UIString, $key, $comment, $file, $macroLine);
    247                         $macro = "";
    248                         $expected = "";
    249                         $localizedCount++;
    250                     }
    251                 } elsif ($isDebugMacro{$token}) {
    252                     $nestingLevel = 0 if !defined $nestingLevel;
    253                 }
    254             }
    255         }
    256             
    257     }
    258     
    259     goto handleString if defined $string;
    260     
    261     if ($expected) {
    262         print "$file:ERROR:reached end of file but expected $expected\n";
    263         $sawError = 1;
    264     }
    265     
    266     close SOURCE;
    267 }
    268 
    269 # Unescapes C language hexadecimal escape sequences.
    270 sub UnescapeHexSequence($)
    271 {
    272     my ($originalStr) = @_;
    273 
    274     my $escapedStr = $originalStr;
    275     my $unescapedStr = "";
    276 
    277     for (;;) {
    278         if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
    279             if (256 <= hex($1)) {
    280                 print "Hexadecimal escape sequence out of range: \\x$1\n";
    281                 return undef;
    282             }
    283             $unescapedStr .= pack("H*", $1);
    284         } elsif ($escapedStr =~ s-^(.)--) {
    285             $unescapedStr .= $1;
    286         } else {
    287             return $unescapedStr;
    288         }
    289     }
    290 }
    291 
    292 my %stringByKey;
    293 my %commentByKey;
    294 my %fileByKey;
    295 my %lineByKey;
    296 
    297 sub HandleUIString
    298 {
    299     my ($string, $key, $comment, $file, $line) = @_;
    300 
    301     my $bad = 0;
    302     $string = UnescapeHexSequence($string);
    303     if (!defined($string)) {
    304         print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n";
    305         $bad = 1;
    306     }
    307     $key = UnescapeHexSequence($key);
    308     if (!defined($key)) {
    309         print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n";
    310         $bad = 1;
    311     }
    312     $comment = UnescapeHexSequence($comment);
    313     if (!defined($comment)) {
    314         print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n";
    315         $bad = 1;
    316     }
    317     if (grep { $_ == 0xFFFD } unpack "U*", $string) {
    318         print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    319         $bad = 1;
    320     }
    321     if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
    322         print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    323         $bad = 1;
    324     }
    325     if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
    326         print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    327         $bad = 1;
    328     }
    329     if ($bad) {
    330         $sawError = 1;
    331         return;
    332     }
    333     
    334     if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
    335         print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
    336         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
    337         $keyCollisionCount++;
    338         return;
    339     }
    340     if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
    341         print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
    342         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
    343         $keyCollisionCount++;
    344         return;
    345     }
    346 
    347     $fileByKey{$key} = $file;
    348     $lineByKey{$key} = $line;
    349     $stringByKey{$key} = $string;
    350     $commentByKey{$key} = $comment;
    351 }
    352 
    353 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
    354 
    355 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
    356 if (@unusedExceptions) {
    357     for my $unused (@unusedExceptions) {
    358         print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
    359     }
    360     print "\n";
    361 }
    362 
    363 print "$localizedCount localizable strings\n" if $localizedCount;
    364 print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
    365 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
    366 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
    367 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
    368 
    369 if ($sawError) {
    370     print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
    371     exit 1;
    372 }
    373 
    374 my $localizedStrings = "";
    375 
    376 for my $key (sort keys %commentByKey) {
    377     $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
    378 }
    379 
    380 # Write out the strings file in UTF-16 with a BOM.
    381 utf8::decode($localizedStrings) if $^V ge v5.8;
    382 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
    383 
    384 if (-e "$fileToUpdate") {
    385     open STRINGS, ">", "$fileToUpdate" or die;
    386     print STRINGS $output;
    387     close STRINGS;
    388 } else {
    389     print "$fileToUpdate does not exist\n";
    390     exit 1;
    391 }
    392