Home | History | Annotate | Download | only in Scripts
      1 #!/usr/bin/perl -w
      2 
      3 # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions
      7 # are met:
      8 #
      9 # 1.  Redistributions of source code must retain the above copyright
     10 #     notice, this list of conditions and the following disclaimer. 
     11 # 2.  Redistributions in binary form must reproduce the above copyright
     12 #     notice, this list of conditions and the following disclaimer in the
     13 #     documentation and/or other materials provided with the distribution. 
     14 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     15 #     its contributors may be used to endorse or promote products derived
     16 #     from this software without specific prior written permission. 
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 # This script is like the genstrings tool (minus most of the options) with these differences.
     30 #
     31 #    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
     32 #       from NSBundle.h, and doesn't support tables (although they would be easy to add).
     33 #    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
     34 #       @"" strings only reliably support ASCII since they are decoded based on the system encoding
     35 #       at runtime, so give different results on US and Japanese systems for example).
     36 #    3) It looks for strings that are not marked for localization, using both macro names that are
     37 #       known to be used for debugging in Intrigue source code and an exceptions file.
     38 #    4) It finds the files to work on rather than taking them as parameters, and also uses a
     39 #       hardcoded location for both the output file and the exceptions file.
     40 #       It would have been nice to use the project to find the source files, but it's too hard to
     41 #       locate source files after parsing a .pbxproj file.
     42 
     43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
     44 
     45 use strict;
     46 
     47 sub UnescapeHexSequence($);
     48 
     49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
     50 
     51 @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n";
     52 
     53 my $exceptionsFile = shift @ARGV;
     54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless $exceptionsFile eq "-";
     55 
     56 my $fileToUpdate = shift @ARGV;
     57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
     58 
     59 my $warnAboutUnlocalizedStrings = $exceptionsFile ne "-";
     60 
     61 my @directories = ();
     62 my @directoriesToSkip = ();
     63 if (@ARGV < 1) {
     64     push(@directories, ".");
     65 } else {
     66     for my $dir (@ARGV) {
     67         if ($dir =~ /^-(.*)$/) {
     68             push @directoriesToSkip, $1;
     69         } else {
     70             push @directories, $dir;
     71         }
     72     }
     73 }
     74 
     75 my $sawError = 0;
     76 
     77 my $localizedCount = 0;
     78 my $keyCollisionCount = 0;
     79 my $notLocalizedCount = 0;
     80 my $NSLocalizeCount = 0;
     81 
     82 my %exception;
     83 my %usedException;
     84 
     85 if ($exceptionsFile ne "-" && open EXCEPTIONS, $exceptionsFile) {
     86     while (<EXCEPTIONS>) {
     87         chomp;
     88         if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
     89             if ($exception{$_}) {
     90                 print "$exceptionsFile:$.:exception for $_ appears twice\n";
     91                 print "$exceptionsFile:$exception{$_}:first appearance\n";
     92             } else {
     93                 $exception{$_} = $.;
     94             }
     95         } else {
     96             print "$exceptionsFile:$.:syntax error\n";
     97         }
     98     }
     99     close EXCEPTIONS;
    100 }
    101 
    102 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
    103 for my $dir (@directoriesToSkip) {
    104     $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o';
    105 }
    106 
    107 my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` );
    108 
    109 for my $file (sort @files) {
    110     next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/;
    111 
    112     $file =~ s-^./--;
    113 
    114     open SOURCE, $file or die "can't open $file\n";
    115     
    116     my $inComment = 0;
    117     
    118     my $expected = "";
    119     my $macroLine;
    120     my $macro;
    121     my $UIString;
    122     my $key;
    123     my $comment;
    124     
    125     my $string;
    126     my $stringLine;
    127     my $nestingLevel;
    128     
    129     my $previousToken = "";
    130 
    131     while (<SOURCE>) {
    132         chomp;
    133         
    134         # Handle continued multi-line comment.
    135         if ($inComment) {
    136             next unless s-.*\*/--;
    137             $inComment = 0;
    138         }
    139     
    140         # Handle all the tokens in the line.
    141         while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
    142             my $token = $1;
    143             
    144             if ($token eq "\"") {
    145                 if ($expected and $expected ne "a quoted string") {
    146                     print "$file:$.:ERROR:found a quoted string but expected $expected\n";
    147                     $sawError = 1;
    148                     $expected = "";
    149                 }
    150                 if (s-^(([^\\$token]|\\.)*?)$token--) {
    151                     if (!defined $string) {
    152                         $stringLine = $.;
    153                         $string = $1;
    154                     } else {
    155                         $string .= $1;
    156                     }
    157                 } else {
    158                     print "$file:$.:ERROR:mismatched quotes\n";
    159                     $sawError = 1;
    160                     $_ = "";
    161                 }
    162                 next;
    163             }
    164             
    165             if (defined $string) {
    166 handleString:
    167                 if ($expected) {
    168                     if (!defined $UIString) {
    169                         # FIXME: Validate UTF-8 here?
    170                         $UIString = $string;
    171                         $expected = ",";
    172                     } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) {
    173                         # FIXME: Validate UTF-8 here?
    174                         $key = $string;
    175                         $expected = ",";
    176                     } elsif (!defined $comment) {
    177                         # FIXME: Validate UTF-8 here?
    178                         $comment = $string;
    179                         $expected = ")";
    180                     }
    181                 } else {
    182                     if (defined $nestingLevel) {
    183                         # In a debug macro, no need to localize.
    184                     } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
    185                         # File name, no need to localize.
    186                     } elsif ($previousToken eq "extern" and $string eq "C") {
    187                         # extern "C", no need to localize.
    188                     } elsif ($string eq "") {
    189                         # Empty string can sometimes be localized, but we need not complain if not.
    190                     } elsif ($exception{$file}) {
    191                         $usedException{$file} = 1;
    192                     } elsif ($exception{"\"$string\""}) {
    193                         $usedException{"\"$string\""} = 1;
    194                     } elsif ($exception{"$file:\"$string\""}) {
    195                         $usedException{"$file:\"$string\""} = 1;
    196                     } else {
    197                         print "$file:$stringLine:\"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings;
    198                         $notLocalizedCount++;
    199                     }
    200                 }
    201                 $string = undef;
    202                 last if !defined $token;
    203             }
    204             
    205             $previousToken = $token;
    206 
    207             if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) {
    208                 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n";
    209                 $nestingLevel = 0 if !defined $nestingLevel;
    210                 $sawError = 1;
    211                 $NSLocalizeCount++;
    212             } elsif ($token eq "/*") {
    213                 if (!s-^.*?\*/--) {
    214                     $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
    215                     $inComment = 1;
    216                 }
    217             } elsif ($token eq "//") {
    218                 $_ = ""; # Discard the rest of the line
    219             } elsif ($token eq "'") {
    220                 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
    221                     print "$file:$.:ERROR:mismatched single quote\n";
    222                     $sawError = 1;
    223                     $_ = "";
    224                 }
    225             } else {
    226                 if ($expected and $expected ne $token) {
    227                     print "$file:$.:ERROR:found $token but expected $expected\n";
    228                     $sawError = 1;
    229                     $expected = "";
    230                 }
    231                 if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) {
    232                     $expected = "(";
    233                     $macro = $token;
    234                     $UIString = undef;
    235                     $key = undef;
    236                     $comment = undef;
    237                     $macroLine = $.;
    238                 } elsif ($token eq "(" or $token eq "[") {
    239                     ++$nestingLevel if defined $nestingLevel;
    240                     $expected = "a quoted string" if $expected;
    241                 } elsif ($token eq ",") {
    242                     $expected = "a quoted string" if $expected;
    243                 } elsif ($token eq ")" or $token eq "]") {
    244                     $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
    245                     if ($expected) {
    246                         $key = $UIString if !defined $key;
    247                         HandleUIString($UIString, $key, $comment, $file, $macroLine);
    248                         $macro = "";
    249                         $expected = "";
    250                         $localizedCount++;
    251                     }
    252                 } elsif ($isDebugMacro{$token}) {
    253                     $nestingLevel = 0 if !defined $nestingLevel;
    254                 }
    255             }
    256         }
    257             
    258     }
    259     
    260     goto handleString if defined $string;
    261     
    262     if ($expected) {
    263         print "$file:ERROR:reached end of file but expected $expected\n";
    264         $sawError = 1;
    265     }
    266     
    267     close SOURCE;
    268 }
    269 
    270 # Unescapes C language hexadecimal escape sequences.
    271 sub UnescapeHexSequence($)
    272 {
    273     my ($originalStr) = @_;
    274 
    275     my $escapedStr = $originalStr;
    276     my $unescapedStr = "";
    277 
    278     for (;;) {
    279         if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
    280             if (256 <= hex($1)) {
    281                 print "Hexadecimal escape sequence out of range: \\x$1\n";
    282                 return undef;
    283             }
    284             $unescapedStr .= pack("H*", $1);
    285         } elsif ($escapedStr =~ s-^(.)--) {
    286             $unescapedStr .= $1;
    287         } else {
    288             return $unescapedStr;
    289         }
    290     }
    291 }
    292 
    293 my %stringByKey;
    294 my %commentByKey;
    295 my %fileByKey;
    296 my %lineByKey;
    297 
    298 sub HandleUIString
    299 {
    300     my ($string, $key, $comment, $file, $line) = @_;
    301 
    302     my $bad = 0;
    303     $string = UnescapeHexSequence($string);
    304     if (!defined($string)) {
    305         print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n";
    306         $bad = 1;
    307     }
    308     $key = UnescapeHexSequence($key);
    309     if (!defined($key)) {
    310         print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n";
    311         $bad = 1;
    312     }
    313     $comment = UnescapeHexSequence($comment);
    314     if (!defined($comment)) {
    315         print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n";
    316         $bad = 1;
    317     }
    318     if (grep { $_ == 0xFFFD } unpack "U*", $string) {
    319         print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    320         $bad = 1;
    321     }
    322     if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
    323         print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    324         $bad = 1;
    325     }
    326     if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
    327         print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
    328         $bad = 1;
    329     }
    330     if ($bad) {
    331         $sawError = 1;
    332         return;
    333     }
    334     
    335     if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
    336         print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
    337         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
    338         $keyCollisionCount++;
    339         return;
    340     }
    341     if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
    342         print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
    343         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
    344         $keyCollisionCount++;
    345         return;
    346     }
    347 
    348     $fileByKey{$key} = $file;
    349     $lineByKey{$key} = $line;
    350     $stringByKey{$key} = $string;
    351     $commentByKey{$key} = $comment;
    352 }
    353 
    354 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
    355 
    356 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
    357 if (@unusedExceptions) {
    358     for my $unused (@unusedExceptions) {
    359         print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
    360     }
    361     print "\n";
    362 }
    363 
    364 print "$localizedCount localizable strings\n" if $localizedCount;
    365 print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
    366 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
    367 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
    368 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
    369 
    370 if ($sawError) {
    371     print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
    372     exit 1;
    373 }
    374 
    375 my $localizedStrings = "";
    376 
    377 for my $key (sort keys %commentByKey) {
    378     $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
    379 }
    380 
    381 # Write out the strings file in UTF-16 with a BOM.
    382 utf8::decode($localizedStrings) if $^V ge v5.8;
    383 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
    384 
    385 if (-e "$fileToUpdate") {
    386     open STRINGS, ">", "$fileToUpdate" or die;
    387     print STRINGS $output;
    388     close STRINGS;
    389 } else {
    390     print "$fileToUpdate does not exist\n";
    391     exit 1;
    392 }
    393