1 #!/usr/bin/perl -w 2 3 # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved. 4 # 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions 7 # are met: 8 # 9 # 1. Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # 2. Redistributions in binary form must reproduce the above copyright 12 # notice, this list of conditions and the following disclaimer in the 13 # documentation and/or other materials provided with the distribution. 14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15 # its contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 # This script is like the genstrings tool (minus most of the options) with these differences. 30 # 31 # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros 32 # from NSBundle.h, and doesn't support tables (although they would be easy to add). 33 # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; 34 # @"" strings only reliably support ASCII since they are decoded based on the system encoding 35 # at runtime, so give different results on US and Japanese systems for example). 36 # 3) It looks for strings that are not marked for localization, using both macro names that are 37 # known to be used for debugging in Intrigue source code and an exceptions file. 38 # 4) It finds the files to work on rather than taking them as parameters, and also uses a 39 # hardcoded location for both the output file and the exceptions file. 40 # It would have been nice to use the project to find the source files, but it's too hard to 41 # locate source files after parsing a .pbxproj file. 42 43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. 44 45 use strict; 46 47 sub UnescapeHexSequence($); 48 49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); 50 51 @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; 52 53 my $exceptionsFile = shift @ARGV; 54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless $exceptionsFile eq "-"; 55 56 my $fileToUpdate = shift @ARGV; 57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; 58 59 my $warnAboutUnlocalizedStrings = $exceptionsFile ne "-"; 60 61 my @directories = (); 62 my @directoriesToSkip = (); 63 if (@ARGV < 1) { 64 push(@directories, "."); 65 } else { 66 for my $dir (@ARGV) { 67 if ($dir =~ /^-(.*)$/) { 68 push @directoriesToSkip, $1; 69 } else { 70 push @directories, $dir; 71 } 72 } 73 } 74 75 my $sawError = 0; 76 77 my $localizedCount = 0; 78 my $keyCollisionCount = 0; 79 my $notLocalizedCount = 0; 80 my $NSLocalizeCount = 0; 81 82 my %exception; 83 my %usedException; 84 85 if ($exceptionsFile ne "-" && open EXCEPTIONS, $exceptionsFile) { 86 while (<EXCEPTIONS>) { 87 chomp; 88 if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { 89 if ($exception{$_}) { 90 print "$exceptionsFile:$.:exception for $_ appears twice\n"; 91 print "$exceptionsFile:$exception{$_}:first appearance\n"; 92 } else { 93 $exception{$_} = $.; 94 } 95 } else { 96 print "$exceptionsFile:$.:syntax error\n"; 97 } 98 } 99 close EXCEPTIONS; 100 } 101 102 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; 103 for my $dir (@directoriesToSkip) { 104 $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; 105 } 106 107 my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); 108 109 for my $file (sort @files) { 110 next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/; 111 112 $file =~ s-^./--; 113 114 open SOURCE, $file or die "can't open $file\n"; 115 116 my $inComment = 0; 117 118 my $expected = ""; 119 my $macroLine; 120 my $macro; 121 my $UIString; 122 my $key; 123 my $comment; 124 125 my $string; 126 my $stringLine; 127 my $nestingLevel; 128 129 my $previousToken = ""; 130 131 while (<SOURCE>) { 132 chomp; 133 134 # Handle continued multi-line comment. 135 if ($inComment) { 136 next unless s-.*\*/--; 137 $inComment = 0; 138 } 139 140 # Handle all the tokens in the line. 141 while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { 142 my $token = $1; 143 144 if ($token eq "\"") { 145 if ($expected and $expected ne "a quoted string") { 146 print "$file:$.:ERROR:found a quoted string but expected $expected\n"; 147 $sawError = 1; 148 $expected = ""; 149 } 150 if (s-^(([^\\$token]|\\.)*?)$token--) { 151 if (!defined $string) { 152 $stringLine = $.; 153 $string = $1; 154 } else { 155 $string .= $1; 156 } 157 } else { 158 print "$file:$.:ERROR:mismatched quotes\n"; 159 $sawError = 1; 160 $_ = ""; 161 } 162 next; 163 } 164 165 if (defined $string) { 166 handleString: 167 if ($expected) { 168 if (!defined $UIString) { 169 # FIXME: Validate UTF-8 here? 170 $UIString = $string; 171 $expected = ","; 172 } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) { 173 # FIXME: Validate UTF-8 here? 174 $key = $string; 175 $expected = ","; 176 } elsif (!defined $comment) { 177 # FIXME: Validate UTF-8 here? 178 $comment = $string; 179 $expected = ")"; 180 } 181 } else { 182 if (defined $nestingLevel) { 183 # In a debug macro, no need to localize. 184 } elsif ($previousToken eq "#include" or $previousToken eq "#import") { 185 # File name, no need to localize. 186 } elsif ($previousToken eq "extern" and $string eq "C") { 187 # extern "C", no need to localize. 188 } elsif ($string eq "") { 189 # Empty string can sometimes be localized, but we need not complain if not. 190 } elsif ($exception{$file}) { 191 $usedException{$file} = 1; 192 } elsif ($exception{"\"$string\""}) { 193 $usedException{"\"$string\""} = 1; 194 } elsif ($exception{"$file:\"$string\""}) { 195 $usedException{"$file:\"$string\""} = 1; 196 } else { 197 print "$file:$stringLine:\"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings; 198 $notLocalizedCount++; 199 } 200 } 201 $string = undef; 202 last if !defined $token; 203 } 204 205 $previousToken = $token; 206 207 if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { 208 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; 209 $nestingLevel = 0 if !defined $nestingLevel; 210 $sawError = 1; 211 $NSLocalizeCount++; 212 } elsif ($token eq "/*") { 213 if (!s-^.*?\*/--) { 214 $_ = ""; # If the comment doesn't end, discard the result of the line and set flag 215 $inComment = 1; 216 } 217 } elsif ($token eq "//") { 218 $_ = ""; # Discard the rest of the line 219 } elsif ($token eq "'") { 220 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused 221 print "$file:$.:ERROR:mismatched single quote\n"; 222 $sawError = 1; 223 $_ = ""; 224 } 225 } else { 226 if ($expected and $expected ne $token) { 227 print "$file:$.:ERROR:found $token but expected $expected\n"; 228 $sawError = 1; 229 $expected = ""; 230 } 231 if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) { 232 $expected = "("; 233 $macro = $token; 234 $UIString = undef; 235 $key = undef; 236 $comment = undef; 237 $macroLine = $.; 238 } elsif ($token eq "(" or $token eq "[") { 239 ++$nestingLevel if defined $nestingLevel; 240 $expected = "a quoted string" if $expected; 241 } elsif ($token eq ",") { 242 $expected = "a quoted string" if $expected; 243 } elsif ($token eq ")" or $token eq "]") { 244 $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; 245 if ($expected) { 246 $key = $UIString if !defined $key; 247 HandleUIString($UIString, $key, $comment, $file, $macroLine); 248 $macro = ""; 249 $expected = ""; 250 $localizedCount++; 251 } 252 } elsif ($isDebugMacro{$token}) { 253 $nestingLevel = 0 if !defined $nestingLevel; 254 } 255 } 256 } 257 258 } 259 260 goto handleString if defined $string; 261 262 if ($expected) { 263 print "$file:ERROR:reached end of file but expected $expected\n"; 264 $sawError = 1; 265 } 266 267 close SOURCE; 268 } 269 270 # Unescapes C language hexadecimal escape sequences. 271 sub UnescapeHexSequence($) 272 { 273 my ($originalStr) = @_; 274 275 my $escapedStr = $originalStr; 276 my $unescapedStr = ""; 277 278 for (;;) { 279 if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { 280 if (256 <= hex($1)) { 281 print "Hexadecimal escape sequence out of range: \\x$1\n"; 282 return undef; 283 } 284 $unescapedStr .= pack("H*", $1); 285 } elsif ($escapedStr =~ s-^(.)--) { 286 $unescapedStr .= $1; 287 } else { 288 return $unescapedStr; 289 } 290 } 291 } 292 293 my %stringByKey; 294 my %commentByKey; 295 my %fileByKey; 296 my %lineByKey; 297 298 sub HandleUIString 299 { 300 my ($string, $key, $comment, $file, $line) = @_; 301 302 my $bad = 0; 303 $string = UnescapeHexSequence($string); 304 if (!defined($string)) { 305 print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n"; 306 $bad = 1; 307 } 308 $key = UnescapeHexSequence($key); 309 if (!defined($key)) { 310 print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n"; 311 $bad = 1; 312 } 313 $comment = UnescapeHexSequence($comment); 314 if (!defined($comment)) { 315 print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n"; 316 $bad = 1; 317 } 318 if (grep { $_ == 0xFFFD } unpack "U*", $string) { 319 print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 320 $bad = 1; 321 } 322 if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { 323 print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 324 $bad = 1; 325 } 326 if (grep { $_ == 0xFFFD } unpack "U*", $comment) { 327 print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 328 $bad = 1; 329 } 330 if ($bad) { 331 $sawError = 1; 332 return; 333 } 334 335 if ($stringByKey{$key} && $stringByKey{$key} ne $string) { 336 print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; 337 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; 338 $keyCollisionCount++; 339 return; 340 } 341 if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { 342 print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; 343 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; 344 $keyCollisionCount++; 345 return; 346 } 347 348 $fileByKey{$key} = $file; 349 $lineByKey{$key} = $line; 350 $stringByKey{$key} = $string; 351 $commentByKey{$key} = $comment; 352 } 353 354 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; 355 356 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; 357 if (@unusedExceptions) { 358 for my $unused (@unusedExceptions) { 359 print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; 360 } 361 print "\n"; 362 } 363 364 print "$localizedCount localizable strings\n" if $localizedCount; 365 print "$keyCollisionCount key collisions\n" if $keyCollisionCount; 366 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; 367 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; 368 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; 369 370 if ($sawError) { 371 print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; 372 exit 1; 373 } 374 375 my $localizedStrings = ""; 376 377 for my $key (sort keys %commentByKey) { 378 $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; 379 } 380 381 # Write out the strings file in UTF-16 with a BOM. 382 utf8::decode($localizedStrings) if $^V ge v5.8; 383 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); 384 385 if (-e "$fileToUpdate") { 386 open STRINGS, ">", "$fileToUpdate" or die; 387 print STRINGS $output; 388 close STRINGS; 389 } else { 390 print "$fileToUpdate does not exist\n"; 391 exit 1; 392 } 393