1 #!/usr/bin/perl -w 2 3 # Copyright (C) 2006, 2007, 2009 Apple Inc. All rights reserved. 4 # 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions 7 # are met: 8 # 9 # 1. Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # 2. Redistributions in binary form must reproduce the above copyright 12 # notice, this list of conditions and the following disclaimer in the 13 # documentation and/or other materials provided with the distribution. 14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15 # its contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 # This script is like the genstrings tool (minus most of the options) with these differences. 30 # 31 # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros 32 # from NSBundle.h, and doesn't support tables (although they would be easy to add). 33 # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; 34 # @"" strings only reliably support ASCII since they are decoded based on the system encoding 35 # at runtime, so give different results on US and Japanese systems for example). 36 # 3) It looks for strings that are not marked for localization, using both macro names that are 37 # known to be used for debugging in Intrigue source code and an exceptions file. 38 # 4) It finds the files to work on rather than taking them as parameters, and also uses a 39 # hardcoded location for both the output file and the exceptions file. 40 # It would have been nice to use the project to find the source files, but it's too hard to 41 # locate source files after parsing a .pbxproj file. 42 43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. 44 45 use strict; 46 47 sub UnescapeHexSequence($); 48 49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); 50 51 @ARGV >= 1 or die "Usage: extract-localizable-strings <exceptions file> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; 52 53 my $exceptionsFile = shift @ARGV; 54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n"; 55 56 my $fileToUpdate = shift @ARGV; 57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; 58 59 my @directories = (); 60 my @directoriesToSkip = (); 61 if (@ARGV < 1) { 62 push(@directories, "."); 63 } else { 64 for my $dir (@ARGV) { 65 if ($dir =~ /^-(.*)$/) { 66 push @directoriesToSkip, $1; 67 } else { 68 push @directories, $dir; 69 } 70 } 71 } 72 73 my $sawError = 0; 74 75 my $localizedCount = 0; 76 my $keyCollisionCount = 0; 77 my $notLocalizedCount = 0; 78 my $NSLocalizeCount = 0; 79 80 my %exception; 81 my %usedException; 82 83 if (open EXCEPTIONS, $exceptionsFile) { 84 while (<EXCEPTIONS>) { 85 chomp; 86 if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { 87 if ($exception{$_}) { 88 print "$exceptionsFile:$.:exception for $_ appears twice\n"; 89 print "$exceptionsFile:$exception{$_}:first appearance\n"; 90 } else { 91 $exception{$_} = $.; 92 } 93 } else { 94 print "$exceptionsFile:$.:syntax error\n"; 95 } 96 } 97 close EXCEPTIONS; 98 } 99 100 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; 101 for my $dir (@directoriesToSkip) { 102 $quotedDirectoriesString .= ' -path "' . $dir . '" -prune'; 103 } 104 105 my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp"` ); 106 107 for my $file (sort @files) { 108 next if $file =~ /\/WebLocalizableStrings\.h$/; 109 next if $file =~ /\/icu\//; 110 111 $file =~ s-^./--; 112 113 open SOURCE, $file or die "can't open $file\n"; 114 115 my $inComment = 0; 116 117 my $expected = ""; 118 my $macroLine; 119 my $macro; 120 my $UIString; 121 my $key; 122 my $comment; 123 124 my $string; 125 my $stringLine; 126 my $nestingLevel; 127 128 my $previousToken = ""; 129 130 while (<SOURCE>) { 131 chomp; 132 133 # Handle continued multi-line comment. 134 if ($inComment) { 135 next unless s-.*\*/--; 136 $inComment = 0; 137 } 138 139 # Handle all the tokens in the line. 140 while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { 141 my $token = $1; 142 143 if ($token eq "\"") { 144 if ($expected and $expected ne "a quoted string") { 145 print "$file:$.:ERROR:found a quoted string but expected $expected\n"; 146 $sawError = 1; 147 $expected = ""; 148 } 149 if (s-^(([^\\$token]|\\.)*?)$token--) { 150 if (!defined $string) { 151 $stringLine = $.; 152 $string = $1; 153 } else { 154 $string .= $1; 155 } 156 } else { 157 print "$file:$.:ERROR:mismatched quotes\n"; 158 $sawError = 1; 159 $_ = ""; 160 } 161 next; 162 } 163 164 if (defined $string) { 165 handleString: 166 if ($expected) { 167 if (!defined $UIString) { 168 # FIXME: Validate UTF-8 here? 169 $UIString = $string; 170 $expected = ","; 171 } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) { 172 # FIXME: Validate UTF-8 here? 173 $key = $string; 174 $expected = ","; 175 } elsif (!defined $comment) { 176 # FIXME: Validate UTF-8 here? 177 $comment = $string; 178 $expected = ")"; 179 } 180 } else { 181 if (defined $nestingLevel) { 182 # In a debug macro, no need to localize. 183 } elsif ($previousToken eq "#include" or $previousToken eq "#import") { 184 # File name, no need to localize. 185 } elsif ($previousToken eq "extern" and $string eq "C") { 186 # extern "C", no need to localize. 187 } elsif ($string eq "") { 188 # Empty string can sometimes be localized, but we need not complain if not. 189 } elsif ($exception{$file}) { 190 $usedException{$file} = 1; 191 } elsif ($exception{"\"$string\""}) { 192 $usedException{"\"$string\""} = 1; 193 } elsif ($exception{"$file:\"$string\""}) { 194 $usedException{"$file:\"$string\""} = 1; 195 } else { 196 print "$file:$stringLine:\"$string\" is not marked for localization\n"; 197 $notLocalizedCount++; 198 } 199 } 200 $string = undef; 201 last if !defined $token; 202 } 203 204 $previousToken = $token; 205 206 if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { 207 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; 208 $nestingLevel = 0 if !defined $nestingLevel; 209 $sawError = 1; 210 $NSLocalizeCount++; 211 } elsif ($token eq "/*") { 212 if (!s-^.*?\*/--) { 213 $_ = ""; # If the comment doesn't end, discard the result of the line and set flag 214 $inComment = 1; 215 } 216 } elsif ($token eq "//") { 217 $_ = ""; # Discard the rest of the line 218 } elsif ($token eq "'") { 219 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused 220 print "$file:$.:ERROR:mismatched single quote\n"; 221 $sawError = 1; 222 $_ = ""; 223 } 224 } else { 225 if ($expected and $expected ne $token) { 226 print "$file:$.:ERROR:found $token but expected $expected\n"; 227 $sawError = 1; 228 $expected = ""; 229 } 230 if ($token =~ /UI_STRING(_KEY)?$/) { 231 $expected = "("; 232 $macro = $token; 233 $UIString = undef; 234 $key = undef; 235 $comment = undef; 236 $macroLine = $.; 237 } elsif ($token eq "(" or $token eq "[") { 238 ++$nestingLevel if defined $nestingLevel; 239 $expected = "a quoted string" if $expected; 240 } elsif ($token eq ",") { 241 $expected = "a quoted string" if $expected; 242 } elsif ($token eq ")" or $token eq "]") { 243 $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; 244 if ($expected) { 245 $key = $UIString if !defined $key; 246 HandleUIString($UIString, $key, $comment, $file, $macroLine); 247 $macro = ""; 248 $expected = ""; 249 $localizedCount++; 250 } 251 } elsif ($isDebugMacro{$token}) { 252 $nestingLevel = 0 if !defined $nestingLevel; 253 } 254 } 255 } 256 257 } 258 259 goto handleString if defined $string; 260 261 if ($expected) { 262 print "$file:ERROR:reached end of file but expected $expected\n"; 263 $sawError = 1; 264 } 265 266 close SOURCE; 267 } 268 269 # Unescapes C language hexadecimal escape sequences. 270 sub UnescapeHexSequence($) 271 { 272 my ($originalStr) = @_; 273 274 my $escapedStr = $originalStr; 275 my $unescapedStr = ""; 276 277 for (;;) { 278 if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { 279 if (256 <= hex($1)) { 280 print "Hexadecimal escape sequence out of range: \\x$1\n"; 281 return undef; 282 } 283 $unescapedStr .= pack("H*", $1); 284 } elsif ($escapedStr =~ s-^(.)--) { 285 $unescapedStr .= $1; 286 } else { 287 return $unescapedStr; 288 } 289 } 290 } 291 292 my %stringByKey; 293 my %commentByKey; 294 my %fileByKey; 295 my %lineByKey; 296 297 sub HandleUIString 298 { 299 my ($string, $key, $comment, $file, $line) = @_; 300 301 my $bad = 0; 302 $string = UnescapeHexSequence($string); 303 if (!defined($string)) { 304 print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n"; 305 $bad = 1; 306 } 307 $key = UnescapeHexSequence($key); 308 if (!defined($key)) { 309 print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n"; 310 $bad = 1; 311 } 312 $comment = UnescapeHexSequence($comment); 313 if (!defined($comment)) { 314 print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n"; 315 $bad = 1; 316 } 317 if (grep { $_ == 0xFFFD } unpack "U*", $string) { 318 print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 319 $bad = 1; 320 } 321 if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { 322 print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 323 $bad = 1; 324 } 325 if (grep { $_ == 0xFFFD } unpack "U*", $comment) { 326 print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 327 $bad = 1; 328 } 329 if ($bad) { 330 $sawError = 1; 331 return; 332 } 333 334 if ($stringByKey{$key} && $stringByKey{$key} ne $string) { 335 print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; 336 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; 337 $keyCollisionCount++; 338 return; 339 } 340 if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { 341 print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; 342 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; 343 $keyCollisionCount++; 344 return; 345 } 346 347 $fileByKey{$key} = $file; 348 $lineByKey{$key} = $line; 349 $stringByKey{$key} = $string; 350 $commentByKey{$key} = $comment; 351 } 352 353 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; 354 355 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; 356 if (@unusedExceptions) { 357 for my $unused (@unusedExceptions) { 358 print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; 359 } 360 print "\n"; 361 } 362 363 print "$localizedCount localizable strings\n" if $localizedCount; 364 print "$keyCollisionCount key collisions\n" if $keyCollisionCount; 365 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; 366 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; 367 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; 368 369 if ($sawError) { 370 print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; 371 exit 1; 372 } 373 374 my $localizedStrings = ""; 375 376 for my $key (sort keys %commentByKey) { 377 $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; 378 } 379 380 # Write out the strings file in UTF-16 with a BOM. 381 utf8::decode($localizedStrings) if $^V ge v5.8; 382 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); 383 384 if (-e "$fileToUpdate") { 385 open STRINGS, ">", "$fileToUpdate" or die; 386 print STRINGS $output; 387 close STRINGS; 388 } else { 389 print "$fileToUpdate does not exist\n"; 390 exit 1; 391 } 392