1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/base/l10n/l10n_util.h" 6 7 #include <algorithm> 8 #include <cstdlib> 9 #include <iterator> 10 #include <string> 11 12 #include "base/command_line.h" 13 #include "base/compiler_specific.h" 14 #include "base/file_util.h" 15 #include "base/i18n/file_util_icu.h" 16 #include "base/i18n/rtl.h" 17 #include "base/i18n/string_compare.h" 18 #include "base/lazy_instance.h" 19 #include "base/memory/scoped_ptr.h" 20 #include "base/path_service.h" 21 #include "base/strings/string_number_conversions.h" 22 #include "base/strings/string_split.h" 23 #include "base/strings/string_util.h" 24 #include "base/strings/stringprintf.h" 25 #include "base/strings/sys_string_conversions.h" 26 #include "base/strings/utf_string_conversions.h" 27 #include "build/build_config.h" 28 #include "third_party/icu/source/common/unicode/rbbi.h" 29 #include "third_party/icu/source/common/unicode/uloc.h" 30 #include "ui/base/l10n/l10n_util_collator.h" 31 #include "ui/base/resource/resource_bundle.h" 32 #include "ui/base/ui_base_paths.h" 33 34 #if defined(OS_ANDROID) 35 #include "ui/base/l10n/l10n_util_android.h" 36 #endif 37 38 #if defined(OS_LINUX) 39 #include <glib.h> 40 #endif 41 42 #if defined(OS_WIN) 43 #include "ui/base/l10n/l10n_util_win.h" 44 #endif // OS_WIN 45 46 namespace { 47 48 static const char* const kAcceptLanguageList[] = { 49 "af", // Afrikaans 50 "am", // Amharic 51 "ar", // Arabic 52 "az", // Azerbaijani 53 "be", // Belarusian 54 "bg", // Bulgarian 55 "bh", // Bihari 56 "bn", // Bengali 57 "br", // Breton 58 "bs", // Bosnian 59 "ca", // Catalan 60 "co", // Corsican 61 "cs", // Czech 62 "cy", // Welsh 63 "da", // Danish 64 "de", // German 65 "de-AT", // German (Austria) 66 "de-CH", // German (Switzerland) 67 "de-DE", // German (Germany) 68 "el", // Greek 69 "en", // English 70 "en-AU", // English (Australia) 71 "en-CA", // English (Canada) 72 "en-GB", // English (UK) 73 "en-NZ", // English (New Zealand) 74 "en-US", // English (US) 75 "en-ZA", // English (South Africa) 76 "eo", // Esperanto 77 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American 78 // Spanish speaking countries? 79 "es", // Spanish 80 "es-419", // Spanish (Latin America) 81 "et", // Estonian 82 "eu", // Basque 83 "fa", // Persian 84 "fi", // Finnish 85 "fil", // Filipino 86 "fo", // Faroese 87 "fr", // French 88 "fr-CA", // French (Canada) 89 "fr-CH", // French (Switzerland) 90 "fr-FR", // French (France) 91 "fy", // Frisian 92 "ga", // Irish 93 "gd", // Scots Gaelic 94 "gl", // Galician 95 "gn", // Guarani 96 "gu", // Gujarati 97 "ha", // Hausa 98 "haw", // Hawaiian 99 "he", // Hebrew 100 "hi", // Hindi 101 "hr", // Croatian 102 "hu", // Hungarian 103 "hy", // Armenian 104 "ia", // Interlingua 105 "id", // Indonesian 106 "is", // Icelandic 107 "it", // Italian 108 "it-CH", // Italian (Switzerland) 109 "it-IT", // Italian (Italy) 110 "ja", // Japanese 111 "jw", // Javanese 112 "ka", // Georgian 113 "kk", // Kazakh 114 "km", // Cambodian 115 "kn", // Kannada 116 "ko", // Korean 117 "ku", // Kurdish 118 "ky", // Kyrgyz 119 "la", // Latin 120 "ln", // Lingala 121 "lo", // Laothian 122 "lt", // Lithuanian 123 "lv", // Latvian 124 "mk", // Macedonian 125 "ml", // Malayalam 126 "mn", // Mongolian 127 "mo", // Moldavian 128 "mr", // Marathi 129 "ms", // Malay 130 "mt", // Maltese 131 "nb", // Norwegian (Bokmal) 132 "ne", // Nepali 133 "nl", // Dutch 134 "nn", // Norwegian (Nynorsk) 135 "no", // Norwegian 136 "oc", // Occitan 137 "om", // Oromo 138 "or", // Oriya 139 "pa", // Punjabi 140 "pl", // Polish 141 "ps", // Pashto 142 "pt", // Portuguese 143 "pt-BR", // Portuguese (Brazil) 144 "pt-PT", // Portuguese (Portugal) 145 "qu", // Quechua 146 "rm", // Romansh 147 "ro", // Romanian 148 "ru", // Russian 149 "sd", // Sindhi 150 "sh", // Serbo-Croatian 151 "si", // Sinhalese 152 "sk", // Slovak 153 "sl", // Slovenian 154 "sn", // Shona 155 "so", // Somali 156 "sq", // Albanian 157 "sr", // Serbian 158 "st", // Sesotho 159 "su", // Sundanese 160 "sv", // Swedish 161 "sw", // Swahili 162 "ta", // Tamil 163 "te", // Telugu 164 "tg", // Tajik 165 "th", // Thai 166 "ti", // Tigrinya 167 "tk", // Turkmen 168 "to", // Tonga 169 "tr", // Turkish 170 "tt", // Tatar 171 "tw", // Twi 172 "ug", // Uighur 173 "uk", // Ukrainian 174 "ur", // Urdu 175 "uz", // Uzbek 176 "vi", // Vietnamese 177 "xh", // Xhosa 178 "yi", // Yiddish 179 "yo", // Yoruba 180 "zh", // Chinese 181 "zh-CN", // Chinese (Simplified) 182 "zh-TW", // Chinese (Traditional) 183 "zu", // Zulu 184 }; 185 186 // Returns true if |locale_name| has an alias in the ICU data file. 187 bool IsDuplicateName(const std::string& locale_name) { 188 static const char* const kDuplicateNames[] = { 189 "en", 190 "pt", 191 "zh", 192 "zh_hans_cn", 193 "zh_hant_hk", 194 "zh_hant_mo", 195 "zh_hans_sg", 196 "zh_hant_tw" 197 }; 198 199 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). 200 // 'es-419' (Spanish in Latin America) is not available in ICU so that it 201 // has to be added manually in GetAvailableLocales(). 202 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) 203 return true; 204 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { 205 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) 206 return true; 207 } 208 return false; 209 } 210 211 // We added 30+ minimally populated locales with only a few entries 212 // (exemplar character set, script, writing direction and its own 213 // lanaguage name). These locales have to be distinguished from the 214 // fully populated locales to which Chrome is localized. 215 bool IsLocalePartiallyPopulated(const std::string& locale_name) { 216 // For partially populated locales, even the translation for "English" 217 // is not available. A more robust/elegant way to check is to add a special 218 // field (say, 'isPartial' to our version of ICU locale files) and 219 // check its value, but this hack seems to work well. 220 return !l10n_util::IsLocaleNameTranslated("en", locale_name); 221 } 222 223 #if !defined(OS_MACOSX) 224 bool IsLocaleAvailable(const std::string& locale) { 225 // If locale has any illegal characters in it, we don't want to try to 226 // load it because it may be pointing outside the locale data file directory. 227 if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale))) 228 return false; 229 230 // IsLocalePartiallyPopulated() can be called here for an early return w/o 231 // checking the resource availability below. It'd help when Chrome is run 232 // under a system locale Chrome is not localized to (e.g.Farsi on Linux), 233 // but it'd slow down the start up time a little bit for locales Chrome is 234 // localized to. So, we don't call it here. 235 if (!l10n_util::IsLocaleSupportedByOS(locale)) 236 return false; 237 238 // If the ResourceBundle is not yet initialized, return false to avoid the 239 // CHECK failure in ResourceBundle::GetSharedInstance(). 240 if (!ResourceBundle::HasSharedInstance()) 241 return false; 242 243 // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function 244 // so that this can be invoked without initializing the global instance. 245 // See crbug.com/230432: CHECK failure in GetUserDataDir(). 246 return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale); 247 } 248 249 bool CheckAndResolveLocale(const std::string& locale, 250 std::string* resolved_locale) { 251 if (IsLocaleAvailable(locale)) { 252 *resolved_locale = locale; 253 return true; 254 } 255 256 // If there's a variant, skip over it so we can try without the region 257 // code. For example, ca_ES@valencia should cause us to try ca@valencia 258 // before ca. 259 std::string::size_type variant_pos = locale.find('@'); 260 if (variant_pos != std::string::npos) 261 return false; 262 263 // If the locale matches language but not country, use that instead. 264 // TODO(jungshik) : Nothing is done about languages that Chrome 265 // does not support but available on Windows. We fall 266 // back to en-US in GetApplicationLocale so that it's a not critical, 267 // but we can do better. 268 std::string::size_type hyphen_pos = locale.find('-'); 269 std::string lang(locale, 0, hyphen_pos); 270 if (hyphen_pos != std::string::npos && hyphen_pos > 0) { 271 std::string region(locale, hyphen_pos + 1); 272 std::string tmp_locale(lang); 273 // Map es-RR other than es-ES to es-419 (Chrome's Latin American 274 // Spanish locale). 275 if (LowerCaseEqualsASCII(lang, "es") && 276 !LowerCaseEqualsASCII(region, "es")) { 277 tmp_locale.append("-419"); 278 } else if (LowerCaseEqualsASCII(lang, "zh")) { 279 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. 280 if (LowerCaseEqualsASCII(region, "hk") || 281 LowerCaseEqualsASCII(region, "mo")) { // Macao 282 tmp_locale.append("-TW"); 283 } else { 284 tmp_locale.append("-CN"); 285 } 286 } else if (LowerCaseEqualsASCII(lang, "en")) { 287 // Map Australian, Canadian, New Zealand and South African English 288 // to British English for now. 289 // TODO(jungshik): en-CA may have to change sides once 290 // we have OS locale separate from app locale (Chrome's UI language). 291 if (LowerCaseEqualsASCII(region, "au") || 292 LowerCaseEqualsASCII(region, "ca") || 293 LowerCaseEqualsASCII(region, "nz") || 294 LowerCaseEqualsASCII(region, "za")) { 295 tmp_locale.append("-GB"); 296 } else { 297 tmp_locale.append("-US"); 298 } 299 } 300 if (IsLocaleAvailable(tmp_locale)) { 301 resolved_locale->swap(tmp_locale); 302 return true; 303 } 304 } 305 306 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US. 307 struct { 308 const char* source; 309 const char* dest; 310 } alias_map[] = { 311 {"no", "nb"}, 312 {"tl", "fil"}, 313 {"iw", "he"}, 314 {"en", "en-US"}, 315 }; 316 317 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { 318 if (LowerCaseEqualsASCII(lang, alias_map[i].source)) { 319 std::string tmp_locale(alias_map[i].dest); 320 if (IsLocaleAvailable(tmp_locale)) { 321 resolved_locale->swap(tmp_locale); 322 return true; 323 } 324 } 325 } 326 327 return false; 328 } 329 #endif 330 331 // On Linux, the text layout engine Pango determines paragraph directionality 332 // by looking at the first strongly-directional character in the text. This 333 // means text such as "Google Chrome foo bar..." will be layed out LTR even 334 // if "foo bar" is RTL. So this function prepends the necessary RLM in such 335 // cases. 336 void AdjustParagraphDirectionality(string16* paragraph) { 337 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID) 338 if (base::i18n::IsRTL() && 339 base::i18n::StringContainsStrongRTLChars(*paragraph)) { 340 paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark)); 341 } 342 #endif 343 } 344 345 #if defined(OS_WIN) 346 std::string GetCanonicalLocale(const std::string& locale) { 347 return base::i18n::GetCanonicalLocale(locale.c_str()); 348 } 349 #endif 350 351 struct AvailableLocalesTraits : 352 base::DefaultLazyInstanceTraits<std::vector<std::string> > { 353 static std::vector<std::string>* New(void* instance) { 354 std::vector<std::string>* locales = 355 base::DefaultLazyInstanceTraits<std::vector<std::string> >::New( 356 instance); 357 int num_locales = uloc_countAvailable(); 358 for (int i = 0; i < num_locales; ++i) { 359 std::string locale_name = uloc_getAvailable(i); 360 // Filter out the names that have aliases. 361 if (IsDuplicateName(locale_name)) 362 continue; 363 // Filter out locales for which we have only partially populated data 364 // and to which Chrome is not localized. 365 if (IsLocalePartiallyPopulated(locale_name)) 366 continue; 367 if (!l10n_util::IsLocaleSupportedByOS(locale_name)) 368 continue; 369 // Normalize underscores to hyphens because that's what our locale files 370 // use. 371 std::replace(locale_name.begin(), locale_name.end(), '_', '-'); 372 373 // Map the Chinese locale names over to zh-CN and zh-TW. 374 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { 375 locale_name = "zh-CN"; 376 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { 377 locale_name = "zh-TW"; 378 } 379 locales->push_back(locale_name); 380 } 381 382 // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). 383 locales->push_back("es-419"); 384 return locales; 385 } 386 }; 387 388 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits > 389 g_available_locales = LAZY_INSTANCE_INITIALIZER; 390 391 } // namespace 392 393 namespace l10n_util { 394 395 std::string GetApplicationLocale(const std::string& pref_locale) { 396 #if defined(OS_MACOSX) 397 398 // Use any override (Cocoa for the browser), otherwise use the preference 399 // passed to the function. 400 std::string app_locale = l10n_util::GetLocaleOverride(); 401 if (app_locale.empty()) 402 app_locale = pref_locale; 403 404 // The above should handle all of the cases Chrome normally hits, but for some 405 // unit tests, we need something to fall back too. 406 if (app_locale.empty()) 407 app_locale = "en-US"; 408 409 // Windows/Linux call SetICUDefaultLocale after determining the actual locale 410 // with CheckAndResolveLocal to make ICU APIs work in that locale. 411 // Mac doesn't use a locale directory tree of resources (it uses Mac style 412 // resources), so mirror the Windows/Linux behavior of calling 413 // SetICUDefaultLocale. 414 base::i18n::SetICUDefaultLocale(app_locale); 415 return app_locale; 416 417 #else 418 419 std::string resolved_locale; 420 std::vector<std::string> candidates; 421 422 // We only use --lang and the app pref on Windows. On Linux, we only 423 // look at the LC_*/LANG environment variables. We do, however, pass --lang 424 // to renderer and plugin processes so they know what language the parent 425 // process decided to use. 426 427 #if defined(OS_WIN) 428 429 // First, try the preference value. 430 if (!pref_locale.empty()) 431 candidates.push_back(GetCanonicalLocale(pref_locale)); 432 433 // Next, try the overridden locale. 434 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); 435 if (!languages.empty()) { 436 candidates.reserve(candidates.size() + languages.size()); 437 std::transform(languages.begin(), languages.end(), 438 std::back_inserter(candidates), &GetCanonicalLocale); 439 } else { 440 // If no override was set, defer to ICU 441 candidates.push_back(base::i18n::GetConfiguredLocale()); 442 } 443 444 #elif defined(OS_CHROMEOS) || (defined(USE_AURA) && !defined(OS_LINUX)) 445 446 // On ChromeOS, use the application locale preference. 447 if (!pref_locale.empty()) 448 candidates.push_back(pref_locale); 449 450 #elif defined(OS_ANDROID) 451 452 // On Android, query java.util.Locale for the default locale. 453 candidates.push_back(GetDefaultLocale()); 454 455 #elif defined(OS_LINUX) 456 // If we're on a different Linux system, we have glib. 457 458 // GLib implements correct environment variable parsing with 459 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. 460 // We used to use our custom parsing code along with ICU for this purpose. 461 // If we have a port that does not depend on GTK, we have to 462 // restore our custom code for that port. 463 const char* const* languages = g_get_language_names(); 464 DCHECK(languages); // A valid pointer is guaranteed. 465 DCHECK(*languages); // At least one entry, "C", is guaranteed. 466 467 for (; *languages != NULL; ++languages) { 468 candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); 469 } 470 471 #else 472 #error Unsupported platform, see build/build_config.h 473 #endif 474 475 std::vector<std::string>::const_iterator i = candidates.begin(); 476 for (; i != candidates.end(); ++i) { 477 if (CheckAndResolveLocale(*i, &resolved_locale)) { 478 base::i18n::SetICUDefaultLocale(resolved_locale); 479 return resolved_locale; 480 } 481 } 482 483 // Fallback on en-US. 484 const std::string fallback_locale("en-US"); 485 if (IsLocaleAvailable(fallback_locale)) { 486 base::i18n::SetICUDefaultLocale(fallback_locale); 487 return fallback_locale; 488 } 489 490 return std::string(); 491 492 #endif 493 } 494 495 bool IsLocaleNameTranslated(const char* locale, 496 const std::string& display_locale) { 497 string16 display_name = 498 l10n_util::GetDisplayNameForLocale(locale, display_locale, false); 499 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not 500 // uloc_getDisplayName returns the actual translation or the default 501 // value (locale code), we have to rely on this hack to tell whether 502 // the translation is available or not. If ICU doesn't have a translated 503 // name for this locale, GetDisplayNameForLocale will just return the 504 // locale code. 505 return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale; 506 } 507 508 string16 GetDisplayNameForLocale(const std::string& locale, 509 const std::string& display_locale, 510 bool is_for_ui) { 511 std::string locale_code = locale; 512 // Internally, we use the language code of zh-CN and zh-TW, but we want the 513 // display names to be Chinese (Simplified) and Chinese (Traditional) instead 514 // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans 515 // and zh-Hant to ICU. Even with this mapping, we'd get 516 // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and 517 // even longer results in other languages. Arguably, they're better than 518 // the current results : Chinese (China) / Chinese (Taiwan). 519 // TODO(jungshik): Do one of the following: 520 // 1. Special-case Chinese by getting the custom-translation for them 521 // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE. 522 // 3. Get translations for two directly from the ICU resouce bundle 523 // because they're not accessible with other any API. 524 // 4. Patch ICU to special-case zh-Hans/zh-Hant for us. 525 // #1 and #2 wouldn't work if display_locale != current UI locale although 526 // we can think of additional hack to work around the problem. 527 // #3 can be potentially expensive. 528 if (locale_code == "zh-CN") 529 locale_code = "zh-Hans"; 530 else if (locale_code == "zh-TW") 531 locale_code = "zh-Hant"; 532 533 string16 display_name; 534 #if defined(OS_ANDROID) 535 // Use Java API to get locale display name so that we can remove most of 536 // the lang data from icu data to reduce binary size, except for zh-Hans and 537 // zh-Hant because the current Android Java API doesn't support scripts. 538 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once 539 // Android Java API supports scripts. 540 if (!StartsWithASCII(locale_code, "zh-Han", true)) { 541 display_name = GetDisplayNameForLocale(locale_code, display_locale); 542 } else 543 #endif 544 { 545 UErrorCode error = U_ZERO_ERROR; 546 const int kBufferSize = 1024; 547 548 int actual_size = uloc_getDisplayName( 549 locale_code.c_str(), display_locale.c_str(), 550 WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error); 551 DCHECK(U_SUCCESS(error)); 552 display_name.resize(actual_size); 553 } 554 555 // Add directional markup so parentheses are properly placed. 556 if (is_for_ui && base::i18n::IsRTL()) 557 base::i18n::AdjustStringForLocaleDirection(&display_name); 558 return display_name; 559 } 560 561 string16 GetDisplayNameForCountry(const std::string& country_code, 562 const std::string& display_locale) { 563 return GetDisplayNameForLocale("_" + country_code, display_locale, false); 564 } 565 566 std::string NormalizeLocale(const std::string& locale) { 567 std::string normalized_locale(locale); 568 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); 569 570 return normalized_locale; 571 } 572 573 void GetParentLocales(const std::string& current_locale, 574 std::vector<std::string>* parent_locales) { 575 std::string locale(NormalizeLocale(current_locale)); 576 577 const int kNameCapacity = 256; 578 char parent[kNameCapacity]; 579 base::strlcpy(parent, locale.c_str(), kNameCapacity); 580 parent_locales->push_back(parent); 581 UErrorCode err = U_ZERO_ERROR; 582 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { 583 if (U_FAILURE(err)) 584 break; 585 parent_locales->push_back(parent); 586 } 587 } 588 589 bool IsValidLocaleSyntax(const std::string& locale) { 590 // Check that the length is plausible. 591 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) 592 return false; 593 594 // Strip off the part after an '@' sign, which might contain keywords, 595 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. 596 // We don't validate that part much, just check that there's at least one 597 // equals sign in a plausible place. Normalize the prefix so that hyphens 598 // are changed to underscores. 599 std::string prefix = NormalizeLocale(locale); 600 size_t split_point = locale.find("@"); 601 if (split_point != std::string::npos) { 602 std::string keywords = locale.substr(split_point + 1); 603 prefix = locale.substr(0, split_point); 604 605 size_t equals_loc = keywords.find("="); 606 if (equals_loc == std::string::npos || 607 equals_loc < 1 || equals_loc > keywords.size() - 2) 608 return false; 609 } 610 611 // Check that all characters before the at-sign are alphanumeric or 612 // underscore. 613 for (size_t i = 0; i < prefix.size(); i++) { 614 char ch = prefix[i]; 615 if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') 616 return false; 617 } 618 619 // Check that the initial token (before the first hyphen/underscore) 620 // is 1 - 3 alphabetical characters (a language tag). 621 for (size_t i = 0; i < prefix.size(); i++) { 622 char ch = prefix[i]; 623 if (ch == '_') { 624 if (i < 1 || i > 3) 625 return false; 626 break; 627 } 628 if (!IsAsciiAlpha(ch)) 629 return false; 630 } 631 632 // Check that the all tokens after the initial token are 1 - 8 characters. 633 // (Tokenize/StringTokenizer don't work here, they collapse multiple 634 // delimiters into one.) 635 int token_len = 0; 636 int token_index = 0; 637 for (size_t i = 0; i < prefix.size(); i++) { 638 if (prefix[i] != '_') { 639 token_len++; 640 continue; 641 } 642 643 if (token_index > 0 && (token_len < 1 || token_len > 8)) { 644 return false; 645 } 646 token_index++; 647 token_len = 0; 648 } 649 if (token_index == 0 && (token_len < 1 || token_len > 3)) { 650 return false; 651 } else if (token_len < 1 || token_len > 8) { 652 return false; 653 } 654 655 return true; 656 } 657 658 std::string GetStringUTF8(int message_id) { 659 return UTF16ToUTF8(GetStringUTF16(message_id)); 660 } 661 662 string16 GetStringUTF16(int message_id) { 663 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); 664 string16 str = rb.GetLocalizedString(message_id); 665 AdjustParagraphDirectionality(&str); 666 667 return str; 668 } 669 670 static string16 GetStringF(int message_id, 671 const std::vector<string16>& replacements, 672 std::vector<size_t>* offsets) { 673 // TODO(tc): We could save a string copy if we got the raw string as 674 // a StringPiece and were able to call ReplaceStringPlaceholders with 675 // a StringPiece format string and string16 substitution strings. In 676 // practice, the strings should be relatively short. 677 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); 678 const string16& format_string = rb.GetLocalizedString(message_id); 679 680 #ifndef NDEBUG 681 // Make sure every replacement string is being used, so we don't just 682 // silently fail to insert one. If |offsets| is non-NULL, then don't do this 683 // check as the code may simply want to find the placeholders rather than 684 // actually replacing them. 685 if (!offsets) { 686 std::string utf8_string = UTF16ToUTF8(format_string); 687 688 // $9 is the highest allowed placeholder. 689 for (size_t i = 0; i < 9; ++i) { 690 bool placeholder_should_exist = replacements.size() > i; 691 692 std::string placeholder = 693 base::StringPrintf("$%d", static_cast<int>(i + 1)); 694 size_t pos = utf8_string.find(placeholder.c_str()); 695 if (placeholder_should_exist) { 696 DCHECK_NE(std::string::npos, pos) << 697 " Didn't find a " << placeholder << " placeholder in " << 698 utf8_string; 699 } else { 700 DCHECK_EQ(std::string::npos, pos) << 701 " Unexpectedly found a " << placeholder << " placeholder in " << 702 utf8_string; 703 } 704 } 705 } 706 #endif 707 708 string16 formatted = ReplaceStringPlaceholders(format_string, replacements, 709 offsets); 710 AdjustParagraphDirectionality(&formatted); 711 712 return formatted; 713 } 714 715 std::string GetStringFUTF8(int message_id, 716 const string16& a) { 717 return UTF16ToUTF8(GetStringFUTF16(message_id, a)); 718 } 719 720 std::string GetStringFUTF8(int message_id, 721 const string16& a, 722 const string16& b) { 723 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); 724 } 725 726 std::string GetStringFUTF8(int message_id, 727 const string16& a, 728 const string16& b, 729 const string16& c) { 730 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); 731 } 732 733 std::string GetStringFUTF8(int message_id, 734 const string16& a, 735 const string16& b, 736 const string16& c, 737 const string16& d) { 738 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); 739 } 740 741 string16 GetStringFUTF16(int message_id, 742 const string16& a) { 743 std::vector<string16> replacements; 744 replacements.push_back(a); 745 return GetStringF(message_id, replacements, NULL); 746 } 747 748 string16 GetStringFUTF16(int message_id, 749 const string16& a, 750 const string16& b) { 751 return GetStringFUTF16(message_id, a, b, NULL); 752 } 753 754 string16 GetStringFUTF16(int message_id, 755 const string16& a, 756 const string16& b, 757 const string16& c) { 758 std::vector<string16> replacements; 759 replacements.push_back(a); 760 replacements.push_back(b); 761 replacements.push_back(c); 762 return GetStringF(message_id, replacements, NULL); 763 } 764 765 string16 GetStringFUTF16(int message_id, 766 const string16& a, 767 const string16& b, 768 const string16& c, 769 const string16& d) { 770 std::vector<string16> replacements; 771 replacements.push_back(a); 772 replacements.push_back(b); 773 replacements.push_back(c); 774 replacements.push_back(d); 775 return GetStringF(message_id, replacements, NULL); 776 } 777 778 string16 GetStringFUTF16(int message_id, 779 const string16& a, 780 const string16& b, 781 const string16& c, 782 const string16& d, 783 const string16& e) { 784 std::vector<string16> replacements; 785 replacements.push_back(a); 786 replacements.push_back(b); 787 replacements.push_back(c); 788 replacements.push_back(d); 789 replacements.push_back(e); 790 return GetStringF(message_id, replacements, NULL); 791 } 792 793 string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) { 794 DCHECK(offset); 795 std::vector<size_t> offsets; 796 std::vector<string16> replacements; 797 replacements.push_back(a); 798 string16 result = GetStringF(message_id, replacements, &offsets); 799 DCHECK(offsets.size() == 1); 800 *offset = offsets[0]; 801 return result; 802 } 803 804 string16 GetStringFUTF16(int message_id, 805 const string16& a, 806 const string16& b, 807 std::vector<size_t>* offsets) { 808 std::vector<string16> replacements; 809 replacements.push_back(a); 810 replacements.push_back(b); 811 return GetStringF(message_id, replacements, offsets); 812 } 813 814 string16 GetStringFUTF16Int(int message_id, int a) { 815 return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a))); 816 } 817 818 string16 GetStringFUTF16Int(int message_id, int64 a) { 819 return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a))); 820 } 821 822 // Specialization of operator() method for string16 version. 823 template <> 824 bool StringComparator<string16>::operator()(const string16& lhs, 825 const string16& rhs) { 826 // If we can not get collator instance for specified locale, just do simple 827 // string compare. 828 if (!collator_) 829 return lhs < rhs; 830 return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) == 831 UCOL_LESS; 832 }; 833 834 void SortStrings16(const std::string& locale, 835 std::vector<string16>* strings) { 836 SortVectorWithStringKey(locale, strings, false); 837 } 838 839 const std::vector<std::string>& GetAvailableLocales() { 840 return g_available_locales.Get(); 841 } 842 843 void GetAcceptLanguagesForLocale(const std::string& display_locale, 844 std::vector<std::string>* locale_codes) { 845 for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { 846 if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i], 847 display_locale)) 848 // TODO(jungshik) : Put them at the of the list with language codes 849 // enclosed by brackets instead of skipping. 850 continue; 851 locale_codes->push_back(kAcceptLanguageList[i]); 852 } 853 } 854 855 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) { 856 int width = 0; 857 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width); 858 DCHECK_GT(width, 0); 859 return width; 860 } 861 862 } // namespace l10n_util 863