1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/base/l10n/l10n_util.h" 6 7 #include <algorithm> 8 #include <cstdlib> 9 #include <iterator> 10 #include <string> 11 12 #include "base/command_line.h" 13 #include "base/compiler_specific.h" 14 #include "base/file_util.h" 15 #include "base/i18n/file_util_icu.h" 16 #include "base/i18n/rtl.h" 17 #include "base/i18n/string_compare.h" 18 #include "base/lazy_instance.h" 19 #include "base/memory/scoped_ptr.h" 20 #include "base/path_service.h" 21 #include "base/strings/string_number_conversions.h" 22 #include "base/strings/string_split.h" 23 #include "base/strings/string_util.h" 24 #include "base/strings/stringprintf.h" 25 #include "base/strings/sys_string_conversions.h" 26 #include "base/strings/utf_string_conversions.h" 27 #include "build/build_config.h" 28 #include "third_party/icu/source/common/unicode/rbbi.h" 29 #include "third_party/icu/source/common/unicode/uloc.h" 30 #include "ui/base/l10n/l10n_util_collator.h" 31 #include "ui/base/l10n/l10n_util_plurals.h" 32 #include "ui/base/resource/resource_bundle.h" 33 #include "ui/base/ui_base_paths.h" 34 35 #if defined(OS_ANDROID) 36 #include "ui/base/l10n/l10n_util_android.h" 37 #endif 38 39 #if defined(USE_GLIB) 40 #include <glib.h> 41 #endif 42 43 #if defined(OS_WIN) 44 #include "ui/base/l10n/l10n_util_win.h" 45 #endif // OS_WIN 46 47 namespace { 48 49 static const char* const kAcceptLanguageList[] = { 50 "af", // Afrikaans 51 "am", // Amharic 52 "ar", // Arabic 53 "az", // Azerbaijani 54 "be", // Belarusian 55 "bg", // Bulgarian 56 "bh", // Bihari 57 "bn", // Bengali 58 "br", // Breton 59 "bs", // Bosnian 60 "ca", // Catalan 61 "co", // Corsican 62 "cs", // Czech 63 "cy", // Welsh 64 "da", // Danish 65 "de", // German 66 "de-AT", // German (Austria) 67 "de-CH", // German (Switzerland) 68 "de-DE", // German (Germany) 69 "el", // Greek 70 "en", // English 71 "en-AU", // English (Australia) 72 "en-CA", // English (Canada) 73 "en-GB", // English (UK) 74 "en-NZ", // English (New Zealand) 75 "en-US", // English (US) 76 "en-ZA", // English (South Africa) 77 "eo", // Esperanto 78 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American 79 // Spanish speaking countries? 80 "es", // Spanish 81 "es-419", // Spanish (Latin America) 82 "et", // Estonian 83 "eu", // Basque 84 "fa", // Persian 85 "fi", // Finnish 86 "fil", // Filipino 87 "fo", // Faroese 88 "fr", // French 89 "fr-CA", // French (Canada) 90 "fr-CH", // French (Switzerland) 91 "fr-FR", // French (France) 92 "fy", // Frisian 93 "ga", // Irish 94 "gd", // Scots Gaelic 95 "gl", // Galician 96 "gn", // Guarani 97 "gu", // Gujarati 98 "ha", // Hausa 99 "haw", // Hawaiian 100 "he", // Hebrew 101 "hi", // Hindi 102 "hr", // Croatian 103 "hu", // Hungarian 104 "hy", // Armenian 105 "ia", // Interlingua 106 "id", // Indonesian 107 "is", // Icelandic 108 "it", // Italian 109 "it-CH", // Italian (Switzerland) 110 "it-IT", // Italian (Italy) 111 "ja", // Japanese 112 "jw", // Javanese 113 "ka", // Georgian 114 "kk", // Kazakh 115 "km", // Cambodian 116 "kn", // Kannada 117 "ko", // Korean 118 "ku", // Kurdish 119 "ky", // Kyrgyz 120 "la", // Latin 121 "ln", // Lingala 122 "lo", // Laothian 123 "lt", // Lithuanian 124 "lv", // Latvian 125 "mk", // Macedonian 126 "ml", // Malayalam 127 "mn", // Mongolian 128 "mo", // Moldavian 129 "mr", // Marathi 130 "ms", // Malay 131 "mt", // Maltese 132 "nb", // Norwegian (Bokmal) 133 "ne", // Nepali 134 "nl", // Dutch 135 "nn", // Norwegian (Nynorsk) 136 "no", // Norwegian 137 "oc", // Occitan 138 "om", // Oromo 139 "or", // Oriya 140 "pa", // Punjabi 141 "pl", // Polish 142 "ps", // Pashto 143 "pt", // Portuguese 144 "pt-BR", // Portuguese (Brazil) 145 "pt-PT", // Portuguese (Portugal) 146 "qu", // Quechua 147 "rm", // Romansh 148 "ro", // Romanian 149 "ru", // Russian 150 "sd", // Sindhi 151 "sh", // Serbo-Croatian 152 "si", // Sinhalese 153 "sk", // Slovak 154 "sl", // Slovenian 155 "sn", // Shona 156 "so", // Somali 157 "sq", // Albanian 158 "sr", // Serbian 159 "st", // Sesotho 160 "su", // Sundanese 161 "sv", // Swedish 162 "sw", // Swahili 163 "ta", // Tamil 164 "te", // Telugu 165 "tg", // Tajik 166 "th", // Thai 167 "ti", // Tigrinya 168 "tk", // Turkmen 169 "to", // Tonga 170 "tr", // Turkish 171 "tt", // Tatar 172 "tw", // Twi 173 "ug", // Uighur 174 "uk", // Ukrainian 175 "ur", // Urdu 176 "uz", // Uzbek 177 "vi", // Vietnamese 178 "xh", // Xhosa 179 "yi", // Yiddish 180 "yo", // Yoruba 181 "zh", // Chinese 182 "zh-CN", // Chinese (Simplified) 183 "zh-TW", // Chinese (Traditional) 184 "zu", // Zulu 185 }; 186 187 // Returns true if |locale_name| has an alias in the ICU data file. 188 bool IsDuplicateName(const std::string& locale_name) { 189 static const char* const kDuplicateNames[] = { 190 "en", 191 "pt", 192 "zh", 193 "zh_hans_cn", 194 "zh_hant_hk", 195 "zh_hant_mo", 196 "zh_hans_sg", 197 "zh_hant_tw" 198 }; 199 200 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). 201 // 'es-419' (Spanish in Latin America) is not available in ICU so that it 202 // has to be added manually in GetAvailableLocales(). 203 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) 204 return true; 205 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { 206 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) 207 return true; 208 } 209 return false; 210 } 211 212 // We added 30+ minimally populated locales with only a few entries 213 // (exemplar character set, script, writing direction and its own 214 // lanaguage name). These locales have to be distinguished from the 215 // fully populated locales to which Chrome is localized. 216 bool IsLocalePartiallyPopulated(const std::string& locale_name) { 217 // For partially populated locales, even the translation for "English" 218 // is not available. A more robust/elegant way to check is to add a special 219 // field (say, 'isPartial' to our version of ICU locale files) and 220 // check its value, but this hack seems to work well. 221 return !l10n_util::IsLocaleNameTranslated("en", locale_name); 222 } 223 224 #if !defined(OS_MACOSX) 225 bool IsLocaleAvailable(const std::string& locale) { 226 // If locale has any illegal characters in it, we don't want to try to 227 // load it because it may be pointing outside the locale data file directory. 228 if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale))) 229 return false; 230 231 // IsLocalePartiallyPopulated() can be called here for an early return w/o 232 // checking the resource availability below. It'd help when Chrome is run 233 // under a system locale Chrome is not localized to (e.g.Farsi on Linux), 234 // but it'd slow down the start up time a little bit for locales Chrome is 235 // localized to. So, we don't call it here. 236 if (!l10n_util::IsLocaleSupportedByOS(locale)) 237 return false; 238 239 // If the ResourceBundle is not yet initialized, return false to avoid the 240 // CHECK failure in ResourceBundle::GetSharedInstance(). 241 if (!ResourceBundle::HasSharedInstance()) 242 return false; 243 244 // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function 245 // so that this can be invoked without initializing the global instance. 246 // See crbug.com/230432: CHECK failure in GetUserDataDir(). 247 return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale); 248 } 249 #endif 250 251 // On Linux, the text layout engine Pango determines paragraph directionality 252 // by looking at the first strongly-directional character in the text. This 253 // means text such as "Google Chrome foo bar..." will be layed out LTR even 254 // if "foo bar" is RTL. So this function prepends the necessary RLM in such 255 // cases. 256 void AdjustParagraphDirectionality(base::string16* paragraph) { 257 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID) 258 if (base::i18n::IsRTL() && 259 base::i18n::StringContainsStrongRTLChars(*paragraph)) { 260 paragraph->insert(0, 1, 261 static_cast<base::char16>(base::i18n::kRightToLeftMark)); 262 } 263 #endif 264 } 265 266 #if defined(OS_WIN) 267 std::string GetCanonicalLocale(const std::string& locale) { 268 return base::i18n::GetCanonicalLocale(locale.c_str()); 269 } 270 #endif 271 272 struct AvailableLocalesTraits 273 : base::DefaultLazyInstanceTraits<std::vector<std::string> > { 274 static std::vector<std::string>* New(void* instance) { 275 std::vector<std::string>* locales = 276 base::DefaultLazyInstanceTraits<std::vector<std::string> >::New( 277 instance); 278 int num_locales = uloc_countAvailable(); 279 for (int i = 0; i < num_locales; ++i) { 280 std::string locale_name = uloc_getAvailable(i); 281 // Filter out the names that have aliases. 282 if (IsDuplicateName(locale_name)) 283 continue; 284 // Filter out locales for which we have only partially populated data 285 // and to which Chrome is not localized. 286 if (IsLocalePartiallyPopulated(locale_name)) 287 continue; 288 if (!l10n_util::IsLocaleSupportedByOS(locale_name)) 289 continue; 290 // Normalize underscores to hyphens because that's what our locale files 291 // use. 292 std::replace(locale_name.begin(), locale_name.end(), '_', '-'); 293 294 // Map the Chinese locale names over to zh-CN and zh-TW. 295 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { 296 locale_name = "zh-CN"; 297 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { 298 locale_name = "zh-TW"; 299 } 300 locales->push_back(locale_name); 301 } 302 303 // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). 304 locales->push_back("es-419"); 305 return locales; 306 } 307 }; 308 309 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits> 310 g_available_locales = LAZY_INSTANCE_INITIALIZER; 311 312 } // namespace 313 314 namespace l10n_util { 315 316 bool CheckAndResolveLocale(const std::string& locale, 317 std::string* resolved_locale) { 318 #if defined(OS_MACOSX) 319 NOTIMPLEMENTED(); 320 return false; 321 #else 322 if (IsLocaleAvailable(locale)) { 323 *resolved_locale = locale; 324 return true; 325 } 326 327 // If there's a variant, skip over it so we can try without the region 328 // code. For example, ca_ES@valencia should cause us to try ca@valencia 329 // before ca. 330 std::string::size_type variant_pos = locale.find('@'); 331 if (variant_pos != std::string::npos) 332 return false; 333 334 // If the locale matches language but not country, use that instead. 335 // TODO(jungshik) : Nothing is done about languages that Chrome 336 // does not support but available on Windows. We fall 337 // back to en-US in GetApplicationLocale so that it's a not critical, 338 // but we can do better. 339 std::string::size_type hyphen_pos = locale.find('-'); 340 std::string lang(locale, 0, hyphen_pos); 341 if (hyphen_pos != std::string::npos && hyphen_pos > 0) { 342 std::string region(locale, hyphen_pos + 1); 343 std::string tmp_locale(lang); 344 // Map es-RR other than es-ES to es-419 (Chrome's Latin American 345 // Spanish locale). 346 if (LowerCaseEqualsASCII(lang, "es") && 347 !LowerCaseEqualsASCII(region, "es")) { 348 tmp_locale.append("-419"); 349 } else if (LowerCaseEqualsASCII(lang, "zh")) { 350 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. 351 if (LowerCaseEqualsASCII(region, "hk") || 352 LowerCaseEqualsASCII(region, "mo")) { // Macao 353 tmp_locale.append("-TW"); 354 } else { 355 tmp_locale.append("-CN"); 356 } 357 } else if (LowerCaseEqualsASCII(lang, "en")) { 358 // Map Australian, Canadian, New Zealand and South African English 359 // to British English for now. 360 // TODO(jungshik): en-CA may have to change sides once 361 // we have OS locale separate from app locale (Chrome's UI language). 362 if (LowerCaseEqualsASCII(region, "au") || 363 LowerCaseEqualsASCII(region, "ca") || 364 LowerCaseEqualsASCII(region, "nz") || 365 LowerCaseEqualsASCII(region, "za")) { 366 tmp_locale.append("-GB"); 367 } else { 368 tmp_locale.append("-US"); 369 } 370 } 371 if (IsLocaleAvailable(tmp_locale)) { 372 resolved_locale->swap(tmp_locale); 373 return true; 374 } 375 } 376 377 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US. 378 struct { 379 const char* source; 380 const char* dest; 381 } alias_map[] = { 382 {"no", "nb"}, 383 {"tl", "fil"}, 384 {"iw", "he"}, 385 {"en", "en-US"}, 386 }; 387 388 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { 389 if (LowerCaseEqualsASCII(lang, alias_map[i].source)) { 390 std::string tmp_locale(alias_map[i].dest); 391 if (IsLocaleAvailable(tmp_locale)) { 392 resolved_locale->swap(tmp_locale); 393 return true; 394 } 395 } 396 } 397 398 return false; 399 #endif 400 } 401 402 std::string GetApplicationLocale(const std::string& pref_locale) { 403 #if defined(OS_MACOSX) 404 405 // Use any override (Cocoa for the browser), otherwise use the preference 406 // passed to the function. 407 std::string app_locale = l10n_util::GetLocaleOverride(); 408 if (app_locale.empty()) 409 app_locale = pref_locale; 410 411 // The above should handle all of the cases Chrome normally hits, but for some 412 // unit tests, we need something to fall back too. 413 if (app_locale.empty()) 414 app_locale = "en-US"; 415 416 // Windows/Linux call SetICUDefaultLocale after determining the actual locale 417 // with CheckAndResolveLocal to make ICU APIs work in that locale. 418 // Mac doesn't use a locale directory tree of resources (it uses Mac style 419 // resources), so mirror the Windows/Linux behavior of calling 420 // SetICUDefaultLocale. 421 base::i18n::SetICUDefaultLocale(app_locale); 422 return app_locale; 423 424 #else 425 426 std::string resolved_locale; 427 std::vector<std::string> candidates; 428 429 // We only use --lang and the app pref on Windows. On Linux, we only 430 // look at the LC_*/LANG environment variables. We do, however, pass --lang 431 // to renderer and plugin processes so they know what language the parent 432 // process decided to use. 433 434 #if defined(OS_WIN) 435 436 // First, try the preference value. 437 if (!pref_locale.empty()) 438 candidates.push_back(GetCanonicalLocale(pref_locale)); 439 440 // Next, try the overridden locale. 441 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); 442 if (!languages.empty()) { 443 candidates.reserve(candidates.size() + languages.size()); 444 std::transform(languages.begin(), languages.end(), 445 std::back_inserter(candidates), &GetCanonicalLocale); 446 } else { 447 // If no override was set, defer to ICU 448 candidates.push_back(base::i18n::GetConfiguredLocale()); 449 } 450 451 #elif defined(OS_ANDROID) 452 453 // On Android, query java.util.Locale for the default locale. 454 candidates.push_back(GetDefaultLocale()); 455 456 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS) 457 458 // GLib implements correct environment variable parsing with 459 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. 460 // We used to use our custom parsing code along with ICU for this purpose. 461 // If we have a port that does not depend on GTK, we have to 462 // restore our custom code for that port. 463 const char* const* languages = g_get_language_names(); 464 DCHECK(languages); // A valid pointer is guaranteed. 465 DCHECK(*languages); // At least one entry, "C", is guaranteed. 466 467 for (; *languages != NULL; ++languages) { 468 candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); 469 } 470 471 #else 472 473 // By default, use the application locale preference. This applies to ChromeOS 474 // and linux systems without glib. 475 if (!pref_locale.empty()) 476 candidates.push_back(pref_locale); 477 478 #endif 479 480 std::vector<std::string>::const_iterator i = candidates.begin(); 481 for (; i != candidates.end(); ++i) { 482 if (CheckAndResolveLocale(*i, &resolved_locale)) { 483 base::i18n::SetICUDefaultLocale(resolved_locale); 484 return resolved_locale; 485 } 486 } 487 488 // Fallback on en-US. 489 const std::string fallback_locale("en-US"); 490 if (IsLocaleAvailable(fallback_locale)) { 491 base::i18n::SetICUDefaultLocale(fallback_locale); 492 return fallback_locale; 493 } 494 495 return std::string(); 496 497 #endif 498 } 499 500 bool IsLocaleNameTranslated(const char* locale, 501 const std::string& display_locale) { 502 base::string16 display_name = 503 l10n_util::GetDisplayNameForLocale(locale, display_locale, false); 504 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not 505 // uloc_getDisplayName returns the actual translation or the default 506 // value (locale code), we have to rely on this hack to tell whether 507 // the translation is available or not. If ICU doesn't have a translated 508 // name for this locale, GetDisplayNameForLocale will just return the 509 // locale code. 510 return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale; 511 } 512 513 base::string16 GetDisplayNameForLocale(const std::string& locale, 514 const std::string& display_locale, 515 bool is_for_ui) { 516 std::string locale_code = locale; 517 // Internally, we use the language code of zh-CN and zh-TW, but we want the 518 // display names to be Chinese (Simplified) and Chinese (Traditional) instead 519 // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans 520 // and zh-Hant to ICU. Even with this mapping, we'd get 521 // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and 522 // even longer results in other languages. Arguably, they're better than 523 // the current results : Chinese (China) / Chinese (Taiwan). 524 // TODO(jungshik): Do one of the following: 525 // 1. Special-case Chinese by getting the custom-translation for them 526 // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE. 527 // 3. Get translations for two directly from the ICU resouce bundle 528 // because they're not accessible with other any API. 529 // 4. Patch ICU to special-case zh-Hans/zh-Hant for us. 530 // #1 and #2 wouldn't work if display_locale != current UI locale although 531 // we can think of additional hack to work around the problem. 532 // #3 can be potentially expensive. 533 if (locale_code == "zh-CN") 534 locale_code = "zh-Hans"; 535 else if (locale_code == "zh-TW") 536 locale_code = "zh-Hant"; 537 538 base::string16 display_name; 539 #if defined(OS_ANDROID) 540 // Use Java API to get locale display name so that we can remove most of 541 // the lang data from icu data to reduce binary size, except for zh-Hans and 542 // zh-Hant because the current Android Java API doesn't support scripts. 543 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once 544 // Android Java API supports scripts. 545 if (!StartsWithASCII(locale_code, "zh-Han", true)) { 546 display_name = GetDisplayNameForLocale(locale_code, display_locale); 547 } else 548 #endif 549 { 550 UErrorCode error = U_ZERO_ERROR; 551 const int kBufferSize = 1024; 552 553 int actual_size = uloc_getDisplayName( 554 locale_code.c_str(), display_locale.c_str(), 555 WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error); 556 DCHECK(U_SUCCESS(error)); 557 display_name.resize(actual_size); 558 } 559 560 // Add directional markup so parentheses are properly placed. 561 if (is_for_ui && base::i18n::IsRTL()) 562 base::i18n::AdjustStringForLocaleDirection(&display_name); 563 return display_name; 564 } 565 566 base::string16 GetDisplayNameForCountry(const std::string& country_code, 567 const std::string& display_locale) { 568 return GetDisplayNameForLocale("_" + country_code, display_locale, false); 569 } 570 571 std::string NormalizeLocale(const std::string& locale) { 572 std::string normalized_locale(locale); 573 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); 574 575 return normalized_locale; 576 } 577 578 void GetParentLocales(const std::string& current_locale, 579 std::vector<std::string>* parent_locales) { 580 std::string locale(NormalizeLocale(current_locale)); 581 582 const int kNameCapacity = 256; 583 char parent[kNameCapacity]; 584 base::strlcpy(parent, locale.c_str(), kNameCapacity); 585 parent_locales->push_back(parent); 586 UErrorCode err = U_ZERO_ERROR; 587 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { 588 if (U_FAILURE(err)) 589 break; 590 parent_locales->push_back(parent); 591 } 592 } 593 594 bool IsValidLocaleSyntax(const std::string& locale) { 595 // Check that the length is plausible. 596 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) 597 return false; 598 599 // Strip off the part after an '@' sign, which might contain keywords, 600 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. 601 // We don't validate that part much, just check that there's at least one 602 // equals sign in a plausible place. Normalize the prefix so that hyphens 603 // are changed to underscores. 604 std::string prefix = NormalizeLocale(locale); 605 size_t split_point = locale.find("@"); 606 if (split_point != std::string::npos) { 607 std::string keywords = locale.substr(split_point + 1); 608 prefix = locale.substr(0, split_point); 609 610 size_t equals_loc = keywords.find("="); 611 if (equals_loc == std::string::npos || 612 equals_loc < 1 || equals_loc > keywords.size() - 2) 613 return false; 614 } 615 616 // Check that all characters before the at-sign are alphanumeric or 617 // underscore. 618 for (size_t i = 0; i < prefix.size(); i++) { 619 char ch = prefix[i]; 620 if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') 621 return false; 622 } 623 624 // Check that the initial token (before the first hyphen/underscore) 625 // is 1 - 3 alphabetical characters (a language tag). 626 for (size_t i = 0; i < prefix.size(); i++) { 627 char ch = prefix[i]; 628 if (ch == '_') { 629 if (i < 1 || i > 3) 630 return false; 631 break; 632 } 633 if (!IsAsciiAlpha(ch)) 634 return false; 635 } 636 637 // Check that the all tokens after the initial token are 1 - 8 characters. 638 // (Tokenize/StringTokenizer don't work here, they collapse multiple 639 // delimiters into one.) 640 int token_len = 0; 641 int token_index = 0; 642 for (size_t i = 0; i < prefix.size(); i++) { 643 if (prefix[i] != '_') { 644 token_len++; 645 continue; 646 } 647 648 if (token_index > 0 && (token_len < 1 || token_len > 8)) { 649 return false; 650 } 651 token_index++; 652 token_len = 0; 653 } 654 if (token_index == 0 && (token_len < 1 || token_len > 3)) { 655 return false; 656 } else if (token_len < 1 || token_len > 8) { 657 return false; 658 } 659 660 return true; 661 } 662 663 std::string GetStringUTF8(int message_id) { 664 return UTF16ToUTF8(GetStringUTF16(message_id)); 665 } 666 667 base::string16 GetStringUTF16(int message_id) { 668 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); 669 base::string16 str = rb.GetLocalizedString(message_id); 670 AdjustParagraphDirectionality(&str); 671 672 return str; 673 } 674 675 base::string16 GetStringFUTF16(int message_id, 676 const std::vector<base::string16>& replacements, 677 std::vector<size_t>* offsets) { 678 // TODO(tc): We could save a string copy if we got the raw string as 679 // a StringPiece and were able to call ReplaceStringPlaceholders with 680 // a StringPiece format string and base::string16 substitution strings. In 681 // practice, the strings should be relatively short. 682 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); 683 const base::string16& format_string = rb.GetLocalizedString(message_id); 684 685 #ifndef NDEBUG 686 // Make sure every replacement string is being used, so we don't just 687 // silently fail to insert one. If |offsets| is non-NULL, then don't do this 688 // check as the code may simply want to find the placeholders rather than 689 // actually replacing them. 690 if (!offsets) { 691 std::string utf8_string = UTF16ToUTF8(format_string); 692 693 // $9 is the highest allowed placeholder. 694 for (size_t i = 0; i < 9; ++i) { 695 bool placeholder_should_exist = replacements.size() > i; 696 697 std::string placeholder = 698 base::StringPrintf("$%d", static_cast<int>(i + 1)); 699 size_t pos = utf8_string.find(placeholder.c_str()); 700 if (placeholder_should_exist) { 701 DCHECK_NE(std::string::npos, pos) << 702 " Didn't find a " << placeholder << " placeholder in " << 703 utf8_string; 704 } else { 705 DCHECK_EQ(std::string::npos, pos) << 706 " Unexpectedly found a " << placeholder << " placeholder in " << 707 utf8_string; 708 } 709 } 710 } 711 #endif 712 713 base::string16 formatted = ReplaceStringPlaceholders( 714 format_string, replacements, offsets); 715 AdjustParagraphDirectionality(&formatted); 716 717 return formatted; 718 } 719 720 std::string GetStringFUTF8(int message_id, 721 const base::string16& a) { 722 return UTF16ToUTF8(GetStringFUTF16(message_id, a)); 723 } 724 725 std::string GetStringFUTF8(int message_id, 726 const base::string16& a, 727 const base::string16& b) { 728 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); 729 } 730 731 std::string GetStringFUTF8(int message_id, 732 const base::string16& a, 733 const base::string16& b, 734 const base::string16& c) { 735 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); 736 } 737 738 std::string GetStringFUTF8(int message_id, 739 const base::string16& a, 740 const base::string16& b, 741 const base::string16& c, 742 const base::string16& d) { 743 return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); 744 } 745 746 base::string16 GetStringFUTF16(int message_id, 747 const base::string16& a) { 748 std::vector<base::string16> replacements; 749 replacements.push_back(a); 750 return GetStringFUTF16(message_id, replacements, NULL); 751 } 752 753 base::string16 GetStringFUTF16(int message_id, 754 const base::string16& a, 755 const base::string16& b) { 756 return GetStringFUTF16(message_id, a, b, NULL); 757 } 758 759 base::string16 GetStringFUTF16(int message_id, 760 const base::string16& a, 761 const base::string16& b, 762 const base::string16& c) { 763 std::vector<base::string16> replacements; 764 replacements.push_back(a); 765 replacements.push_back(b); 766 replacements.push_back(c); 767 return GetStringFUTF16(message_id, replacements, NULL); 768 } 769 770 base::string16 GetStringFUTF16(int message_id, 771 const base::string16& a, 772 const base::string16& b, 773 const base::string16& c, 774 const base::string16& d) { 775 std::vector<base::string16> replacements; 776 replacements.push_back(a); 777 replacements.push_back(b); 778 replacements.push_back(c); 779 replacements.push_back(d); 780 return GetStringFUTF16(message_id, replacements, NULL); 781 } 782 783 base::string16 GetStringFUTF16(int message_id, 784 const base::string16& a, 785 const base::string16& b, 786 const base::string16& c, 787 const base::string16& d, 788 const base::string16& e) { 789 std::vector<base::string16> replacements; 790 replacements.push_back(a); 791 replacements.push_back(b); 792 replacements.push_back(c); 793 replacements.push_back(d); 794 replacements.push_back(e); 795 return GetStringFUTF16(message_id, replacements, NULL); 796 } 797 798 base::string16 GetStringFUTF16(int message_id, 799 const base::string16& a, 800 size_t* offset) { 801 DCHECK(offset); 802 std::vector<size_t> offsets; 803 std::vector<base::string16> replacements; 804 replacements.push_back(a); 805 base::string16 result = GetStringFUTF16(message_id, replacements, &offsets); 806 DCHECK(offsets.size() == 1); 807 *offset = offsets[0]; 808 return result; 809 } 810 811 base::string16 GetStringFUTF16(int message_id, 812 const base::string16& a, 813 const base::string16& b, 814 std::vector<size_t>* offsets) { 815 std::vector<base::string16> replacements; 816 replacements.push_back(a); 817 replacements.push_back(b); 818 return GetStringFUTF16(message_id, replacements, offsets); 819 } 820 821 base::string16 GetStringFUTF16Int(int message_id, int a) { 822 return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a))); 823 } 824 825 base::string16 GetStringFUTF16Int(int message_id, int64 a) { 826 return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a))); 827 } 828 829 // Specialization of operator() method for base::string16 version. 830 template <> 831 bool StringComparator<base::string16>::operator()(const base::string16& lhs, 832 const base::string16& rhs) { 833 // If we can not get collator instance for specified locale, just do simple 834 // string compare. 835 if (!collator_) 836 return lhs < rhs; 837 return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) == 838 UCOL_LESS; 839 }; 840 841 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids, 842 int number) { 843 scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids); 844 DCHECK(format); 845 846 UErrorCode err = U_ZERO_ERROR; 847 icu::UnicodeString result_files_string = format->format(number, err); 848 int capacity = result_files_string.length() + 1; 849 DCHECK_GT(capacity, 1); 850 base::string16 result; 851 result_files_string.extract( 852 static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err); 853 DCHECK(U_SUCCESS(err)); 854 return result; 855 } 856 857 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids, 858 int number) { 859 return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number)); 860 } 861 862 void SortStrings16(const std::string& locale, 863 std::vector<base::string16>* strings) { 864 SortVectorWithStringKey(locale, strings, false); 865 } 866 867 const std::vector<std::string>& GetAvailableLocales() { 868 return g_available_locales.Get(); 869 } 870 871 void GetAcceptLanguagesForLocale(const std::string& display_locale, 872 std::vector<std::string>* locale_codes) { 873 for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { 874 if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i], 875 display_locale)) 876 // TODO(jungshik) : Put them at the of the list with language codes 877 // enclosed by brackets instead of skipping. 878 continue; 879 locale_codes->push_back(kAcceptLanguageList[i]); 880 } 881 } 882 883 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) { 884 int width = 0; 885 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width); 886 DCHECK_GT(width, 0); 887 return width; 888 } 889 890 } // namespace l10n_util 891