Home | History | Annotate | Download | only in l10n
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ui/base/l10n/l10n_util.h"
      6 
      7 #include <algorithm>
      8 #include <cstdlib>
      9 #include <iterator>
     10 #include <string>
     11 
     12 #include "base/command_line.h"
     13 #include "base/compiler_specific.h"
     14 #include "base/file_util.h"
     15 #include "base/i18n/file_util_icu.h"
     16 #include "base/i18n/rtl.h"
     17 #include "base/i18n/string_compare.h"
     18 #include "base/lazy_instance.h"
     19 #include "base/memory/scoped_ptr.h"
     20 #include "base/path_service.h"
     21 #include "base/strings/string_number_conversions.h"
     22 #include "base/strings/string_split.h"
     23 #include "base/strings/string_util.h"
     24 #include "base/strings/stringprintf.h"
     25 #include "base/strings/sys_string_conversions.h"
     26 #include "base/strings/utf_string_conversions.h"
     27 #include "build/build_config.h"
     28 #include "third_party/icu/source/common/unicode/rbbi.h"
     29 #include "third_party/icu/source/common/unicode/uloc.h"
     30 #include "ui/base/l10n/l10n_util_collator.h"
     31 #include "ui/base/resource/resource_bundle.h"
     32 #include "ui/base/ui_base_paths.h"
     33 
     34 #if defined(OS_ANDROID)
     35 #include "ui/base/l10n/l10n_util_android.h"
     36 #endif
     37 
     38 #if defined(OS_LINUX)
     39 #include <glib.h>
     40 #endif
     41 
     42 #if defined(OS_WIN)
     43 #include "ui/base/l10n/l10n_util_win.h"
     44 #endif  // OS_WIN
     45 
     46 namespace {
     47 
     48 static const char* const kAcceptLanguageList[] = {
     49   "af",     // Afrikaans
     50   "am",     // Amharic
     51   "ar",     // Arabic
     52   "az",     // Azerbaijani
     53   "be",     // Belarusian
     54   "bg",     // Bulgarian
     55   "bh",     // Bihari
     56   "bn",     // Bengali
     57   "br",     // Breton
     58   "bs",     // Bosnian
     59   "ca",     // Catalan
     60   "co",     // Corsican
     61   "cs",     // Czech
     62   "cy",     // Welsh
     63   "da",     // Danish
     64   "de",     // German
     65   "de-AT",  // German (Austria)
     66   "de-CH",  // German (Switzerland)
     67   "de-DE",  // German (Germany)
     68   "el",     // Greek
     69   "en",     // English
     70   "en-AU",  // English (Australia)
     71   "en-CA",  // English (Canada)
     72   "en-GB",  // English (UK)
     73   "en-NZ",  // English (New Zealand)
     74   "en-US",  // English (US)
     75   "en-ZA",  // English (South Africa)
     76   "eo",     // Esperanto
     77   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
     78   // Spanish speaking countries?
     79   "es",     // Spanish
     80   "es-419", // Spanish (Latin America)
     81   "et",     // Estonian
     82   "eu",     // Basque
     83   "fa",     // Persian
     84   "fi",     // Finnish
     85   "fil",    // Filipino
     86   "fo",     // Faroese
     87   "fr",     // French
     88   "fr-CA",  // French (Canada)
     89   "fr-CH",  // French (Switzerland)
     90   "fr-FR",  // French (France)
     91   "fy",     // Frisian
     92   "ga",     // Irish
     93   "gd",     // Scots Gaelic
     94   "gl",     // Galician
     95   "gn",     // Guarani
     96   "gu",     // Gujarati
     97   "ha",     // Hausa
     98   "haw",    // Hawaiian
     99   "he",     // Hebrew
    100   "hi",     // Hindi
    101   "hr",     // Croatian
    102   "hu",     // Hungarian
    103   "hy",     // Armenian
    104   "ia",     // Interlingua
    105   "id",     // Indonesian
    106   "is",     // Icelandic
    107   "it",     // Italian
    108   "it-CH",  // Italian (Switzerland)
    109   "it-IT",  // Italian (Italy)
    110   "ja",     // Japanese
    111   "jw",     // Javanese
    112   "ka",     // Georgian
    113   "kk",     // Kazakh
    114   "km",     // Cambodian
    115   "kn",     // Kannada
    116   "ko",     // Korean
    117   "ku",     // Kurdish
    118   "ky",     // Kyrgyz
    119   "la",     // Latin
    120   "ln",     // Lingala
    121   "lo",     // Laothian
    122   "lt",     // Lithuanian
    123   "lv",     // Latvian
    124   "mk",     // Macedonian
    125   "ml",     // Malayalam
    126   "mn",     // Mongolian
    127   "mo",     // Moldavian
    128   "mr",     // Marathi
    129   "ms",     // Malay
    130   "mt",     // Maltese
    131   "nb",     // Norwegian (Bokmal)
    132   "ne",     // Nepali
    133   "nl",     // Dutch
    134   "nn",     // Norwegian (Nynorsk)
    135   "no",     // Norwegian
    136   "oc",     // Occitan
    137   "om",     // Oromo
    138   "or",     // Oriya
    139   "pa",     // Punjabi
    140   "pl",     // Polish
    141   "ps",     // Pashto
    142   "pt",     // Portuguese
    143   "pt-BR",  // Portuguese (Brazil)
    144   "pt-PT",  // Portuguese (Portugal)
    145   "qu",     // Quechua
    146   "rm",     // Romansh
    147   "ro",     // Romanian
    148   "ru",     // Russian
    149   "sd",     // Sindhi
    150   "sh",     // Serbo-Croatian
    151   "si",     // Sinhalese
    152   "sk",     // Slovak
    153   "sl",     // Slovenian
    154   "sn",     // Shona
    155   "so",     // Somali
    156   "sq",     // Albanian
    157   "sr",     // Serbian
    158   "st",     // Sesotho
    159   "su",     // Sundanese
    160   "sv",     // Swedish
    161   "sw",     // Swahili
    162   "ta",     // Tamil
    163   "te",     // Telugu
    164   "tg",     // Tajik
    165   "th",     // Thai
    166   "ti",     // Tigrinya
    167   "tk",     // Turkmen
    168   "to",     // Tonga
    169   "tr",     // Turkish
    170   "tt",     // Tatar
    171   "tw",     // Twi
    172   "ug",     // Uighur
    173   "uk",     // Ukrainian
    174   "ur",     // Urdu
    175   "uz",     // Uzbek
    176   "vi",     // Vietnamese
    177   "xh",     // Xhosa
    178   "yi",     // Yiddish
    179   "yo",     // Yoruba
    180   "zh",     // Chinese
    181   "zh-CN",  // Chinese (Simplified)
    182   "zh-TW",  // Chinese (Traditional)
    183   "zu",     // Zulu
    184 };
    185 
    186 // Returns true if |locale_name| has an alias in the ICU data file.
    187 bool IsDuplicateName(const std::string& locale_name) {
    188   static const char* const kDuplicateNames[] = {
    189     "en",
    190     "pt",
    191     "zh",
    192     "zh_hans_cn",
    193     "zh_hant_hk",
    194     "zh_hant_mo",
    195     "zh_hans_sg",
    196     "zh_hant_tw"
    197   };
    198 
    199   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
    200   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
    201   // has to be added manually in GetAvailableLocales().
    202   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
    203     return true;
    204   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
    205     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
    206       return true;
    207   }
    208   return false;
    209 }
    210 
    211 // We added 30+ minimally populated locales with only a few entries
    212 // (exemplar character set, script, writing direction and its own
    213 // lanaguage name). These locales have to be distinguished from the
    214 // fully populated locales to which Chrome is localized.
    215 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
    216   // For partially populated locales, even the translation for "English"
    217   // is not available. A more robust/elegant way to check is to add a special
    218   // field (say, 'isPartial' to our version of ICU locale files) and
    219   // check its value, but this hack seems to work well.
    220   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
    221 }
    222 
    223 #if !defined(OS_MACOSX)
    224 bool IsLocaleAvailable(const std::string& locale) {
    225   // If locale has any illegal characters in it, we don't want to try to
    226   // load it because it may be pointing outside the locale data file directory.
    227   if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale)))
    228     return false;
    229 
    230   // IsLocalePartiallyPopulated() can be called here for an early return w/o
    231   // checking the resource availability below. It'd help when Chrome is run
    232   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
    233   // but it'd slow down the start up time a little bit for locales Chrome is
    234   // localized to. So, we don't call it here.
    235   if (!l10n_util::IsLocaleSupportedByOS(locale))
    236     return false;
    237 
    238   // If the ResourceBundle is not yet initialized, return false to avoid the
    239   // CHECK failure in ResourceBundle::GetSharedInstance().
    240   if (!ResourceBundle::HasSharedInstance())
    241     return false;
    242 
    243   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
    244   // so that this can be invoked without initializing the global instance.
    245   // See crbug.com/230432: CHECK failure in GetUserDataDir().
    246   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
    247 }
    248 
    249 bool CheckAndResolveLocale(const std::string& locale,
    250                            std::string* resolved_locale) {
    251   if (IsLocaleAvailable(locale)) {
    252     *resolved_locale = locale;
    253     return true;
    254   }
    255 
    256   // If there's a variant, skip over it so we can try without the region
    257   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
    258   // before ca.
    259   std::string::size_type variant_pos = locale.find('@');
    260   if (variant_pos != std::string::npos)
    261     return false;
    262 
    263   // If the locale matches language but not country, use that instead.
    264   // TODO(jungshik) : Nothing is done about languages that Chrome
    265   // does not support but available on Windows. We fall
    266   // back to en-US in GetApplicationLocale so that it's a not critical,
    267   // but we can do better.
    268   std::string::size_type hyphen_pos = locale.find('-');
    269   std::string lang(locale, 0, hyphen_pos);
    270   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
    271     std::string region(locale, hyphen_pos + 1);
    272     std::string tmp_locale(lang);
    273     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
    274     // Spanish locale).
    275     if (LowerCaseEqualsASCII(lang, "es") &&
    276         !LowerCaseEqualsASCII(region, "es")) {
    277       tmp_locale.append("-419");
    278     } else if (LowerCaseEqualsASCII(lang, "zh")) {
    279       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
    280       if (LowerCaseEqualsASCII(region, "hk") ||
    281           LowerCaseEqualsASCII(region, "mo")) { // Macao
    282         tmp_locale.append("-TW");
    283       } else {
    284         tmp_locale.append("-CN");
    285       }
    286     } else if (LowerCaseEqualsASCII(lang, "en")) {
    287       // Map Australian, Canadian, New Zealand and South African English
    288       // to British English for now.
    289       // TODO(jungshik): en-CA may have to change sides once
    290       // we have OS locale separate from app locale (Chrome's UI language).
    291       if (LowerCaseEqualsASCII(region, "au") ||
    292           LowerCaseEqualsASCII(region, "ca") ||
    293           LowerCaseEqualsASCII(region, "nz") ||
    294           LowerCaseEqualsASCII(region, "za")) {
    295         tmp_locale.append("-GB");
    296       } else {
    297         tmp_locale.append("-US");
    298       }
    299     }
    300     if (IsLocaleAvailable(tmp_locale)) {
    301       resolved_locale->swap(tmp_locale);
    302       return true;
    303     }
    304   }
    305 
    306   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
    307   struct {
    308     const char* source;
    309     const char* dest;
    310   } alias_map[] = {
    311       {"no", "nb"},
    312       {"tl", "fil"},
    313       {"iw", "he"},
    314       {"en", "en-US"},
    315   };
    316 
    317   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
    318     if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
    319       std::string tmp_locale(alias_map[i].dest);
    320       if (IsLocaleAvailable(tmp_locale)) {
    321         resolved_locale->swap(tmp_locale);
    322         return true;
    323       }
    324     }
    325   }
    326 
    327   return false;
    328 }
    329 #endif
    330 
    331 // On Linux, the text layout engine Pango determines paragraph directionality
    332 // by looking at the first strongly-directional character in the text. This
    333 // means text such as "Google Chrome foo bar..." will be layed out LTR even
    334 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
    335 // cases.
    336 void AdjustParagraphDirectionality(string16* paragraph) {
    337 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
    338   if (base::i18n::IsRTL() &&
    339       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
    340     paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark));
    341   }
    342 #endif
    343 }
    344 
    345 #if defined(OS_WIN)
    346 std::string GetCanonicalLocale(const std::string& locale) {
    347   return base::i18n::GetCanonicalLocale(locale.c_str());
    348 }
    349 #endif
    350 
    351 struct AvailableLocalesTraits :
    352     base::DefaultLazyInstanceTraits<std::vector<std::string> > {
    353   static std::vector<std::string>* New(void* instance) {
    354     std::vector<std::string>* locales =
    355         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
    356             instance);
    357     int num_locales = uloc_countAvailable();
    358     for (int i = 0; i < num_locales; ++i) {
    359       std::string locale_name = uloc_getAvailable(i);
    360       // Filter out the names that have aliases.
    361       if (IsDuplicateName(locale_name))
    362         continue;
    363       // Filter out locales for which we have only partially populated data
    364       // and to which Chrome is not localized.
    365       if (IsLocalePartiallyPopulated(locale_name))
    366         continue;
    367       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
    368         continue;
    369       // Normalize underscores to hyphens because that's what our locale files
    370       // use.
    371       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
    372 
    373       // Map the Chinese locale names over to zh-CN and zh-TW.
    374       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
    375         locale_name = "zh-CN";
    376       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
    377         locale_name = "zh-TW";
    378       }
    379       locales->push_back(locale_name);
    380     }
    381 
    382     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
    383     locales->push_back("es-419");
    384     return locales;
    385   }
    386 };
    387 
    388 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits >
    389     g_available_locales = LAZY_INSTANCE_INITIALIZER;
    390 
    391 }  // namespace
    392 
    393 namespace l10n_util {
    394 
    395 std::string GetApplicationLocale(const std::string& pref_locale) {
    396 #if defined(OS_MACOSX)
    397 
    398   // Use any override (Cocoa for the browser), otherwise use the preference
    399   // passed to the function.
    400   std::string app_locale = l10n_util::GetLocaleOverride();
    401   if (app_locale.empty())
    402     app_locale = pref_locale;
    403 
    404   // The above should handle all of the cases Chrome normally hits, but for some
    405   // unit tests, we need something to fall back too.
    406   if (app_locale.empty())
    407     app_locale = "en-US";
    408 
    409   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
    410   // with CheckAndResolveLocal to make ICU APIs work in that locale.
    411   // Mac doesn't use a locale directory tree of resources (it uses Mac style
    412   // resources), so mirror the Windows/Linux behavior of calling
    413   // SetICUDefaultLocale.
    414   base::i18n::SetICUDefaultLocale(app_locale);
    415   return app_locale;
    416 
    417 #else
    418 
    419   std::string resolved_locale;
    420   std::vector<std::string> candidates;
    421 
    422   // We only use --lang and the app pref on Windows.  On Linux, we only
    423   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
    424   // to renderer and plugin processes so they know what language the parent
    425   // process decided to use.
    426 
    427 #if defined(OS_WIN)
    428 
    429   // First, try the preference value.
    430   if (!pref_locale.empty())
    431     candidates.push_back(GetCanonicalLocale(pref_locale));
    432 
    433   // Next, try the overridden locale.
    434   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
    435   if (!languages.empty()) {
    436     candidates.reserve(candidates.size() + languages.size());
    437     std::transform(languages.begin(), languages.end(),
    438                    std::back_inserter(candidates), &GetCanonicalLocale);
    439   } else {
    440     // If no override was set, defer to ICU
    441     candidates.push_back(base::i18n::GetConfiguredLocale());
    442   }
    443 
    444 #elif defined(OS_CHROMEOS) || (defined(USE_AURA) && !defined(OS_LINUX))
    445 
    446   // On ChromeOS, use the application locale preference.
    447   if (!pref_locale.empty())
    448     candidates.push_back(pref_locale);
    449 
    450 #elif defined(OS_ANDROID)
    451 
    452   // On Android, query java.util.Locale for the default locale.
    453   candidates.push_back(GetDefaultLocale());
    454 
    455 #elif defined(OS_LINUX)
    456   // If we're on a different Linux system, we have glib.
    457 
    458   // GLib implements correct environment variable parsing with
    459   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
    460   // We used to use our custom parsing code along with ICU for this purpose.
    461   // If we have a port that does not depend on GTK, we have to
    462   // restore our custom code for that port.
    463   const char* const* languages = g_get_language_names();
    464   DCHECK(languages);  // A valid pointer is guaranteed.
    465   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
    466 
    467   for (; *languages != NULL; ++languages) {
    468     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
    469   }
    470 
    471 #else
    472 #error Unsupported platform, see build/build_config.h
    473 #endif
    474 
    475   std::vector<std::string>::const_iterator i = candidates.begin();
    476   for (; i != candidates.end(); ++i) {
    477     if (CheckAndResolveLocale(*i, &resolved_locale)) {
    478       base::i18n::SetICUDefaultLocale(resolved_locale);
    479       return resolved_locale;
    480     }
    481   }
    482 
    483   // Fallback on en-US.
    484   const std::string fallback_locale("en-US");
    485   if (IsLocaleAvailable(fallback_locale)) {
    486     base::i18n::SetICUDefaultLocale(fallback_locale);
    487     return fallback_locale;
    488   }
    489 
    490   return std::string();
    491 
    492 #endif
    493 }
    494 
    495 bool IsLocaleNameTranslated(const char* locale,
    496                             const std::string& display_locale) {
    497   string16 display_name =
    498       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
    499   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
    500   // uloc_getDisplayName returns the actual translation or the default
    501   // value (locale code), we have to rely on this hack to tell whether
    502   // the translation is available or not.  If ICU doesn't have a translated
    503   // name for this locale, GetDisplayNameForLocale will just return the
    504   // locale code.
    505   return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale;
    506 }
    507 
    508 string16 GetDisplayNameForLocale(const std::string& locale,
    509                                  const std::string& display_locale,
    510                                  bool is_for_ui) {
    511   std::string locale_code = locale;
    512   // Internally, we use the language code of zh-CN and zh-TW, but we want the
    513   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
    514   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
    515   // and zh-Hant to ICU. Even with this mapping, we'd get
    516   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
    517   // even longer results in other languages. Arguably, they're better than
    518   // the current results : Chinese (China) / Chinese (Taiwan).
    519   // TODO(jungshik): Do one of the following:
    520   // 1. Special-case Chinese by getting the custom-translation for them
    521   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
    522   // 3. Get translations for two directly from the ICU resouce bundle
    523   // because they're not accessible with other any API.
    524   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
    525   // #1 and #2 wouldn't work if display_locale != current UI locale although
    526   // we can think of additional hack to work around the problem.
    527   // #3 can be potentially expensive.
    528   if (locale_code == "zh-CN")
    529     locale_code = "zh-Hans";
    530   else if (locale_code == "zh-TW")
    531     locale_code = "zh-Hant";
    532 
    533   string16 display_name;
    534 #if defined(OS_ANDROID)
    535   // Use Java API to get locale display name so that we can remove most of
    536   // the lang data from icu data to reduce binary size, except for zh-Hans and
    537   // zh-Hant because the current Android Java API doesn't support scripts.
    538   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
    539   // Android Java API supports scripts.
    540   if (!StartsWithASCII(locale_code, "zh-Han", true)) {
    541     display_name = GetDisplayNameForLocale(locale_code, display_locale);
    542   } else
    543 #endif
    544   {
    545     UErrorCode error = U_ZERO_ERROR;
    546     const int kBufferSize = 1024;
    547 
    548     int actual_size = uloc_getDisplayName(
    549         locale_code.c_str(), display_locale.c_str(),
    550         WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
    551     DCHECK(U_SUCCESS(error));
    552     display_name.resize(actual_size);
    553   }
    554 
    555   // Add directional markup so parentheses are properly placed.
    556   if (is_for_ui && base::i18n::IsRTL())
    557     base::i18n::AdjustStringForLocaleDirection(&display_name);
    558   return display_name;
    559 }
    560 
    561 string16 GetDisplayNameForCountry(const std::string& country_code,
    562                                   const std::string& display_locale) {
    563   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
    564 }
    565 
    566 std::string NormalizeLocale(const std::string& locale) {
    567   std::string normalized_locale(locale);
    568   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
    569 
    570   return normalized_locale;
    571 }
    572 
    573 void GetParentLocales(const std::string& current_locale,
    574                       std::vector<std::string>* parent_locales) {
    575   std::string locale(NormalizeLocale(current_locale));
    576 
    577   const int kNameCapacity = 256;
    578   char parent[kNameCapacity];
    579   base::strlcpy(parent, locale.c_str(), kNameCapacity);
    580   parent_locales->push_back(parent);
    581   UErrorCode err = U_ZERO_ERROR;
    582   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
    583     if (U_FAILURE(err))
    584       break;
    585     parent_locales->push_back(parent);
    586   }
    587 }
    588 
    589 bool IsValidLocaleSyntax(const std::string& locale) {
    590   // Check that the length is plausible.
    591   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
    592     return false;
    593 
    594   // Strip off the part after an '@' sign, which might contain keywords,
    595   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
    596   // We don't validate that part much, just check that there's at least one
    597   // equals sign in a plausible place. Normalize the prefix so that hyphens
    598   // are changed to underscores.
    599   std::string prefix = NormalizeLocale(locale);
    600   size_t split_point = locale.find("@");
    601   if (split_point != std::string::npos) {
    602     std::string keywords = locale.substr(split_point + 1);
    603     prefix = locale.substr(0, split_point);
    604 
    605     size_t equals_loc = keywords.find("=");
    606     if (equals_loc == std::string::npos ||
    607         equals_loc < 1 || equals_loc > keywords.size() - 2)
    608       return false;
    609   }
    610 
    611   // Check that all characters before the at-sign are alphanumeric or
    612   // underscore.
    613   for (size_t i = 0; i < prefix.size(); i++) {
    614     char ch = prefix[i];
    615     if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
    616       return false;
    617   }
    618 
    619   // Check that the initial token (before the first hyphen/underscore)
    620   // is 1 - 3 alphabetical characters (a language tag).
    621   for (size_t i = 0; i < prefix.size(); i++) {
    622     char ch = prefix[i];
    623     if (ch == '_') {
    624       if (i < 1 || i > 3)
    625         return false;
    626       break;
    627     }
    628     if (!IsAsciiAlpha(ch))
    629       return false;
    630   }
    631 
    632   // Check that the all tokens after the initial token are 1 - 8 characters.
    633   // (Tokenize/StringTokenizer don't work here, they collapse multiple
    634   // delimiters into one.)
    635   int token_len = 0;
    636   int token_index = 0;
    637   for (size_t i = 0; i < prefix.size(); i++) {
    638     if (prefix[i] != '_') {
    639       token_len++;
    640       continue;
    641     }
    642 
    643     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
    644       return false;
    645     }
    646     token_index++;
    647     token_len = 0;
    648   }
    649   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
    650     return false;
    651   } else if (token_len < 1 || token_len > 8) {
    652     return false;
    653   }
    654 
    655   return true;
    656 }
    657 
    658 std::string GetStringUTF8(int message_id) {
    659   return UTF16ToUTF8(GetStringUTF16(message_id));
    660 }
    661 
    662 string16 GetStringUTF16(int message_id) {
    663   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
    664   string16 str = rb.GetLocalizedString(message_id);
    665   AdjustParagraphDirectionality(&str);
    666 
    667   return str;
    668 }
    669 
    670 static string16 GetStringF(int message_id,
    671                            const std::vector<string16>& replacements,
    672                            std::vector<size_t>* offsets) {
    673   // TODO(tc): We could save a string copy if we got the raw string as
    674   // a StringPiece and were able to call ReplaceStringPlaceholders with
    675   // a StringPiece format string and string16 substitution strings.  In
    676   // practice, the strings should be relatively short.
    677   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
    678   const string16& format_string = rb.GetLocalizedString(message_id);
    679 
    680 #ifndef NDEBUG
    681   // Make sure every replacement string is being used, so we don't just
    682   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
    683   // check as the code may simply want to find the placeholders rather than
    684   // actually replacing them.
    685   if (!offsets) {
    686     std::string utf8_string = UTF16ToUTF8(format_string);
    687 
    688     // $9 is the highest allowed placeholder.
    689     for (size_t i = 0; i < 9; ++i) {
    690       bool placeholder_should_exist = replacements.size() > i;
    691 
    692       std::string placeholder =
    693           base::StringPrintf("$%d", static_cast<int>(i + 1));
    694       size_t pos = utf8_string.find(placeholder.c_str());
    695       if (placeholder_should_exist) {
    696         DCHECK_NE(std::string::npos, pos) <<
    697             " Didn't find a " << placeholder << " placeholder in " <<
    698             utf8_string;
    699       } else {
    700         DCHECK_EQ(std::string::npos, pos) <<
    701             " Unexpectedly found a " << placeholder << " placeholder in " <<
    702             utf8_string;
    703       }
    704     }
    705   }
    706 #endif
    707 
    708   string16 formatted = ReplaceStringPlaceholders(format_string, replacements,
    709                                                  offsets);
    710   AdjustParagraphDirectionality(&formatted);
    711 
    712   return formatted;
    713 }
    714 
    715 std::string GetStringFUTF8(int message_id,
    716                            const string16& a) {
    717   return UTF16ToUTF8(GetStringFUTF16(message_id, a));
    718 }
    719 
    720 std::string GetStringFUTF8(int message_id,
    721                            const string16& a,
    722                            const string16& b) {
    723   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
    724 }
    725 
    726 std::string GetStringFUTF8(int message_id,
    727                            const string16& a,
    728                            const string16& b,
    729                            const string16& c) {
    730   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
    731 }
    732 
    733 std::string GetStringFUTF8(int message_id,
    734                            const string16& a,
    735                            const string16& b,
    736                            const string16& c,
    737                            const string16& d) {
    738   return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
    739 }
    740 
    741 string16 GetStringFUTF16(int message_id,
    742                          const string16& a) {
    743   std::vector<string16> replacements;
    744   replacements.push_back(a);
    745   return GetStringF(message_id, replacements, NULL);
    746 }
    747 
    748 string16 GetStringFUTF16(int message_id,
    749                          const string16& a,
    750                          const string16& b) {
    751   return GetStringFUTF16(message_id, a, b, NULL);
    752 }
    753 
    754 string16 GetStringFUTF16(int message_id,
    755                          const string16& a,
    756                          const string16& b,
    757                          const string16& c) {
    758   std::vector<string16> replacements;
    759   replacements.push_back(a);
    760   replacements.push_back(b);
    761   replacements.push_back(c);
    762   return GetStringF(message_id, replacements, NULL);
    763 }
    764 
    765 string16 GetStringFUTF16(int message_id,
    766                          const string16& a,
    767                          const string16& b,
    768                          const string16& c,
    769                          const string16& d) {
    770   std::vector<string16> replacements;
    771   replacements.push_back(a);
    772   replacements.push_back(b);
    773   replacements.push_back(c);
    774   replacements.push_back(d);
    775   return GetStringF(message_id, replacements, NULL);
    776 }
    777 
    778 string16 GetStringFUTF16(int message_id,
    779                          const string16& a,
    780                          const string16& b,
    781                          const string16& c,
    782                          const string16& d,
    783                          const string16& e) {
    784   std::vector<string16> replacements;
    785   replacements.push_back(a);
    786   replacements.push_back(b);
    787   replacements.push_back(c);
    788   replacements.push_back(d);
    789   replacements.push_back(e);
    790   return GetStringF(message_id, replacements, NULL);
    791 }
    792 
    793 string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) {
    794   DCHECK(offset);
    795   std::vector<size_t> offsets;
    796   std::vector<string16> replacements;
    797   replacements.push_back(a);
    798   string16 result = GetStringF(message_id, replacements, &offsets);
    799   DCHECK(offsets.size() == 1);
    800   *offset = offsets[0];
    801   return result;
    802 }
    803 
    804 string16 GetStringFUTF16(int message_id,
    805                          const string16& a,
    806                          const string16& b,
    807                          std::vector<size_t>* offsets) {
    808   std::vector<string16> replacements;
    809   replacements.push_back(a);
    810   replacements.push_back(b);
    811   return GetStringF(message_id, replacements, offsets);
    812 }
    813 
    814 string16 GetStringFUTF16Int(int message_id, int a) {
    815   return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a)));
    816 }
    817 
    818 string16 GetStringFUTF16Int(int message_id, int64 a) {
    819   return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a)));
    820 }
    821 
    822 // Specialization of operator() method for string16 version.
    823 template <>
    824 bool StringComparator<string16>::operator()(const string16& lhs,
    825                                             const string16& rhs) {
    826   // If we can not get collator instance for specified locale, just do simple
    827   // string compare.
    828   if (!collator_)
    829     return lhs < rhs;
    830   return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
    831       UCOL_LESS;
    832 };
    833 
    834 void SortStrings16(const std::string& locale,
    835                    std::vector<string16>* strings) {
    836   SortVectorWithStringKey(locale, strings, false);
    837 }
    838 
    839 const std::vector<std::string>& GetAvailableLocales() {
    840   return g_available_locales.Get();
    841 }
    842 
    843 void GetAcceptLanguagesForLocale(const std::string& display_locale,
    844                                  std::vector<std::string>* locale_codes) {
    845   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
    846     if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
    847                                            display_locale))
    848       // TODO(jungshik) : Put them at the of the list with language codes
    849       // enclosed by brackets instead of skipping.
    850         continue;
    851     locale_codes->push_back(kAcceptLanguageList[i]);
    852   }
    853 }
    854 
    855 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
    856   int width = 0;
    857   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
    858   DCHECK_GT(width, 0);
    859   return width;
    860 }
    861 
    862 }  // namespace l10n_util
    863