Home | History | Annotate | Download | only in l10n
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ui/base/l10n/l10n_util.h"
      6 
      7 #include <algorithm>
      8 #include <cstdlib>
      9 #include <iterator>
     10 #include <string>
     11 
     12 #include "base/command_line.h"
     13 #include "base/compiler_specific.h"
     14 #include "base/file_util.h"
     15 #include "base/i18n/file_util_icu.h"
     16 #include "base/i18n/rtl.h"
     17 #include "base/i18n/string_compare.h"
     18 #include "base/lazy_instance.h"
     19 #include "base/memory/scoped_ptr.h"
     20 #include "base/path_service.h"
     21 #include "base/strings/string_number_conversions.h"
     22 #include "base/strings/string_split.h"
     23 #include "base/strings/string_util.h"
     24 #include "base/strings/stringprintf.h"
     25 #include "base/strings/sys_string_conversions.h"
     26 #include "base/strings/utf_string_conversions.h"
     27 #include "build/build_config.h"
     28 #include "third_party/icu/source/common/unicode/rbbi.h"
     29 #include "third_party/icu/source/common/unicode/uloc.h"
     30 #include "ui/base/l10n/l10n_util_collator.h"
     31 #include "ui/base/l10n/l10n_util_plurals.h"
     32 #include "ui/base/resource/resource_bundle.h"
     33 #include "ui/base/ui_base_paths.h"
     34 
     35 #if defined(OS_ANDROID)
     36 #include "ui/base/l10n/l10n_util_android.h"
     37 #endif
     38 
     39 #if defined(USE_GLIB)
     40 #include <glib.h>
     41 #endif
     42 
     43 #if defined(OS_WIN)
     44 #include "ui/base/l10n/l10n_util_win.h"
     45 #endif  // OS_WIN
     46 
     47 namespace {
     48 
     49 static const char* const kAcceptLanguageList[] = {
     50   "af",     // Afrikaans
     51   "am",     // Amharic
     52   "ar",     // Arabic
     53   "az",     // Azerbaijani
     54   "be",     // Belarusian
     55   "bg",     // Bulgarian
     56   "bh",     // Bihari
     57   "bn",     // Bengali
     58   "br",     // Breton
     59   "bs",     // Bosnian
     60   "ca",     // Catalan
     61   "co",     // Corsican
     62   "cs",     // Czech
     63   "cy",     // Welsh
     64   "da",     // Danish
     65   "de",     // German
     66   "de-AT",  // German (Austria)
     67   "de-CH",  // German (Switzerland)
     68   "de-DE",  // German (Germany)
     69   "el",     // Greek
     70   "en",     // English
     71   "en-AU",  // English (Australia)
     72   "en-CA",  // English (Canada)
     73   "en-GB",  // English (UK)
     74   "en-NZ",  // English (New Zealand)
     75   "en-US",  // English (US)
     76   "en-ZA",  // English (South Africa)
     77   "eo",     // Esperanto
     78   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
     79   // Spanish speaking countries?
     80   "es",     // Spanish
     81   "es-419", // Spanish (Latin America)
     82   "et",     // Estonian
     83   "eu",     // Basque
     84   "fa",     // Persian
     85   "fi",     // Finnish
     86   "fil",    // Filipino
     87   "fo",     // Faroese
     88   "fr",     // French
     89   "fr-CA",  // French (Canada)
     90   "fr-CH",  // French (Switzerland)
     91   "fr-FR",  // French (France)
     92   "fy",     // Frisian
     93   "ga",     // Irish
     94   "gd",     // Scots Gaelic
     95   "gl",     // Galician
     96   "gn",     // Guarani
     97   "gu",     // Gujarati
     98   "ha",     // Hausa
     99   "haw",    // Hawaiian
    100   "he",     // Hebrew
    101   "hi",     // Hindi
    102   "hr",     // Croatian
    103   "hu",     // Hungarian
    104   "hy",     // Armenian
    105   "ia",     // Interlingua
    106   "id",     // Indonesian
    107   "is",     // Icelandic
    108   "it",     // Italian
    109   "it-CH",  // Italian (Switzerland)
    110   "it-IT",  // Italian (Italy)
    111   "ja",     // Japanese
    112   "jw",     // Javanese
    113   "ka",     // Georgian
    114   "kk",     // Kazakh
    115   "km",     // Cambodian
    116   "kn",     // Kannada
    117   "ko",     // Korean
    118   "ku",     // Kurdish
    119   "ky",     // Kyrgyz
    120   "la",     // Latin
    121   "ln",     // Lingala
    122   "lo",     // Laothian
    123   "lt",     // Lithuanian
    124   "lv",     // Latvian
    125   "mk",     // Macedonian
    126   "ml",     // Malayalam
    127   "mn",     // Mongolian
    128   "mo",     // Moldavian
    129   "mr",     // Marathi
    130   "ms",     // Malay
    131   "mt",     // Maltese
    132   "nb",     // Norwegian (Bokmal)
    133   "ne",     // Nepali
    134   "nl",     // Dutch
    135   "nn",     // Norwegian (Nynorsk)
    136   "no",     // Norwegian
    137   "oc",     // Occitan
    138   "om",     // Oromo
    139   "or",     // Oriya
    140   "pa",     // Punjabi
    141   "pl",     // Polish
    142   "ps",     // Pashto
    143   "pt",     // Portuguese
    144   "pt-BR",  // Portuguese (Brazil)
    145   "pt-PT",  // Portuguese (Portugal)
    146   "qu",     // Quechua
    147   "rm",     // Romansh
    148   "ro",     // Romanian
    149   "ru",     // Russian
    150   "sd",     // Sindhi
    151   "sh",     // Serbo-Croatian
    152   "si",     // Sinhalese
    153   "sk",     // Slovak
    154   "sl",     // Slovenian
    155   "sn",     // Shona
    156   "so",     // Somali
    157   "sq",     // Albanian
    158   "sr",     // Serbian
    159   "st",     // Sesotho
    160   "su",     // Sundanese
    161   "sv",     // Swedish
    162   "sw",     // Swahili
    163   "ta",     // Tamil
    164   "te",     // Telugu
    165   "tg",     // Tajik
    166   "th",     // Thai
    167   "ti",     // Tigrinya
    168   "tk",     // Turkmen
    169   "to",     // Tonga
    170   "tr",     // Turkish
    171   "tt",     // Tatar
    172   "tw",     // Twi
    173   "ug",     // Uighur
    174   "uk",     // Ukrainian
    175   "ur",     // Urdu
    176   "uz",     // Uzbek
    177   "vi",     // Vietnamese
    178   "xh",     // Xhosa
    179   "yi",     // Yiddish
    180   "yo",     // Yoruba
    181   "zh",     // Chinese
    182   "zh-CN",  // Chinese (Simplified)
    183   "zh-TW",  // Chinese (Traditional)
    184   "zu",     // Zulu
    185 };
    186 
    187 // Returns true if |locale_name| has an alias in the ICU data file.
    188 bool IsDuplicateName(const std::string& locale_name) {
    189   static const char* const kDuplicateNames[] = {
    190     "en",
    191     "pt",
    192     "zh",
    193     "zh_hans_cn",
    194     "zh_hant_hk",
    195     "zh_hant_mo",
    196     "zh_hans_sg",
    197     "zh_hant_tw"
    198   };
    199 
    200   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
    201   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
    202   // has to be added manually in GetAvailableLocales().
    203   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
    204     return true;
    205   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
    206     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
    207       return true;
    208   }
    209   return false;
    210 }
    211 
    212 // We added 30+ minimally populated locales with only a few entries
    213 // (exemplar character set, script, writing direction and its own
    214 // lanaguage name). These locales have to be distinguished from the
    215 // fully populated locales to which Chrome is localized.
    216 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
    217   // For partially populated locales, even the translation for "English"
    218   // is not available. A more robust/elegant way to check is to add a special
    219   // field (say, 'isPartial' to our version of ICU locale files) and
    220   // check its value, but this hack seems to work well.
    221   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
    222 }
    223 
    224 #if !defined(OS_MACOSX)
    225 bool IsLocaleAvailable(const std::string& locale) {
    226   // If locale has any illegal characters in it, we don't want to try to
    227   // load it because it may be pointing outside the locale data file directory.
    228   if (!file_util::IsFilenameLegal(base::ASCIIToUTF16(locale)))
    229     return false;
    230 
    231   // IsLocalePartiallyPopulated() can be called here for an early return w/o
    232   // checking the resource availability below. It'd help when Chrome is run
    233   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
    234   // but it'd slow down the start up time a little bit for locales Chrome is
    235   // localized to. So, we don't call it here.
    236   if (!l10n_util::IsLocaleSupportedByOS(locale))
    237     return false;
    238 
    239   // If the ResourceBundle is not yet initialized, return false to avoid the
    240   // CHECK failure in ResourceBundle::GetSharedInstance().
    241   if (!ResourceBundle::HasSharedInstance())
    242     return false;
    243 
    244   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
    245   // so that this can be invoked without initializing the global instance.
    246   // See crbug.com/230432: CHECK failure in GetUserDataDir().
    247   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
    248 }
    249 #endif
    250 
    251 // On Linux, the text layout engine Pango determines paragraph directionality
    252 // by looking at the first strongly-directional character in the text. This
    253 // means text such as "Google Chrome foo bar..." will be layed out LTR even
    254 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
    255 // cases.
    256 void AdjustParagraphDirectionality(base::string16* paragraph) {
    257 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
    258   if (base::i18n::IsRTL() &&
    259       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
    260     paragraph->insert(0, 1,
    261                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
    262   }
    263 #endif
    264 }
    265 
    266 struct AvailableLocalesTraits
    267     : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
    268   static std::vector<std::string>* New(void* instance) {
    269     std::vector<std::string>* locales =
    270         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
    271             instance);
    272     int num_locales = uloc_countAvailable();
    273     for (int i = 0; i < num_locales; ++i) {
    274       std::string locale_name = uloc_getAvailable(i);
    275       // Filter out the names that have aliases.
    276       if (IsDuplicateName(locale_name))
    277         continue;
    278       // Filter out locales for which we have only partially populated data
    279       // and to which Chrome is not localized.
    280       if (IsLocalePartiallyPopulated(locale_name))
    281         continue;
    282       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
    283         continue;
    284       // Normalize underscores to hyphens because that's what our locale files
    285       // use.
    286       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
    287 
    288       // Map the Chinese locale names over to zh-CN and zh-TW.
    289       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
    290         locale_name = "zh-CN";
    291       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
    292         locale_name = "zh-TW";
    293       }
    294       locales->push_back(locale_name);
    295     }
    296 
    297     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
    298     locales->push_back("es-419");
    299     return locales;
    300   }
    301 };
    302 
    303 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
    304     g_available_locales = LAZY_INSTANCE_INITIALIZER;
    305 
    306 }  // namespace
    307 
    308 namespace l10n_util {
    309 
    310 std::string GetCanonicalLocale(const std::string& locale) {
    311   return base::i18n::GetCanonicalLocale(locale.c_str());
    312 }
    313 
    314 bool CheckAndResolveLocale(const std::string& locale,
    315                            std::string* resolved_locale) {
    316 #if defined(OS_MACOSX)
    317   NOTIMPLEMENTED();
    318   return false;
    319 #else
    320   if (IsLocaleAvailable(locale)) {
    321     *resolved_locale = locale;
    322     return true;
    323   }
    324 
    325   // If there's a variant, skip over it so we can try without the region
    326   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
    327   // before ca.
    328   std::string::size_type variant_pos = locale.find('@');
    329   if (variant_pos != std::string::npos)
    330     return false;
    331 
    332   // If the locale matches language but not country, use that instead.
    333   // TODO(jungshik) : Nothing is done about languages that Chrome
    334   // does not support but available on Windows. We fall
    335   // back to en-US in GetApplicationLocale so that it's a not critical,
    336   // but we can do better.
    337   std::string::size_type hyphen_pos = locale.find('-');
    338   std::string lang(locale, 0, hyphen_pos);
    339   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
    340     std::string region(locale, hyphen_pos + 1);
    341     std::string tmp_locale(lang);
    342     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
    343     // Spanish locale).
    344     if (LowerCaseEqualsASCII(lang, "es") &&
    345         !LowerCaseEqualsASCII(region, "es")) {
    346       tmp_locale.append("-419");
    347     } else if (LowerCaseEqualsASCII(lang, "zh")) {
    348       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
    349       if (LowerCaseEqualsASCII(region, "hk") ||
    350           LowerCaseEqualsASCII(region, "mo")) { // Macao
    351         tmp_locale.append("-TW");
    352       } else {
    353         tmp_locale.append("-CN");
    354       }
    355     } else if (LowerCaseEqualsASCII(lang, "en")) {
    356       // Map Australian, Canadian, New Zealand and South African English
    357       // to British English for now.
    358       // TODO(jungshik): en-CA may have to change sides once
    359       // we have OS locale separate from app locale (Chrome's UI language).
    360       if (LowerCaseEqualsASCII(region, "au") ||
    361           LowerCaseEqualsASCII(region, "ca") ||
    362           LowerCaseEqualsASCII(region, "nz") ||
    363           LowerCaseEqualsASCII(region, "za")) {
    364         tmp_locale.append("-GB");
    365       } else {
    366         tmp_locale.append("-US");
    367       }
    368     }
    369     if (IsLocaleAvailable(tmp_locale)) {
    370       resolved_locale->swap(tmp_locale);
    371       return true;
    372     }
    373   }
    374 
    375   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
    376   struct {
    377     const char* source;
    378     const char* dest;
    379   } alias_map[] = {
    380       {"no", "nb"},
    381       {"tl", "fil"},
    382       {"iw", "he"},
    383       {"en", "en-US"},
    384   };
    385 
    386   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
    387     if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
    388       std::string tmp_locale(alias_map[i].dest);
    389       if (IsLocaleAvailable(tmp_locale)) {
    390         resolved_locale->swap(tmp_locale);
    391         return true;
    392       }
    393     }
    394   }
    395 
    396   return false;
    397 #endif
    398 }
    399 
    400 std::string GetApplicationLocale(const std::string& pref_locale) {
    401 #if defined(OS_MACOSX)
    402 
    403   // Use any override (Cocoa for the browser), otherwise use the preference
    404   // passed to the function.
    405   std::string app_locale = l10n_util::GetLocaleOverride();
    406   if (app_locale.empty())
    407     app_locale = pref_locale;
    408 
    409   // The above should handle all of the cases Chrome normally hits, but for some
    410   // unit tests, we need something to fall back too.
    411   if (app_locale.empty())
    412     app_locale = "en-US";
    413 
    414   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
    415   // with CheckAndResolveLocal to make ICU APIs work in that locale.
    416   // Mac doesn't use a locale directory tree of resources (it uses Mac style
    417   // resources), so mirror the Windows/Linux behavior of calling
    418   // SetICUDefaultLocale.
    419   base::i18n::SetICUDefaultLocale(app_locale);
    420   return app_locale;
    421 
    422 #else
    423 
    424   std::string resolved_locale;
    425   std::vector<std::string> candidates;
    426 
    427   // We only use --lang and the app pref on Windows.  On Linux, we only
    428   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
    429   // to renderer and plugin processes so they know what language the parent
    430   // process decided to use.
    431 
    432 #if defined(OS_WIN)
    433 
    434   // First, try the preference value.
    435   if (!pref_locale.empty())
    436     candidates.push_back(GetCanonicalLocale(pref_locale));
    437 
    438   // Next, try the overridden locale.
    439   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
    440   if (!languages.empty()) {
    441     candidates.reserve(candidates.size() + languages.size());
    442     std::transform(languages.begin(), languages.end(),
    443                    std::back_inserter(candidates), &GetCanonicalLocale);
    444   } else {
    445     // If no override was set, defer to ICU
    446     candidates.push_back(base::i18n::GetConfiguredLocale());
    447   }
    448 
    449 #elif defined(OS_ANDROID)
    450 
    451   // On Android, query java.util.Locale for the default locale.
    452   candidates.push_back(GetDefaultLocale());
    453 
    454 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
    455 
    456   // GLib implements correct environment variable parsing with
    457   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
    458   // We used to use our custom parsing code along with ICU for this purpose.
    459   // If we have a port that does not depend on GTK, we have to
    460   // restore our custom code for that port.
    461   const char* const* languages = g_get_language_names();
    462   DCHECK(languages);  // A valid pointer is guaranteed.
    463   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
    464 
    465   for (; *languages != NULL; ++languages) {
    466     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
    467   }
    468 
    469 #else
    470 
    471   // By default, use the application locale preference. This applies to ChromeOS
    472   // and linux systems without glib.
    473   if (!pref_locale.empty())
    474     candidates.push_back(pref_locale);
    475 
    476 #endif
    477 
    478   std::vector<std::string>::const_iterator i = candidates.begin();
    479   for (; i != candidates.end(); ++i) {
    480     if (CheckAndResolveLocale(*i, &resolved_locale)) {
    481       base::i18n::SetICUDefaultLocale(resolved_locale);
    482       return resolved_locale;
    483     }
    484   }
    485 
    486   // Fallback on en-US.
    487   const std::string fallback_locale("en-US");
    488   if (IsLocaleAvailable(fallback_locale)) {
    489     base::i18n::SetICUDefaultLocale(fallback_locale);
    490     return fallback_locale;
    491   }
    492 
    493   return std::string();
    494 
    495 #endif
    496 }
    497 
    498 bool IsLocaleNameTranslated(const char* locale,
    499                             const std::string& display_locale) {
    500   base::string16 display_name =
    501       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
    502   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
    503   // uloc_getDisplayName returns the actual translation or the default
    504   // value (locale code), we have to rely on this hack to tell whether
    505   // the translation is available or not.  If ICU doesn't have a translated
    506   // name for this locale, GetDisplayNameForLocale will just return the
    507   // locale code.
    508   return !base::IsStringASCII(display_name) ||
    509       base::UTF16ToASCII(display_name) != locale;
    510 }
    511 
    512 base::string16 GetDisplayNameForLocale(const std::string& locale,
    513                                        const std::string& display_locale,
    514                                        bool is_for_ui) {
    515   std::string locale_code = locale;
    516   // Internally, we use the language code of zh-CN and zh-TW, but we want the
    517   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
    518   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
    519   // and zh-Hant to ICU. Even with this mapping, we'd get
    520   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
    521   // even longer results in other languages. Arguably, they're better than
    522   // the current results : Chinese (China) / Chinese (Taiwan).
    523   // TODO(jungshik): Do one of the following:
    524   // 1. Special-case Chinese by getting the custom-translation for them
    525   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
    526   // 3. Get translations for two directly from the ICU resouce bundle
    527   // because they're not accessible with other any API.
    528   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
    529   // #1 and #2 wouldn't work if display_locale != current UI locale although
    530   // we can think of additional hack to work around the problem.
    531   // #3 can be potentially expensive.
    532   if (locale_code == "zh-CN")
    533     locale_code = "zh-Hans";
    534   else if (locale_code == "zh-TW")
    535     locale_code = "zh-Hant";
    536 
    537   base::string16 display_name;
    538 #if defined(OS_ANDROID)
    539   // Use Java API to get locale display name so that we can remove most of
    540   // the lang data from icu data to reduce binary size, except for zh-Hans and
    541   // zh-Hant because the current Android Java API doesn't support scripts.
    542   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
    543   // Android Java API supports scripts.
    544   if (!StartsWithASCII(locale_code, "zh-Han", true)) {
    545     display_name = GetDisplayNameForLocale(locale_code, display_locale);
    546   } else
    547 #endif
    548   {
    549     UErrorCode error = U_ZERO_ERROR;
    550     const int kBufferSize = 1024;
    551 
    552     int actual_size = uloc_getDisplayName(
    553         locale_code.c_str(), display_locale.c_str(),
    554         WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
    555     DCHECK(U_SUCCESS(error));
    556     display_name.resize(actual_size);
    557   }
    558 
    559   // Add directional markup so parentheses are properly placed.
    560   if (is_for_ui && base::i18n::IsRTL())
    561     base::i18n::AdjustStringForLocaleDirection(&display_name);
    562   return display_name;
    563 }
    564 
    565 base::string16 GetDisplayNameForCountry(const std::string& country_code,
    566                                         const std::string& display_locale) {
    567   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
    568 }
    569 
    570 std::string NormalizeLocale(const std::string& locale) {
    571   std::string normalized_locale(locale);
    572   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
    573 
    574   return normalized_locale;
    575 }
    576 
    577 void GetParentLocales(const std::string& current_locale,
    578                       std::vector<std::string>* parent_locales) {
    579   std::string locale(NormalizeLocale(current_locale));
    580 
    581   const int kNameCapacity = 256;
    582   char parent[kNameCapacity];
    583   base::strlcpy(parent, locale.c_str(), kNameCapacity);
    584   parent_locales->push_back(parent);
    585   UErrorCode err = U_ZERO_ERROR;
    586   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
    587     if (U_FAILURE(err))
    588       break;
    589     parent_locales->push_back(parent);
    590   }
    591 }
    592 
    593 bool IsValidLocaleSyntax(const std::string& locale) {
    594   // Check that the length is plausible.
    595   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
    596     return false;
    597 
    598   // Strip off the part after an '@' sign, which might contain keywords,
    599   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
    600   // We don't validate that part much, just check that there's at least one
    601   // equals sign in a plausible place. Normalize the prefix so that hyphens
    602   // are changed to underscores.
    603   std::string prefix = NormalizeLocale(locale);
    604   size_t split_point = locale.find("@");
    605   if (split_point != std::string::npos) {
    606     std::string keywords = locale.substr(split_point + 1);
    607     prefix = locale.substr(0, split_point);
    608 
    609     size_t equals_loc = keywords.find("=");
    610     if (equals_loc == std::string::npos ||
    611         equals_loc < 1 || equals_loc > keywords.size() - 2)
    612       return false;
    613   }
    614 
    615   // Check that all characters before the at-sign are alphanumeric or
    616   // underscore.
    617   for (size_t i = 0; i < prefix.size(); i++) {
    618     char ch = prefix[i];
    619     if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
    620       return false;
    621   }
    622 
    623   // Check that the initial token (before the first hyphen/underscore)
    624   // is 1 - 3 alphabetical characters (a language tag).
    625   for (size_t i = 0; i < prefix.size(); i++) {
    626     char ch = prefix[i];
    627     if (ch == '_') {
    628       if (i < 1 || i > 3)
    629         return false;
    630       break;
    631     }
    632     if (!IsAsciiAlpha(ch))
    633       return false;
    634   }
    635 
    636   // Check that the all tokens after the initial token are 1 - 8 characters.
    637   // (Tokenize/StringTokenizer don't work here, they collapse multiple
    638   // delimiters into one.)
    639   int token_len = 0;
    640   int token_index = 0;
    641   for (size_t i = 0; i < prefix.size(); i++) {
    642     if (prefix[i] != '_') {
    643       token_len++;
    644       continue;
    645     }
    646 
    647     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
    648       return false;
    649     }
    650     token_index++;
    651     token_len = 0;
    652   }
    653   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
    654     return false;
    655   } else if (token_len < 1 || token_len > 8) {
    656     return false;
    657   }
    658 
    659   return true;
    660 }
    661 
    662 std::string GetStringUTF8(int message_id) {
    663   return base::UTF16ToUTF8(GetStringUTF16(message_id));
    664 }
    665 
    666 base::string16 GetStringUTF16(int message_id) {
    667   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
    668   base::string16 str = rb.GetLocalizedString(message_id);
    669   AdjustParagraphDirectionality(&str);
    670 
    671   return str;
    672 }
    673 
    674 base::string16 GetStringFUTF16(int message_id,
    675                                const std::vector<base::string16>& replacements,
    676                                std::vector<size_t>* offsets) {
    677   // TODO(tc): We could save a string copy if we got the raw string as
    678   // a StringPiece and were able to call ReplaceStringPlaceholders with
    679   // a StringPiece format string and base::string16 substitution strings.  In
    680   // practice, the strings should be relatively short.
    681   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
    682   const base::string16& format_string = rb.GetLocalizedString(message_id);
    683 
    684 #ifndef NDEBUG
    685   // Make sure every replacement string is being used, so we don't just
    686   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
    687   // check as the code may simply want to find the placeholders rather than
    688   // actually replacing them.
    689   if (!offsets) {
    690     std::string utf8_string = base::UTF16ToUTF8(format_string);
    691 
    692     // $9 is the highest allowed placeholder.
    693     for (size_t i = 0; i < 9; ++i) {
    694       bool placeholder_should_exist = replacements.size() > i;
    695 
    696       std::string placeholder =
    697           base::StringPrintf("$%d", static_cast<int>(i + 1));
    698       size_t pos = utf8_string.find(placeholder.c_str());
    699       if (placeholder_should_exist) {
    700         DCHECK_NE(std::string::npos, pos) <<
    701             " Didn't find a " << placeholder << " placeholder in " <<
    702             utf8_string;
    703       } else {
    704         DCHECK_EQ(std::string::npos, pos) <<
    705             " Unexpectedly found a " << placeholder << " placeholder in " <<
    706             utf8_string;
    707       }
    708     }
    709   }
    710 #endif
    711 
    712   base::string16 formatted = ReplaceStringPlaceholders(
    713       format_string, replacements, offsets);
    714   AdjustParagraphDirectionality(&formatted);
    715 
    716   return formatted;
    717 }
    718 
    719 std::string GetStringFUTF8(int message_id,
    720                            const base::string16& a) {
    721   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
    722 }
    723 
    724 std::string GetStringFUTF8(int message_id,
    725                            const base::string16& a,
    726                            const base::string16& b) {
    727   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
    728 }
    729 
    730 std::string GetStringFUTF8(int message_id,
    731                            const base::string16& a,
    732                            const base::string16& b,
    733                            const base::string16& c) {
    734   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
    735 }
    736 
    737 std::string GetStringFUTF8(int message_id,
    738                            const base::string16& a,
    739                            const base::string16& b,
    740                            const base::string16& c,
    741                            const base::string16& d) {
    742   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
    743 }
    744 
    745 base::string16 GetStringFUTF16(int message_id,
    746                                const base::string16& a) {
    747   std::vector<base::string16> replacements;
    748   replacements.push_back(a);
    749   return GetStringFUTF16(message_id, replacements, NULL);
    750 }
    751 
    752 base::string16 GetStringFUTF16(int message_id,
    753                                const base::string16& a,
    754                                const base::string16& b) {
    755   return GetStringFUTF16(message_id, a, b, NULL);
    756 }
    757 
    758 base::string16 GetStringFUTF16(int message_id,
    759                                const base::string16& a,
    760                                const base::string16& b,
    761                                const base::string16& c) {
    762   std::vector<base::string16> replacements;
    763   replacements.push_back(a);
    764   replacements.push_back(b);
    765   replacements.push_back(c);
    766   return GetStringFUTF16(message_id, replacements, NULL);
    767 }
    768 
    769 base::string16 GetStringFUTF16(int message_id,
    770                                const base::string16& a,
    771                                const base::string16& b,
    772                                const base::string16& c,
    773                                const base::string16& d) {
    774   std::vector<base::string16> replacements;
    775   replacements.push_back(a);
    776   replacements.push_back(b);
    777   replacements.push_back(c);
    778   replacements.push_back(d);
    779   return GetStringFUTF16(message_id, replacements, NULL);
    780 }
    781 
    782 base::string16 GetStringFUTF16(int message_id,
    783                                const base::string16& a,
    784                                const base::string16& b,
    785                                const base::string16& c,
    786                                const base::string16& d,
    787                                const base::string16& e) {
    788   std::vector<base::string16> replacements;
    789   replacements.push_back(a);
    790   replacements.push_back(b);
    791   replacements.push_back(c);
    792   replacements.push_back(d);
    793   replacements.push_back(e);
    794   return GetStringFUTF16(message_id, replacements, NULL);
    795 }
    796 
    797 base::string16 GetStringFUTF16(int message_id,
    798                                const base::string16& a,
    799                                size_t* offset) {
    800   DCHECK(offset);
    801   std::vector<size_t> offsets;
    802   std::vector<base::string16> replacements;
    803   replacements.push_back(a);
    804   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
    805   DCHECK(offsets.size() == 1);
    806   *offset = offsets[0];
    807   return result;
    808 }
    809 
    810 base::string16 GetStringFUTF16(int message_id,
    811                                const base::string16& a,
    812                                const base::string16& b,
    813                                std::vector<size_t>* offsets) {
    814   std::vector<base::string16> replacements;
    815   replacements.push_back(a);
    816   replacements.push_back(b);
    817   return GetStringFUTF16(message_id, replacements, offsets);
    818 }
    819 
    820 base::string16 GetStringFUTF16Int(int message_id, int a) {
    821   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
    822 }
    823 
    824 base::string16 GetStringFUTF16Int(int message_id, int64 a) {
    825   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
    826 }
    827 
    828 // Specialization of operator() method for base::string16 version.
    829 template <>
    830 bool StringComparator<base::string16>::operator()(const base::string16& lhs,
    831                                                   const base::string16& rhs) {
    832   // If we can not get collator instance for specified locale, just do simple
    833   // string compare.
    834   if (!collator_)
    835     return lhs < rhs;
    836   return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
    837       UCOL_LESS;
    838 };
    839 
    840 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids,
    841                                int number) {
    842   scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids);
    843   DCHECK(format);
    844 
    845   UErrorCode err = U_ZERO_ERROR;
    846   icu::UnicodeString result_files_string = format->format(number, err);
    847   int capacity = result_files_string.length() + 1;
    848   DCHECK_GT(capacity, 1);
    849   base::string16 result;
    850   result_files_string.extract(
    851       static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err);
    852   DCHECK(U_SUCCESS(err));
    853   return result;
    854 }
    855 
    856 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids,
    857                                  int number) {
    858   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number));
    859 }
    860 
    861 void SortStrings16(const std::string& locale,
    862                    std::vector<base::string16>* strings) {
    863   SortVectorWithStringKey(locale, strings, false);
    864 }
    865 
    866 const std::vector<std::string>& GetAvailableLocales() {
    867   return g_available_locales.Get();
    868 }
    869 
    870 void GetAcceptLanguagesForLocale(const std::string& display_locale,
    871                                  std::vector<std::string>* locale_codes) {
    872   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
    873     if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
    874                                            display_locale))
    875       // TODO(jungshik) : Put them at the of the list with language codes
    876       // enclosed by brackets instead of skipping.
    877         continue;
    878     locale_codes->push_back(kAcceptLanguageList[i]);
    879   }
    880 }
    881 
    882 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
    883   int width = 0;
    884   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
    885   DCHECK_GT(width, 0);
    886   return width;
    887 }
    888 
    889 }  // namespace l10n_util
    890