Home | History | Annotate | Download | only in util
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file defines a helper class for selecting a supported language from a
      6 // set of candidates.
      7 
      8 #include "chrome/installer/util/language_selector.h"
      9 
     10 #include <algorithm>
     11 #include <functional>
     12 
     13 #include "base/logging.h"
     14 #include "base/strings/string16.h"
     15 #include "base/strings/string_util.h"
     16 #include "base/win/i18n.h"
     17 #include "chrome/installer/util/google_update_settings.h"
     18 
     19 #include "installer_util_strings.h"
     20 
     21 namespace {
     22 
     23 struct LangToOffset {
     24   const wchar_t* language;
     25   int offset;
     26 };
     27 
     28 // The language we fall back upon when all else fails.
     29 const wchar_t kFallbackLanguage[] = L"en-us";
     30 const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US;
     31 
     32 // http://tools.ietf.org/html/rfc5646 Section 2.3.3
     33 const std::wstring::size_type kScriptSubtagLength = 4;
     34 
     35 // A sorted array of language identifiers (and their offsets) for which
     36 // translations are available. The contents of the array are generated by
     37 // create_string_rc.py.
     38 const LangToOffset kLanguageOffsetPairs[] = {
     39 #define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ },
     40   DO_LANGUAGES
     41 #undef HANDLE_LANGUAGE
     42 };
     43 
     44 // A sorted array of language identifiers that are aliases to other languages
     45 // for which translations are available.
     46 const LangToOffset kLanguageToOffsetExceptions[] = {
     47   // Alias some English variants to British English (all others wildcard to US).
     48   { L"en-au", IDS_L10N_OFFSET_EN_GB },
     49   { L"en-ca", IDS_L10N_OFFSET_EN_GB },
     50   { L"en-nz", IDS_L10N_OFFSET_EN_GB },
     51   { L"en-za", IDS_L10N_OFFSET_EN_GB },
     52   // Alias es-es to es (all others wildcard to es-419).
     53   { L"es-es", IDS_L10N_OFFSET_ES },
     54   // Google web properties use iw for he. Handle both just to be safe.
     55   { L"he", IDS_L10N_OFFSET_IW },
     56   // Google web properties use no for nb. Handle both just to be safe.
     57   { L"nb", IDS_L10N_OFFSET_NO },
     58   // Some Google web properties use tl for fil. Handle both just to be safe.
     59   // They're not completely identical, but alias it here.
     60   { L"tl", IDS_L10N_OFFSET_FIL },
     61   // Pre-Vista aliases for Chinese w/ script subtag.
     62   { L"zh-chs", IDS_L10N_OFFSET_ZH_CN },
     63   { L"zh-cht", IDS_L10N_OFFSET_ZH_TW },
     64   // Vista+ aliases for Chinese w/ script subtag.
     65   { L"zh-hans", IDS_L10N_OFFSET_ZH_CN },
     66   { L"zh-hant", IDS_L10N_OFFSET_ZH_TW },
     67   // Alias Hong Kong and Macau to Taiwan.
     68   { L"zh-hk", IDS_L10N_OFFSET_ZH_TW },
     69   { L"zh-mo", IDS_L10N_OFFSET_ZH_TW },
     70   // Although the wildcard entry for zh would result in this, alias zh-sg so
     71   // that it will win if it precedes another valid tag in a list of candidates.
     72   { L"zh-sg", IDS_L10N_OFFSET_ZH_CN }
     73 };
     74 
     75 // A sorted array of neutral language identifiers that are wildcard aliases to
     76 // other languages for which translations are available.
     77 const LangToOffset kLanguageToOffsetWildcards[] = {
     78   // Use the U.S. region for anything English.
     79   { L"en", IDS_L10N_OFFSET_EN_US },
     80   // Use the Latin American region for anything Spanish.
     81   { L"es", IDS_L10N_OFFSET_ES_419 },
     82   // Use the Brazil region for anything Portugese.
     83   { L"pt", IDS_L10N_OFFSET_PT_BR },
     84   // Use the P.R.C. region for anything Chinese.
     85   { L"zh", IDS_L10N_OFFSET_ZH_CN }
     86 };
     87 
     88 #if !defined(NDEBUG)
     89 // Returns true if the items in the given range are sorted.  If
     90 // |byNameAndOffset| is true, the items must be sorted by both name and offset.
     91 bool IsArraySorted(const LangToOffset* first, const LangToOffset* last,
     92                    bool byNameAndOffset) {
     93   if (last - first > 1) {
     94     for (--last; first != last; ++first) {
     95        if (!(std::wstring(first->language) < (first + 1)->language) ||
     96            byNameAndOffset && !(first->offset < (first + 1)->offset)) {
     97          return false;
     98        }
     99     }
    100   }
    101   return true;
    102 }
    103 
    104 // Validates that the static read-only mappings are properly sorted.
    105 void ValidateMappings() {
    106   // Ensure that kLanguageOffsetPairs is sorted.
    107   DCHECK(IsArraySorted(&kLanguageOffsetPairs[0],
    108                        &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
    109                        true)) << "kOffsetToLanguageId is not sorted";
    110 
    111   // Ensure that kLanguageToOffsetExceptions is sorted.
    112   DCHECK(IsArraySorted(
    113            &kLanguageToOffsetExceptions[0],
    114            &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
    115            false)) << "kLanguageToOffsetExceptions is not sorted";
    116 
    117   // Ensure that kLanguageToOffsetWildcards is sorted.
    118   DCHECK(IsArraySorted(
    119             &kLanguageToOffsetWildcards[0],
    120             &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
    121             false)) << "kLanguageToOffsetWildcards is not sorted";
    122 }
    123 #endif  // !defined(NDEBUG)
    124 
    125 // A less-than overload to do slightly more efficient searches in the
    126 // sorted arrays.
    127 bool operator<(const LangToOffset& left, const std::wstring& right) {
    128   return left.language < right;
    129 }
    130 
    131 // A less-than overload to do slightly more efficient searches in the
    132 // sorted arrays.
    133 bool operator<(const std::wstring& left, const LangToOffset& right) {
    134   return left < right.language;
    135 }
    136 
    137 // A not-so-efficient less-than overload for the same uses as above.
    138 bool operator<(const LangToOffset& left, const LangToOffset& right) {
    139   return std::wstring(left.language) < right.language;
    140 }
    141 
    142 // A compare function for searching in a sorted array by offset.
    143 bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) {
    144   return left.offset < right.offset;
    145 }
    146 
    147 // Binary search in one of the sorted arrays to find the offset corresponding to
    148 // a given language |name|.
    149 bool TryFindOffset(const LangToOffset* first, const LangToOffset* last,
    150                    const std::wstring& name, int* offset) {
    151   const LangToOffset* search_result = std::lower_bound(first, last, name);
    152   if (last != search_result && search_result->language == name) {
    153     *offset = search_result->offset;
    154     return true;
    155   }
    156   return false;
    157 }
    158 
    159 // A predicate function for LanguageSelector::SelectIf that searches for the
    160 // offset of a translated language.  The search first tries to find an exact
    161 // match.  Failing that, an exact match with an alias is attempted.
    162 bool GetLanguageOffset(const std::wstring& language, int* offset) {
    163   // Note: always perform the exact match first so that an alias is never
    164   // selected in place of a future translation.
    165   return
    166       TryFindOffset(
    167           &kLanguageOffsetPairs[0],
    168           &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
    169           language, offset) ||
    170       TryFindOffset(
    171           &kLanguageToOffsetExceptions[0],
    172           &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
    173           language, offset);
    174 }
    175 
    176 // A predicate function for LanguageSelector::SelectIf that searches for a
    177 // wildcard match with |language|'s primary language subtag.
    178 bool MatchLanguageOffset(const std::wstring& language, int* offset) {
    179   std::wstring primary_language = language.substr(0, language.find(L'-'));
    180 
    181   // Now check for wildcards.
    182   return
    183       TryFindOffset(
    184           &kLanguageToOffsetWildcards[0],
    185           &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
    186           primary_language, offset);
    187 }
    188 
    189 // Adds to |candidates| the eligible languages on the system.  Any language
    190 // setting specified by Omaha takes precedence over the operating system's
    191 // configured languages.
    192 void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) {
    193   DCHECK(candidates);
    194   base::string16 language;
    195 
    196   // Omaha gets first pick.
    197   GoogleUpdateSettings::GetLanguage(&language);
    198   if (!language.empty()) {
    199     candidates->push_back(language);
    200   }
    201 
    202   // Now try the Windows UI languages.  Use the thread preferred since that will
    203   // kindly return us a list of all kinds of fallbacks.
    204   base::win::i18n::GetThreadPreferredUILanguageList(candidates);
    205 }
    206 
    207 }  // namespace
    208 
    209 namespace installer {
    210 
    211 LanguageSelector::LanguageSelector()
    212     : offset_(arraysize(kLanguageOffsetPairs)) {
    213 #if !defined(NDEBUG)
    214   ValidateMappings();
    215 #endif  // !defined(NDEBUG)
    216   std::vector<std::wstring> candidates;
    217 
    218   GetCandidatesFromSystem(&candidates);
    219   DoSelect(candidates);
    220 }
    221 
    222 LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates)
    223     : offset_(arraysize(kLanguageOffsetPairs)) {
    224 #if !defined(NDEBUG)
    225   ValidateMappings();
    226 #endif  // !defined(NDEBUG)
    227   DoSelect(candidates);
    228 }
    229 
    230 LanguageSelector::~LanguageSelector() {
    231 }
    232 
    233 // static
    234 std::wstring LanguageSelector::GetLanguageName(int offset) {
    235   DCHECK_GE(offset, 0);
    236   DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs));
    237 
    238   LangToOffset value = { NULL, offset };
    239   const LangToOffset* search_result =
    240     std::lower_bound(&kLanguageOffsetPairs[0],
    241                      &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
    242                      value, IsOffsetLessThan);
    243   if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result &&
    244       search_result->offset == offset) {
    245     return search_result->language;
    246   }
    247   NOTREACHED() << "Unknown language offset.";
    248   return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1);
    249 }
    250 
    251 // Runs through the set of candidates, sending their downcased representation
    252 // through |select_predicate|.  Returns true if the predicate selects a
    253 // candidate, in which case |matched_name| is assigned the value of the
    254 // candidate and |matched_offset| is assigned the language offset of the
    255 // selected translation.
    256 // static
    257 bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates,
    258                                 SelectPred_Fn select_predicate,
    259                                 std::wstring* matched_name,
    260                                 int* matched_offset) {
    261   std::wstring candidate;
    262   for (std::vector<std::wstring>::const_iterator scan = candidates.begin(),
    263           end = candidates.end(); scan != end; ++scan) {
    264     candidate.assign(*scan);
    265     base::StringToLowerASCII(&candidate);
    266     if (select_predicate(candidate, matched_offset)) {
    267       matched_name->assign(*scan);
    268       return true;
    269     }
    270   }
    271 
    272   return false;
    273 }
    274 
    275 // Select the best-fit translation from the ordered list |candidates|.
    276 // At the conclusion, this instance's |matched_candidate_| and |offset_| members
    277 // are set to the name of the selected candidate and the offset of the matched
    278 // translation.  If no translation is selected, the fallback's name and offset
    279 // are selected.
    280 void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) {
    281   // Make a pass through the candidates looking for an exact or alias match.
    282   // Failing that, make another pass looking for a wildcard match.
    283   if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_,
    284                 &offset_) &&
    285       !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_,
    286                 &offset_)) {
    287     VLOG(1) << "No suitable language found for any candidates.";
    288 
    289     // Our fallback is "en-us"
    290     matched_candidate_.assign(&kFallbackLanguage[0],
    291                               arraysize(kFallbackLanguage) - 1);
    292     offset_ = kFallbackLanguageOffset;
    293   }
    294 }
    295 
    296 }  // namespace installer
    297