1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file defines a helper class for selecting a supported language from a 6 // set of candidates. 7 8 #include "chrome/installer/util/language_selector.h" 9 10 #include <algorithm> 11 #include <functional> 12 13 #include "base/logging.h" 14 #include "base/strings/string16.h" 15 #include "base/strings/string_util.h" 16 #include "base/win/i18n.h" 17 #include "chrome/installer/util/google_update_settings.h" 18 19 #include "installer_util_strings.h" 20 21 namespace { 22 23 struct LangToOffset { 24 const wchar_t* language; 25 int offset; 26 }; 27 28 // The language we fall back upon when all else fails. 29 const wchar_t kFallbackLanguage[] = L"en-us"; 30 const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US; 31 32 // http://tools.ietf.org/html/rfc5646 Section 2.3.3 33 const std::wstring::size_type kScriptSubtagLength = 4; 34 35 // A sorted array of language identifiers (and their offsets) for which 36 // translations are available. The contents of the array are generated by 37 // create_string_rc.py. 38 const LangToOffset kLanguageOffsetPairs[] = { 39 #define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ }, 40 DO_LANGUAGES 41 #undef HANDLE_LANGUAGE 42 }; 43 44 // A sorted array of language identifiers that are aliases to other languages 45 // for which translations are available. 46 const LangToOffset kLanguageToOffsetExceptions[] = { 47 // Alias some English variants to British English (all others wildcard to US). 48 { L"en-au", IDS_L10N_OFFSET_EN_GB }, 49 { L"en-ca", IDS_L10N_OFFSET_EN_GB }, 50 { L"en-nz", IDS_L10N_OFFSET_EN_GB }, 51 { L"en-za", IDS_L10N_OFFSET_EN_GB }, 52 // Alias es-es to es (all others wildcard to es-419). 53 { L"es-es", IDS_L10N_OFFSET_ES }, 54 // Google web properties use iw for he. Handle both just to be safe. 55 { L"he", IDS_L10N_OFFSET_IW }, 56 // Google web properties use no for nb. Handle both just to be safe. 57 { L"nb", IDS_L10N_OFFSET_NO }, 58 // Some Google web properties use tl for fil. Handle both just to be safe. 59 // They're not completely identical, but alias it here. 60 { L"tl", IDS_L10N_OFFSET_FIL }, 61 // Pre-Vista aliases for Chinese w/ script subtag. 62 { L"zh-chs", IDS_L10N_OFFSET_ZH_CN }, 63 { L"zh-cht", IDS_L10N_OFFSET_ZH_TW }, 64 // Vista+ aliases for Chinese w/ script subtag. 65 { L"zh-hans", IDS_L10N_OFFSET_ZH_CN }, 66 { L"zh-hant", IDS_L10N_OFFSET_ZH_TW }, 67 // Alias Hong Kong and Macau to Taiwan. 68 { L"zh-hk", IDS_L10N_OFFSET_ZH_TW }, 69 { L"zh-mo", IDS_L10N_OFFSET_ZH_TW }, 70 // Although the wildcard entry for zh would result in this, alias zh-sg so 71 // that it will win if it precedes another valid tag in a list of candidates. 72 { L"zh-sg", IDS_L10N_OFFSET_ZH_CN } 73 }; 74 75 // A sorted array of neutral language identifiers that are wildcard aliases to 76 // other languages for which translations are available. 77 const LangToOffset kLanguageToOffsetWildcards[] = { 78 // Use the U.S. region for anything English. 79 { L"en", IDS_L10N_OFFSET_EN_US }, 80 // Use the Latin American region for anything Spanish. 81 { L"es", IDS_L10N_OFFSET_ES_419 }, 82 // Use the Brazil region for anything Portugese. 83 { L"pt", IDS_L10N_OFFSET_PT_BR }, 84 // Use the P.R.C. region for anything Chinese. 85 { L"zh", IDS_L10N_OFFSET_ZH_CN } 86 }; 87 88 #if !defined(NDEBUG) 89 // Returns true if the items in the given range are sorted. If 90 // |byNameAndOffset| is true, the items must be sorted by both name and offset. 91 bool IsArraySorted(const LangToOffset* first, const LangToOffset* last, 92 bool byNameAndOffset) { 93 if (last - first > 1) { 94 for (--last; first != last; ++first) { 95 if (!(std::wstring(first->language) < (first + 1)->language) || 96 byNameAndOffset && !(first->offset < (first + 1)->offset)) { 97 return false; 98 } 99 } 100 } 101 return true; 102 } 103 104 // Validates that the static read-only mappings are properly sorted. 105 void ValidateMappings() { 106 // Ensure that kLanguageOffsetPairs is sorted. 107 DCHECK(IsArraySorted(&kLanguageOffsetPairs[0], 108 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 109 true)) << "kOffsetToLanguageId is not sorted"; 110 111 // Ensure that kLanguageToOffsetExceptions is sorted. 112 DCHECK(IsArraySorted( 113 &kLanguageToOffsetExceptions[0], 114 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 115 false)) << "kLanguageToOffsetExceptions is not sorted"; 116 117 // Ensure that kLanguageToOffsetWildcards is sorted. 118 DCHECK(IsArraySorted( 119 &kLanguageToOffsetWildcards[0], 120 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 121 false)) << "kLanguageToOffsetWildcards is not sorted"; 122 } 123 #endif // !defined(NDEBUG) 124 125 // A less-than overload to do slightly more efficient searches in the 126 // sorted arrays. 127 bool operator<(const LangToOffset& left, const std::wstring& right) { 128 return left.language < right; 129 } 130 131 // A less-than overload to do slightly more efficient searches in the 132 // sorted arrays. 133 bool operator<(const std::wstring& left, const LangToOffset& right) { 134 return left < right.language; 135 } 136 137 // A not-so-efficient less-than overload for the same uses as above. 138 bool operator<(const LangToOffset& left, const LangToOffset& right) { 139 return std::wstring(left.language) < right.language; 140 } 141 142 // A compare function for searching in a sorted array by offset. 143 bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) { 144 return left.offset < right.offset; 145 } 146 147 // Binary search in one of the sorted arrays to find the offset corresponding to 148 // a given language |name|. 149 bool TryFindOffset(const LangToOffset* first, const LangToOffset* last, 150 const std::wstring& name, int* offset) { 151 const LangToOffset* search_result = std::lower_bound(first, last, name); 152 if (last != search_result && search_result->language == name) { 153 *offset = search_result->offset; 154 return true; 155 } 156 return false; 157 } 158 159 // A predicate function for LanguageSelector::SelectIf that searches for the 160 // offset of a translated language. The search first tries to find an exact 161 // match. Failing that, an exact match with an alias is attempted. 162 bool GetLanguageOffset(const std::wstring& language, int* offset) { 163 // Note: always perform the exact match first so that an alias is never 164 // selected in place of a future translation. 165 return 166 TryFindOffset( 167 &kLanguageOffsetPairs[0], 168 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 169 language, offset) || 170 TryFindOffset( 171 &kLanguageToOffsetExceptions[0], 172 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 173 language, offset); 174 } 175 176 // A predicate function for LanguageSelector::SelectIf that searches for a 177 // wildcard match with |language|'s primary language subtag. 178 bool MatchLanguageOffset(const std::wstring& language, int* offset) { 179 std::wstring primary_language = language.substr(0, language.find(L'-')); 180 181 // Now check for wildcards. 182 return 183 TryFindOffset( 184 &kLanguageToOffsetWildcards[0], 185 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 186 primary_language, offset); 187 } 188 189 // Adds to |candidates| the eligible languages on the system. Any language 190 // setting specified by Omaha takes precedence over the operating system's 191 // configured languages. 192 void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) { 193 DCHECK(candidates); 194 base::string16 language; 195 196 // Omaha gets first pick. 197 GoogleUpdateSettings::GetLanguage(&language); 198 if (!language.empty()) { 199 candidates->push_back(language); 200 } 201 202 // Now try the Windows UI languages. Use the thread preferred since that will 203 // kindly return us a list of all kinds of fallbacks. 204 base::win::i18n::GetThreadPreferredUILanguageList(candidates); 205 } 206 207 } // namespace 208 209 namespace installer { 210 211 LanguageSelector::LanguageSelector() 212 : offset_(arraysize(kLanguageOffsetPairs)) { 213 #if !defined(NDEBUG) 214 ValidateMappings(); 215 #endif // !defined(NDEBUG) 216 std::vector<std::wstring> candidates; 217 218 GetCandidatesFromSystem(&candidates); 219 DoSelect(candidates); 220 } 221 222 LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates) 223 : offset_(arraysize(kLanguageOffsetPairs)) { 224 #if !defined(NDEBUG) 225 ValidateMappings(); 226 #endif // !defined(NDEBUG) 227 DoSelect(candidates); 228 } 229 230 LanguageSelector::~LanguageSelector() { 231 } 232 233 // static 234 std::wstring LanguageSelector::GetLanguageName(int offset) { 235 DCHECK_GE(offset, 0); 236 DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs)); 237 238 LangToOffset value = { NULL, offset }; 239 const LangToOffset* search_result = 240 std::lower_bound(&kLanguageOffsetPairs[0], 241 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 242 value, IsOffsetLessThan); 243 if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result && 244 search_result->offset == offset) { 245 return search_result->language; 246 } 247 NOTREACHED() << "Unknown language offset."; 248 return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1); 249 } 250 251 // Runs through the set of candidates, sending their downcased representation 252 // through |select_predicate|. Returns true if the predicate selects a 253 // candidate, in which case |matched_name| is assigned the value of the 254 // candidate and |matched_offset| is assigned the language offset of the 255 // selected translation. 256 // static 257 bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates, 258 SelectPred_Fn select_predicate, 259 std::wstring* matched_name, 260 int* matched_offset) { 261 std::wstring candidate; 262 for (std::vector<std::wstring>::const_iterator scan = candidates.begin(), 263 end = candidates.end(); scan != end; ++scan) { 264 candidate.assign(*scan); 265 base::StringToLowerASCII(&candidate); 266 if (select_predicate(candidate, matched_offset)) { 267 matched_name->assign(*scan); 268 return true; 269 } 270 } 271 272 return false; 273 } 274 275 // Select the best-fit translation from the ordered list |candidates|. 276 // At the conclusion, this instance's |matched_candidate_| and |offset_| members 277 // are set to the name of the selected candidate and the offset of the matched 278 // translation. If no translation is selected, the fallback's name and offset 279 // are selected. 280 void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) { 281 // Make a pass through the candidates looking for an exact or alias match. 282 // Failing that, make another pass looking for a wildcard match. 283 if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_, 284 &offset_) && 285 !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_, 286 &offset_)) { 287 VLOG(1) << "No suitable language found for any candidates."; 288 289 // Our fallback is "en-us" 290 matched_candidate_.assign(&kFallbackLanguage[0], 291 arraysize(kFallbackLanguage) - 1); 292 offset_ = kFallbackLanguageOffset; 293 } 294 } 295 296 } // namespace installer 297