1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file defines a helper class for selecting a supported language from a 6 // set of candidates. 7 8 #include "chrome/installer/util/language_selector.h" 9 10 #include <algorithm> 11 #include <functional> 12 13 #include "base/logging.h" 14 #include "base/strings/string_util.h" 15 #include "base/win/i18n.h" 16 #include "chrome/installer/util/google_update_settings.h" 17 18 #include "installer_util_strings.h" 19 20 namespace { 21 22 struct LangToOffset { 23 const wchar_t* language; 24 int offset; 25 }; 26 27 // The language we fall back upon when all else fails. 28 const wchar_t kFallbackLanguage[] = L"en-us"; 29 const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US; 30 31 // http://tools.ietf.org/html/rfc5646 Section 2.3.3 32 const std::wstring::size_type kScriptSubtagLength = 4; 33 34 // A sorted array of language identifiers (and their offsets) for which 35 // translations are available. The contents of the array are generated by 36 // create_string_rc.py. 37 const LangToOffset kLanguageOffsetPairs[] = { 38 #define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ }, 39 DO_LANGUAGES 40 #undef HANDLE_LANGUAGE 41 }; 42 43 // A sorted array of language identifiers that are aliases to other languages 44 // for which translations are available. 45 const LangToOffset kLanguageToOffsetExceptions[] = { 46 // Alias some English variants to British English (all others wildcard to US). 47 { L"en-au", IDS_L10N_OFFSET_EN_GB }, 48 { L"en-ca", IDS_L10N_OFFSET_EN_GB }, 49 { L"en-nz", IDS_L10N_OFFSET_EN_GB }, 50 { L"en-za", IDS_L10N_OFFSET_EN_GB }, 51 // Alias es-es to es (all others wildcard to es-419). 52 { L"es-es", IDS_L10N_OFFSET_ES }, 53 // Google web properties use iw for he. Handle both just to be safe. 54 { L"he", IDS_L10N_OFFSET_IW }, 55 // Google web properties use no for nb. Handle both just to be safe. 56 { L"nb", IDS_L10N_OFFSET_NO }, 57 // Some Google web properties use tl for fil. Handle both just to be safe. 58 // They're not completely identical, but alias it here. 59 { L"tl", IDS_L10N_OFFSET_FIL }, 60 // Pre-Vista aliases for Chinese w/ script subtag. 61 { L"zh-chs", IDS_L10N_OFFSET_ZH_CN }, 62 { L"zh-cht", IDS_L10N_OFFSET_ZH_TW }, 63 // Vista+ aliases for Chinese w/ script subtag. 64 { L"zh-hans", IDS_L10N_OFFSET_ZH_CN }, 65 { L"zh-hant", IDS_L10N_OFFSET_ZH_TW }, 66 // Alias Hong Kong and Macau to Taiwan. 67 { L"zh-hk", IDS_L10N_OFFSET_ZH_TW }, 68 { L"zh-mo", IDS_L10N_OFFSET_ZH_TW }, 69 // Although the wildcard entry for zh would result in this, alias zh-sg so 70 // that it will win if it precedes another valid tag in a list of candidates. 71 { L"zh-sg", IDS_L10N_OFFSET_ZH_CN } 72 }; 73 74 // A sorted array of neutral language identifiers that are wildcard aliases to 75 // other languages for which translations are available. 76 const LangToOffset kLanguageToOffsetWildcards[] = { 77 // Use the U.S. region for anything English. 78 { L"en", IDS_L10N_OFFSET_EN_US }, 79 // Use the Latin American region for anything Spanish. 80 { L"es", IDS_L10N_OFFSET_ES_419 }, 81 // Use the Brazil region for anything Portugese. 82 { L"pt", IDS_L10N_OFFSET_PT_BR }, 83 // Use the P.R.C. region for anything Chinese. 84 { L"zh", IDS_L10N_OFFSET_ZH_CN } 85 }; 86 87 #if !defined(NDEBUG) 88 // Returns true if the items in the given range are sorted. If 89 // |byNameAndOffset| is true, the items must be sorted by both name and offset. 90 bool IsArraySorted(const LangToOffset* first, const LangToOffset* last, 91 bool byNameAndOffset) { 92 if (last - first > 1) { 93 for (--last; first != last; ++first) { 94 if (!(std::wstring(first->language) < (first + 1)->language) || 95 byNameAndOffset && !(first->offset < (first + 1)->offset)) { 96 return false; 97 } 98 } 99 } 100 return true; 101 } 102 103 // Validates that the static read-only mappings are properly sorted. 104 void ValidateMappings() { 105 // Ensure that kLanguageOffsetPairs is sorted. 106 DCHECK(IsArraySorted(&kLanguageOffsetPairs[0], 107 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 108 true)) << "kOffsetToLanguageId is not sorted"; 109 110 // Ensure that kLanguageToOffsetExceptions is sorted. 111 DCHECK(IsArraySorted( 112 &kLanguageToOffsetExceptions[0], 113 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 114 false)) << "kLanguageToOffsetExceptions is not sorted"; 115 116 // Ensure that kLanguageToOffsetWildcards is sorted. 117 DCHECK(IsArraySorted( 118 &kLanguageToOffsetWildcards[0], 119 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 120 false)) << "kLanguageToOffsetWildcards is not sorted"; 121 } 122 #endif // !defined(NDEBUG) 123 124 // A less-than overload to do slightly more efficient searches in the 125 // sorted arrays. 126 bool operator<(const LangToOffset& left, const std::wstring& right) { 127 return left.language < right; 128 } 129 130 // A less-than overload to do slightly more efficient searches in the 131 // sorted arrays. 132 bool operator<(const std::wstring& left, const LangToOffset& right) { 133 return left < right.language; 134 } 135 136 // A not-so-efficient less-than overload for the same uses as above. 137 bool operator<(const LangToOffset& left, const LangToOffset& right) { 138 return std::wstring(left.language) < right.language; 139 } 140 141 // A compare function for searching in a sorted array by offset. 142 bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) { 143 return left.offset < right.offset; 144 } 145 146 // Binary search in one of the sorted arrays to find the offset corresponding to 147 // a given language |name|. 148 bool TryFindOffset(const LangToOffset* first, const LangToOffset* last, 149 const std::wstring& name, int* offset) { 150 const LangToOffset* search_result = std::lower_bound(first, last, name); 151 if (last != search_result && search_result->language == name) { 152 *offset = search_result->offset; 153 return true; 154 } 155 return false; 156 } 157 158 // A predicate function for LanguageSelector::SelectIf that searches for the 159 // offset of a translated language. The search first tries to find an exact 160 // match. Failing that, an exact match with an alias is attempted. 161 bool GetLanguageOffset(const std::wstring& language, int* offset) { 162 // Note: always perform the exact match first so that an alias is never 163 // selected in place of a future translation. 164 return 165 TryFindOffset( 166 &kLanguageOffsetPairs[0], 167 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 168 language, offset) || 169 TryFindOffset( 170 &kLanguageToOffsetExceptions[0], 171 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 172 language, offset); 173 } 174 175 // A predicate function for LanguageSelector::SelectIf that searches for a 176 // wildcard match with |language|'s primary language subtag. 177 bool MatchLanguageOffset(const std::wstring& language, int* offset) { 178 std::wstring primary_language = language.substr(0, language.find(L'-')); 179 180 // Now check for wildcards. 181 return 182 TryFindOffset( 183 &kLanguageToOffsetWildcards[0], 184 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 185 primary_language, offset); 186 } 187 188 // Adds to |candidates| the eligible languages on the system. Any language 189 // setting specified by Omaha takes precedence over the operating system's 190 // configured languages. 191 void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) { 192 DCHECK(candidates); 193 std::wstring language; 194 195 // Omaha gets first pick. 196 GoogleUpdateSettings::GetLanguage(&language); 197 if (!language.empty()) { 198 candidates->push_back(language); 199 } 200 201 // Now try the Windows UI languages. Use the thread preferred since that will 202 // kindly return us a list of all kinds of fallbacks. 203 base::win::i18n::GetThreadPreferredUILanguageList(candidates); 204 } 205 206 } // namespace 207 208 namespace installer { 209 210 LanguageSelector::LanguageSelector() 211 : offset_(arraysize(kLanguageOffsetPairs)) { 212 #if !defined(NDEBUG) 213 ValidateMappings(); 214 #endif // !defined(NDEBUG) 215 std::vector<std::wstring> candidates; 216 217 GetCandidatesFromSystem(&candidates); 218 DoSelect(candidates); 219 } 220 221 LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates) 222 : offset_(arraysize(kLanguageOffsetPairs)) { 223 #if !defined(NDEBUG) 224 ValidateMappings(); 225 #endif // !defined(NDEBUG) 226 DoSelect(candidates); 227 } 228 229 LanguageSelector::~LanguageSelector() { 230 } 231 232 // static 233 std::wstring LanguageSelector::GetLanguageName(int offset) { 234 DCHECK_GE(offset, 0); 235 DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs)); 236 237 LangToOffset value = { NULL, offset }; 238 const LangToOffset* search_result = 239 std::lower_bound(&kLanguageOffsetPairs[0], 240 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 241 value, IsOffsetLessThan); 242 if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result && 243 search_result->offset == offset) { 244 return search_result->language; 245 } 246 NOTREACHED() << "Unknown language offset."; 247 return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1); 248 } 249 250 // Runs through the set of candidates, sending their downcased representation 251 // through |select_predicate|. Returns true if the predicate selects a 252 // candidate, in which case |matched_name| is assigned the value of the 253 // candidate and |matched_offset| is assigned the language offset of the 254 // selected translation. 255 // static 256 bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates, 257 SelectPred_Fn select_predicate, 258 std::wstring* matched_name, 259 int* matched_offset) { 260 std::wstring candidate; 261 for (std::vector<std::wstring>::const_iterator scan = candidates.begin(), 262 end = candidates.end(); scan != end; ++scan) { 263 candidate.assign(*scan); 264 StringToLowerASCII(&candidate); 265 if (select_predicate(candidate, matched_offset)) { 266 matched_name->assign(*scan); 267 return true; 268 } 269 } 270 271 return false; 272 } 273 274 // Select the best-fit translation from the ordered list |candidates|. 275 // At the conclusion, this instance's |matched_candidate_| and |offset_| members 276 // are set to the name of the selected candidate and the offset of the matched 277 // translation. If no translation is selected, the fallback's name and offset 278 // are selected. 279 void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) { 280 // Make a pass through the candidates looking for an exact or alias match. 281 // Failing that, make another pass looking for a wildcard match. 282 if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_, 283 &offset_) && 284 !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_, 285 &offset_)) { 286 VLOG(1) << "No suitable language found for any candidates."; 287 288 // Our fallback is "en-us" 289 matched_candidate_.assign(&kFallbackLanguage[0], 290 arraysize(kFallbackLanguage) - 1); 291 offset_ = kFallbackLanguageOffset; 292 } 293 } 294 295 } // namespace installer 296