1 // Copyright (C) 2014 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "language.h" 16 17 #include <algorithm> 18 #include <cctype> 19 #include <string> 20 #include <vector> 21 22 #include "rule.h" 23 #include "util/string_split.h" 24 25 namespace i18n { 26 namespace addressinput { 27 28 Language::Language(const std::string& language_tag) : tag(language_tag), 29 base(), 30 has_latin_script(false) { 31 // Character '-' is the separator for subtags in the BCP 47. However, some 32 // legacy code generates tags with '_' instead of '-'. 33 static const char kSubtagsSeparator = '-'; 34 static const char kAlternativeSubtagsSeparator = '_'; 35 std::replace( 36 tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator); 37 38 // OK to use 'tolower' because BCP 47 tags are always in ASCII. 39 std::string lowercase = tag; 40 std::transform( 41 lowercase.begin(), lowercase.end(), lowercase.begin(), tolower); 42 43 base = lowercase.substr(0, lowercase.find(kSubtagsSeparator)); 44 45 // The lowercase BCP 47 subtag for Latin script. 46 static const char kLowercaseLatinScript[] = "latn"; 47 std::vector<std::string> subtags; 48 SplitString(lowercase, kSubtagsSeparator, &subtags); 49 50 // Support only the second and third position for the script. 51 has_latin_script = 52 (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) || 53 (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript); 54 } 55 56 Language::~Language() {} 57 58 Language ChooseBestAddressLanguage(const Rule& address_region_rule, 59 const Language& ui_language) { 60 if (address_region_rule.GetLanguages().empty()) { 61 return ui_language; 62 } 63 64 std::vector<Language> available_languages; 65 for (std::vector<std::string>::const_iterator 66 language_tag_it = address_region_rule.GetLanguages().begin(); 67 language_tag_it != address_region_rule.GetLanguages().end(); 68 ++language_tag_it) { 69 available_languages.push_back(Language(*language_tag_it)); 70 } 71 72 if (ui_language.tag.empty()) { 73 return available_languages.front(); 74 } 75 76 bool has_latin_format = !address_region_rule.GetLatinFormat().empty(); 77 78 // The conventionally formatted BCP 47 Latin script with a preceding subtag 79 // separator. 80 static const char kLatinScriptSuffix[] = "-Latn"; 81 Language latin_script_language( 82 available_languages.front().base + kLatinScriptSuffix); 83 if (has_latin_format && ui_language.has_latin_script) { 84 return latin_script_language; 85 } 86 87 for (std::vector<Language>::const_iterator 88 available_lang_it = available_languages.begin(); 89 available_lang_it != available_languages.end(); ++available_lang_it) { 90 // Base language comparison works because no region supports the same base 91 // language with different scripts, for now. For example, no region supports 92 // "zh-Hant" and "zh-Hans" at the same time. 93 if (ui_language.base == available_lang_it->base) { 94 return *available_lang_it; 95 } 96 } 97 98 return has_latin_format ? latin_script_language : available_languages.front(); 99 } 100 101 } // namespace addressinput 102 } // namespace i18n 103