Home | History | Annotate | Download | only in src
      1 // Copyright (C) 2014 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 // http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "language.h"
     16 
     17 #include <algorithm>
     18 #include <cctype>
     19 #include <string>
     20 #include <vector>
     21 
     22 #include "rule.h"
     23 #include "util/string_split.h"
     24 
     25 namespace i18n {
     26 namespace addressinput {
     27 
     28 Language::Language(const std::string& language_tag) : tag(language_tag),
     29                                                       base(),
     30                                                       has_latin_script(false) {
     31   // Character '-' is the separator for subtags in the BCP 47. However, some
     32   // legacy code generates tags with '_' instead of '-'.
     33   static const char kSubtagsSeparator = '-';
     34   static const char kAlternativeSubtagsSeparator = '_';
     35   std::replace(
     36       tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);
     37 
     38   // OK to use 'tolower' because BCP 47 tags are always in ASCII.
     39   std::string lowercase = tag;
     40   std::transform(
     41       lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);
     42 
     43   base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
     44 
     45   // The lowercase BCP 47 subtag for Latin script.
     46   static const char kLowercaseLatinScript[] = "latn";
     47   std::vector<std::string> subtags;
     48   SplitString(lowercase, kSubtagsSeparator, &subtags);
     49 
     50   // Support only the second and third position for the script.
     51   has_latin_script =
     52       (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
     53       (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
     54 }
     55 
     56 Language::~Language() {}
     57 
     58 Language ChooseBestAddressLanguage(const Rule& address_region_rule,
     59                                    const Language& ui_language) {
     60   if (address_region_rule.GetLanguages().empty()) {
     61     return ui_language;
     62   }
     63 
     64   std::vector<Language> available_languages;
     65   for (std::vector<std::string>::const_iterator
     66        language_tag_it = address_region_rule.GetLanguages().begin();
     67        language_tag_it != address_region_rule.GetLanguages().end();
     68        ++language_tag_it) {
     69     available_languages.push_back(Language(*language_tag_it));
     70   }
     71 
     72   if (ui_language.tag.empty()) {
     73     return available_languages.front();
     74   }
     75 
     76   bool has_latin_format = !address_region_rule.GetLatinFormat().empty();
     77 
     78   // The conventionally formatted BCP 47 Latin script with a preceding subtag
     79   // separator.
     80   static const char kLatinScriptSuffix[] = "-Latn";
     81   Language latin_script_language(
     82       available_languages.front().base + kLatinScriptSuffix);
     83   if (has_latin_format && ui_language.has_latin_script) {
     84     return latin_script_language;
     85   }
     86 
     87   for (std::vector<Language>::const_iterator
     88        available_lang_it = available_languages.begin();
     89        available_lang_it != available_languages.end(); ++available_lang_it) {
     90     // Base language comparison works because no region supports the same base
     91     // language with different scripts, for now. For example, no region supports
     92     // "zh-Hant" and "zh-Hans" at the same time.
     93     if (ui_language.base == available_lang_it->base) {
     94       return *available_lang_it;
     95     }
     96   }
     97 
     98   return has_latin_format ? latin_script_language : available_languages.front();
     99 }
    100 
    101 }  // namespace addressinput
    102 }  // namespace i18n
    103