Home | History | Annotate | Download | only in androidfw
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <cstdint>
     18 #include <cstdlib>
     19 #include <cstring>
     20 #include <string>
     21 #include <unordered_map>
     22 #include <unordered_set>
     23 
     24 #include <androidfw/LocaleData.h>
     25 
     26 namespace android {
     27 
     28 #include "LocaleDataTables.cpp"
     29 
     30 inline uint32_t packLocale(const char* language, const char* region) {
     31     return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
     32            (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
     33 }
     34 
     35 inline uint32_t dropRegion(uint32_t packed_locale) {
     36     return packed_locale & 0xFFFF0000lu;
     37 }
     38 
     39 inline bool hasRegion(uint32_t packed_locale) {
     40     return (packed_locale & 0x0000FFFFlu) != 0;
     41 }
     42 
     43 const size_t SCRIPT_LENGTH = 4;
     44 const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
     45 const uint32_t PACKED_ROOT = 0; // to represent the root locale
     46 
     47 uint32_t findParent(uint32_t packed_locale, const char* script) {
     48     if (hasRegion(packed_locale)) {
     49         for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
     50             if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
     51                 auto map = SCRIPT_PARENTS[i].map;
     52                 auto lookup_result = map->find(packed_locale);
     53                 if (lookup_result != map->end()) {
     54                     return lookup_result->second;
     55                 }
     56                 break;
     57             }
     58         }
     59         return dropRegion(packed_locale);
     60     }
     61     return PACKED_ROOT;
     62 }
     63 
     64 // Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
     65 // space). If any of the members of stop_list was seen, write it in the
     66 // output but stop afterwards.
     67 //
     68 // This also outputs the index of the last written ancestor in the stop_list
     69 // to stop_list_index, which will be -1 if it is not found in the stop_list.
     70 //
     71 // Returns the number of ancestors written in the output, which is always
     72 // at least one.
     73 //
     74 // (If 'out' is nullptr, we do everything the same way but we simply don't write
     75 // any results in 'out'.)
     76 size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
     77                      uint32_t packed_locale, const char* script,
     78                      const uint32_t* stop_list, size_t stop_set_length) {
     79     uint32_t ancestor = packed_locale;
     80     size_t count = 0;
     81     do {
     82         if (out != nullptr) out[count] = ancestor;
     83         count++;
     84         for (size_t i = 0; i < stop_set_length; i++) {
     85             if (stop_list[i] == ancestor) {
     86                 *stop_list_index = (ssize_t) i;
     87                 return count;
     88             }
     89         }
     90         ancestor = findParent(ancestor, script);
     91     } while (ancestor != PACKED_ROOT);
     92     *stop_list_index = (ssize_t) -1;
     93     return count;
     94 }
     95 
     96 size_t findDistance(uint32_t supported,
     97                     const char* script,
     98                     const uint32_t* request_ancestors,
     99                     size_t request_ancestors_count) {
    100     ssize_t request_ancestors_index;
    101     const size_t supported_ancestor_count = findAncestors(
    102             nullptr, &request_ancestors_index,
    103             supported, script,
    104             request_ancestors, request_ancestors_count);
    105     // Since both locales share the same root, there will always be a shared
    106     // ancestor, so the distance in the parent tree is the sum of the distance
    107     // of 'supported' to the lowest common ancestor (number of ancestors
    108     // written for 'supported' minus 1) plus the distance of 'request' to the
    109     // lowest common ancestor (the index of the ancestor in request_ancestors).
    110     return supported_ancestor_count + request_ancestors_index - 1;
    111 }
    112 
    113 inline bool isRepresentative(uint32_t language_and_region, const char* script) {
    114     const uint64_t packed_locale = (
    115             (((uint64_t) language_and_region) << 32u) |
    116             (((uint64_t) script[0]) << 24u) |
    117             (((uint64_t) script[1]) << 16u) |
    118             (((uint64_t) script[2]) <<  8u) |
    119             ((uint64_t) script[3]));
    120 
    121     return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
    122 }
    123 
    124 int localeDataCompareRegions(
    125         const char* left_region, const char* right_region,
    126         const char* requested_language, const char* requested_script,
    127         const char* requested_region) {
    128 
    129     if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
    130         return 0;
    131     }
    132     const uint32_t left = packLocale(requested_language, left_region);
    133     const uint32_t right = packLocale(requested_language, right_region);
    134     const uint32_t request = packLocale(requested_language, requested_region);
    135 
    136     uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
    137     ssize_t left_right_index;
    138     // Find the parents of the request, but stop as soon as we saw left or right
    139     const uint32_t left_and_right[] = {left, right};
    140     const size_t ancestor_count = findAncestors(
    141             request_ancestors, &left_right_index,
    142             request, requested_script,
    143             left_and_right, sizeof(left_and_right)/sizeof(left_and_right[0]));
    144     if (left_right_index == 0) { // We saw left earlier
    145         return 1;
    146     }
    147     if (left_right_index == 1) { // We saw right earlier
    148         return -1;
    149     }
    150 
    151     // If we are here, neither left nor right are an ancestor of the
    152     // request. This means that all the ancestors have been computed and
    153     // the last ancestor is just the language by itself. We will use the
    154     // distance in the parent tree for determining the better match.
    155     const size_t left_distance = findDistance(
    156             left, requested_script, request_ancestors, ancestor_count);
    157     const size_t right_distance = findDistance(
    158             right, requested_script, request_ancestors, ancestor_count);
    159     if (left_distance != right_distance) {
    160         return (int) right_distance - (int) left_distance; // smaller distance is better
    161     }
    162 
    163     // If we are here, left and right are equidistant from the request. We will
    164     // try and see if any of them is a representative locale.
    165     const bool left_is_representative = isRepresentative(left, requested_script);
    166     const bool right_is_representative = isRepresentative(right, requested_script);
    167     if (left_is_representative != right_is_representative) {
    168         return (int) left_is_representative - (int) right_is_representative;
    169     }
    170 
    171     // We have no way of figuring out which locale is a better match. For
    172     // the sake of stability, we consider the locale with the lower region
    173     // code (in dictionary order) better, with two-letter codes before
    174     // three-digit codes (since two-letter codes are more specific).
    175     return (int64_t) right - (int64_t) left;
    176 }
    177 
    178 void localeDataComputeScript(char out[4], const char* language, const char* region) {
    179     if (language[0] == '\0') {
    180         memset(out, '\0', SCRIPT_LENGTH);
    181         return;
    182     }
    183     uint32_t lookup_key = packLocale(language, region);
    184     auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
    185     if (lookup_result == LIKELY_SCRIPTS.end()) {
    186         // We couldn't find the locale. Let's try without the region
    187         if (region[0] != '\0') {
    188             lookup_key = dropRegion(lookup_key);
    189             lookup_result = LIKELY_SCRIPTS.find(lookup_key);
    190             if (lookup_result != LIKELY_SCRIPTS.end()) {
    191                 memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
    192                 return;
    193             }
    194         }
    195         // We don't know anything about the locale
    196         memset(out, '\0', SCRIPT_LENGTH);
    197         return;
    198     } else {
    199         // We found the locale.
    200         memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
    201     }
    202 }
    203 
    204 const uint32_t ENGLISH_STOP_LIST[2] = {
    205     0x656E0000lu, // en
    206     0x656E8400lu, // en-001
    207 };
    208 const char ENGLISH_CHARS[2] = {'e', 'n'};
    209 const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'};
    210 
    211 bool localeDataIsCloseToUsEnglish(const char* region) {
    212     const uint32_t locale = packLocale(ENGLISH_CHARS, region);
    213     ssize_t stop_list_index;
    214     findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2);
    215     // A locale is like US English if we see "en" before "en-001" in its ancestor list.
    216     return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST
    217 }
    218 
    219 } // namespace android
    220