1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <cstdint> 18 #include <cstdlib> 19 #include <cstring> 20 #include <string> 21 #include <unordered_map> 22 #include <unordered_set> 23 24 #include <androidfw/LocaleData.h> 25 26 namespace android { 27 28 #include "LocaleDataTables.cpp" 29 30 inline uint32_t packLocale(const char* language, const char* region) { 31 return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) | 32 (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]); 33 } 34 35 inline uint32_t dropRegion(uint32_t packed_locale) { 36 return packed_locale & 0xFFFF0000lu; 37 } 38 39 inline bool hasRegion(uint32_t packed_locale) { 40 return (packed_locale & 0x0000FFFFlu) != 0; 41 } 42 43 const size_t SCRIPT_LENGTH = 4; 44 const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]); 45 const uint32_t PACKED_ROOT = 0; // to represent the root locale 46 47 uint32_t findParent(uint32_t packed_locale, const char* script) { 48 if (hasRegion(packed_locale)) { 49 for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) { 50 if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) { 51 auto map = SCRIPT_PARENTS[i].map; 52 auto lookup_result = map->find(packed_locale); 53 if (lookup_result != map->end()) { 54 return lookup_result->second; 55 } 56 break; 57 } 58 } 59 return dropRegion(packed_locale); 60 } 61 return PACKED_ROOT; 62 } 63 64 // Find the ancestors of a locale, and fill 'out' with it (assumes out has enough 65 // space). If any of the members of stop_list was seen, write it in the 66 // output but stop afterwards. 67 // 68 // This also outputs the index of the last written ancestor in the stop_list 69 // to stop_list_index, which will be -1 if it is not found in the stop_list. 70 // 71 // Returns the number of ancestors written in the output, which is always 72 // at least one. 73 // 74 // (If 'out' is nullptr, we do everything the same way but we simply don't write 75 // any results in 'out'.) 76 size_t findAncestors(uint32_t* out, ssize_t* stop_list_index, 77 uint32_t packed_locale, const char* script, 78 const uint32_t* stop_list, size_t stop_set_length) { 79 uint32_t ancestor = packed_locale; 80 size_t count = 0; 81 do { 82 if (out != nullptr) out[count] = ancestor; 83 count++; 84 for (size_t i = 0; i < stop_set_length; i++) { 85 if (stop_list[i] == ancestor) { 86 *stop_list_index = (ssize_t) i; 87 return count; 88 } 89 } 90 ancestor = findParent(ancestor, script); 91 } while (ancestor != PACKED_ROOT); 92 *stop_list_index = (ssize_t) -1; 93 return count; 94 } 95 96 size_t findDistance(uint32_t supported, 97 const char* script, 98 const uint32_t* request_ancestors, 99 size_t request_ancestors_count) { 100 ssize_t request_ancestors_index; 101 const size_t supported_ancestor_count = findAncestors( 102 nullptr, &request_ancestors_index, 103 supported, script, 104 request_ancestors, request_ancestors_count); 105 // Since both locales share the same root, there will always be a shared 106 // ancestor, so the distance in the parent tree is the sum of the distance 107 // of 'supported' to the lowest common ancestor (number of ancestors 108 // written for 'supported' minus 1) plus the distance of 'request' to the 109 // lowest common ancestor (the index of the ancestor in request_ancestors). 110 return supported_ancestor_count + request_ancestors_index - 1; 111 } 112 113 inline bool isRepresentative(uint32_t language_and_region, const char* script) { 114 const uint64_t packed_locale = ( 115 (((uint64_t) language_and_region) << 32u) | 116 (((uint64_t) script[0]) << 24u) | 117 (((uint64_t) script[1]) << 16u) | 118 (((uint64_t) script[2]) << 8u) | 119 ((uint64_t) script[3])); 120 121 return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0); 122 } 123 124 int localeDataCompareRegions( 125 const char* left_region, const char* right_region, 126 const char* requested_language, const char* requested_script, 127 const char* requested_region) { 128 129 if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) { 130 return 0; 131 } 132 const uint32_t left = packLocale(requested_language, left_region); 133 const uint32_t right = packLocale(requested_language, right_region); 134 const uint32_t request = packLocale(requested_language, requested_region); 135 136 uint32_t request_ancestors[MAX_PARENT_DEPTH+1]; 137 ssize_t left_right_index; 138 // Find the parents of the request, but stop as soon as we saw left or right 139 const uint32_t left_and_right[] = {left, right}; 140 const size_t ancestor_count = findAncestors( 141 request_ancestors, &left_right_index, 142 request, requested_script, 143 left_and_right, sizeof(left_and_right)/sizeof(left_and_right[0])); 144 if (left_right_index == 0) { // We saw left earlier 145 return 1; 146 } 147 if (left_right_index == 1) { // We saw right earlier 148 return -1; 149 } 150 151 // If we are here, neither left nor right are an ancestor of the 152 // request. This means that all the ancestors have been computed and 153 // the last ancestor is just the language by itself. We will use the 154 // distance in the parent tree for determining the better match. 155 const size_t left_distance = findDistance( 156 left, requested_script, request_ancestors, ancestor_count); 157 const size_t right_distance = findDistance( 158 right, requested_script, request_ancestors, ancestor_count); 159 if (left_distance != right_distance) { 160 return (int) right_distance - (int) left_distance; // smaller distance is better 161 } 162 163 // If we are here, left and right are equidistant from the request. We will 164 // try and see if any of them is a representative locale. 165 const bool left_is_representative = isRepresentative(left, requested_script); 166 const bool right_is_representative = isRepresentative(right, requested_script); 167 if (left_is_representative != right_is_representative) { 168 return (int) left_is_representative - (int) right_is_representative; 169 } 170 171 // We have no way of figuring out which locale is a better match. For 172 // the sake of stability, we consider the locale with the lower region 173 // code (in dictionary order) better, with two-letter codes before 174 // three-digit codes (since two-letter codes are more specific). 175 return (int64_t) right - (int64_t) left; 176 } 177 178 void localeDataComputeScript(char out[4], const char* language, const char* region) { 179 if (language[0] == '\0') { 180 memset(out, '\0', SCRIPT_LENGTH); 181 return; 182 } 183 uint32_t lookup_key = packLocale(language, region); 184 auto lookup_result = LIKELY_SCRIPTS.find(lookup_key); 185 if (lookup_result == LIKELY_SCRIPTS.end()) { 186 // We couldn't find the locale. Let's try without the region 187 if (region[0] != '\0') { 188 lookup_key = dropRegion(lookup_key); 189 lookup_result = LIKELY_SCRIPTS.find(lookup_key); 190 if (lookup_result != LIKELY_SCRIPTS.end()) { 191 memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH); 192 return; 193 } 194 } 195 // We don't know anything about the locale 196 memset(out, '\0', SCRIPT_LENGTH); 197 return; 198 } else { 199 // We found the locale. 200 memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH); 201 } 202 } 203 204 const uint32_t ENGLISH_STOP_LIST[2] = { 205 0x656E0000lu, // en 206 0x656E8400lu, // en-001 207 }; 208 const char ENGLISH_CHARS[2] = {'e', 'n'}; 209 const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'}; 210 211 bool localeDataIsCloseToUsEnglish(const char* region) { 212 const uint32_t locale = packLocale(ENGLISH_CHARS, region); 213 ssize_t stop_list_index; 214 findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2); 215 // A locale is like US English if we see "en" before "en-001" in its ancestor list. 216 return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST 217 } 218 219 } // namespace android 220