1 /* 2 ********************************************************************** 3 * Copyright (C) 1997-2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File ULOC.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/01/97 aliu Creation. 13 * 08/21/98 stephen JDK 1.2 sync 14 * 12/08/98 rtg New Locale implementation and C API 15 * 03/15/99 damiba overhaul. 16 * 04/06/99 stephen changed setDefault() to realloc and copy 17 * 06/14/99 stephen Changed calls to ures_open for new params 18 * 07/21/99 stephen Modified setDefault() to propagate to C++ 19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 20 * brought canonicalization code into line with spec 21 *****************************************************************************/ 22 23 /* 24 POSIX's locale format, from putil.c: [no spaces] 25 26 ll [ _CC ] [ . MM ] [ @ VV] 27 28 l = lang, C = ctry, M = charmap, V = variant 29 */ 30 31 #include "unicode/utypes.h" 32 #include "unicode/ustring.h" 33 #include "unicode/uloc.h" 34 35 #include "putilimp.h" 36 #include "ustr_imp.h" 37 #include "ulocimp.h" 38 #include "umutex.h" 39 #include "cstring.h" 40 #include "cmemory.h" 41 #include "ucln_cmn.h" 42 #include "locmap.h" 43 #include "uarrsort.h" 44 #include "uenumimp.h" 45 #include "uassert.h" 46 47 #include <stdio.h> /* for sprintf */ 48 49 /* ### Declarations **************************************************/ 50 51 /* Locale stuff from locid.cpp */ 52 U_CFUNC void locale_set_default(const char *id); 53 U_CFUNC const char *locale_get_default(void); 54 U_CFUNC int32_t 55 locale_getKeywords(const char *localeID, 56 char prev, 57 char *keywords, int32_t keywordCapacity, 58 char *values, int32_t valuesCapacity, int32_t *valLen, 59 UBool valuesToo, 60 UErrorCode *status); 61 62 /* ### Data tables **************************************************/ 63 64 /** 65 * Table of language codes, both 2- and 3-letter, with preference 66 * given to 2-letter codes where possible. Includes 3-letter codes 67 * that lack a 2-letter equivalent. 68 * 69 * This list must be in sorted order. This list is returned directly 70 * to the user by some API. 71 * 72 * This list must be kept in sync with LANGUAGES_3, with corresponding 73 * entries matched. 74 * 75 * This table should be terminated with a NULL entry, followed by a 76 * second list, and another NULL entry. The first list is visible to 77 * user code when this array is returned by API. The second list 78 * contains codes we support, but do not expose through user API. 79 * 80 * Notes 81 * 82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 83 * include the revisions up to 2001/7/27 *CWB* 84 * 85 * The 3 character codes are the terminology codes like RFC 3066. This 86 * is compatible with prior ICU codes 87 * 88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 89 * table but now at the end of the table because 3 character codes are 90 * duplicates. This avoids bad searches going from 3 to 2 character 91 * codes. 92 * 93 * The range qaa-qtz is reserved for local use 94 */ 95 static const char * const LANGUAGES[] = { 96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", 97 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", 98 "ang", "anp", "apa", 99 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", 100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", 101 "bai", "bal", "ban", "bas", "bat", "be", "bej", 102 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", 103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", 104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", 105 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", 106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", 107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", 108 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", 109 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", 110 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", 111 "enm", "eo", "es", "et", "eu", "ewo", "fa", 112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", 113 "fr", "frm", "fro", "frr", "frs", "fur", "fy", 114 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil", 115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb", 116 "grc", "gsw", "gu", "gv", "gwi", 117 "ha", "hai", "haw", "he", "hi", "hil", "him", 118 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", 119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", 120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", 121 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", 122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi", 123 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", 124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", 125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", 126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", 127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", 128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", 129 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", 130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", 131 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", 132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", 133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", 134 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", 135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", 136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa", 137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", 138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", 139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", 140 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", 141 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", 142 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", 143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", 144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", 145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", 146 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", 147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", 148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", 149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", 150 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", 151 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", 152 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", 153 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", 154 "zu", "zun", "zxx", "zza", 155 NULL, 156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 157 NULL 158 }; 159 static const char* const DEPRECATED_LANGUAGES[]={ 160 "in", "iw", "ji", "jw", NULL, NULL 161 }; 162 static const char* const REPLACEMENT_LANGUAGES[]={ 163 "id", "he", "yi", "jv", NULL, NULL 164 }; 165 166 /** 167 * Table of 3-letter language codes. 168 * 169 * This is a lookup table used to convert 3-letter language codes to 170 * their 2-letter equivalent, where possible. It must be kept in sync 171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 172 * same language as LANGUAGES_3[i]. The commented-out lines are 173 * copied from LANGUAGES to make eyeballing this baby easier. 174 * 175 * Where a 3-letter language code has no 2-letter equivalent, the 176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 177 * 178 * This table should be terminated with a NULL entry, followed by a 179 * second list, and another NULL entry. The two lists correspond to 180 * the two lists in LANGUAGES. 181 */ 182 static const char * const LANGUAGES_3[] = { 183 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */ 184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa", 185 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */ 186 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa", 187 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */ 188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast", 189 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */ 190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad", 191 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */ 192 "bai", "bal", "ban", "bas", "bat", "bel", "bej", 193 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */ 194 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin", 195 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */ 196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos", 197 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */ 198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau", 199 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */ 200 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm", 201 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */ 202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop", 203 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */ 204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus", 205 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */ 206 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den", 207 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */ 208 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu", 209 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */ 210 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", 211 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */ 212 "enm", "epo", "spa", "est", "eus", "ewo", "fas", 213 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */ 214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon", 215 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */ 216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay", 217 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */ 218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn", 219 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */ 220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv", 221 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */ 222 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him", 223 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */ 224 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her", 225 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */ 226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk", 227 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */ 228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita", 229 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */ 230 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab", 231 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/ 232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi", 233 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */ 234 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan", 235 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */ 236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas", 237 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */ 238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad", 239 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */ 240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol", 241 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */ 242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus", 243 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */ 244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas", 245 /* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */ 246 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min", 247 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */ 248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno", 249 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */ 250 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun", 251 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */ 252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap", 253 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */ 254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic", 255 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */ 256 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub", 257 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */ 258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji", 259 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */ 260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa", 261 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */ 262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", 263 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */ 264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que", 265 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */ 266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom", 267 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */ 268 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam", 269 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */ 270 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem", 271 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */ 272 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit", 273 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */ 274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", 275 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */ 276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", 277 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */ 278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux", 279 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */ 280 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter", 281 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */ 282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl", 283 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */ 284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv", 285 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */ 286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", 287 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */ 288 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd", 289 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */ 290 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak", 291 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */ 292 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap", 293 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */ 294 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd", 295 /* "zu", "zun", "zxx", "zza", */ 296 "zul", "zun", "zxx", "zza", 297 NULL, 298 /* "in", "iw", "ji", "jw", "sh", */ 299 "ind", "heb", "yid", "jaw", "srp", 300 NULL 301 }; 302 303 /** 304 * Table of 2-letter country codes. 305 * 306 * This list must be in sorted order. This list is returned directly 307 * to the user by some API. 308 * 309 * This list must be kept in sync with COUNTRIES_3, with corresponding 310 * entries matched. 311 * 312 * This table should be terminated with a NULL entry, followed by a 313 * second list, and another NULL entry. The first list is visible to 314 * user code when this array is returned by API. The second list 315 * contains codes we support, but do not expose through user API. 316 * 317 * Notes: 318 * 319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 321 * new codes keeping the old ones for compatibility updated to include 322 * 1999/12/03 revisions *CWB* 323 * 324 * RO(ROM) is now RO(ROU) according to 325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 326 */ 327 static const char * const COUNTRIES[] = { 328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", 329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", 332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", 335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", 353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 357 "WS", "YE", "YT", "ZA", "ZM", "ZW", 358 NULL, 359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */ 360 NULL 361 }; 362 363 static const char* const DEPRECATED_COUNTRIES[] ={ 364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */ 365 }; 366 static const char* const REPLACEMENT_COUNTRIES[] = { 367 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */ 368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */ 369 }; 370 371 /** 372 * Table of 3-letter country codes. 373 * 374 * This is a lookup table used to convert 3-letter country codes to 375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 376 * For all valid i, COUNTRIES[i] must refer to the same country as 377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 378 * to make eyeballing this baby easier. 379 * 380 * This table should be terminated with a NULL entry, followed by a 381 * second list, and another NULL entry. The two lists correspond to 382 * the two lists in COUNTRIES. 383 */ 384 static const char * const COUNTRIES_3[] = { 385 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */ 386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT", 387 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 389 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 391 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */ 392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT", 393 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 395 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 397 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 399 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 401 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 403 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 405 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 407 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 409 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 411 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 413 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 415 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 417 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 419 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 421 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 423 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 425 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 427 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 429 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 431 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 433 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */ 434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV", 435 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 437 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 439 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 441 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 443 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 445 NULL, 446 /* "FX", "CS", "RO", "TP", "YU", "ZR", */ 447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR", 448 NULL 449 }; 450 451 typedef struct CanonicalizationMap { 452 const char *id; /* input ID */ 453 const char *canonicalID; /* canonicalized output ID */ 454 const char *keyword; /* keyword, or NULL if none */ 455 const char *value; /* keyword value, or NULL if kw==NULL */ 456 } CanonicalizationMap; 457 458 /** 459 * A map to canonicalize locale IDs. This handles a variety of 460 * different semantic kinds of transformations. 461 */ 462 static const CanonicalizationMap CANONICALIZE_MAP[] = { 463 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 464 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 465 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 466 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 468 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 470 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */ 471 { "de_1901", "de__1901", NULL, NULL }, /* registered name */ 472 { "de_1906", "de__1906", NULL, NULL }, /* registered name */ 473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 478 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */ 479 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */ 480 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 481 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 482 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 483 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 484 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 485 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 486 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 487 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 488 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 489 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 490 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 491 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 492 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 493 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 494 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 495 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 496 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 497 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 498 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */ 499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 508 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */ 509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 510 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */ 511 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */ 512 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */ 513 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */ 514 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */ 515 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */ 516 }; 517 518 typedef struct VariantMap { 519 const char *variant; /* input ID */ 520 const char *keyword; /* keyword, or NULL if none */ 521 const char *value; /* keyword value, or NULL if kw==NULL */ 522 } VariantMap; 523 524 static const VariantMap VARIANT_MAP[] = { 525 { "EURO", "currency", "EUR" }, 526 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 527 { "STROKE", "collation", "stroke" } /* Solaris variant */ 528 }; 529 530 /* ### BCP47 Conversion *******************************************/ 531 /* Test if the locale id has BCP47 u extension and does not have '@' */ 532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 534 #define _ConvertBCP47(finalID, id, buffer, length,err) \ 535 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ 536 finalID=id; \ 537 } else { \ 538 finalID=buffer; \ 539 } 540 /* Gets the size of the shortest subtag in the given localeID. */ 541 static int32_t getShortestSubtagLength(const char *localeID) { 542 int32_t localeIDLength = uprv_strlen(localeID); 543 int32_t length = localeIDLength; 544 int32_t tmpLength = 0; 545 int32_t i; 546 UBool reset = TRUE; 547 548 for (i = 0; i < localeIDLength; i++) { 549 if (localeID[i] != '_' && localeID[i] != '-') { 550 if (reset) { 551 tmpLength = 0; 552 reset = FALSE; 553 } 554 tmpLength++; 555 } else { 556 if (tmpLength != 0 && tmpLength < length) { 557 length = tmpLength; 558 } 559 reset = TRUE; 560 } 561 } 562 563 return length; 564 } 565 566 /* ### Keywords **************************************************/ 567 568 #define ULOC_KEYWORD_BUFFER_LEN 25 569 #define ULOC_MAX_NO_KEYWORDS 25 570 571 U_CAPI const char * U_EXPORT2 572 locale_getKeywordsStart(const char *localeID) { 573 const char *result = NULL; 574 if((result = uprv_strchr(localeID, '@')) != NULL) { 575 return result; 576 } 577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 578 else { 579 /* We do this because the @ sign is variant, and the @ sign used on one 580 EBCDIC machine won't be compiled the same way on other EBCDIC based 581 machines. */ 582 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 583 const uint8_t *charToFind = ebcdicSigns; 584 while(*charToFind) { 585 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 586 return result; 587 } 588 charToFind++; 589 } 590 } 591 #endif 592 return NULL; 593 } 594 595 /** 596 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 597 * @param keywordName incoming name to be canonicalized 598 * @param status return status (keyword too long) 599 * @return length of the keyword name 600 */ 601 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 602 { 603 int32_t i; 604 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 605 606 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 607 /* keyword name too long for internal buffer */ 608 *status = U_INTERNAL_PROGRAM_ERROR; 609 return 0; 610 } 611 612 /* normalize the keyword name */ 613 for(i = 0; i < keywordNameLen; i++) { 614 buf[i] = uprv_tolower(keywordName[i]); 615 } 616 buf[i] = 0; 617 618 return keywordNameLen; 619 } 620 621 typedef struct { 622 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 623 int32_t keywordLen; 624 const char *valueStart; 625 int32_t valueLen; 626 } KeywordStruct; 627 628 static int32_t U_CALLCONV 629 compareKeywordStructs(const void *context, const void *left, const void *right) { 630 const char* leftString = ((const KeywordStruct *)left)->keyword; 631 const char* rightString = ((const KeywordStruct *)right)->keyword; 632 return uprv_strcmp(leftString, rightString); 633 } 634 635 /** 636 * Both addKeyword and addValue must already be in canonical form. 637 * Either both addKeyword and addValue are NULL, or neither is NULL. 638 * If they are not NULL they must be zero terminated. 639 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 640 */ 641 static int32_t 642 _getKeywords(const char *localeID, 643 char prev, 644 char *keywords, int32_t keywordCapacity, 645 char *values, int32_t valuesCapacity, int32_t *valLen, 646 UBool valuesToo, 647 const char* addKeyword, 648 const char* addValue, 649 UErrorCode *status) 650 { 651 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 652 653 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 654 int32_t numKeywords = 0; 655 const char* pos = localeID; 656 const char* equalSign = NULL; 657 const char* semicolon = NULL; 658 int32_t i = 0, j, n; 659 int32_t keywordsLen = 0; 660 int32_t valuesLen = 0; 661 662 if(prev == '@') { /* start of keyword definition */ 663 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 664 do { 665 UBool duplicate = FALSE; 666 /* skip leading spaces */ 667 while(*pos == ' ') { 668 pos++; 669 } 670 if (!*pos) { /* handle trailing "; " */ 671 break; 672 } 673 if(numKeywords == maxKeywords) { 674 *status = U_INTERNAL_PROGRAM_ERROR; 675 return 0; 676 } 677 equalSign = uprv_strchr(pos, '='); 678 semicolon = uprv_strchr(pos, ';'); 679 /* lack of '=' [foo@currency] is illegal */ 680 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 681 if(!equalSign || (semicolon && semicolon<equalSign)) { 682 *status = U_INVALID_FORMAT_ERROR; 683 return 0; 684 } 685 /* need to normalize both keyword and keyword name */ 686 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 687 /* keyword name too long for internal buffer */ 688 *status = U_INTERNAL_PROGRAM_ERROR; 689 return 0; 690 } 691 for(i = 0, n = 0; i < equalSign - pos; ++i) { 692 if (pos[i] != ' ') { 693 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 694 } 695 } 696 697 /* zero-length keyword is an error. */ 698 if (n == 0) { 699 *status = U_INVALID_FORMAT_ERROR; 700 return 0; 701 } 702 703 keywordList[numKeywords].keyword[n] = 0; 704 keywordList[numKeywords].keywordLen = n; 705 /* now grab the value part. First we skip the '=' */ 706 equalSign++; 707 /* then we leading spaces */ 708 while(*equalSign == ' ') { 709 equalSign++; 710 } 711 712 /* Premature end or zero-length value */ 713 if (!equalSign || equalSign == semicolon) { 714 *status = U_INVALID_FORMAT_ERROR; 715 return 0; 716 } 717 718 keywordList[numKeywords].valueStart = equalSign; 719 720 pos = semicolon; 721 i = 0; 722 if(pos) { 723 while(*(pos - i - 1) == ' ') { 724 i++; 725 } 726 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 727 pos++; 728 } else { 729 i = (int32_t)uprv_strlen(equalSign); 730 while(i && equalSign[i-1] == ' ') { 731 i--; 732 } 733 keywordList[numKeywords].valueLen = i; 734 } 735 /* If this is a duplicate keyword, then ignore it */ 736 for (j=0; j<numKeywords; ++j) { 737 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 738 duplicate = TRUE; 739 break; 740 } 741 } 742 if (!duplicate) { 743 ++numKeywords; 744 } 745 } while(pos); 746 747 /* Handle addKeyword/addValue. */ 748 if (addKeyword != NULL) { 749 UBool duplicate = FALSE; 750 U_ASSERT(addValue != NULL); 751 /* Search for duplicate; if found, do nothing. Explicit keyword 752 overrides addKeyword. */ 753 for (j=0; j<numKeywords; ++j) { 754 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 755 duplicate = TRUE; 756 break; 757 } 758 } 759 if (!duplicate) { 760 if (numKeywords == maxKeywords) { 761 *status = U_INTERNAL_PROGRAM_ERROR; 762 return 0; 763 } 764 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 765 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 766 keywordList[numKeywords].valueStart = addValue; 767 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 768 ++numKeywords; 769 } 770 } else { 771 U_ASSERT(addValue == NULL); 772 } 773 774 /* now we have a list of keywords */ 775 /* we need to sort it */ 776 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 777 778 /* Now construct the keyword part */ 779 for(i = 0; i < numKeywords; i++) { 780 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 781 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 782 if(valuesToo) { 783 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 784 } else { 785 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 786 } 787 } 788 keywordsLen += keywordList[i].keywordLen + 1; 789 if(valuesToo) { 790 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 791 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 792 } 793 keywordsLen += keywordList[i].valueLen; 794 795 if(i < numKeywords - 1) { 796 if(keywordsLen < keywordCapacity) { 797 keywords[keywordsLen] = ';'; 798 } 799 keywordsLen++; 800 } 801 } 802 if(values) { 803 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 804 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 805 values[valuesLen + keywordList[i].valueLen] = 0; 806 } 807 valuesLen += keywordList[i].valueLen + 1; 808 } 809 } 810 if(values) { 811 values[valuesLen] = 0; 812 if(valLen) { 813 *valLen = valuesLen; 814 } 815 } 816 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 817 } else { 818 return 0; 819 } 820 } 821 822 U_CFUNC int32_t 823 locale_getKeywords(const char *localeID, 824 char prev, 825 char *keywords, int32_t keywordCapacity, 826 char *values, int32_t valuesCapacity, int32_t *valLen, 827 UBool valuesToo, 828 UErrorCode *status) { 829 return _getKeywords(localeID, prev, keywords, keywordCapacity, 830 values, valuesCapacity, valLen, valuesToo, 831 NULL, NULL, status); 832 } 833 834 U_CAPI int32_t U_EXPORT2 835 uloc_getKeywordValue(const char* localeID, 836 const char* keywordName, 837 char* buffer, int32_t bufferCapacity, 838 UErrorCode* status) 839 { 840 const char* startSearchHere = NULL; 841 const char* nextSeparator = NULL; 842 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 843 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 844 int32_t i = 0; 845 int32_t result = 0; 846 847 if(status && U_SUCCESS(*status) && localeID) { 848 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 849 const char* tmpLocaleID; 850 851 if (_hasBCP47Extension(localeID)) { 852 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 853 } else { 854 tmpLocaleID=localeID; 855 } 856 857 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 858 if(startSearchHere == NULL) { 859 /* no keywords, return at once */ 860 return 0; 861 } 862 863 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 864 if(U_FAILURE(*status)) { 865 return 0; 866 } 867 868 /* find the first keyword */ 869 while(startSearchHere) { 870 startSearchHere++; 871 /* skip leading spaces (allowed?) */ 872 while(*startSearchHere == ' ') { 873 startSearchHere++; 874 } 875 nextSeparator = uprv_strchr(startSearchHere, '='); 876 /* need to normalize both keyword and keyword name */ 877 if(!nextSeparator) { 878 break; 879 } 880 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 881 /* keyword name too long for internal buffer */ 882 *status = U_INTERNAL_PROGRAM_ERROR; 883 return 0; 884 } 885 for(i = 0; i < nextSeparator - startSearchHere; i++) { 886 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 887 } 888 /* trim trailing spaces */ 889 while(startSearchHere[i-1] == ' ') { 890 i--; 891 } 892 localeKeywordNameBuffer[i] = 0; 893 894 startSearchHere = uprv_strchr(nextSeparator, ';'); 895 896 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 897 nextSeparator++; 898 while(*nextSeparator == ' ') { 899 nextSeparator++; 900 } 901 /* we actually found the keyword. Copy the value */ 902 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 903 while(*(startSearchHere-1) == ' ') { 904 startSearchHere--; 905 } 906 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 907 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 908 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 909 i = (int32_t)uprv_strlen(nextSeparator); 910 while(nextSeparator[i - 1] == ' ') { 911 i--; 912 } 913 uprv_strncpy(buffer, nextSeparator, i); 914 result = u_terminateChars(buffer, bufferCapacity, i, status); 915 } else { 916 /* give a bigger buffer, please */ 917 *status = U_BUFFER_OVERFLOW_ERROR; 918 if(startSearchHere) { 919 result = (int32_t)(startSearchHere - nextSeparator); 920 } else { 921 result = (int32_t)uprv_strlen(nextSeparator); 922 } 923 } 924 return result; 925 } 926 } 927 } 928 return 0; 929 } 930 931 U_CAPI int32_t U_EXPORT2 932 uloc_setKeywordValue(const char* keywordName, 933 const char* keywordValue, 934 char* buffer, int32_t bufferCapacity, 935 UErrorCode* status) 936 { 937 /* TODO: sorting. removal. */ 938 int32_t keywordNameLen; 939 int32_t keywordValueLen; 940 int32_t bufLen; 941 int32_t needLen = 0; 942 int32_t foundValueLen; 943 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 944 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 945 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 946 int32_t i = 0; 947 int32_t rc; 948 char* nextSeparator = NULL; 949 char* nextEqualsign = NULL; 950 char* startSearchHere = NULL; 951 char* keywordStart = NULL; 952 char *insertHere = NULL; 953 if(U_FAILURE(*status)) { 954 return -1; 955 } 956 if(bufferCapacity>1) { 957 bufLen = (int32_t)uprv_strlen(buffer); 958 } else { 959 *status = U_ILLEGAL_ARGUMENT_ERROR; 960 return 0; 961 } 962 if(bufferCapacity<bufLen) { 963 /* The capacity is less than the length?! Is this NULL terminated? */ 964 *status = U_ILLEGAL_ARGUMENT_ERROR; 965 return 0; 966 } 967 if(keywordValue && !*keywordValue) { 968 keywordValue = NULL; 969 } 970 if(keywordValue) { 971 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 972 } else { 973 keywordValueLen = 0; 974 } 975 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 976 if(U_FAILURE(*status)) { 977 return 0; 978 } 979 startSearchHere = (char*)locale_getKeywordsStart(buffer); 980 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 981 if(!keywordValue) { /* no keywords = nothing to remove */ 982 return bufLen; 983 } 984 985 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 986 if(startSearchHere) { /* had a single @ */ 987 needLen--; /* already had the @ */ 988 /* startSearchHere points at the @ */ 989 } else { 990 startSearchHere=buffer+bufLen; 991 } 992 if(needLen >= bufferCapacity) { 993 *status = U_BUFFER_OVERFLOW_ERROR; 994 return needLen; /* no change */ 995 } 996 *startSearchHere = '@'; 997 startSearchHere++; 998 uprv_strcpy(startSearchHere, keywordNameBuffer); 999 startSearchHere += keywordNameLen; 1000 *startSearchHere = '='; 1001 startSearchHere++; 1002 uprv_strcpy(startSearchHere, keywordValue); 1003 startSearchHere+=keywordValueLen; 1004 return needLen; 1005 } /* end shortcut - no @ */ 1006 1007 keywordStart = startSearchHere; 1008 /* search for keyword */ 1009 while(keywordStart) { 1010 keywordStart++; 1011 /* skip leading spaces (allowed?) */ 1012 while(*keywordStart == ' ') { 1013 keywordStart++; 1014 } 1015 nextEqualsign = uprv_strchr(keywordStart, '='); 1016 /* need to normalize both keyword and keyword name */ 1017 if(!nextEqualsign) { 1018 break; 1019 } 1020 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 1021 /* keyword name too long for internal buffer */ 1022 *status = U_INTERNAL_PROGRAM_ERROR; 1023 return 0; 1024 } 1025 for(i = 0; i < nextEqualsign - keywordStart; i++) { 1026 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 1027 } 1028 /* trim trailing spaces */ 1029 while(keywordStart[i-1] == ' ') { 1030 i--; 1031 } 1032 localeKeywordNameBuffer[i] = 0; 1033 1034 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1035 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1036 if(rc == 0) { 1037 nextEqualsign++; 1038 while(*nextEqualsign == ' ') { 1039 nextEqualsign++; 1040 } 1041 /* we actually found the keyword. Change the value */ 1042 if (nextSeparator) { 1043 keywordAtEnd = 0; 1044 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1045 } else { 1046 keywordAtEnd = 1; 1047 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1048 } 1049 if(keywordValue) { /* adding a value - not removing */ 1050 if(foundValueLen == keywordValueLen) { 1051 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1052 return bufLen; /* no change in size */ 1053 } else if(foundValueLen > keywordValueLen) { 1054 int32_t delta = foundValueLen - keywordValueLen; 1055 if(nextSeparator) { /* RH side */ 1056 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1057 } 1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1059 bufLen -= delta; 1060 buffer[bufLen]=0; 1061 return bufLen; 1062 } else { /* FVL < KVL */ 1063 int32_t delta = keywordValueLen - foundValueLen; 1064 if((bufLen+delta) >= bufferCapacity) { 1065 *status = U_BUFFER_OVERFLOW_ERROR; 1066 return bufLen+delta; 1067 } 1068 if(nextSeparator) { /* RH side */ 1069 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1070 } 1071 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1072 bufLen += delta; 1073 buffer[bufLen]=0; 1074 return bufLen; 1075 } 1076 } else { /* removing a keyword */ 1077 if(keywordAtEnd) { 1078 /* zero out the ';' or '@' just before startSearchhere */ 1079 keywordStart[-1] = 0; 1080 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1081 } else { 1082 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1083 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1084 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1085 } 1086 } 1087 } else if(rc<0){ /* end match keyword */ 1088 /* could insert at this location. */ 1089 insertHere = keywordStart; 1090 } 1091 keywordStart = nextSeparator; 1092 } /* end loop searching */ 1093 1094 if(!keywordValue) { 1095 return bufLen; /* removal of non-extant keyword - no change */ 1096 } 1097 1098 /* we know there is at least one keyword. */ 1099 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1100 if(needLen >= bufferCapacity) { 1101 *status = U_BUFFER_OVERFLOW_ERROR; 1102 return needLen; /* no change */ 1103 } 1104 1105 if(insertHere) { 1106 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1107 keywordStart = insertHere; 1108 } else { 1109 keywordStart = buffer+bufLen; 1110 *keywordStart = ';'; 1111 keywordStart++; 1112 } 1113 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1114 keywordStart += keywordNameLen; 1115 *keywordStart = '='; 1116 keywordStart++; 1117 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1118 keywordStart+=keywordValueLen; 1119 if(insertHere) { 1120 *keywordStart = ';'; 1121 keywordStart++; 1122 } 1123 buffer[needLen]=0; 1124 return needLen; 1125 } 1126 1127 /* ### ID parsing implementation **************************************************/ 1128 1129 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1130 1131 /*returns TRUE if one of the special prefixes is here (s=string) 1132 'x-' or 'i-' */ 1133 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1134 1135 /* Dot terminates it because of POSIX form where dot precedes the codepage 1136 * except for variant 1137 */ 1138 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1139 1140 static char* _strnchr(const char* str, int32_t len, char c) { 1141 U_ASSERT(str != 0 && len >= 0); 1142 while (len-- != 0) { 1143 char d = *str; 1144 if (d == c) { 1145 return (char*) str; 1146 } else if (d == 0) { 1147 break; 1148 } 1149 ++str; 1150 } 1151 return NULL; 1152 } 1153 1154 /** 1155 * Lookup 'key' in the array 'list'. The array 'list' should contain 1156 * a NULL entry, followed by more entries, and a second NULL entry. 1157 * 1158 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1159 * COUNTRIES_3. 1160 */ 1161 static int16_t _findIndex(const char* const* list, const char* key) 1162 { 1163 const char* const* anchor = list; 1164 int32_t pass = 0; 1165 1166 /* Make two passes through two NULL-terminated arrays at 'list' */ 1167 while (pass++ < 2) { 1168 while (*list) { 1169 if (uprv_strcmp(key, *list) == 0) { 1170 return (int16_t)(list - anchor); 1171 } 1172 list++; 1173 } 1174 ++list; /* skip final NULL *CWB*/ 1175 } 1176 return -1; 1177 } 1178 1179 /* count the length of src while copying it to dest; return strlen(src) */ 1180 static U_INLINE int32_t 1181 _copyCount(char *dest, int32_t destCapacity, const char *src) { 1182 const char *anchor; 1183 char c; 1184 1185 anchor=src; 1186 for(;;) { 1187 if((c=*src)==0) { 1188 return (int32_t)(src-anchor); 1189 } 1190 if(destCapacity<=0) { 1191 return (int32_t)((src-anchor)+uprv_strlen(src)); 1192 } 1193 ++src; 1194 *dest++=c; 1195 --destCapacity; 1196 } 1197 } 1198 1199 U_CFUNC const char* 1200 uloc_getCurrentCountryID(const char* oldID){ 1201 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1202 if (offset >= 0) { 1203 return REPLACEMENT_COUNTRIES[offset]; 1204 } 1205 return oldID; 1206 } 1207 U_CFUNC const char* 1208 uloc_getCurrentLanguageID(const char* oldID){ 1209 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1210 if (offset >= 0) { 1211 return REPLACEMENT_LANGUAGES[offset]; 1212 } 1213 return oldID; 1214 } 1215 /* 1216 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1217 * avoid duplicating code to handle the earlier locale ID pieces 1218 * in the functions for the later ones by 1219 * setting the *pEnd pointer to where they stopped parsing 1220 * 1221 * TODO try to use this in Locale 1222 */ 1223 U_CFUNC int32_t 1224 ulocimp_getLanguage(const char *localeID, 1225 char *language, int32_t languageCapacity, 1226 const char **pEnd) { 1227 int32_t i=0; 1228 int32_t offset; 1229 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1230 1231 /* if it starts with i- or x- then copy that prefix */ 1232 if(_isIDPrefix(localeID)) { 1233 if(i<languageCapacity) { 1234 language[i]=(char)uprv_tolower(*localeID); 1235 } 1236 if(i<languageCapacity) { 1237 language[i+1]='-'; 1238 } 1239 i+=2; 1240 localeID+=2; 1241 } 1242 1243 /* copy the language as far as possible and count its length */ 1244 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1245 if(i<languageCapacity) { 1246 language[i]=(char)uprv_tolower(*localeID); 1247 } 1248 if(i<3) { 1249 lang[i]=(char)uprv_tolower(*localeID); 1250 } 1251 i++; 1252 localeID++; 1253 } 1254 1255 if(i==3) { 1256 /* convert 3 character code to 2 character code if possible *CWB*/ 1257 offset=_findIndex(LANGUAGES_3, lang); 1258 if(offset>=0) { 1259 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1260 } 1261 } 1262 1263 if(pEnd!=NULL) { 1264 *pEnd=localeID; 1265 } 1266 return i; 1267 } 1268 1269 U_CFUNC int32_t 1270 ulocimp_getScript(const char *localeID, 1271 char *script, int32_t scriptCapacity, 1272 const char **pEnd) 1273 { 1274 int32_t idLen = 0; 1275 1276 if (pEnd != NULL) { 1277 *pEnd = localeID; 1278 } 1279 1280 /* copy the second item as far as possible and count its length */ 1281 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1282 idLen++; 1283 } 1284 1285 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1286 if (idLen == 4) { 1287 int32_t i; 1288 if (pEnd != NULL) { 1289 *pEnd = localeID+idLen; 1290 } 1291 if(idLen > scriptCapacity) { 1292 idLen = scriptCapacity; 1293 } 1294 if (idLen >= 1) { 1295 script[0]=(char)uprv_toupper(*(localeID++)); 1296 } 1297 for (i = 1; i < idLen; i++) { 1298 script[i]=(char)uprv_tolower(*(localeID++)); 1299 } 1300 } 1301 else { 1302 idLen = 0; 1303 } 1304 return idLen; 1305 } 1306 1307 U_CFUNC int32_t 1308 ulocimp_getCountry(const char *localeID, 1309 char *country, int32_t countryCapacity, 1310 const char **pEnd) 1311 { 1312 int32_t idLen=0; 1313 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1314 int32_t offset; 1315 1316 /* copy the country as far as possible and count its length */ 1317 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1318 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1319 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1320 } 1321 idLen++; 1322 } 1323 1324 /* the country should be either length 2 or 3 */ 1325 if (idLen == 2 || idLen == 3) { 1326 UBool gotCountry = FALSE; 1327 /* convert 3 character code to 2 character code if possible *CWB*/ 1328 if(idLen==3) { 1329 offset=_findIndex(COUNTRIES_3, cnty); 1330 if(offset>=0) { 1331 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1332 gotCountry = TRUE; 1333 } 1334 } 1335 if (!gotCountry) { 1336 int32_t i = 0; 1337 for (i = 0; i < idLen; i++) { 1338 if (i < countryCapacity) { 1339 country[i]=(char)uprv_toupper(localeID[i]); 1340 } 1341 } 1342 } 1343 localeID+=idLen; 1344 } else { 1345 idLen = 0; 1346 } 1347 1348 if(pEnd!=NULL) { 1349 *pEnd=localeID; 1350 } 1351 1352 return idLen; 1353 } 1354 1355 /** 1356 * @param needSeparator if true, then add leading '_' if any variants 1357 * are added to 'variant' 1358 */ 1359 static int32_t 1360 _getVariantEx(const char *localeID, 1361 char prev, 1362 char *variant, int32_t variantCapacity, 1363 UBool needSeparator) { 1364 int32_t i=0; 1365 1366 /* get one or more variant tags and separate them with '_' */ 1367 if(_isIDSeparator(prev)) { 1368 /* get a variant string after a '-' or '_' */ 1369 while(!_isTerminator(*localeID)) { 1370 if (needSeparator) { 1371 if (i<variantCapacity) { 1372 variant[i] = '_'; 1373 } 1374 ++i; 1375 needSeparator = FALSE; 1376 } 1377 if(i<variantCapacity) { 1378 variant[i]=(char)uprv_toupper(*localeID); 1379 if(variant[i]=='-') { 1380 variant[i]='_'; 1381 } 1382 } 1383 i++; 1384 localeID++; 1385 } 1386 } 1387 1388 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1389 if(i==0) { 1390 if(prev=='@') { 1391 /* keep localeID */ 1392 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1393 ++localeID; /* point after the '@' */ 1394 } else { 1395 return 0; 1396 } 1397 while(!_isTerminator(*localeID)) { 1398 if (needSeparator) { 1399 if (i<variantCapacity) { 1400 variant[i] = '_'; 1401 } 1402 ++i; 1403 needSeparator = FALSE; 1404 } 1405 if(i<variantCapacity) { 1406 variant[i]=(char)uprv_toupper(*localeID); 1407 if(variant[i]=='-' || variant[i]==',') { 1408 variant[i]='_'; 1409 } 1410 } 1411 i++; 1412 localeID++; 1413 } 1414 } 1415 1416 return i; 1417 } 1418 1419 static int32_t 1420 _getVariant(const char *localeID, 1421 char prev, 1422 char *variant, int32_t variantCapacity) { 1423 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1424 } 1425 1426 /** 1427 * Delete ALL instances of a variant from the given list of one or 1428 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1429 * @param variants the source string of one or more variants, 1430 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1431 * terminated; if it is, trailing zero will NOT be maintained. 1432 * @param variantsLen length of variants 1433 * @param toDelete variant to delete, without separators, e.g. "EURO" 1434 * or "PREEURO"; not zero terminated 1435 * @param toDeleteLen length of toDelete 1436 * @return number of characters deleted from variants 1437 */ 1438 static int32_t 1439 _deleteVariant(char* variants, int32_t variantsLen, 1440 const char* toDelete, int32_t toDeleteLen) 1441 { 1442 int32_t delta = 0; /* number of chars deleted */ 1443 for (;;) { 1444 UBool flag = FALSE; 1445 if (variantsLen < toDeleteLen) { 1446 return delta; 1447 } 1448 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1449 (variantsLen == toDeleteLen || 1450 (flag=(variants[toDeleteLen] == '_')))) 1451 { 1452 int32_t d = toDeleteLen + (flag?1:0); 1453 variantsLen -= d; 1454 delta += d; 1455 if (variantsLen > 0) { 1456 uprv_memmove(variants, variants+d, variantsLen); 1457 } 1458 } else { 1459 char* p = _strnchr(variants, variantsLen, '_'); 1460 if (p == NULL) { 1461 return delta; 1462 } 1463 ++p; 1464 variantsLen -= (int32_t)(p - variants); 1465 variants = p; 1466 } 1467 } 1468 } 1469 1470 /* Keyword enumeration */ 1471 1472 typedef struct UKeywordsContext { 1473 char* keywords; 1474 char* current; 1475 } UKeywordsContext; 1476 1477 static void U_CALLCONV 1478 uloc_kw_closeKeywords(UEnumeration *enumerator) { 1479 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1480 uprv_free(enumerator->context); 1481 uprv_free(enumerator); 1482 } 1483 1484 static int32_t U_CALLCONV 1485 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) { 1486 char *kw = ((UKeywordsContext *)en->context)->keywords; 1487 int32_t result = 0; 1488 while(*kw) { 1489 result++; 1490 kw += uprv_strlen(kw)+1; 1491 } 1492 return result; 1493 } 1494 1495 static const char* U_CALLCONV 1496 uloc_kw_nextKeyword(UEnumeration* en, 1497 int32_t* resultLength, 1498 UErrorCode* status) { 1499 const char* result = ((UKeywordsContext *)en->context)->current; 1500 int32_t len = 0; 1501 if(*result) { 1502 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1503 ((UKeywordsContext *)en->context)->current += len+1; 1504 } else { 1505 result = NULL; 1506 } 1507 if (resultLength) { 1508 *resultLength = len; 1509 } 1510 return result; 1511 } 1512 1513 static void U_CALLCONV 1514 uloc_kw_resetKeywords(UEnumeration* en, 1515 UErrorCode* status) { 1516 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1517 } 1518 1519 static const UEnumeration gKeywordsEnum = { 1520 NULL, 1521 NULL, 1522 uloc_kw_closeKeywords, 1523 uloc_kw_countKeywords, 1524 uenum_unextDefault, 1525 uloc_kw_nextKeyword, 1526 uloc_kw_resetKeywords 1527 }; 1528 1529 U_CAPI UEnumeration* U_EXPORT2 1530 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1531 { 1532 UKeywordsContext *myContext = NULL; 1533 UEnumeration *result = NULL; 1534 1535 if(U_FAILURE(*status)) { 1536 return NULL; 1537 } 1538 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1539 /* Null pointer test */ 1540 if (result == NULL) { 1541 *status = U_MEMORY_ALLOCATION_ERROR; 1542 return NULL; 1543 } 1544 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1545 myContext = uprv_malloc(sizeof(UKeywordsContext)); 1546 if (myContext == NULL) { 1547 *status = U_MEMORY_ALLOCATION_ERROR; 1548 uprv_free(result); 1549 return NULL; 1550 } 1551 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1552 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1553 myContext->keywords[keywordListSize] = 0; 1554 myContext->current = myContext->keywords; 1555 result->context = myContext; 1556 return result; 1557 } 1558 1559 U_CAPI UEnumeration* U_EXPORT2 1560 uloc_openKeywords(const char* localeID, 1561 UErrorCode* status) 1562 { 1563 int32_t i=0; 1564 char keywords[256]; 1565 int32_t keywordsCapacity = 256; 1566 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1567 const char* tmpLocaleID; 1568 1569 if(status==NULL || U_FAILURE(*status)) { 1570 return 0; 1571 } 1572 1573 if (_hasBCP47Extension(localeID)) { 1574 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1575 } else { 1576 if (localeID==NULL) { 1577 localeID=uloc_getDefault(); 1578 } 1579 tmpLocaleID=localeID; 1580 } 1581 1582 /* Skip the language */ 1583 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1584 if(_isIDSeparator(*tmpLocaleID)) { 1585 const char *scriptID; 1586 /* Skip the script if available */ 1587 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1588 if(scriptID != tmpLocaleID+1) { 1589 /* Found optional script */ 1590 tmpLocaleID = scriptID; 1591 } 1592 /* Skip the Country */ 1593 if (_isIDSeparator(*tmpLocaleID)) { 1594 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1595 if(_isIDSeparator(*tmpLocaleID)) { 1596 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1597 } 1598 } 1599 } 1600 1601 /* keywords are located after '@' */ 1602 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1603 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1604 } 1605 1606 if(i) { 1607 return uloc_openKeywordList(keywords, i, status); 1608 } else { 1609 return NULL; 1610 } 1611 } 1612 1613 1614 /* bit-flags for 'options' parameter of _canonicalize */ 1615 #define _ULOC_STRIP_KEYWORDS 0x2 1616 #define _ULOC_CANONICALIZE 0x1 1617 1618 #define OPTION_SET(options, mask) ((options & mask) != 0) 1619 1620 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1621 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) 1622 1623 /** 1624 * Canonicalize the given localeID, to level 1 or to level 2, 1625 * depending on the options. To specify level 1, pass in options=0. 1626 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1627 * 1628 * This is the code underlying uloc_getName and uloc_canonicalize. 1629 */ 1630 static int32_t 1631 _canonicalize(const char* localeID, 1632 char* result, 1633 int32_t resultCapacity, 1634 uint32_t options, 1635 UErrorCode* err) { 1636 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1637 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1638 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1639 const char* origLocaleID; 1640 const char* tmpLocaleID; 1641 const char* keywordAssign = NULL; 1642 const char* separatorIndicator = NULL; 1643 const char* addKeyword = NULL; 1644 const char* addValue = NULL; 1645 char* name; 1646 char* variant = NULL; /* pointer into name, or NULL */ 1647 1648 if (U_FAILURE(*err)) { 1649 return 0; 1650 } 1651 1652 if (_hasBCP47Extension(localeID)) { 1653 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1654 } else { 1655 if (localeID==NULL) { 1656 localeID=uloc_getDefault(); 1657 } 1658 tmpLocaleID=localeID; 1659 } 1660 1661 origLocaleID=tmpLocaleID; 1662 1663 /* if we are doing a full canonicalization, then put results in 1664 localeBuffer, if necessary; otherwise send them to result. */ 1665 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1666 (result == NULL || resultCapacity < sizeof(localeBuffer))) { 1667 name = localeBuffer; 1668 nameCapacity = sizeof(localeBuffer); 1669 } else { 1670 name = result; 1671 nameCapacity = resultCapacity; 1672 } 1673 1674 /* get all pieces, one after another, and separate with '_' */ 1675 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1676 1677 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1678 const char *d = uloc_getDefault(); 1679 1680 len = (int32_t)uprv_strlen(d); 1681 1682 if (name != NULL) { 1683 uprv_strncpy(name, d, len); 1684 } 1685 } else if(_isIDSeparator(*tmpLocaleID)) { 1686 const char *scriptID; 1687 1688 ++fieldCount; 1689 if(len<nameCapacity) { 1690 name[len]='_'; 1691 } 1692 ++len; 1693 1694 scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID); 1695 if(scriptSize > 0) { 1696 /* Found optional script */ 1697 tmpLocaleID = scriptID; 1698 ++fieldCount; 1699 len+=scriptSize; 1700 if (_isIDSeparator(*tmpLocaleID)) { 1701 /* If there is something else, then we add the _ */ 1702 if(len<nameCapacity) { 1703 name[len]='_'; 1704 } 1705 ++len; 1706 } 1707 } 1708 1709 if (_isIDSeparator(*tmpLocaleID)) { 1710 const char *cntryID; 1711 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID); 1712 if (cntrySize > 0) { 1713 /* Found optional country */ 1714 tmpLocaleID = cntryID; 1715 len+=cntrySize; 1716 } 1717 if(_isIDSeparator(*tmpLocaleID)) { 1718 /* If there is something else, then we add the _ if we found country before.*/ 1719 if (cntrySize > 0) { 1720 ++fieldCount; 1721 if(len<nameCapacity) { 1722 name[len]='_'; 1723 } 1724 ++len; 1725 } 1726 1727 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len); 1728 if (variantSize > 0) { 1729 variant = name+len; 1730 len += variantSize; 1731 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1732 } 1733 } 1734 } 1735 } 1736 1737 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1738 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1739 UBool done = FALSE; 1740 do { 1741 char c = *tmpLocaleID; 1742 switch (c) { 1743 case 0: 1744 case '@': 1745 done = TRUE; 1746 break; 1747 default: 1748 if (len<nameCapacity) { 1749 name[len] = c; 1750 } 1751 ++len; 1752 ++tmpLocaleID; 1753 break; 1754 } 1755 } while (!done); 1756 } 1757 1758 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1759 After this, tmpLocaleID either points to '@' or is NULL */ 1760 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1761 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1762 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1763 } 1764 1765 /* Copy POSIX-style variant, if any [mr@FOO] */ 1766 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1767 tmpLocaleID != NULL && keywordAssign == NULL) { 1768 for (;;) { 1769 char c = *tmpLocaleID; 1770 if (c == 0) { 1771 break; 1772 } 1773 if (len<nameCapacity) { 1774 name[len] = c; 1775 } 1776 ++len; 1777 ++tmpLocaleID; 1778 } 1779 } 1780 1781 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1782 /* Handle @FOO variant if @ is present and not followed by = */ 1783 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1784 int32_t posixVariantSize; 1785 /* Add missing '_' if needed */ 1786 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1787 do { 1788 if(len<nameCapacity) { 1789 name[len]='_'; 1790 } 1791 ++len; 1792 ++fieldCount; 1793 } while(fieldCount<2); 1794 } 1795 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1796 (UBool)(variantSize > 0)); 1797 if (posixVariantSize > 0) { 1798 if (variant == NULL) { 1799 variant = name+len; 1800 } 1801 len += posixVariantSize; 1802 variantSize += posixVariantSize; 1803 } 1804 } 1805 1806 /* Handle generic variants first */ 1807 if (variant) { 1808 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { 1809 const char* variantToCompare = VARIANT_MAP[j].variant; 1810 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1811 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1812 len -= variantLen; 1813 if (variantLen > 0) { 1814 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1815 --len; 1816 } 1817 addKeyword = VARIANT_MAP[j].keyword; 1818 addValue = VARIANT_MAP[j].value; 1819 break; 1820 } 1821 } 1822 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1823 --len; 1824 } 1825 } 1826 1827 /* Look up the ID in the canonicalization map */ 1828 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { 1829 const char* id = CANONICALIZE_MAP[j].id; 1830 int32_t n = (int32_t)uprv_strlen(id); 1831 if (len == n && uprv_strncmp(name, id, n) == 0) { 1832 if (n == 0 && tmpLocaleID != NULL) { 1833 break; /* Don't remap "" if keywords present */ 1834 } 1835 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1836 if (CANONICALIZE_MAP[j].keyword) { 1837 addKeyword = CANONICALIZE_MAP[j].keyword; 1838 addValue = CANONICALIZE_MAP[j].value; 1839 } 1840 break; 1841 } 1842 } 1843 } 1844 1845 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1846 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1847 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1848 if(len<nameCapacity) { 1849 name[len]='@'; 1850 } 1851 ++len; 1852 ++fieldCount; 1853 len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE, 1854 addKeyword, addValue, err); 1855 } else if (addKeyword != NULL) { 1856 U_ASSERT(addValue != NULL); 1857 /* inelegant but works -- later make _getKeywords do this? */ 1858 len += _copyCount(name+len, nameCapacity-len, "@"); 1859 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1860 len += _copyCount(name+len, nameCapacity-len, "="); 1861 len += _copyCount(name+len, nameCapacity-len, addValue); 1862 } 1863 } 1864 1865 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1866 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1867 } 1868 1869 return u_terminateChars(result, resultCapacity, len, err); 1870 } 1871 1872 /* ### ID parsing API **************************************************/ 1873 1874 U_CAPI int32_t U_EXPORT2 1875 uloc_getParent(const char* localeID, 1876 char* parent, 1877 int32_t parentCapacity, 1878 UErrorCode* err) 1879 { 1880 const char *lastUnderscore; 1881 int32_t i; 1882 1883 if (U_FAILURE(*err)) 1884 return 0; 1885 1886 if (localeID == NULL) 1887 localeID = uloc_getDefault(); 1888 1889 lastUnderscore=uprv_strrchr(localeID, '_'); 1890 if(lastUnderscore!=NULL) { 1891 i=(int32_t)(lastUnderscore-localeID); 1892 } else { 1893 i=0; 1894 } 1895 1896 if(i>0 && parent != localeID) { 1897 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1898 } 1899 return u_terminateChars(parent, parentCapacity, i, err); 1900 } 1901 1902 U_CAPI int32_t U_EXPORT2 1903 uloc_getLanguage(const char* localeID, 1904 char* language, 1905 int32_t languageCapacity, 1906 UErrorCode* err) 1907 { 1908 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1909 int32_t i=0; 1910 1911 if (err==NULL || U_FAILURE(*err)) { 1912 return 0; 1913 } 1914 1915 if(localeID==NULL) { 1916 localeID=uloc_getDefault(); 1917 } 1918 1919 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1920 return u_terminateChars(language, languageCapacity, i, err); 1921 } 1922 1923 U_CAPI int32_t U_EXPORT2 1924 uloc_getScript(const char* localeID, 1925 char* script, 1926 int32_t scriptCapacity, 1927 UErrorCode* err) 1928 { 1929 int32_t i=0; 1930 1931 if(err==NULL || U_FAILURE(*err)) { 1932 return 0; 1933 } 1934 1935 if(localeID==NULL) { 1936 localeID=uloc_getDefault(); 1937 } 1938 1939 /* skip the language */ 1940 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1941 if(_isIDSeparator(*localeID)) { 1942 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 1943 } 1944 return u_terminateChars(script, scriptCapacity, i, err); 1945 } 1946 1947 U_CAPI int32_t U_EXPORT2 1948 uloc_getCountry(const char* localeID, 1949 char* country, 1950 int32_t countryCapacity, 1951 UErrorCode* err) 1952 { 1953 int32_t i=0; 1954 1955 if(err==NULL || U_FAILURE(*err)) { 1956 return 0; 1957 } 1958 1959 if(localeID==NULL) { 1960 localeID=uloc_getDefault(); 1961 } 1962 1963 /* Skip the language */ 1964 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1965 if(_isIDSeparator(*localeID)) { 1966 const char *scriptID; 1967 /* Skip the script if available */ 1968 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 1969 if(scriptID != localeID+1) { 1970 /* Found optional script */ 1971 localeID = scriptID; 1972 } 1973 if(_isIDSeparator(*localeID)) { 1974 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 1975 } 1976 } 1977 return u_terminateChars(country, countryCapacity, i, err); 1978 } 1979 1980 U_CAPI int32_t U_EXPORT2 1981 uloc_getVariant(const char* localeID, 1982 char* variant, 1983 int32_t variantCapacity, 1984 UErrorCode* err) 1985 { 1986 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1987 const char* tmpLocaleID; 1988 int32_t i=0; 1989 1990 if(err==NULL || U_FAILURE(*err)) { 1991 return 0; 1992 } 1993 1994 if (_hasBCP47Extension(localeID)) { 1995 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1996 } else { 1997 if (localeID==NULL) { 1998 localeID=uloc_getDefault(); 1999 } 2000 tmpLocaleID=localeID; 2001 } 2002 2003 /* Skip the language */ 2004 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 2005 if(_isIDSeparator(*tmpLocaleID)) { 2006 const char *scriptID; 2007 /* Skip the script if available */ 2008 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 2009 if(scriptID != tmpLocaleID+1) { 2010 /* Found optional script */ 2011 tmpLocaleID = scriptID; 2012 } 2013 /* Skip the Country */ 2014 if (_isIDSeparator(*tmpLocaleID)) { 2015 const char *cntryID; 2016 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 2017 if (cntryID != tmpLocaleID+1) { 2018 /* Found optional country */ 2019 tmpLocaleID = cntryID; 2020 } 2021 if(_isIDSeparator(*tmpLocaleID)) { 2022 /* If there was no country ID, skip a possible extra IDSeparator */ 2023 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2024 tmpLocaleID++; 2025 } 2026 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2027 } 2028 } 2029 } 2030 2031 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2032 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2033 /* 2034 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2035 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2036 } 2037 */ 2038 return u_terminateChars(variant, variantCapacity, i, err); 2039 } 2040 2041 U_CAPI int32_t U_EXPORT2 2042 uloc_getName(const char* localeID, 2043 char* name, 2044 int32_t nameCapacity, 2045 UErrorCode* err) 2046 { 2047 return _canonicalize(localeID, name, nameCapacity, 0, err); 2048 } 2049 2050 U_CAPI int32_t U_EXPORT2 2051 uloc_getBaseName(const char* localeID, 2052 char* name, 2053 int32_t nameCapacity, 2054 UErrorCode* err) 2055 { 2056 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2057 } 2058 2059 U_CAPI int32_t U_EXPORT2 2060 uloc_canonicalize(const char* localeID, 2061 char* name, 2062 int32_t nameCapacity, 2063 UErrorCode* err) 2064 { 2065 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2066 } 2067 2068 U_CAPI const char* U_EXPORT2 2069 uloc_getISO3Language(const char* localeID) 2070 { 2071 int16_t offset; 2072 char lang[ULOC_LANG_CAPACITY]; 2073 UErrorCode err = U_ZERO_ERROR; 2074 2075 if (localeID == NULL) 2076 { 2077 localeID = uloc_getDefault(); 2078 } 2079 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2080 if (U_FAILURE(err)) 2081 return ""; 2082 offset = _findIndex(LANGUAGES, lang); 2083 if (offset < 0) 2084 return ""; 2085 return LANGUAGES_3[offset]; 2086 } 2087 2088 U_CAPI const char* U_EXPORT2 2089 uloc_getISO3Country(const char* localeID) 2090 { 2091 int16_t offset; 2092 char cntry[ULOC_LANG_CAPACITY]; 2093 UErrorCode err = U_ZERO_ERROR; 2094 2095 if (localeID == NULL) 2096 { 2097 localeID = uloc_getDefault(); 2098 } 2099 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2100 if (U_FAILURE(err)) 2101 return ""; 2102 offset = _findIndex(COUNTRIES, cntry); 2103 if (offset < 0) 2104 return ""; 2105 2106 return COUNTRIES_3[offset]; 2107 } 2108 2109 U_CAPI uint32_t U_EXPORT2 2110 uloc_getLCID(const char* localeID) 2111 { 2112 UErrorCode status = U_ZERO_ERROR; 2113 char langID[ULOC_FULLNAME_CAPACITY]; 2114 2115 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2116 if (U_FAILURE(status)) { 2117 return 0; 2118 } 2119 2120 return uprv_convertToLCID(langID, localeID, &status); 2121 } 2122 2123 U_CAPI int32_t U_EXPORT2 2124 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2125 UErrorCode *status) 2126 { 2127 int32_t length; 2128 const char *posix = uprv_convertToPosix(hostid, status); 2129 if (U_FAILURE(*status) || posix == NULL) { 2130 return 0; 2131 } 2132 length = (int32_t)uprv_strlen(posix); 2133 if (length+1 > localeCapacity) { 2134 *status = U_BUFFER_OVERFLOW_ERROR; 2135 } 2136 else { 2137 uprv_strcpy(locale, posix); 2138 } 2139 return length; 2140 } 2141 2142 /* ### Default locale **************************************************/ 2143 2144 U_CAPI const char* U_EXPORT2 2145 uloc_getDefault() 2146 { 2147 return locale_get_default(); 2148 } 2149 2150 U_CAPI void U_EXPORT2 2151 uloc_setDefault(const char* newDefaultLocale, 2152 UErrorCode* err) 2153 { 2154 if (U_FAILURE(*err)) 2155 return; 2156 /* the error code isn't currently used for anything by this function*/ 2157 2158 /* propagate change to C++ */ 2159 locale_set_default(newDefaultLocale); 2160 } 2161 2162 /** 2163 * Returns a list of all language codes defined in ISO 639. This is a pointer 2164 * to an array of pointers to arrays of char. All of these pointers are owned 2165 * by ICU-- do not delete them, and do not write through them. The array is 2166 * terminated with a null pointer. 2167 */ 2168 U_CAPI const char* const* U_EXPORT2 2169 uloc_getISOLanguages() 2170 { 2171 return LANGUAGES; 2172 } 2173 2174 /** 2175 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2176 * pointer to an array of pointers to arrays of char. All of these pointers are 2177 * owned by ICU-- do not delete them, and do not write through them. The array is 2178 * terminated with a null pointer. 2179 */ 2180 U_CAPI const char* const* U_EXPORT2 2181 uloc_getISOCountries() 2182 { 2183 return COUNTRIES; 2184 } 2185 2186 2187 /* this function to be moved into cstring.c later */ 2188 static char gDecimal = 0; 2189 2190 static /* U_CAPI */ 2191 double 2192 /* U_EXPORT2 */ 2193 _uloc_strtod(const char *start, char **end) { 2194 char *decimal; 2195 char *myEnd; 2196 char buf[30]; 2197 double rv; 2198 if (!gDecimal) { 2199 char rep[5]; 2200 /* For machines that decide to change the decimal on you, 2201 and try to be too smart with localization. 2202 This normally should be just a '.'. */ 2203 sprintf(rep, "%+1.1f", 1.0); 2204 gDecimal = rep[2]; 2205 } 2206 2207 if(gDecimal == '.') { 2208 return uprv_strtod(start, end); /* fall through to OS */ 2209 } else { 2210 uprv_strncpy(buf, start, 29); 2211 buf[29]=0; 2212 decimal = uprv_strchr(buf, '.'); 2213 if(decimal) { 2214 *decimal = gDecimal; 2215 } else { 2216 return uprv_strtod(start, end); /* no decimal point */ 2217 } 2218 rv = uprv_strtod(buf, &myEnd); 2219 if(end) { 2220 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2221 } 2222 return rv; 2223 } 2224 } 2225 2226 typedef struct { 2227 float q; 2228 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2229 char *locale; 2230 } _acceptLangItem; 2231 2232 static int32_t U_CALLCONV 2233 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b) 2234 { 2235 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2236 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2237 2238 int32_t rc = 0; 2239 if(bb->q < aa->q) { 2240 rc = -1; /* A > B */ 2241 } else if(bb->q > aa->q) { 2242 rc = 1; /* A < B */ 2243 } else { 2244 rc = 0; /* A = B */ 2245 } 2246 2247 if(rc==0) { 2248 rc = uprv_stricmp(aa->locale, bb->locale); 2249 } 2250 2251 #if defined(ULOC_DEBUG) 2252 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2253 aa->locale, aa->q, 2254 bb->locale, bb->q, 2255 rc);*/ 2256 #endif 2257 2258 return rc; 2259 } 2260 2261 /* 2262 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2263 */ 2264 2265 U_CAPI int32_t U_EXPORT2 2266 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2267 const char *httpAcceptLanguage, 2268 UEnumeration* availableLocales, 2269 UErrorCode *status) 2270 { 2271 _acceptLangItem *j; 2272 _acceptLangItem smallBuffer[30]; 2273 char **strs; 2274 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2275 int32_t n = 0; 2276 const char *itemEnd; 2277 const char *paramEnd; 2278 const char *s; 2279 const char *t; 2280 int32_t res; 2281 int32_t i; 2282 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2283 int32_t jSize; 2284 char *tempstr; /* Use for null pointer check */ 2285 2286 j = smallBuffer; 2287 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); 2288 if(U_FAILURE(*status)) { 2289 return -1; 2290 } 2291 2292 for(s=httpAcceptLanguage;s&&*s;) { 2293 while(isspace(*s)) /* eat space at the beginning */ 2294 s++; 2295 itemEnd=uprv_strchr(s,','); 2296 paramEnd=uprv_strchr(s,';'); 2297 if(!itemEnd) { 2298 itemEnd = httpAcceptLanguage+l; /* end of string */ 2299 } 2300 if(paramEnd && paramEnd<itemEnd) { 2301 /* semicolon (;) is closer than end (,) */ 2302 t = paramEnd+1; 2303 if(*t=='q') { 2304 t++; 2305 } 2306 while(isspace(*t)) { 2307 t++; 2308 } 2309 if(*t=='=') { 2310 t++; 2311 } 2312 while(isspace(*t)) { 2313 t++; 2314 } 2315 j[n].q = (float)_uloc_strtod(t,NULL); 2316 } else { 2317 /* no semicolon - it's 1.0 */ 2318 j[n].q = 1.0f; 2319 paramEnd = itemEnd; 2320 } 2321 j[n].dummy=0; 2322 /* eat spaces prior to semi */ 2323 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2324 ; 2325 /* Check for null pointer from uprv_strndup */ 2326 tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); 2327 if (tempstr == NULL) { 2328 *status = U_MEMORY_ALLOCATION_ERROR; 2329 return -1; 2330 } 2331 j[n].locale = tempstr; 2332 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); 2333 if(strcmp(j[n].locale,tmp)) { 2334 uprv_free(j[n].locale); 2335 j[n].locale=uprv_strdup(tmp); 2336 } 2337 #if defined(ULOC_DEBUG) 2338 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2339 #endif 2340 n++; 2341 s = itemEnd; 2342 while(*s==',') { /* eat duplicate commas */ 2343 s++; 2344 } 2345 if(n>=jSize) { 2346 if(j==smallBuffer) { /* overflowed the small buffer. */ 2347 j = uprv_malloc(sizeof(j[0])*(jSize*2)); 2348 if(j!=NULL) { 2349 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); 2350 } 2351 #if defined(ULOC_DEBUG) 2352 fprintf(stderr,"malloced at size %d\n", jSize); 2353 #endif 2354 } else { 2355 j = uprv_realloc(j, sizeof(j[0])*jSize*2); 2356 #if defined(ULOC_DEBUG) 2357 fprintf(stderr,"re-alloced at size %d\n", jSize); 2358 #endif 2359 } 2360 jSize *= 2; 2361 if(j==NULL) { 2362 *status = U_MEMORY_ALLOCATION_ERROR; 2363 return -1; 2364 } 2365 } 2366 } 2367 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2368 if(U_FAILURE(*status)) { 2369 if(j != smallBuffer) { 2370 #if defined(ULOC_DEBUG) 2371 fprintf(stderr,"freeing j %p\n", j); 2372 #endif 2373 uprv_free(j); 2374 } 2375 return -1; 2376 } 2377 strs = uprv_malloc((size_t)(sizeof(strs[0])*n)); 2378 /* Check for null pointer */ 2379 if (strs == NULL) { 2380 uprv_free(j); /* Free to avoid memory leak */ 2381 *status = U_MEMORY_ALLOCATION_ERROR; 2382 return -1; 2383 } 2384 for(i=0;i<n;i++) { 2385 #if defined(ULOC_DEBUG) 2386 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2387 #endif 2388 strs[i]=j[i].locale; 2389 } 2390 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2391 (const char**)strs, n, availableLocales, status); 2392 for(i=0;i<n;i++) { 2393 uprv_free(strs[i]); 2394 } 2395 uprv_free(strs); 2396 if(j != smallBuffer) { 2397 #if defined(ULOC_DEBUG) 2398 fprintf(stderr,"freeing j %p\n", j); 2399 #endif 2400 uprv_free(j); 2401 } 2402 return res; 2403 } 2404 2405 2406 U_CAPI int32_t U_EXPORT2 2407 uloc_acceptLanguage(char *result, int32_t resultAvailable, 2408 UAcceptResult *outResult, const char **acceptList, 2409 int32_t acceptListCount, 2410 UEnumeration* availableLocales, 2411 UErrorCode *status) 2412 { 2413 int32_t i,j; 2414 int32_t len; 2415 int32_t maxLen=0; 2416 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2417 const char *l; 2418 char **fallbackList; 2419 if(U_FAILURE(*status)) { 2420 return -1; 2421 } 2422 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)); 2423 if(fallbackList==NULL) { 2424 *status = U_MEMORY_ALLOCATION_ERROR; 2425 return -1; 2426 } 2427 for(i=0;i<acceptListCount;i++) { 2428 #if defined(ULOC_DEBUG) 2429 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2430 #endif 2431 while((l=uenum_next(availableLocales, NULL, status))) { 2432 #if defined(ULOC_DEBUG) 2433 fprintf(stderr," %s\n", l); 2434 #endif 2435 len = (int32_t)uprv_strlen(l); 2436 if(!uprv_strcmp(acceptList[i], l)) { 2437 if(outResult) { 2438 *outResult = ULOC_ACCEPT_VALID; 2439 } 2440 #if defined(ULOC_DEBUG) 2441 fprintf(stderr, "MATCH! %s\n", l); 2442 #endif 2443 if(len>0) { 2444 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2445 } 2446 for(j=0;j<i;j++) { 2447 uprv_free(fallbackList[j]); 2448 } 2449 uprv_free(fallbackList); 2450 return u_terminateChars(result, resultAvailable, len, status); 2451 } 2452 if(len>maxLen) { 2453 maxLen = len; 2454 } 2455 } 2456 uenum_reset(availableLocales, status); 2457 /* save off parent info */ 2458 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2459 fallbackList[i] = uprv_strdup(tmp); 2460 } else { 2461 fallbackList[i]=0; 2462 } 2463 } 2464 2465 for(maxLen--;maxLen>0;maxLen--) { 2466 for(i=0;i<acceptListCount;i++) { 2467 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2468 #if defined(ULOC_DEBUG) 2469 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2470 #endif 2471 while((l=uenum_next(availableLocales, NULL, status))) { 2472 #if defined(ULOC_DEBUG) 2473 fprintf(stderr," %s\n", l); 2474 #endif 2475 len = (int32_t)uprv_strlen(l); 2476 if(!uprv_strcmp(fallbackList[i], l)) { 2477 if(outResult) { 2478 *outResult = ULOC_ACCEPT_FALLBACK; 2479 } 2480 #if defined(ULOC_DEBUG) 2481 fprintf(stderr, "fallback MATCH! %s\n", l); 2482 #endif 2483 if(len>0) { 2484 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2485 } 2486 for(j=0;j<acceptListCount;j++) { 2487 uprv_free(fallbackList[j]); 2488 } 2489 uprv_free(fallbackList); 2490 return u_terminateChars(result, resultAvailable, len, status); 2491 } 2492 } 2493 uenum_reset(availableLocales, status); 2494 2495 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2496 uprv_free(fallbackList[i]); 2497 fallbackList[i] = uprv_strdup(tmp); 2498 } else { 2499 uprv_free(fallbackList[i]); 2500 fallbackList[i]=0; 2501 } 2502 } 2503 } 2504 if(outResult) { 2505 *outResult = ULOC_ACCEPT_FAILED; 2506 } 2507 } 2508 for(i=0;i<acceptListCount;i++) { 2509 uprv_free(fallbackList[i]); 2510 } 2511 uprv_free(fallbackList); 2512 return -1; 2513 } 2514 2515 /*eof*/ 2516