1 /* 2 ********************************************************************** 3 * Copyright (C) 1997-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File ULOC.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/01/97 aliu Creation. 13 * 08/21/98 stephen JDK 1.2 sync 14 * 12/08/98 rtg New Locale implementation and C API 15 * 03/15/99 damiba overhaul. 16 * 04/06/99 stephen changed setDefault() to realloc and copy 17 * 06/14/99 stephen Changed calls to ures_open for new params 18 * 07/21/99 stephen Modified setDefault() to propagate to C++ 19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 20 * brought canonicalization code into line with spec 21 *****************************************************************************/ 22 23 /* 24 POSIX's locale format, from putil.c: [no spaces] 25 26 ll [ _CC ] [ . MM ] [ @ VV] 27 28 l = lang, C = ctry, M = charmap, V = variant 29 */ 30 31 #include "unicode/utypes.h" 32 #include "unicode/ustring.h" 33 #include "unicode/uloc.h" 34 35 #include "putilimp.h" 36 #include "ustr_imp.h" 37 #include "ulocimp.h" 38 #include "umutex.h" 39 #include "cstring.h" 40 #include "cmemory.h" 41 #include "ucln_cmn.h" 42 #include "locmap.h" 43 #include "uarrsort.h" 44 #include "uenumimp.h" 45 #include "uassert.h" 46 47 #include <stdio.h> /* for sprintf */ 48 49 /* ### Declarations **************************************************/ 50 51 /* Locale stuff from locid.cpp */ 52 U_CFUNC void locale_set_default(const char *id); 53 U_CFUNC const char *locale_get_default(void); 54 U_CFUNC int32_t 55 locale_getKeywords(const char *localeID, 56 char prev, 57 char *keywords, int32_t keywordCapacity, 58 char *values, int32_t valuesCapacity, int32_t *valLen, 59 UBool valuesToo, 60 UErrorCode *status); 61 62 /* ### Data tables **************************************************/ 63 64 /** 65 * Table of language codes, both 2- and 3-letter, with preference 66 * given to 2-letter codes where possible. Includes 3-letter codes 67 * that lack a 2-letter equivalent. 68 * 69 * This list must be in sorted order. This list is returned directly 70 * to the user by some API. 71 * 72 * This list must be kept in sync with LANGUAGES_3, with corresponding 73 * entries matched. 74 * 75 * This table should be terminated with a NULL entry, followed by a 76 * second list, and another NULL entry. The first list is visible to 77 * user code when this array is returned by API. The second list 78 * contains codes we support, but do not expose through user API. 79 * 80 * Notes 81 * 82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 83 * include the revisions up to 2001/7/27 *CWB* 84 * 85 * The 3 character codes are the terminology codes like RFC 3066. This 86 * is compatible with prior ICU codes 87 * 88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 89 * table but now at the end of the table because 3 character codes are 90 * duplicates. This avoids bad searches going from 3 to 2 character 91 * codes. 92 * 93 * The range qaa-qtz is reserved for local use 94 */ 95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 96 /* ISO639 table version is 20130123 */ 97 static const char * const LANGUAGES[] = { 98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", 99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg", 100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc", 101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath", 102 "aus", "av", "awa", "ay", "az", 103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax", 104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg", 105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm", 106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss", 107 "btk", "bua", "bug", "bum", "byn", "byv", 108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce", 109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm", 110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co", 111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", 112 "csb", "cu", "cus", "cv", "cy", 113 "da", "dak", "dar", "dav", "day", "de", "del", "den", 114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", 115 "dv", "dyo", "dyu", "dz", "dzg", 116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en", 117 "enm", "eo", "es", "et", "eu", "ewo", 118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj", 119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur", 120 "fy", 121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil", 122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb", 123 "grc", "gsw", "gu", "guz", "gv", "gwi", 124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit", 125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", 126 "hz", 127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo", 128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro", 129 "is", "it", "iu", 130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv", 131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", 132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha", 133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl", 134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe", 135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf", 136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky", 137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg", 138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu", 139 "lua", "lui", "lun", "luo", "lus", "luy", "lv", 140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", 141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga", 142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", 143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh", 144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus", 145 "mwl", "mwr", "my", "mye", "myn", "myv", 146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds", 147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg", 148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso", 149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", 150 "nzi", 151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto", 152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo", 153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps", 154 "pt", 155 "qu", 156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof", 157 "rom", "ru", "rup", "rw", "rwk", 158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas", 159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se", 160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn", 161 "shi", "shn", "shu", "si", "sid", "sio", "sit", 162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", 163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", 164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk", 165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr", 166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg", 167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh", 168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", 169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", 170 "twq", "ty", "tyv", "tzm", 171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", 172 "vai", "ve", "vi", "vo", "vot", "vun", 173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo", 174 "xal", "xh", "xog", 175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue", 176 "za", "zap", "zbl", "zen", "zh", "znd", "zu", "zun", 177 "zxx", "zza", 178 NULL, 179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 180 NULL 181 }; 182 183 static const char* const DEPRECATED_LANGUAGES[]={ 184 "in", "iw", "ji", "jw", NULL, NULL 185 }; 186 static const char* const REPLACEMENT_LANGUAGES[]={ 187 "id", "he", "yi", "jv", NULL, NULL 188 }; 189 190 /** 191 * Table of 3-letter language codes. 192 * 193 * This is a lookup table used to convert 3-letter language codes to 194 * their 2-letter equivalent, where possible. It must be kept in sync 195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 196 * same language as LANGUAGES_3[i]. The commented-out lines are 197 * copied from LANGUAGES to make eyeballing this baby easier. 198 * 199 * Where a 3-letter language code has no 2-letter equivalent, the 200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 201 * 202 * This table should be terminated with a NULL entry, followed by a 203 * second list, and another NULL entry. The two lists correspond to 204 * the two lists in LANGUAGES. 205 */ 206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 207 /* ISO639 table version is 20130123 */ 208 static const char * const LANGUAGES_3[] = { 209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", 210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg", 211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc", 212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath", 213 "aus", "ava", "awa", "aym", "aze", 214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax", 215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul", 216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam", 217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss", 218 "btk", "bua", "bug", "bum", "byn", "byv", 219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che", 220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm", 221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos", 222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", 223 "csb", "chu", "cus", "chv", "cym", 224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den", 225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", 226 "div", "dyo", "dyu", "dzo", "dzg", 227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", 228 "enm", "epo", "spa", "est", "eus", "ewo", 229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij", 230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur", 231 "fry", 232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil", 233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb", 234 "grc", "gsw", "guj", "guz", "glv", "gwi", 235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit", 236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", 237 "her", 238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo", 239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro", 240 "isl", "ita", "iku", 241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav", 242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", 243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha", 244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal", 245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe", 246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf", 247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir", 248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug", 249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub", 250 "lua", "lui", "lun", "luo", "lus", "luy", "lav", 251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", 252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga", 253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", 254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh", 255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus", 256 "mwl", "mwr", "mya", "mye", "myn", "myv", 257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds", 258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg", 259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso", 260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", 261 "nzi", 262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto", 263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo", 264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus", 265 "por", 266 "que", 267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof", 268 "rom", "rus", "rup", "kin", "rwk", 269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas", 270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme", 271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn", 272 "shi", "shn", "shu", "sin", "sid", "sio", "sit", 273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", 274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", 275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk", 276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr", 277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk", 278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh", 279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv", 280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", 281 "twq", "tah", "tyv", "tzm", 282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", 283 "vai", "ven", "vie", "vol", "vot", "vun", 284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol", 285 "xal", "xho", "xog", 286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue", 287 "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun", 288 "zxx", "zza", 289 NULL, 290 /* "in", "iw", "ji", "jw", "sh", */ 291 "ind", "heb", "yid", "jaw", "srp", 292 NULL 293 }; 294 295 /** 296 * Table of 2-letter country codes. 297 * 298 * This list must be in sorted order. This list is returned directly 299 * to the user by some API. 300 * 301 * This list must be kept in sync with COUNTRIES_3, with corresponding 302 * entries matched. 303 * 304 * This table should be terminated with a NULL entry, followed by a 305 * second list, and another NULL entry. The first list is visible to 306 * user code when this array is returned by API. The second list 307 * contains codes we support, but do not expose through user API. 308 * 309 * Notes: 310 * 311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 313 * new codes keeping the old ones for compatibility updated to include 314 * 1999/12/03 revisions *CWB* 315 * 316 * RO(ROM) is now RO(ROU) according to 317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 318 */ 319 static const char * const COUNTRIES[] = { 320 "AD", "AE", "AF", "AG", "AI", "AL", "AM", 321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", 324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", 327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", 345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 349 "WS", "YE", "YT", "ZA", "ZM", "ZW", 350 NULL, 351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ 352 NULL 353 }; 354 355 static const char* const DEPRECATED_COUNTRIES[] = { 356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ 357 }; 358 static const char* const REPLACEMENT_COUNTRIES[] = { 359 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ 360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ 361 }; 362 363 /** 364 * Table of 3-letter country codes. 365 * 366 * This is a lookup table used to convert 3-letter country codes to 367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 368 * For all valid i, COUNTRIES[i] must refer to the same country as 369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 370 * to make eyeballing this baby easier. 371 * 372 * This table should be terminated with a NULL entry, followed by a 373 * second list, and another NULL entry. The two lists correspond to 374 * the two lists in COUNTRIES. 375 */ 376 static const char * const COUNTRIES_3[] = { 377 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ 378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", 379 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 381 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 383 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ 384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", 385 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 387 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 389 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 391 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 393 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 395 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 397 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 399 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 401 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 403 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 405 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 407 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 409 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 411 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 413 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 415 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 417 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 419 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 421 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 423 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 425 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ 426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", 427 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 429 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 431 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 433 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 435 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 437 NULL, 438 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ 439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", 440 NULL 441 }; 442 443 typedef struct CanonicalizationMap { 444 const char *id; /* input ID */ 445 const char *canonicalID; /* canonicalized output ID */ 446 const char *keyword; /* keyword, or NULL if none */ 447 const char *value; /* keyword value, or NULL if kw==NULL */ 448 } CanonicalizationMap; 449 450 /** 451 * A map to canonicalize locale IDs. This handles a variety of 452 * different semantic kinds of transformations. 453 */ 454 static const CanonicalizationMap CANONICALIZE_MAP[] = { 455 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 456 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 457 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 458 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 460 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 481 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 486 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 492 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 493 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 494 { "zh_GAN", "gan", NULL, NULL }, /* registered name */ 495 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 496 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ 497 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ 498 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ 499 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ 500 { "zh_YUE", "yue", NULL, NULL }, /* registered name */ 501 }; 502 503 typedef struct VariantMap { 504 const char *variant; /* input ID */ 505 const char *keyword; /* keyword, or NULL if none */ 506 const char *value; /* keyword value, or NULL if kw==NULL */ 507 } VariantMap; 508 509 static const VariantMap VARIANT_MAP[] = { 510 { "EURO", "currency", "EUR" }, 511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 512 { "STROKE", "collation", "stroke" } /* Solaris variant */ 513 }; 514 515 /* ### BCP47 Conversion *******************************************/ 516 /* Test if the locale id has BCP47 u extension and does not have '@' */ 517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 519 #define _ConvertBCP47(finalID, id, buffer, length,err) \ 520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ 521 finalID=id; \ 522 } else { \ 523 finalID=buffer; \ 524 } 525 /* Gets the size of the shortest subtag in the given localeID. */ 526 static int32_t getShortestSubtagLength(const char *localeID) { 527 int32_t localeIDLength = uprv_strlen(localeID); 528 int32_t length = localeIDLength; 529 int32_t tmpLength = 0; 530 int32_t i; 531 UBool reset = TRUE; 532 533 for (i = 0; i < localeIDLength; i++) { 534 if (localeID[i] != '_' && localeID[i] != '-') { 535 if (reset) { 536 tmpLength = 0; 537 reset = FALSE; 538 } 539 tmpLength++; 540 } else { 541 if (tmpLength != 0 && tmpLength < length) { 542 length = tmpLength; 543 } 544 reset = TRUE; 545 } 546 } 547 548 return length; 549 } 550 551 /* ### Keywords **************************************************/ 552 553 #define ULOC_KEYWORD_BUFFER_LEN 25 554 #define ULOC_MAX_NO_KEYWORDS 25 555 556 U_CAPI const char * U_EXPORT2 557 locale_getKeywordsStart(const char *localeID) { 558 const char *result = NULL; 559 if((result = uprv_strchr(localeID, '@')) != NULL) { 560 return result; 561 } 562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 563 else { 564 /* We do this because the @ sign is variant, and the @ sign used on one 565 EBCDIC machine won't be compiled the same way on other EBCDIC based 566 machines. */ 567 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 568 const uint8_t *charToFind = ebcdicSigns; 569 while(*charToFind) { 570 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 571 return result; 572 } 573 charToFind++; 574 } 575 } 576 #endif 577 return NULL; 578 } 579 580 /** 581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 582 * @param keywordName incoming name to be canonicalized 583 * @param status return status (keyword too long) 584 * @return length of the keyword name 585 */ 586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 587 { 588 int32_t i; 589 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 590 591 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 592 /* keyword name too long for internal buffer */ 593 *status = U_INTERNAL_PROGRAM_ERROR; 594 return 0; 595 } 596 597 /* normalize the keyword name */ 598 for(i = 0; i < keywordNameLen; i++) { 599 buf[i] = uprv_tolower(keywordName[i]); 600 } 601 buf[i] = 0; 602 603 return keywordNameLen; 604 } 605 606 typedef struct { 607 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 608 int32_t keywordLen; 609 const char *valueStart; 610 int32_t valueLen; 611 } KeywordStruct; 612 613 static int32_t U_CALLCONV 614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { 615 const char* leftString = ((const KeywordStruct *)left)->keyword; 616 const char* rightString = ((const KeywordStruct *)right)->keyword; 617 return uprv_strcmp(leftString, rightString); 618 } 619 620 /** 621 * Both addKeyword and addValue must already be in canonical form. 622 * Either both addKeyword and addValue are NULL, or neither is NULL. 623 * If they are not NULL they must be zero terminated. 624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 625 */ 626 static int32_t 627 _getKeywords(const char *localeID, 628 char prev, 629 char *keywords, int32_t keywordCapacity, 630 char *values, int32_t valuesCapacity, int32_t *valLen, 631 UBool valuesToo, 632 const char* addKeyword, 633 const char* addValue, 634 UErrorCode *status) 635 { 636 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 637 638 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 639 int32_t numKeywords = 0; 640 const char* pos = localeID; 641 const char* equalSign = NULL; 642 const char* semicolon = NULL; 643 int32_t i = 0, j, n; 644 int32_t keywordsLen = 0; 645 int32_t valuesLen = 0; 646 647 if(prev == '@') { /* start of keyword definition */ 648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 649 do { 650 UBool duplicate = FALSE; 651 /* skip leading spaces */ 652 while(*pos == ' ') { 653 pos++; 654 } 655 if (!*pos) { /* handle trailing "; " */ 656 break; 657 } 658 if(numKeywords == maxKeywords) { 659 *status = U_INTERNAL_PROGRAM_ERROR; 660 return 0; 661 } 662 equalSign = uprv_strchr(pos, '='); 663 semicolon = uprv_strchr(pos, ';'); 664 /* lack of '=' [foo@currency] is illegal */ 665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 666 if(!equalSign || (semicolon && semicolon<equalSign)) { 667 *status = U_INVALID_FORMAT_ERROR; 668 return 0; 669 } 670 /* need to normalize both keyword and keyword name */ 671 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 672 /* keyword name too long for internal buffer */ 673 *status = U_INTERNAL_PROGRAM_ERROR; 674 return 0; 675 } 676 for(i = 0, n = 0; i < equalSign - pos; ++i) { 677 if (pos[i] != ' ') { 678 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 679 } 680 } 681 keywordList[numKeywords].keyword[n] = 0; 682 keywordList[numKeywords].keywordLen = n; 683 /* now grab the value part. First we skip the '=' */ 684 equalSign++; 685 /* then we leading spaces */ 686 while(*equalSign == ' ') { 687 equalSign++; 688 } 689 keywordList[numKeywords].valueStart = equalSign; 690 691 pos = semicolon; 692 i = 0; 693 if(pos) { 694 while(*(pos - i - 1) == ' ') { 695 i++; 696 } 697 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 698 pos++; 699 } else { 700 i = (int32_t)uprv_strlen(equalSign); 701 while(i && equalSign[i-1] == ' ') { 702 i--; 703 } 704 keywordList[numKeywords].valueLen = i; 705 } 706 /* If this is a duplicate keyword, then ignore it */ 707 for (j=0; j<numKeywords; ++j) { 708 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 709 duplicate = TRUE; 710 break; 711 } 712 } 713 if (!duplicate) { 714 ++numKeywords; 715 } 716 } while(pos); 717 718 /* Handle addKeyword/addValue. */ 719 if (addKeyword != NULL) { 720 UBool duplicate = FALSE; 721 U_ASSERT(addValue != NULL); 722 /* Search for duplicate; if found, do nothing. Explicit keyword 723 overrides addKeyword. */ 724 for (j=0; j<numKeywords; ++j) { 725 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 726 duplicate = TRUE; 727 break; 728 } 729 } 730 if (!duplicate) { 731 if (numKeywords == maxKeywords) { 732 *status = U_INTERNAL_PROGRAM_ERROR; 733 return 0; 734 } 735 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 736 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 737 keywordList[numKeywords].valueStart = addValue; 738 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 739 ++numKeywords; 740 } 741 } else { 742 U_ASSERT(addValue == NULL); 743 } 744 745 /* now we have a list of keywords */ 746 /* we need to sort it */ 747 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 748 749 /* Now construct the keyword part */ 750 for(i = 0; i < numKeywords; i++) { 751 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 752 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 753 if(valuesToo) { 754 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 755 } else { 756 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 757 } 758 } 759 keywordsLen += keywordList[i].keywordLen + 1; 760 if(valuesToo) { 761 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 762 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 763 } 764 keywordsLen += keywordList[i].valueLen; 765 766 if(i < numKeywords - 1) { 767 if(keywordsLen < keywordCapacity) { 768 keywords[keywordsLen] = ';'; 769 } 770 keywordsLen++; 771 } 772 } 773 if(values) { 774 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 775 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 776 values[valuesLen + keywordList[i].valueLen] = 0; 777 } 778 valuesLen += keywordList[i].valueLen + 1; 779 } 780 } 781 if(values) { 782 values[valuesLen] = 0; 783 if(valLen) { 784 *valLen = valuesLen; 785 } 786 } 787 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 788 } else { 789 return 0; 790 } 791 } 792 793 U_CFUNC int32_t 794 locale_getKeywords(const char *localeID, 795 char prev, 796 char *keywords, int32_t keywordCapacity, 797 char *values, int32_t valuesCapacity, int32_t *valLen, 798 UBool valuesToo, 799 UErrorCode *status) { 800 return _getKeywords(localeID, prev, keywords, keywordCapacity, 801 values, valuesCapacity, valLen, valuesToo, 802 NULL, NULL, status); 803 } 804 805 U_CAPI int32_t U_EXPORT2 806 uloc_getKeywordValue(const char* localeID, 807 const char* keywordName, 808 char* buffer, int32_t bufferCapacity, 809 UErrorCode* status) 810 { 811 const char* startSearchHere = NULL; 812 const char* nextSeparator = NULL; 813 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 814 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 815 int32_t i = 0; 816 int32_t result = 0; 817 818 if(status && U_SUCCESS(*status) && localeID) { 819 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 820 const char* tmpLocaleID; 821 822 if (_hasBCP47Extension(localeID)) { 823 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 824 } else { 825 tmpLocaleID=localeID; 826 } 827 828 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 829 if(startSearchHere == NULL) { 830 /* no keywords, return at once */ 831 return 0; 832 } 833 834 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 835 if(U_FAILURE(*status)) { 836 return 0; 837 } 838 839 /* find the first keyword */ 840 while(startSearchHere) { 841 startSearchHere++; 842 /* skip leading spaces (allowed?) */ 843 while(*startSearchHere == ' ') { 844 startSearchHere++; 845 } 846 nextSeparator = uprv_strchr(startSearchHere, '='); 847 /* need to normalize both keyword and keyword name */ 848 if(!nextSeparator) { 849 break; 850 } 851 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 852 /* keyword name too long for internal buffer */ 853 *status = U_INTERNAL_PROGRAM_ERROR; 854 return 0; 855 } 856 for(i = 0; i < nextSeparator - startSearchHere; i++) { 857 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 858 } 859 /* trim trailing spaces */ 860 while(startSearchHere[i-1] == ' ') { 861 i--; 862 U_ASSERT(i>=0); 863 } 864 localeKeywordNameBuffer[i] = 0; 865 866 startSearchHere = uprv_strchr(nextSeparator, ';'); 867 868 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 869 nextSeparator++; 870 while(*nextSeparator == ' ') { 871 nextSeparator++; 872 } 873 /* we actually found the keyword. Copy the value */ 874 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 875 while(*(startSearchHere-1) == ' ') { 876 startSearchHere--; 877 } 878 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 879 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 880 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 881 i = (int32_t)uprv_strlen(nextSeparator); 882 while(nextSeparator[i - 1] == ' ') { 883 i--; 884 } 885 uprv_strncpy(buffer, nextSeparator, i); 886 result = u_terminateChars(buffer, bufferCapacity, i, status); 887 } else { 888 /* give a bigger buffer, please */ 889 *status = U_BUFFER_OVERFLOW_ERROR; 890 if(startSearchHere) { 891 result = (int32_t)(startSearchHere - nextSeparator); 892 } else { 893 result = (int32_t)uprv_strlen(nextSeparator); 894 } 895 } 896 return result; 897 } 898 } 899 } 900 return 0; 901 } 902 903 U_CAPI int32_t U_EXPORT2 904 uloc_setKeywordValue(const char* keywordName, 905 const char* keywordValue, 906 char* buffer, int32_t bufferCapacity, 907 UErrorCode* status) 908 { 909 /* TODO: sorting. removal. */ 910 int32_t keywordNameLen; 911 int32_t keywordValueLen; 912 int32_t bufLen; 913 int32_t needLen = 0; 914 int32_t foundValueLen; 915 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 916 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 917 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 918 int32_t i = 0; 919 int32_t rc; 920 char* nextSeparator = NULL; 921 char* nextEqualsign = NULL; 922 char* startSearchHere = NULL; 923 char* keywordStart = NULL; 924 char *insertHere = NULL; 925 if(U_FAILURE(*status)) { 926 return -1; 927 } 928 if(bufferCapacity>1) { 929 bufLen = (int32_t)uprv_strlen(buffer); 930 } else { 931 *status = U_ILLEGAL_ARGUMENT_ERROR; 932 return 0; 933 } 934 if(bufferCapacity<bufLen) { 935 /* The capacity is less than the length?! Is this NULL terminated? */ 936 *status = U_ILLEGAL_ARGUMENT_ERROR; 937 return 0; 938 } 939 if(keywordValue && !*keywordValue) { 940 keywordValue = NULL; 941 } 942 if(keywordValue) { 943 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 944 } else { 945 keywordValueLen = 0; 946 } 947 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 948 if(U_FAILURE(*status)) { 949 return 0; 950 } 951 startSearchHere = (char*)locale_getKeywordsStart(buffer); 952 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 953 if(!keywordValue) { /* no keywords = nothing to remove */ 954 return bufLen; 955 } 956 957 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 958 if(startSearchHere) { /* had a single @ */ 959 needLen--; /* already had the @ */ 960 /* startSearchHere points at the @ */ 961 } else { 962 startSearchHere=buffer+bufLen; 963 } 964 if(needLen >= bufferCapacity) { 965 *status = U_BUFFER_OVERFLOW_ERROR; 966 return needLen; /* no change */ 967 } 968 *startSearchHere = '@'; 969 startSearchHere++; 970 uprv_strcpy(startSearchHere, keywordNameBuffer); 971 startSearchHere += keywordNameLen; 972 *startSearchHere = '='; 973 startSearchHere++; 974 uprv_strcpy(startSearchHere, keywordValue); 975 startSearchHere+=keywordValueLen; 976 return needLen; 977 } /* end shortcut - no @ */ 978 979 keywordStart = startSearchHere; 980 /* search for keyword */ 981 while(keywordStart) { 982 keywordStart++; 983 /* skip leading spaces (allowed?) */ 984 while(*keywordStart == ' ') { 985 keywordStart++; 986 } 987 nextEqualsign = uprv_strchr(keywordStart, '='); 988 /* need to normalize both keyword and keyword name */ 989 if(!nextEqualsign) { 990 break; 991 } 992 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 993 /* keyword name too long for internal buffer */ 994 *status = U_INTERNAL_PROGRAM_ERROR; 995 return 0; 996 } 997 for(i = 0; i < nextEqualsign - keywordStart; i++) { 998 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 999 } 1000 /* trim trailing spaces */ 1001 while(keywordStart[i-1] == ' ') { 1002 i--; 1003 } 1004 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN); 1005 localeKeywordNameBuffer[i] = 0; 1006 1007 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1008 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1009 if(rc == 0) { 1010 nextEqualsign++; 1011 while(*nextEqualsign == ' ') { 1012 nextEqualsign++; 1013 } 1014 /* we actually found the keyword. Change the value */ 1015 if (nextSeparator) { 1016 keywordAtEnd = 0; 1017 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1018 } else { 1019 keywordAtEnd = 1; 1020 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1021 } 1022 if(keywordValue) { /* adding a value - not removing */ 1023 if(foundValueLen == keywordValueLen) { 1024 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1025 return bufLen; /* no change in size */ 1026 } else if(foundValueLen > keywordValueLen) { 1027 int32_t delta = foundValueLen - keywordValueLen; 1028 if(nextSeparator) { /* RH side */ 1029 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1030 } 1031 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1032 bufLen -= delta; 1033 buffer[bufLen]=0; 1034 return bufLen; 1035 } else { /* FVL < KVL */ 1036 int32_t delta = keywordValueLen - foundValueLen; 1037 if((bufLen+delta) >= bufferCapacity) { 1038 *status = U_BUFFER_OVERFLOW_ERROR; 1039 return bufLen+delta; 1040 } 1041 if(nextSeparator) { /* RH side */ 1042 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1043 } 1044 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1045 bufLen += delta; 1046 buffer[bufLen]=0; 1047 return bufLen; 1048 } 1049 } else { /* removing a keyword */ 1050 if(keywordAtEnd) { 1051 /* zero out the ';' or '@' just before startSearchhere */ 1052 keywordStart[-1] = 0; 1053 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1054 } else { 1055 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1056 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1057 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1058 } 1059 } 1060 } else if(rc<0){ /* end match keyword */ 1061 /* could insert at this location. */ 1062 insertHere = keywordStart; 1063 } 1064 keywordStart = nextSeparator; 1065 } /* end loop searching */ 1066 1067 if(!keywordValue) { 1068 return bufLen; /* removal of non-extant keyword - no change */ 1069 } 1070 1071 /* we know there is at least one keyword. */ 1072 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1073 if(needLen >= bufferCapacity) { 1074 *status = U_BUFFER_OVERFLOW_ERROR; 1075 return needLen; /* no change */ 1076 } 1077 1078 if(insertHere) { 1079 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1080 keywordStart = insertHere; 1081 } else { 1082 keywordStart = buffer+bufLen; 1083 *keywordStart = ';'; 1084 keywordStart++; 1085 } 1086 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1087 keywordStart += keywordNameLen; 1088 *keywordStart = '='; 1089 keywordStart++; 1090 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1091 keywordStart+=keywordValueLen; 1092 if(insertHere) { 1093 *keywordStart = ';'; 1094 keywordStart++; 1095 } 1096 buffer[needLen]=0; 1097 return needLen; 1098 } 1099 1100 /* ### ID parsing implementation **************************************************/ 1101 1102 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1103 1104 /*returns TRUE if one of the special prefixes is here (s=string) 1105 'x-' or 'i-' */ 1106 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1107 1108 /* Dot terminates it because of POSIX form where dot precedes the codepage 1109 * except for variant 1110 */ 1111 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1112 1113 static char* _strnchr(const char* str, int32_t len, char c) { 1114 U_ASSERT(str != 0 && len >= 0); 1115 while (len-- != 0) { 1116 char d = *str; 1117 if (d == c) { 1118 return (char*) str; 1119 } else if (d == 0) { 1120 break; 1121 } 1122 ++str; 1123 } 1124 return NULL; 1125 } 1126 1127 /** 1128 * Lookup 'key' in the array 'list'. The array 'list' should contain 1129 * a NULL entry, followed by more entries, and a second NULL entry. 1130 * 1131 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1132 * COUNTRIES_3. 1133 */ 1134 static int16_t _findIndex(const char* const* list, const char* key) 1135 { 1136 const char* const* anchor = list; 1137 int32_t pass = 0; 1138 1139 /* Make two passes through two NULL-terminated arrays at 'list' */ 1140 while (pass++ < 2) { 1141 while (*list) { 1142 if (uprv_strcmp(key, *list) == 0) { 1143 return (int16_t)(list - anchor); 1144 } 1145 list++; 1146 } 1147 ++list; /* skip final NULL *CWB*/ 1148 } 1149 return -1; 1150 } 1151 1152 /* count the length of src while copying it to dest; return strlen(src) */ 1153 static inline int32_t 1154 _copyCount(char *dest, int32_t destCapacity, const char *src) { 1155 const char *anchor; 1156 char c; 1157 1158 anchor=src; 1159 for(;;) { 1160 if((c=*src)==0) { 1161 return (int32_t)(src-anchor); 1162 } 1163 if(destCapacity<=0) { 1164 return (int32_t)((src-anchor)+uprv_strlen(src)); 1165 } 1166 ++src; 1167 *dest++=c; 1168 --destCapacity; 1169 } 1170 } 1171 1172 U_CFUNC const char* 1173 uloc_getCurrentCountryID(const char* oldID){ 1174 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1175 if (offset >= 0) { 1176 return REPLACEMENT_COUNTRIES[offset]; 1177 } 1178 return oldID; 1179 } 1180 U_CFUNC const char* 1181 uloc_getCurrentLanguageID(const char* oldID){ 1182 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1183 if (offset >= 0) { 1184 return REPLACEMENT_LANGUAGES[offset]; 1185 } 1186 return oldID; 1187 } 1188 /* 1189 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1190 * avoid duplicating code to handle the earlier locale ID pieces 1191 * in the functions for the later ones by 1192 * setting the *pEnd pointer to where they stopped parsing 1193 * 1194 * TODO try to use this in Locale 1195 */ 1196 U_CFUNC int32_t 1197 ulocimp_getLanguage(const char *localeID, 1198 char *language, int32_t languageCapacity, 1199 const char **pEnd) { 1200 int32_t i=0; 1201 int32_t offset; 1202 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1203 1204 /* if it starts with i- or x- then copy that prefix */ 1205 if(_isIDPrefix(localeID)) { 1206 if(i<languageCapacity) { 1207 language[i]=(char)uprv_tolower(*localeID); 1208 } 1209 if(i<languageCapacity) { 1210 language[i+1]='-'; 1211 } 1212 i+=2; 1213 localeID+=2; 1214 } 1215 1216 /* copy the language as far as possible and count its length */ 1217 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1218 if(i<languageCapacity) { 1219 language[i]=(char)uprv_tolower(*localeID); 1220 } 1221 if(i<3) { 1222 U_ASSERT(i>=0); 1223 lang[i]=(char)uprv_tolower(*localeID); 1224 } 1225 i++; 1226 localeID++; 1227 } 1228 1229 if(i==3) { 1230 /* convert 3 character code to 2 character code if possible *CWB*/ 1231 offset=_findIndex(LANGUAGES_3, lang); 1232 if(offset>=0) { 1233 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1234 } 1235 } 1236 1237 if(pEnd!=NULL) { 1238 *pEnd=localeID; 1239 } 1240 return i; 1241 } 1242 1243 U_CFUNC int32_t 1244 ulocimp_getScript(const char *localeID, 1245 char *script, int32_t scriptCapacity, 1246 const char **pEnd) 1247 { 1248 int32_t idLen = 0; 1249 1250 if (pEnd != NULL) { 1251 *pEnd = localeID; 1252 } 1253 1254 /* copy the second item as far as possible and count its length */ 1255 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) 1256 && uprv_isASCIILetter(localeID[idLen])) { 1257 idLen++; 1258 } 1259 1260 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1261 if (idLen == 4) { 1262 int32_t i; 1263 if (pEnd != NULL) { 1264 *pEnd = localeID+idLen; 1265 } 1266 if(idLen > scriptCapacity) { 1267 idLen = scriptCapacity; 1268 } 1269 if (idLen >= 1) { 1270 script[0]=(char)uprv_toupper(*(localeID++)); 1271 } 1272 for (i = 1; i < idLen; i++) { 1273 script[i]=(char)uprv_tolower(*(localeID++)); 1274 } 1275 } 1276 else { 1277 idLen = 0; 1278 } 1279 return idLen; 1280 } 1281 1282 U_CFUNC int32_t 1283 ulocimp_getCountry(const char *localeID, 1284 char *country, int32_t countryCapacity, 1285 const char **pEnd) 1286 { 1287 int32_t idLen=0; 1288 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1289 int32_t offset; 1290 1291 /* copy the country as far as possible and count its length */ 1292 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1293 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1294 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1295 } 1296 idLen++; 1297 } 1298 1299 /* the country should be either length 2 or 3 */ 1300 if (idLen == 2 || idLen == 3) { 1301 UBool gotCountry = FALSE; 1302 /* convert 3 character code to 2 character code if possible *CWB*/ 1303 if(idLen==3) { 1304 offset=_findIndex(COUNTRIES_3, cnty); 1305 if(offset>=0) { 1306 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1307 gotCountry = TRUE; 1308 } 1309 } 1310 if (!gotCountry) { 1311 int32_t i = 0; 1312 for (i = 0; i < idLen; i++) { 1313 if (i < countryCapacity) { 1314 country[i]=(char)uprv_toupper(localeID[i]); 1315 } 1316 } 1317 } 1318 localeID+=idLen; 1319 } else { 1320 idLen = 0; 1321 } 1322 1323 if(pEnd!=NULL) { 1324 *pEnd=localeID; 1325 } 1326 1327 return idLen; 1328 } 1329 1330 /** 1331 * @param needSeparator if true, then add leading '_' if any variants 1332 * are added to 'variant' 1333 */ 1334 static int32_t 1335 _getVariantEx(const char *localeID, 1336 char prev, 1337 char *variant, int32_t variantCapacity, 1338 UBool needSeparator) { 1339 int32_t i=0; 1340 1341 /* get one or more variant tags and separate them with '_' */ 1342 if(_isIDSeparator(prev)) { 1343 /* get a variant string after a '-' or '_' */ 1344 while(!_isTerminator(*localeID)) { 1345 if (needSeparator) { 1346 if (i<variantCapacity) { 1347 variant[i] = '_'; 1348 } 1349 ++i; 1350 needSeparator = FALSE; 1351 } 1352 if(i<variantCapacity) { 1353 variant[i]=(char)uprv_toupper(*localeID); 1354 if(variant[i]=='-') { 1355 variant[i]='_'; 1356 } 1357 } 1358 i++; 1359 localeID++; 1360 } 1361 } 1362 1363 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1364 if(i==0) { 1365 if(prev=='@') { 1366 /* keep localeID */ 1367 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1368 ++localeID; /* point after the '@' */ 1369 } else { 1370 return 0; 1371 } 1372 while(!_isTerminator(*localeID)) { 1373 if (needSeparator) { 1374 if (i<variantCapacity) { 1375 variant[i] = '_'; 1376 } 1377 ++i; 1378 needSeparator = FALSE; 1379 } 1380 if(i<variantCapacity) { 1381 variant[i]=(char)uprv_toupper(*localeID); 1382 if(variant[i]=='-' || variant[i]==',') { 1383 variant[i]='_'; 1384 } 1385 } 1386 i++; 1387 localeID++; 1388 } 1389 } 1390 1391 return i; 1392 } 1393 1394 static int32_t 1395 _getVariant(const char *localeID, 1396 char prev, 1397 char *variant, int32_t variantCapacity) { 1398 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1399 } 1400 1401 /** 1402 * Delete ALL instances of a variant from the given list of one or 1403 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1404 * @param variants the source string of one or more variants, 1405 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1406 * terminated; if it is, trailing zero will NOT be maintained. 1407 * @param variantsLen length of variants 1408 * @param toDelete variant to delete, without separators, e.g. "EURO" 1409 * or "PREEURO"; not zero terminated 1410 * @param toDeleteLen length of toDelete 1411 * @return number of characters deleted from variants 1412 */ 1413 static int32_t 1414 _deleteVariant(char* variants, int32_t variantsLen, 1415 const char* toDelete, int32_t toDeleteLen) 1416 { 1417 int32_t delta = 0; /* number of chars deleted */ 1418 for (;;) { 1419 UBool flag = FALSE; 1420 if (variantsLen < toDeleteLen) { 1421 return delta; 1422 } 1423 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1424 (variantsLen == toDeleteLen || 1425 (flag=(variants[toDeleteLen] == '_')))) 1426 { 1427 int32_t d = toDeleteLen + (flag?1:0); 1428 variantsLen -= d; 1429 delta += d; 1430 if (variantsLen > 0) { 1431 uprv_memmove(variants, variants+d, variantsLen); 1432 } 1433 } else { 1434 char* p = _strnchr(variants, variantsLen, '_'); 1435 if (p == NULL) { 1436 return delta; 1437 } 1438 ++p; 1439 variantsLen -= (int32_t)(p - variants); 1440 variants = p; 1441 } 1442 } 1443 } 1444 1445 /* Keyword enumeration */ 1446 1447 typedef struct UKeywordsContext { 1448 char* keywords; 1449 char* current; 1450 } UKeywordsContext; 1451 1452 static void U_CALLCONV 1453 uloc_kw_closeKeywords(UEnumeration *enumerator) { 1454 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1455 uprv_free(enumerator->context); 1456 uprv_free(enumerator); 1457 } 1458 1459 static int32_t U_CALLCONV 1460 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { 1461 char *kw = ((UKeywordsContext *)en->context)->keywords; 1462 int32_t result = 0; 1463 while(*kw) { 1464 result++; 1465 kw += uprv_strlen(kw)+1; 1466 } 1467 return result; 1468 } 1469 1470 static const char* U_CALLCONV 1471 uloc_kw_nextKeyword(UEnumeration* en, 1472 int32_t* resultLength, 1473 UErrorCode* /*status*/) { 1474 const char* result = ((UKeywordsContext *)en->context)->current; 1475 int32_t len = 0; 1476 if(*result) { 1477 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1478 ((UKeywordsContext *)en->context)->current += len+1; 1479 } else { 1480 result = NULL; 1481 } 1482 if (resultLength) { 1483 *resultLength = len; 1484 } 1485 return result; 1486 } 1487 1488 static void U_CALLCONV 1489 uloc_kw_resetKeywords(UEnumeration* en, 1490 UErrorCode* /*status*/) { 1491 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1492 } 1493 1494 static const UEnumeration gKeywordsEnum = { 1495 NULL, 1496 NULL, 1497 uloc_kw_closeKeywords, 1498 uloc_kw_countKeywords, 1499 uenum_unextDefault, 1500 uloc_kw_nextKeyword, 1501 uloc_kw_resetKeywords 1502 }; 1503 1504 U_CAPI UEnumeration* U_EXPORT2 1505 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1506 { 1507 UKeywordsContext *myContext = NULL; 1508 UEnumeration *result = NULL; 1509 1510 if(U_FAILURE(*status)) { 1511 return NULL; 1512 } 1513 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1514 /* Null pointer test */ 1515 if (result == NULL) { 1516 *status = U_MEMORY_ALLOCATION_ERROR; 1517 return NULL; 1518 } 1519 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1520 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); 1521 if (myContext == NULL) { 1522 *status = U_MEMORY_ALLOCATION_ERROR; 1523 uprv_free(result); 1524 return NULL; 1525 } 1526 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1527 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1528 myContext->keywords[keywordListSize] = 0; 1529 myContext->current = myContext->keywords; 1530 result->context = myContext; 1531 return result; 1532 } 1533 1534 U_CAPI UEnumeration* U_EXPORT2 1535 uloc_openKeywords(const char* localeID, 1536 UErrorCode* status) 1537 { 1538 int32_t i=0; 1539 char keywords[256]; 1540 int32_t keywordsCapacity = 256; 1541 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1542 const char* tmpLocaleID; 1543 1544 if(status==NULL || U_FAILURE(*status)) { 1545 return 0; 1546 } 1547 1548 if (_hasBCP47Extension(localeID)) { 1549 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1550 } else { 1551 if (localeID==NULL) { 1552 localeID=uloc_getDefault(); 1553 } 1554 tmpLocaleID=localeID; 1555 } 1556 1557 /* Skip the language */ 1558 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1559 if(_isIDSeparator(*tmpLocaleID)) { 1560 const char *scriptID; 1561 /* Skip the script if available */ 1562 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1563 if(scriptID != tmpLocaleID+1) { 1564 /* Found optional script */ 1565 tmpLocaleID = scriptID; 1566 } 1567 /* Skip the Country */ 1568 if (_isIDSeparator(*tmpLocaleID)) { 1569 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1570 if(_isIDSeparator(*tmpLocaleID)) { 1571 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1572 } 1573 } 1574 } 1575 1576 /* keywords are located after '@' */ 1577 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1578 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1579 } 1580 1581 if(i) { 1582 return uloc_openKeywordList(keywords, i, status); 1583 } else { 1584 return NULL; 1585 } 1586 } 1587 1588 1589 /* bit-flags for 'options' parameter of _canonicalize */ 1590 #define _ULOC_STRIP_KEYWORDS 0x2 1591 #define _ULOC_CANONICALIZE 0x1 1592 1593 #define OPTION_SET(options, mask) ((options & mask) != 0) 1594 1595 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1596 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) 1597 1598 /** 1599 * Canonicalize the given localeID, to level 1 or to level 2, 1600 * depending on the options. To specify level 1, pass in options=0. 1601 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1602 * 1603 * This is the code underlying uloc_getName and uloc_canonicalize. 1604 */ 1605 static int32_t 1606 _canonicalize(const char* localeID, 1607 char* result, 1608 int32_t resultCapacity, 1609 uint32_t options, 1610 UErrorCode* err) { 1611 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1612 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1613 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1614 const char* origLocaleID; 1615 const char* tmpLocaleID; 1616 const char* keywordAssign = NULL; 1617 const char* separatorIndicator = NULL; 1618 const char* addKeyword = NULL; 1619 const char* addValue = NULL; 1620 char* name; 1621 char* variant = NULL; /* pointer into name, or NULL */ 1622 1623 if (U_FAILURE(*err)) { 1624 return 0; 1625 } 1626 1627 if (_hasBCP47Extension(localeID)) { 1628 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1629 } else { 1630 if (localeID==NULL) { 1631 localeID=uloc_getDefault(); 1632 } 1633 tmpLocaleID=localeID; 1634 } 1635 1636 origLocaleID=tmpLocaleID; 1637 1638 /* if we are doing a full canonicalization, then put results in 1639 localeBuffer, if necessary; otherwise send them to result. */ 1640 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1641 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { 1642 name = localeBuffer; 1643 nameCapacity = (int32_t)sizeof(localeBuffer); 1644 } else { 1645 name = result; 1646 nameCapacity = resultCapacity; 1647 } 1648 1649 /* get all pieces, one after another, and separate with '_' */ 1650 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1651 1652 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1653 const char *d = uloc_getDefault(); 1654 1655 len = (int32_t)uprv_strlen(d); 1656 1657 if (name != NULL) { 1658 uprv_strncpy(name, d, len); 1659 } 1660 } else if(_isIDSeparator(*tmpLocaleID)) { 1661 const char *scriptID; 1662 1663 ++fieldCount; 1664 if(len<nameCapacity) { 1665 name[len]='_'; 1666 } 1667 ++len; 1668 1669 scriptSize=ulocimp_getScript(tmpLocaleID+1, 1670 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID); 1671 if(scriptSize > 0) { 1672 /* Found optional script */ 1673 tmpLocaleID = scriptID; 1674 ++fieldCount; 1675 len+=scriptSize; 1676 if (_isIDSeparator(*tmpLocaleID)) { 1677 /* If there is something else, then we add the _ */ 1678 if(len<nameCapacity) { 1679 name[len]='_'; 1680 } 1681 ++len; 1682 } 1683 } 1684 1685 if (_isIDSeparator(*tmpLocaleID)) { 1686 const char *cntryID; 1687 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, 1688 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID); 1689 if (cntrySize > 0) { 1690 /* Found optional country */ 1691 tmpLocaleID = cntryID; 1692 len+=cntrySize; 1693 } 1694 if(_isIDSeparator(*tmpLocaleID)) { 1695 /* If there is something else, then we add the _ if we found country before. */ 1696 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { 1697 ++fieldCount; 1698 if(len<nameCapacity) { 1699 name[len]='_'; 1700 } 1701 ++len; 1702 } 1703 1704 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, 1705 (len<nameCapacity ? name+len : NULL), nameCapacity-len); 1706 if (variantSize > 0) { 1707 variant = len<nameCapacity ? name+len : NULL; 1708 len += variantSize; 1709 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1710 } 1711 } 1712 } 1713 } 1714 1715 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1716 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1717 UBool done = FALSE; 1718 do { 1719 char c = *tmpLocaleID; 1720 switch (c) { 1721 case 0: 1722 case '@': 1723 done = TRUE; 1724 break; 1725 default: 1726 if (len<nameCapacity) { 1727 name[len] = c; 1728 } 1729 ++len; 1730 ++tmpLocaleID; 1731 break; 1732 } 1733 } while (!done); 1734 } 1735 1736 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1737 After this, tmpLocaleID either points to '@' or is NULL */ 1738 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1739 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1740 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1741 } 1742 1743 /* Copy POSIX-style variant, if any [mr@FOO] */ 1744 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1745 tmpLocaleID != NULL && keywordAssign == NULL) { 1746 for (;;) { 1747 char c = *tmpLocaleID; 1748 if (c == 0) { 1749 break; 1750 } 1751 if (len<nameCapacity) { 1752 name[len] = c; 1753 } 1754 ++len; 1755 ++tmpLocaleID; 1756 } 1757 } 1758 1759 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1760 /* Handle @FOO variant if @ is present and not followed by = */ 1761 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1762 int32_t posixVariantSize; 1763 /* Add missing '_' if needed */ 1764 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1765 do { 1766 if(len<nameCapacity) { 1767 name[len]='_'; 1768 } 1769 ++len; 1770 ++fieldCount; 1771 } while(fieldCount<2); 1772 } 1773 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1774 (UBool)(variantSize > 0)); 1775 if (posixVariantSize > 0) { 1776 if (variant == NULL) { 1777 variant = name+len; 1778 } 1779 len += posixVariantSize; 1780 variantSize += posixVariantSize; 1781 } 1782 } 1783 1784 /* Handle generic variants first */ 1785 if (variant) { 1786 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { 1787 const char* variantToCompare = VARIANT_MAP[j].variant; 1788 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1789 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1790 len -= variantLen; 1791 if (variantLen > 0) { 1792 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1793 --len; 1794 } 1795 addKeyword = VARIANT_MAP[j].keyword; 1796 addValue = VARIANT_MAP[j].value; 1797 break; 1798 } 1799 } 1800 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1801 --len; 1802 } 1803 } 1804 1805 /* Look up the ID in the canonicalization map */ 1806 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { 1807 const char* id = CANONICALIZE_MAP[j].id; 1808 int32_t n = (int32_t)uprv_strlen(id); 1809 if (len == n && uprv_strncmp(name, id, n) == 0) { 1810 if (n == 0 && tmpLocaleID != NULL) { 1811 break; /* Don't remap "" if keywords present */ 1812 } 1813 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1814 if (CANONICALIZE_MAP[j].keyword) { 1815 addKeyword = CANONICALIZE_MAP[j].keyword; 1816 addValue = CANONICALIZE_MAP[j].value; 1817 } 1818 break; 1819 } 1820 } 1821 } 1822 1823 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1824 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1825 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1826 if(len<nameCapacity) { 1827 name[len]='@'; 1828 } 1829 ++len; 1830 ++fieldCount; 1831 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len, 1832 NULL, 0, NULL, TRUE, addKeyword, addValue, err); 1833 } else if (addKeyword != NULL) { 1834 U_ASSERT(addValue != NULL && len < nameCapacity); 1835 /* inelegant but works -- later make _getKeywords do this? */ 1836 len += _copyCount(name+len, nameCapacity-len, "@"); 1837 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1838 len += _copyCount(name+len, nameCapacity-len, "="); 1839 len += _copyCount(name+len, nameCapacity-len, addValue); 1840 } 1841 } 1842 1843 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1844 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1845 } 1846 1847 return u_terminateChars(result, resultCapacity, len, err); 1848 } 1849 1850 /* ### ID parsing API **************************************************/ 1851 1852 U_CAPI int32_t U_EXPORT2 1853 uloc_getParent(const char* localeID, 1854 char* parent, 1855 int32_t parentCapacity, 1856 UErrorCode* err) 1857 { 1858 const char *lastUnderscore; 1859 int32_t i; 1860 1861 if (U_FAILURE(*err)) 1862 return 0; 1863 1864 if (localeID == NULL) 1865 localeID = uloc_getDefault(); 1866 1867 lastUnderscore=uprv_strrchr(localeID, '_'); 1868 if(lastUnderscore!=NULL) { 1869 i=(int32_t)(lastUnderscore-localeID); 1870 } else { 1871 i=0; 1872 } 1873 1874 if(i>0 && parent != localeID) { 1875 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1876 } 1877 return u_terminateChars(parent, parentCapacity, i, err); 1878 } 1879 1880 U_CAPI int32_t U_EXPORT2 1881 uloc_getLanguage(const char* localeID, 1882 char* language, 1883 int32_t languageCapacity, 1884 UErrorCode* err) 1885 { 1886 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1887 int32_t i=0; 1888 1889 if (err==NULL || U_FAILURE(*err)) { 1890 return 0; 1891 } 1892 1893 if(localeID==NULL) { 1894 localeID=uloc_getDefault(); 1895 } 1896 1897 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1898 return u_terminateChars(language, languageCapacity, i, err); 1899 } 1900 1901 U_CAPI int32_t U_EXPORT2 1902 uloc_getScript(const char* localeID, 1903 char* script, 1904 int32_t scriptCapacity, 1905 UErrorCode* err) 1906 { 1907 int32_t i=0; 1908 1909 if(err==NULL || U_FAILURE(*err)) { 1910 return 0; 1911 } 1912 1913 if(localeID==NULL) { 1914 localeID=uloc_getDefault(); 1915 } 1916 1917 /* skip the language */ 1918 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1919 if(_isIDSeparator(*localeID)) { 1920 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 1921 } 1922 return u_terminateChars(script, scriptCapacity, i, err); 1923 } 1924 1925 U_CAPI int32_t U_EXPORT2 1926 uloc_getCountry(const char* localeID, 1927 char* country, 1928 int32_t countryCapacity, 1929 UErrorCode* err) 1930 { 1931 int32_t i=0; 1932 1933 if(err==NULL || U_FAILURE(*err)) { 1934 return 0; 1935 } 1936 1937 if(localeID==NULL) { 1938 localeID=uloc_getDefault(); 1939 } 1940 1941 /* Skip the language */ 1942 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1943 if(_isIDSeparator(*localeID)) { 1944 const char *scriptID; 1945 /* Skip the script if available */ 1946 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 1947 if(scriptID != localeID+1) { 1948 /* Found optional script */ 1949 localeID = scriptID; 1950 } 1951 if(_isIDSeparator(*localeID)) { 1952 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 1953 } 1954 } 1955 return u_terminateChars(country, countryCapacity, i, err); 1956 } 1957 1958 U_CAPI int32_t U_EXPORT2 1959 uloc_getVariant(const char* localeID, 1960 char* variant, 1961 int32_t variantCapacity, 1962 UErrorCode* err) 1963 { 1964 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1965 const char* tmpLocaleID; 1966 int32_t i=0; 1967 1968 if(err==NULL || U_FAILURE(*err)) { 1969 return 0; 1970 } 1971 1972 if (_hasBCP47Extension(localeID)) { 1973 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1974 } else { 1975 if (localeID==NULL) { 1976 localeID=uloc_getDefault(); 1977 } 1978 tmpLocaleID=localeID; 1979 } 1980 1981 /* Skip the language */ 1982 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1983 if(_isIDSeparator(*tmpLocaleID)) { 1984 const char *scriptID; 1985 /* Skip the script if available */ 1986 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1987 if(scriptID != tmpLocaleID+1) { 1988 /* Found optional script */ 1989 tmpLocaleID = scriptID; 1990 } 1991 /* Skip the Country */ 1992 if (_isIDSeparator(*tmpLocaleID)) { 1993 const char *cntryID; 1994 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 1995 if (cntryID != tmpLocaleID+1) { 1996 /* Found optional country */ 1997 tmpLocaleID = cntryID; 1998 } 1999 if(_isIDSeparator(*tmpLocaleID)) { 2000 /* If there was no country ID, skip a possible extra IDSeparator */ 2001 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2002 tmpLocaleID++; 2003 } 2004 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2005 } 2006 } 2007 } 2008 2009 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2010 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2011 /* 2012 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2013 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2014 } 2015 */ 2016 return u_terminateChars(variant, variantCapacity, i, err); 2017 } 2018 2019 U_CAPI int32_t U_EXPORT2 2020 uloc_getName(const char* localeID, 2021 char* name, 2022 int32_t nameCapacity, 2023 UErrorCode* err) 2024 { 2025 return _canonicalize(localeID, name, nameCapacity, 0, err); 2026 } 2027 2028 U_CAPI int32_t U_EXPORT2 2029 uloc_getBaseName(const char* localeID, 2030 char* name, 2031 int32_t nameCapacity, 2032 UErrorCode* err) 2033 { 2034 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2035 } 2036 2037 U_CAPI int32_t U_EXPORT2 2038 uloc_canonicalize(const char* localeID, 2039 char* name, 2040 int32_t nameCapacity, 2041 UErrorCode* err) 2042 { 2043 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2044 } 2045 2046 U_CAPI const char* U_EXPORT2 2047 uloc_getISO3Language(const char* localeID) 2048 { 2049 int16_t offset; 2050 char lang[ULOC_LANG_CAPACITY]; 2051 UErrorCode err = U_ZERO_ERROR; 2052 2053 if (localeID == NULL) 2054 { 2055 localeID = uloc_getDefault(); 2056 } 2057 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2058 if (U_FAILURE(err)) 2059 return ""; 2060 offset = _findIndex(LANGUAGES, lang); 2061 if (offset < 0) 2062 return ""; 2063 return LANGUAGES_3[offset]; 2064 } 2065 2066 U_CAPI const char* U_EXPORT2 2067 uloc_getISO3Country(const char* localeID) 2068 { 2069 int16_t offset; 2070 char cntry[ULOC_LANG_CAPACITY]; 2071 UErrorCode err = U_ZERO_ERROR; 2072 2073 if (localeID == NULL) 2074 { 2075 localeID = uloc_getDefault(); 2076 } 2077 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2078 if (U_FAILURE(err)) 2079 return ""; 2080 offset = _findIndex(COUNTRIES, cntry); 2081 if (offset < 0) 2082 return ""; 2083 2084 return COUNTRIES_3[offset]; 2085 } 2086 2087 U_CAPI uint32_t U_EXPORT2 2088 uloc_getLCID(const char* localeID) 2089 { 2090 UErrorCode status = U_ZERO_ERROR; 2091 char langID[ULOC_FULLNAME_CAPACITY]; 2092 2093 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2094 if (U_FAILURE(status)) { 2095 return 0; 2096 } 2097 2098 return uprv_convertToLCID(langID, localeID, &status); 2099 } 2100 2101 U_CAPI int32_t U_EXPORT2 2102 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2103 UErrorCode *status) 2104 { 2105 int32_t length; 2106 const char *posix = uprv_convertToPosix(hostid, status); 2107 if (U_FAILURE(*status) || posix == NULL) { 2108 return 0; 2109 } 2110 length = (int32_t)uprv_strlen(posix); 2111 if (length+1 > localeCapacity) { 2112 *status = U_BUFFER_OVERFLOW_ERROR; 2113 } 2114 else { 2115 uprv_strcpy(locale, posix); 2116 } 2117 return length; 2118 } 2119 2120 /* ### Default locale **************************************************/ 2121 2122 U_CAPI const char* U_EXPORT2 2123 uloc_getDefault() 2124 { 2125 return locale_get_default(); 2126 } 2127 2128 U_CAPI void U_EXPORT2 2129 uloc_setDefault(const char* newDefaultLocale, 2130 UErrorCode* err) 2131 { 2132 if (U_FAILURE(*err)) 2133 return; 2134 /* the error code isn't currently used for anything by this function*/ 2135 2136 /* propagate change to C++ */ 2137 locale_set_default(newDefaultLocale); 2138 } 2139 2140 /** 2141 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer 2142 * to an array of pointers to arrays of char. All of these pointers are owned 2143 * by ICU-- do not delete them, and do not write through them. The array is 2144 * terminated with a null pointer. 2145 */ 2146 U_CAPI const char* const* U_EXPORT2 2147 uloc_getISOLanguages() 2148 { 2149 return LANGUAGES; 2150 } 2151 2152 /** 2153 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2154 * pointer to an array of pointers to arrays of char. All of these pointers are 2155 * owned by ICU-- do not delete them, and do not write through them. The array is 2156 * terminated with a null pointer. 2157 */ 2158 U_CAPI const char* const* U_EXPORT2 2159 uloc_getISOCountries() 2160 { 2161 return COUNTRIES; 2162 } 2163 2164 2165 /* this function to be moved into cstring.c later */ 2166 static char gDecimal = 0; 2167 2168 static /* U_CAPI */ 2169 double 2170 /* U_EXPORT2 */ 2171 _uloc_strtod(const char *start, char **end) { 2172 char *decimal; 2173 char *myEnd; 2174 char buf[30]; 2175 double rv; 2176 if (!gDecimal) { 2177 char rep[5]; 2178 /* For machines that decide to change the decimal on you, 2179 and try to be too smart with localization. 2180 This normally should be just a '.'. */ 2181 sprintf(rep, "%+1.1f", 1.0); 2182 gDecimal = rep[2]; 2183 } 2184 2185 if(gDecimal == '.') { 2186 return uprv_strtod(start, end); /* fall through to OS */ 2187 } else { 2188 uprv_strncpy(buf, start, 29); 2189 buf[29]=0; 2190 decimal = uprv_strchr(buf, '.'); 2191 if(decimal) { 2192 *decimal = gDecimal; 2193 } else { 2194 return uprv_strtod(start, end); /* no decimal point */ 2195 } 2196 rv = uprv_strtod(buf, &myEnd); 2197 if(end) { 2198 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2199 } 2200 return rv; 2201 } 2202 } 2203 2204 typedef struct { 2205 float q; 2206 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2207 char *locale; 2208 } _acceptLangItem; 2209 2210 static int32_t U_CALLCONV 2211 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) 2212 { 2213 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2214 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2215 2216 int32_t rc = 0; 2217 if(bb->q < aa->q) { 2218 rc = -1; /* A > B */ 2219 } else if(bb->q > aa->q) { 2220 rc = 1; /* A < B */ 2221 } else { 2222 rc = 0; /* A = B */ 2223 } 2224 2225 if(rc==0) { 2226 rc = uprv_stricmp(aa->locale, bb->locale); 2227 } 2228 2229 #if defined(ULOC_DEBUG) 2230 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2231 aa->locale, aa->q, 2232 bb->locale, bb->q, 2233 rc);*/ 2234 #endif 2235 2236 return rc; 2237 } 2238 2239 /* 2240 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2241 */ 2242 2243 U_CAPI int32_t U_EXPORT2 2244 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2245 const char *httpAcceptLanguage, 2246 UEnumeration* availableLocales, 2247 UErrorCode *status) 2248 { 2249 _acceptLangItem *j; 2250 _acceptLangItem smallBuffer[30]; 2251 char **strs; 2252 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2253 int32_t n = 0; 2254 const char *itemEnd; 2255 const char *paramEnd; 2256 const char *s; 2257 const char *t; 2258 int32_t res; 2259 int32_t i; 2260 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2261 int32_t jSize; 2262 char *tempstr; /* Use for null pointer check */ 2263 2264 j = smallBuffer; 2265 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); 2266 if(U_FAILURE(*status)) { 2267 return -1; 2268 } 2269 2270 for(s=httpAcceptLanguage;s&&*s;) { 2271 while(isspace(*s)) /* eat space at the beginning */ 2272 s++; 2273 itemEnd=uprv_strchr(s,','); 2274 paramEnd=uprv_strchr(s,';'); 2275 if(!itemEnd) { 2276 itemEnd = httpAcceptLanguage+l; /* end of string */ 2277 } 2278 if(paramEnd && paramEnd<itemEnd) { 2279 /* semicolon (;) is closer than end (,) */ 2280 t = paramEnd+1; 2281 if(*t=='q') { 2282 t++; 2283 } 2284 while(isspace(*t)) { 2285 t++; 2286 } 2287 if(*t=='=') { 2288 t++; 2289 } 2290 while(isspace(*t)) { 2291 t++; 2292 } 2293 j[n].q = (float)_uloc_strtod(t,NULL); 2294 } else { 2295 /* no semicolon - it's 1.0 */ 2296 j[n].q = 1.0f; 2297 paramEnd = itemEnd; 2298 } 2299 j[n].dummy=0; 2300 /* eat spaces prior to semi */ 2301 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2302 ; 2303 /* Check for null pointer from uprv_strndup */ 2304 tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); 2305 if (tempstr == NULL) { 2306 *status = U_MEMORY_ALLOCATION_ERROR; 2307 return -1; 2308 } 2309 j[n].locale = tempstr; 2310 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); 2311 if(strcmp(j[n].locale,tmp)) { 2312 uprv_free(j[n].locale); 2313 j[n].locale=uprv_strdup(tmp); 2314 } 2315 #if defined(ULOC_DEBUG) 2316 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2317 #endif 2318 n++; 2319 s = itemEnd; 2320 while(*s==',') { /* eat duplicate commas */ 2321 s++; 2322 } 2323 if(n>=jSize) { 2324 if(j==smallBuffer) { /* overflowed the small buffer. */ 2325 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); 2326 if(j!=NULL) { 2327 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); 2328 } 2329 #if defined(ULOC_DEBUG) 2330 fprintf(stderr,"malloced at size %d\n", jSize); 2331 #endif 2332 } else { 2333 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); 2334 #if defined(ULOC_DEBUG) 2335 fprintf(stderr,"re-alloced at size %d\n", jSize); 2336 #endif 2337 } 2338 jSize *= 2; 2339 if(j==NULL) { 2340 *status = U_MEMORY_ALLOCATION_ERROR; 2341 return -1; 2342 } 2343 } 2344 } 2345 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2346 if(U_FAILURE(*status)) { 2347 if(j != smallBuffer) { 2348 #if defined(ULOC_DEBUG) 2349 fprintf(stderr,"freeing j %p\n", j); 2350 #endif 2351 uprv_free(j); 2352 } 2353 return -1; 2354 } 2355 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n))); 2356 /* Check for null pointer */ 2357 if (strs == NULL) { 2358 uprv_free(j); /* Free to avoid memory leak */ 2359 *status = U_MEMORY_ALLOCATION_ERROR; 2360 return -1; 2361 } 2362 for(i=0;i<n;i++) { 2363 #if defined(ULOC_DEBUG) 2364 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2365 #endif 2366 strs[i]=j[i].locale; 2367 } 2368 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2369 (const char**)strs, n, availableLocales, status); 2370 for(i=0;i<n;i++) { 2371 uprv_free(strs[i]); 2372 } 2373 uprv_free(strs); 2374 if(j != smallBuffer) { 2375 #if defined(ULOC_DEBUG) 2376 fprintf(stderr,"freeing j %p\n", j); 2377 #endif 2378 uprv_free(j); 2379 } 2380 return res; 2381 } 2382 2383 2384 U_CAPI int32_t U_EXPORT2 2385 uloc_acceptLanguage(char *result, int32_t resultAvailable, 2386 UAcceptResult *outResult, const char **acceptList, 2387 int32_t acceptListCount, 2388 UEnumeration* availableLocales, 2389 UErrorCode *status) 2390 { 2391 int32_t i,j; 2392 int32_t len; 2393 int32_t maxLen=0; 2394 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2395 const char *l; 2396 char **fallbackList; 2397 if(U_FAILURE(*status)) { 2398 return -1; 2399 } 2400 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); 2401 if(fallbackList==NULL) { 2402 *status = U_MEMORY_ALLOCATION_ERROR; 2403 return -1; 2404 } 2405 for(i=0;i<acceptListCount;i++) { 2406 #if defined(ULOC_DEBUG) 2407 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2408 #endif 2409 while((l=uenum_next(availableLocales, NULL, status))) { 2410 #if defined(ULOC_DEBUG) 2411 fprintf(stderr," %s\n", l); 2412 #endif 2413 len = (int32_t)uprv_strlen(l); 2414 if(!uprv_strcmp(acceptList[i], l)) { 2415 if(outResult) { 2416 *outResult = ULOC_ACCEPT_VALID; 2417 } 2418 #if defined(ULOC_DEBUG) 2419 fprintf(stderr, "MATCH! %s\n", l); 2420 #endif 2421 if(len>0) { 2422 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2423 } 2424 for(j=0;j<i;j++) { 2425 uprv_free(fallbackList[j]); 2426 } 2427 uprv_free(fallbackList); 2428 return u_terminateChars(result, resultAvailable, len, status); 2429 } 2430 if(len>maxLen) { 2431 maxLen = len; 2432 } 2433 } 2434 uenum_reset(availableLocales, status); 2435 /* save off parent info */ 2436 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2437 fallbackList[i] = uprv_strdup(tmp); 2438 } else { 2439 fallbackList[i]=0; 2440 } 2441 } 2442 2443 for(maxLen--;maxLen>0;maxLen--) { 2444 for(i=0;i<acceptListCount;i++) { 2445 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2446 #if defined(ULOC_DEBUG) 2447 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2448 #endif 2449 while((l=uenum_next(availableLocales, NULL, status))) { 2450 #if defined(ULOC_DEBUG) 2451 fprintf(stderr," %s\n", l); 2452 #endif 2453 len = (int32_t)uprv_strlen(l); 2454 if(!uprv_strcmp(fallbackList[i], l)) { 2455 if(outResult) { 2456 *outResult = ULOC_ACCEPT_FALLBACK; 2457 } 2458 #if defined(ULOC_DEBUG) 2459 fprintf(stderr, "fallback MATCH! %s\n", l); 2460 #endif 2461 if(len>0) { 2462 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2463 } 2464 for(j=0;j<acceptListCount;j++) { 2465 uprv_free(fallbackList[j]); 2466 } 2467 uprv_free(fallbackList); 2468 return u_terminateChars(result, resultAvailable, len, status); 2469 } 2470 } 2471 uenum_reset(availableLocales, status); 2472 2473 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2474 uprv_free(fallbackList[i]); 2475 fallbackList[i] = uprv_strdup(tmp); 2476 } else { 2477 uprv_free(fallbackList[i]); 2478 fallbackList[i]=0; 2479 } 2480 } 2481 } 2482 if(outResult) { 2483 *outResult = ULOC_ACCEPT_FAILED; 2484 } 2485 } 2486 for(i=0;i<acceptListCount;i++) { 2487 uprv_free(fallbackList[i]); 2488 } 2489 uprv_free(fallbackList); 2490 return -1; 2491 } 2492 2493 /*eof*/ 2494