1 /* 2 ********************************************************************** 3 * Copyright (C) 1997-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File ULOC.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/01/97 aliu Creation. 13 * 08/21/98 stephen JDK 1.2 sync 14 * 12/08/98 rtg New Locale implementation and C API 15 * 03/15/99 damiba overhaul. 16 * 04/06/99 stephen changed setDefault() to realloc and copy 17 * 06/14/99 stephen Changed calls to ures_open for new params 18 * 07/21/99 stephen Modified setDefault() to propagate to C++ 19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 20 * brought canonicalization code into line with spec 21 *****************************************************************************/ 22 23 /* 24 POSIX's locale format, from putil.c: [no spaces] 25 26 ll [ _CC ] [ . MM ] [ @ VV] 27 28 l = lang, C = ctry, M = charmap, V = variant 29 */ 30 31 #include "unicode/utypes.h" 32 #include "unicode/ustring.h" 33 #include "unicode/uloc.h" 34 35 #include "putilimp.h" 36 #include "ustr_imp.h" 37 #include "ulocimp.h" 38 #include "umutex.h" 39 #include "cstring.h" 40 #include "cmemory.h" 41 #include "ucln_cmn.h" 42 #include "locmap.h" 43 #include "uarrsort.h" 44 #include "uenumimp.h" 45 #include "uassert.h" 46 47 #include <stdio.h> /* for sprintf */ 48 49 /* ### Declarations **************************************************/ 50 51 /* Locale stuff from locid.cpp */ 52 U_CFUNC void locale_set_default(const char *id); 53 U_CFUNC const char *locale_get_default(void); 54 U_CFUNC int32_t 55 locale_getKeywords(const char *localeID, 56 char prev, 57 char *keywords, int32_t keywordCapacity, 58 char *values, int32_t valuesCapacity, int32_t *valLen, 59 UBool valuesToo, 60 UErrorCode *status); 61 62 /* ### Data tables **************************************************/ 63 64 /** 65 * Table of language codes, both 2- and 3-letter, with preference 66 * given to 2-letter codes where possible. Includes 3-letter codes 67 * that lack a 2-letter equivalent. 68 * 69 * This list must be in sorted order. This list is returned directly 70 * to the user by some API. 71 * 72 * This list must be kept in sync with LANGUAGES_3, with corresponding 73 * entries matched. 74 * 75 * This table should be terminated with a NULL entry, followed by a 76 * second list, and another NULL entry. The first list is visible to 77 * user code when this array is returned by API. The second list 78 * contains codes we support, but do not expose through user API. 79 * 80 * Notes 81 * 82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 83 * include the revisions up to 2001/7/27 *CWB* 84 * 85 * The 3 character codes are the terminology codes like RFC 3066. This 86 * is compatible with prior ICU codes 87 * 88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 89 * table but now at the end of the table because 3 character codes are 90 * duplicates. This avoids bad searches going from 3 to 2 character 91 * codes. 92 * 93 * The range qaa-qtz is reserved for local use 94 */ 95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 96 /* ISO639 table version is 20130531 */ 97 static const char * const LANGUAGES[] = { 98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", 99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg", 100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc", 101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath", 102 "aus", "av", "awa", "ay", "az", 103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax", 104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg", 105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm", 106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss", 107 "btk", "bua", "bug", "bum", "byn", "byv", 108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce", 109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm", 110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co", 111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", 112 "csb", "cu", "cus", "cv", "cy", 113 "da", "dak", "dar", "dav", "day", "de", "del", "den", 114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", 115 "dv", "dyo", "dyu", "dz", "dzg", 116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en", 117 "enm", "eo", "es", "et", "eu", "ewo", 118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj", 119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur", 120 "fy", 121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil", 122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb", 123 "grc", "gsw", "gu", "guz", "gv", "gwi", 124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit", 125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", 126 "hz", 127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo", 128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro", 129 "is", "it", "iu", 130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv", 131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", 132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha", 133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl", 134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe", 135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf", 136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky", 137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg", 138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu", 139 "lua", "lui", "lun", "luo", "lus", "luy", "lv", 140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", 141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga", 142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", 143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh", 144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus", 145 "mwl", "mwr", "my", "mye", "myn", "myv", 146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds", 147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg", 148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso", 149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", 150 "nzi", 151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto", 152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo", 153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps", 154 "pt", 155 "qu", 156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof", 157 "rom", "ru", "rup", "rw", "rwk", 158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas", 159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se", 160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn", 161 "shi", "shn", "shu", "si", "sid", "sio", "sit", 162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", 163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", 164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk", 165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr", 166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg", 167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh", 168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", 169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", 170 "twq", "ty", "tyv", "tzm", 171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", 172 "vai", "ve", "vi", "vo", "vot", "vun", 173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo", 174 "xal", "xh", "xog", 175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue", 176 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu", 177 "zun", "zxx", "zza", 178 NULL, 179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 180 NULL 181 }; 182 183 static const char* const DEPRECATED_LANGUAGES[]={ 184 "in", "iw", "ji", "jw", NULL, NULL 185 }; 186 static const char* const REPLACEMENT_LANGUAGES[]={ 187 "id", "he", "yi", "jv", NULL, NULL 188 }; 189 190 /** 191 * Table of 3-letter language codes. 192 * 193 * This is a lookup table used to convert 3-letter language codes to 194 * their 2-letter equivalent, where possible. It must be kept in sync 195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 196 * same language as LANGUAGES_3[i]. The commented-out lines are 197 * copied from LANGUAGES to make eyeballing this baby easier. 198 * 199 * Where a 3-letter language code has no 2-letter equivalent, the 200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 201 * 202 * This table should be terminated with a NULL entry, followed by a 203 * second list, and another NULL entry. The two lists correspond to 204 * the two lists in LANGUAGES. 205 */ 206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 207 /* ISO639 table version is 20130531 */ 208 static const char * const LANGUAGES_3[] = { 209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", 210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg", 211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc", 212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath", 213 "aus", "ava", "awa", "aym", "aze", 214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax", 215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul", 216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam", 217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss", 218 "btk", "bua", "bug", "bum", "byn", "byv", 219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che", 220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm", 221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos", 222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", 223 "csb", "chu", "cus", "chv", "cym", 224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den", 225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", 226 "div", "dyo", "dyu", "dzo", "dzg", 227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", 228 "enm", "epo", "spa", "est", "eus", "ewo", 229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij", 230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur", 231 "fry", 232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil", 233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb", 234 "grc", "gsw", "guj", "guz", "glv", "gwi", 235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit", 236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", 237 "her", 238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo", 239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro", 240 "isl", "ita", "iku", 241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav", 242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", 243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha", 244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal", 245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe", 246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf", 247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir", 248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug", 249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub", 250 "lua", "lui", "lun", "luo", "lus", "luy", "lav", 251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", 252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga", 253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", 254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh", 255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus", 256 "mwl", "mwr", "mya", "mye", "myn", "myv", 257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds", 258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg", 259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso", 260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", 261 "nzi", 262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto", 263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo", 264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus", 265 "por", 266 "que", 267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof", 268 "rom", "rus", "rup", "kin", "rwk", 269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas", 270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme", 271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn", 272 "shi", "shn", "shu", "sin", "sid", "sio", "sit", 273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", 274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", 275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk", 276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr", 277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk", 278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh", 279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv", 280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", 281 "twq", "tah", "tyv", "tzm", 282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", 283 "vai", "ven", "vie", "vol", "vot", "vun", 284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol", 285 "xal", "xho", "xog", 286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue", 287 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul", 288 "zun", "zxx", "zza", 289 NULL, 290 /* "in", "iw", "ji", "jw", "sh", */ 291 "ind", "heb", "yid", "jaw", "srp", 292 NULL 293 }; 294 295 /** 296 * Table of 2-letter country codes. 297 * 298 * This list must be in sorted order. This list is returned directly 299 * to the user by some API. 300 * 301 * This list must be kept in sync with COUNTRIES_3, with corresponding 302 * entries matched. 303 * 304 * This table should be terminated with a NULL entry, followed by a 305 * second list, and another NULL entry. The first list is visible to 306 * user code when this array is returned by API. The second list 307 * contains codes we support, but do not expose through user API. 308 * 309 * Notes: 310 * 311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 313 * new codes keeping the old ones for compatibility updated to include 314 * 1999/12/03 revisions *CWB* 315 * 316 * RO(ROM) is now RO(ROU) according to 317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 318 */ 319 static const char * const COUNTRIES[] = { 320 "AD", "AE", "AF", "AG", "AI", "AL", "AM", 321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", 324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", 327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", 345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 349 "WS", "YE", "YT", "ZA", "ZM", "ZW", 350 NULL, 351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ 352 NULL 353 }; 354 355 static const char* const DEPRECATED_COUNTRIES[] = { 356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ 357 }; 358 static const char* const REPLACEMENT_COUNTRIES[] = { 359 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ 360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ 361 }; 362 363 /** 364 * Table of 3-letter country codes. 365 * 366 * This is a lookup table used to convert 3-letter country codes to 367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 368 * For all valid i, COUNTRIES[i] must refer to the same country as 369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 370 * to make eyeballing this baby easier. 371 * 372 * This table should be terminated with a NULL entry, followed by a 373 * second list, and another NULL entry. The two lists correspond to 374 * the two lists in COUNTRIES. 375 */ 376 static const char * const COUNTRIES_3[] = { 377 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ 378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", 379 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 381 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 383 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ 384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", 385 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 387 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 389 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 391 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 393 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 395 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 397 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 399 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 401 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 403 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 405 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 407 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 409 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 411 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 413 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 415 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 417 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 419 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 421 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 423 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 425 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ 426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", 427 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 429 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 431 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 433 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 435 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 437 NULL, 438 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ 439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", 440 NULL 441 }; 442 443 typedef struct CanonicalizationMap { 444 const char *id; /* input ID */ 445 const char *canonicalID; /* canonicalized output ID */ 446 const char *keyword; /* keyword, or NULL if none */ 447 const char *value; /* keyword value, or NULL if kw==NULL */ 448 } CanonicalizationMap; 449 450 /** 451 * A map to canonicalize locale IDs. This handles a variety of 452 * different semantic kinds of transformations. 453 */ 454 static const CanonicalizationMap CANONICALIZE_MAP[] = { 455 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 456 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 457 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 458 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 460 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 481 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 486 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 492 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 493 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 494 { "zh_GAN", "gan", NULL, NULL }, /* registered name */ 495 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 496 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ 497 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ 498 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ 499 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ 500 { "zh_YUE", "yue", NULL, NULL }, /* registered name */ 501 }; 502 503 typedef struct VariantMap { 504 const char *variant; /* input ID */ 505 const char *keyword; /* keyword, or NULL if none */ 506 const char *value; /* keyword value, or NULL if kw==NULL */ 507 } VariantMap; 508 509 static const VariantMap VARIANT_MAP[] = { 510 { "EURO", "currency", "EUR" }, 511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 512 { "STROKE", "collation", "stroke" } /* Solaris variant */ 513 }; 514 515 /* ### BCP47 Conversion *******************************************/ 516 /* Test if the locale id has BCP47 u extension and does not have '@' */ 517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 519 #define _ConvertBCP47(finalID, id, buffer, length,err) \ 520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ 521 finalID=id; \ 522 } else { \ 523 finalID=buffer; \ 524 } 525 /* Gets the size of the shortest subtag in the given localeID. */ 526 static int32_t getShortestSubtagLength(const char *localeID) { 527 int32_t localeIDLength = uprv_strlen(localeID); 528 int32_t length = localeIDLength; 529 int32_t tmpLength = 0; 530 int32_t i; 531 UBool reset = TRUE; 532 533 for (i = 0; i < localeIDLength; i++) { 534 if (localeID[i] != '_' && localeID[i] != '-') { 535 if (reset) { 536 tmpLength = 0; 537 reset = FALSE; 538 } 539 tmpLength++; 540 } else { 541 if (tmpLength != 0 && tmpLength < length) { 542 length = tmpLength; 543 } 544 reset = TRUE; 545 } 546 } 547 548 return length; 549 } 550 551 /* ### Keywords **************************************************/ 552 553 #define ULOC_KEYWORD_BUFFER_LEN 25 554 #define ULOC_MAX_NO_KEYWORDS 25 555 556 U_CAPI const char * U_EXPORT2 557 locale_getKeywordsStart(const char *localeID) { 558 const char *result = NULL; 559 if((result = uprv_strchr(localeID, '@')) != NULL) { 560 return result; 561 } 562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 563 else { 564 /* We do this because the @ sign is variant, and the @ sign used on one 565 EBCDIC machine won't be compiled the same way on other EBCDIC based 566 machines. */ 567 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 568 const uint8_t *charToFind = ebcdicSigns; 569 while(*charToFind) { 570 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 571 return result; 572 } 573 charToFind++; 574 } 575 } 576 #endif 577 return NULL; 578 } 579 580 /** 581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 582 * @param keywordName incoming name to be canonicalized 583 * @param status return status (keyword too long) 584 * @return length of the keyword name 585 */ 586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 587 { 588 int32_t i; 589 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 590 591 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 592 /* keyword name too long for internal buffer */ 593 *status = U_INTERNAL_PROGRAM_ERROR; 594 return 0; 595 } 596 597 /* normalize the keyword name */ 598 for(i = 0; i < keywordNameLen; i++) { 599 buf[i] = uprv_tolower(keywordName[i]); 600 } 601 buf[i] = 0; 602 603 return keywordNameLen; 604 } 605 606 typedef struct { 607 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 608 int32_t keywordLen; 609 const char *valueStart; 610 int32_t valueLen; 611 } KeywordStruct; 612 613 static int32_t U_CALLCONV 614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { 615 const char* leftString = ((const KeywordStruct *)left)->keyword; 616 const char* rightString = ((const KeywordStruct *)right)->keyword; 617 return uprv_strcmp(leftString, rightString); 618 } 619 620 /** 621 * Both addKeyword and addValue must already be in canonical form. 622 * Either both addKeyword and addValue are NULL, or neither is NULL. 623 * If they are not NULL they must be zero terminated. 624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 625 */ 626 static int32_t 627 _getKeywords(const char *localeID, 628 char prev, 629 char *keywords, int32_t keywordCapacity, 630 char *values, int32_t valuesCapacity, int32_t *valLen, 631 UBool valuesToo, 632 const char* addKeyword, 633 const char* addValue, 634 UErrorCode *status) 635 { 636 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 637 638 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 639 int32_t numKeywords = 0; 640 const char* pos = localeID; 641 const char* equalSign = NULL; 642 const char* semicolon = NULL; 643 int32_t i = 0, j, n; 644 int32_t keywordsLen = 0; 645 int32_t valuesLen = 0; 646 647 if(prev == '@') { /* start of keyword definition */ 648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 649 do { 650 UBool duplicate = FALSE; 651 /* skip leading spaces */ 652 while(*pos == ' ') { 653 pos++; 654 } 655 if (!*pos) { /* handle trailing "; " */ 656 break; 657 } 658 if(numKeywords == maxKeywords) { 659 *status = U_INTERNAL_PROGRAM_ERROR; 660 return 0; 661 } 662 equalSign = uprv_strchr(pos, '='); 663 semicolon = uprv_strchr(pos, ';'); 664 /* lack of '=' [foo@currency] is illegal */ 665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 666 if(!equalSign || (semicolon && semicolon<equalSign)) { 667 *status = U_INVALID_FORMAT_ERROR; 668 return 0; 669 } 670 /* need to normalize both keyword and keyword name */ 671 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 672 /* keyword name too long for internal buffer */ 673 *status = U_INTERNAL_PROGRAM_ERROR; 674 return 0; 675 } 676 for(i = 0, n = 0; i < equalSign - pos; ++i) { 677 if (pos[i] != ' ') { 678 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 679 } 680 } 681 682 /* zero-length keyword is an error. */ 683 if (n == 0) { 684 *status = U_INVALID_FORMAT_ERROR; 685 return 0; 686 } 687 688 keywordList[numKeywords].keyword[n] = 0; 689 keywordList[numKeywords].keywordLen = n; 690 /* now grab the value part. First we skip the '=' */ 691 equalSign++; 692 /* then we leading spaces */ 693 while(*equalSign == ' ') { 694 equalSign++; 695 } 696 697 /* Premature end or zero-length value */ 698 if (!equalSign || equalSign == semicolon) { 699 *status = U_INVALID_FORMAT_ERROR; 700 return 0; 701 } 702 703 keywordList[numKeywords].valueStart = equalSign; 704 705 pos = semicolon; 706 i = 0; 707 if(pos) { 708 while(*(pos - i - 1) == ' ') { 709 i++; 710 } 711 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 712 pos++; 713 } else { 714 i = (int32_t)uprv_strlen(equalSign); 715 while(i && equalSign[i-1] == ' ') { 716 i--; 717 } 718 keywordList[numKeywords].valueLen = i; 719 } 720 /* If this is a duplicate keyword, then ignore it */ 721 for (j=0; j<numKeywords; ++j) { 722 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 723 duplicate = TRUE; 724 break; 725 } 726 } 727 if (!duplicate) { 728 ++numKeywords; 729 } 730 } while(pos); 731 732 /* Handle addKeyword/addValue. */ 733 if (addKeyword != NULL) { 734 UBool duplicate = FALSE; 735 U_ASSERT(addValue != NULL); 736 /* Search for duplicate; if found, do nothing. Explicit keyword 737 overrides addKeyword. */ 738 for (j=0; j<numKeywords; ++j) { 739 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 740 duplicate = TRUE; 741 break; 742 } 743 } 744 if (!duplicate) { 745 if (numKeywords == maxKeywords) { 746 *status = U_INTERNAL_PROGRAM_ERROR; 747 return 0; 748 } 749 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 750 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 751 keywordList[numKeywords].valueStart = addValue; 752 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 753 ++numKeywords; 754 } 755 } else { 756 U_ASSERT(addValue == NULL); 757 } 758 759 /* now we have a list of keywords */ 760 /* we need to sort it */ 761 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 762 763 /* Now construct the keyword part */ 764 for(i = 0; i < numKeywords; i++) { 765 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 766 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 767 if(valuesToo) { 768 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 769 } else { 770 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 771 } 772 } 773 keywordsLen += keywordList[i].keywordLen + 1; 774 if(valuesToo) { 775 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 776 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 777 } 778 keywordsLen += keywordList[i].valueLen; 779 780 if(i < numKeywords - 1) { 781 if(keywordsLen < keywordCapacity) { 782 keywords[keywordsLen] = ';'; 783 } 784 keywordsLen++; 785 } 786 } 787 if(values) { 788 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 789 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 790 values[valuesLen + keywordList[i].valueLen] = 0; 791 } 792 valuesLen += keywordList[i].valueLen + 1; 793 } 794 } 795 if(values) { 796 values[valuesLen] = 0; 797 if(valLen) { 798 *valLen = valuesLen; 799 } 800 } 801 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 802 } else { 803 return 0; 804 } 805 } 806 807 U_CFUNC int32_t 808 locale_getKeywords(const char *localeID, 809 char prev, 810 char *keywords, int32_t keywordCapacity, 811 char *values, int32_t valuesCapacity, int32_t *valLen, 812 UBool valuesToo, 813 UErrorCode *status) { 814 return _getKeywords(localeID, prev, keywords, keywordCapacity, 815 values, valuesCapacity, valLen, valuesToo, 816 NULL, NULL, status); 817 } 818 819 U_CAPI int32_t U_EXPORT2 820 uloc_getKeywordValue(const char* localeID, 821 const char* keywordName, 822 char* buffer, int32_t bufferCapacity, 823 UErrorCode* status) 824 { 825 const char* startSearchHere = NULL; 826 const char* nextSeparator = NULL; 827 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 828 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 829 int32_t i = 0; 830 int32_t result = 0; 831 832 if(status && U_SUCCESS(*status) && localeID) { 833 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 834 const char* tmpLocaleID; 835 836 if (_hasBCP47Extension(localeID)) { 837 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 838 } else { 839 tmpLocaleID=localeID; 840 } 841 842 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 843 if(startSearchHere == NULL) { 844 /* no keywords, return at once */ 845 return 0; 846 } 847 848 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 849 if(U_FAILURE(*status)) { 850 return 0; 851 } 852 853 /* find the first keyword */ 854 while(startSearchHere) { 855 startSearchHere++; 856 /* skip leading spaces (allowed?) */ 857 while(*startSearchHere == ' ') { 858 startSearchHere++; 859 } 860 nextSeparator = uprv_strchr(startSearchHere, '='); 861 /* need to normalize both keyword and keyword name */ 862 if(!nextSeparator) { 863 break; 864 } 865 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 866 /* keyword name too long for internal buffer */ 867 *status = U_INTERNAL_PROGRAM_ERROR; 868 return 0; 869 } 870 for(i = 0; i < nextSeparator - startSearchHere; i++) { 871 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 872 } 873 /* trim trailing spaces */ 874 while(startSearchHere[i-1] == ' ') { 875 i--; 876 U_ASSERT(i>=0); 877 } 878 localeKeywordNameBuffer[i] = 0; 879 880 startSearchHere = uprv_strchr(nextSeparator, ';'); 881 882 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 883 nextSeparator++; 884 while(*nextSeparator == ' ') { 885 nextSeparator++; 886 } 887 /* we actually found the keyword. Copy the value */ 888 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 889 while(*(startSearchHere-1) == ' ') { 890 startSearchHere--; 891 } 892 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 893 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 894 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 895 i = (int32_t)uprv_strlen(nextSeparator); 896 while(nextSeparator[i - 1] == ' ') { 897 i--; 898 } 899 uprv_strncpy(buffer, nextSeparator, i); 900 result = u_terminateChars(buffer, bufferCapacity, i, status); 901 } else { 902 /* give a bigger buffer, please */ 903 *status = U_BUFFER_OVERFLOW_ERROR; 904 if(startSearchHere) { 905 result = (int32_t)(startSearchHere - nextSeparator); 906 } else { 907 result = (int32_t)uprv_strlen(nextSeparator); 908 } 909 } 910 return result; 911 } 912 } 913 } 914 return 0; 915 } 916 917 U_CAPI int32_t U_EXPORT2 918 uloc_setKeywordValue(const char* keywordName, 919 const char* keywordValue, 920 char* buffer, int32_t bufferCapacity, 921 UErrorCode* status) 922 { 923 /* TODO: sorting. removal. */ 924 int32_t keywordNameLen; 925 int32_t keywordValueLen; 926 int32_t bufLen; 927 int32_t needLen = 0; 928 int32_t foundValueLen; 929 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 930 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 931 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 932 int32_t i = 0; 933 int32_t rc; 934 char* nextSeparator = NULL; 935 char* nextEqualsign = NULL; 936 char* startSearchHere = NULL; 937 char* keywordStart = NULL; 938 char *insertHere = NULL; 939 if(U_FAILURE(*status)) { 940 return -1; 941 } 942 if(bufferCapacity>1) { 943 bufLen = (int32_t)uprv_strlen(buffer); 944 } else { 945 *status = U_ILLEGAL_ARGUMENT_ERROR; 946 return 0; 947 } 948 if(bufferCapacity<bufLen) { 949 /* The capacity is less than the length?! Is this NULL terminated? */ 950 *status = U_ILLEGAL_ARGUMENT_ERROR; 951 return 0; 952 } 953 if(keywordValue && !*keywordValue) { 954 keywordValue = NULL; 955 } 956 if(keywordValue) { 957 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 958 } else { 959 keywordValueLen = 0; 960 } 961 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 962 if(U_FAILURE(*status)) { 963 return 0; 964 } 965 startSearchHere = (char*)locale_getKeywordsStart(buffer); 966 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 967 if(!keywordValue) { /* no keywords = nothing to remove */ 968 return bufLen; 969 } 970 971 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 972 if(startSearchHere) { /* had a single @ */ 973 needLen--; /* already had the @ */ 974 /* startSearchHere points at the @ */ 975 } else { 976 startSearchHere=buffer+bufLen; 977 } 978 if(needLen >= bufferCapacity) { 979 *status = U_BUFFER_OVERFLOW_ERROR; 980 return needLen; /* no change */ 981 } 982 *startSearchHere = '@'; 983 startSearchHere++; 984 uprv_strcpy(startSearchHere, keywordNameBuffer); 985 startSearchHere += keywordNameLen; 986 *startSearchHere = '='; 987 startSearchHere++; 988 uprv_strcpy(startSearchHere, keywordValue); 989 startSearchHere+=keywordValueLen; 990 return needLen; 991 } /* end shortcut - no @ */ 992 993 keywordStart = startSearchHere; 994 /* search for keyword */ 995 while(keywordStart) { 996 keywordStart++; 997 /* skip leading spaces (allowed?) */ 998 while(*keywordStart == ' ') { 999 keywordStart++; 1000 } 1001 nextEqualsign = uprv_strchr(keywordStart, '='); 1002 /* need to normalize both keyword and keyword name */ 1003 if(!nextEqualsign) { 1004 break; 1005 } 1006 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 1007 /* keyword name too long for internal buffer */ 1008 *status = U_INTERNAL_PROGRAM_ERROR; 1009 return 0; 1010 } 1011 for(i = 0; i < nextEqualsign - keywordStart; i++) { 1012 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 1013 } 1014 /* trim trailing spaces */ 1015 while(keywordStart[i-1] == ' ') { 1016 i--; 1017 } 1018 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN); 1019 localeKeywordNameBuffer[i] = 0; 1020 1021 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1022 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1023 if(rc == 0) { 1024 nextEqualsign++; 1025 while(*nextEqualsign == ' ') { 1026 nextEqualsign++; 1027 } 1028 /* we actually found the keyword. Change the value */ 1029 if (nextSeparator) { 1030 keywordAtEnd = 0; 1031 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1032 } else { 1033 keywordAtEnd = 1; 1034 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1035 } 1036 if(keywordValue) { /* adding a value - not removing */ 1037 if(foundValueLen == keywordValueLen) { 1038 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1039 return bufLen; /* no change in size */ 1040 } else if(foundValueLen > keywordValueLen) { 1041 int32_t delta = foundValueLen - keywordValueLen; 1042 if(nextSeparator) { /* RH side */ 1043 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1044 } 1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1046 bufLen -= delta; 1047 buffer[bufLen]=0; 1048 return bufLen; 1049 } else { /* FVL < KVL */ 1050 int32_t delta = keywordValueLen - foundValueLen; 1051 if((bufLen+delta) >= bufferCapacity) { 1052 *status = U_BUFFER_OVERFLOW_ERROR; 1053 return bufLen+delta; 1054 } 1055 if(nextSeparator) { /* RH side */ 1056 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1057 } 1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1059 bufLen += delta; 1060 buffer[bufLen]=0; 1061 return bufLen; 1062 } 1063 } else { /* removing a keyword */ 1064 if(keywordAtEnd) { 1065 /* zero out the ';' or '@' just before startSearchhere */ 1066 keywordStart[-1] = 0; 1067 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1068 } else { 1069 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1070 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1071 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1072 } 1073 } 1074 } else if(rc<0){ /* end match keyword */ 1075 /* could insert at this location. */ 1076 insertHere = keywordStart; 1077 } 1078 keywordStart = nextSeparator; 1079 } /* end loop searching */ 1080 1081 if(!keywordValue) { 1082 return bufLen; /* removal of non-extant keyword - no change */ 1083 } 1084 1085 /* we know there is at least one keyword. */ 1086 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1087 if(needLen >= bufferCapacity) { 1088 *status = U_BUFFER_OVERFLOW_ERROR; 1089 return needLen; /* no change */ 1090 } 1091 1092 if(insertHere) { 1093 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1094 keywordStart = insertHere; 1095 } else { 1096 keywordStart = buffer+bufLen; 1097 *keywordStart = ';'; 1098 keywordStart++; 1099 } 1100 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1101 keywordStart += keywordNameLen; 1102 *keywordStart = '='; 1103 keywordStart++; 1104 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1105 keywordStart+=keywordValueLen; 1106 if(insertHere) { 1107 *keywordStart = ';'; 1108 keywordStart++; 1109 } 1110 buffer[needLen]=0; 1111 return needLen; 1112 } 1113 1114 /* ### ID parsing implementation **************************************************/ 1115 1116 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1117 1118 /*returns TRUE if one of the special prefixes is here (s=string) 1119 'x-' or 'i-' */ 1120 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1121 1122 /* Dot terminates it because of POSIX form where dot precedes the codepage 1123 * except for variant 1124 */ 1125 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1126 1127 static char* _strnchr(const char* str, int32_t len, char c) { 1128 U_ASSERT(str != 0 && len >= 0); 1129 while (len-- != 0) { 1130 char d = *str; 1131 if (d == c) { 1132 return (char*) str; 1133 } else if (d == 0) { 1134 break; 1135 } 1136 ++str; 1137 } 1138 return NULL; 1139 } 1140 1141 /** 1142 * Lookup 'key' in the array 'list'. The array 'list' should contain 1143 * a NULL entry, followed by more entries, and a second NULL entry. 1144 * 1145 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1146 * COUNTRIES_3. 1147 */ 1148 static int16_t _findIndex(const char* const* list, const char* key) 1149 { 1150 const char* const* anchor = list; 1151 int32_t pass = 0; 1152 1153 /* Make two passes through two NULL-terminated arrays at 'list' */ 1154 while (pass++ < 2) { 1155 while (*list) { 1156 if (uprv_strcmp(key, *list) == 0) { 1157 return (int16_t)(list - anchor); 1158 } 1159 list++; 1160 } 1161 ++list; /* skip final NULL *CWB*/ 1162 } 1163 return -1; 1164 } 1165 1166 /* count the length of src while copying it to dest; return strlen(src) */ 1167 static inline int32_t 1168 _copyCount(char *dest, int32_t destCapacity, const char *src) { 1169 const char *anchor; 1170 char c; 1171 1172 anchor=src; 1173 for(;;) { 1174 if((c=*src)==0) { 1175 return (int32_t)(src-anchor); 1176 } 1177 if(destCapacity<=0) { 1178 return (int32_t)((src-anchor)+uprv_strlen(src)); 1179 } 1180 ++src; 1181 *dest++=c; 1182 --destCapacity; 1183 } 1184 } 1185 1186 U_CFUNC const char* 1187 uloc_getCurrentCountryID(const char* oldID){ 1188 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1189 if (offset >= 0) { 1190 return REPLACEMENT_COUNTRIES[offset]; 1191 } 1192 return oldID; 1193 } 1194 U_CFUNC const char* 1195 uloc_getCurrentLanguageID(const char* oldID){ 1196 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1197 if (offset >= 0) { 1198 return REPLACEMENT_LANGUAGES[offset]; 1199 } 1200 return oldID; 1201 } 1202 /* 1203 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1204 * avoid duplicating code to handle the earlier locale ID pieces 1205 * in the functions for the later ones by 1206 * setting the *pEnd pointer to where they stopped parsing 1207 * 1208 * TODO try to use this in Locale 1209 */ 1210 U_CFUNC int32_t 1211 ulocimp_getLanguage(const char *localeID, 1212 char *language, int32_t languageCapacity, 1213 const char **pEnd) { 1214 int32_t i=0; 1215 int32_t offset; 1216 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1217 1218 /* if it starts with i- or x- then copy that prefix */ 1219 if(_isIDPrefix(localeID)) { 1220 if(i<languageCapacity) { 1221 language[i]=(char)uprv_tolower(*localeID); 1222 } 1223 if(i<languageCapacity) { 1224 language[i+1]='-'; 1225 } 1226 i+=2; 1227 localeID+=2; 1228 } 1229 1230 /* copy the language as far as possible and count its length */ 1231 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1232 if(i<languageCapacity) { 1233 language[i]=(char)uprv_tolower(*localeID); 1234 } 1235 if(i<3) { 1236 U_ASSERT(i>=0); 1237 lang[i]=(char)uprv_tolower(*localeID); 1238 } 1239 i++; 1240 localeID++; 1241 } 1242 1243 if(i==3) { 1244 /* convert 3 character code to 2 character code if possible *CWB*/ 1245 offset=_findIndex(LANGUAGES_3, lang); 1246 if(offset>=0) { 1247 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1248 } 1249 } 1250 1251 if(pEnd!=NULL) { 1252 *pEnd=localeID; 1253 } 1254 return i; 1255 } 1256 1257 U_CFUNC int32_t 1258 ulocimp_getScript(const char *localeID, 1259 char *script, int32_t scriptCapacity, 1260 const char **pEnd) 1261 { 1262 int32_t idLen = 0; 1263 1264 if (pEnd != NULL) { 1265 *pEnd = localeID; 1266 } 1267 1268 /* copy the second item as far as possible and count its length */ 1269 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) 1270 && uprv_isASCIILetter(localeID[idLen])) { 1271 idLen++; 1272 } 1273 1274 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1275 if (idLen == 4) { 1276 int32_t i; 1277 if (pEnd != NULL) { 1278 *pEnd = localeID+idLen; 1279 } 1280 if(idLen > scriptCapacity) { 1281 idLen = scriptCapacity; 1282 } 1283 if (idLen >= 1) { 1284 script[0]=(char)uprv_toupper(*(localeID++)); 1285 } 1286 for (i = 1; i < idLen; i++) { 1287 script[i]=(char)uprv_tolower(*(localeID++)); 1288 } 1289 } 1290 else { 1291 idLen = 0; 1292 } 1293 return idLen; 1294 } 1295 1296 U_CFUNC int32_t 1297 ulocimp_getCountry(const char *localeID, 1298 char *country, int32_t countryCapacity, 1299 const char **pEnd) 1300 { 1301 int32_t idLen=0; 1302 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1303 int32_t offset; 1304 1305 /* copy the country as far as possible and count its length */ 1306 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1307 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1308 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1309 } 1310 idLen++; 1311 } 1312 1313 /* the country should be either length 2 or 3 */ 1314 if (idLen == 2 || idLen == 3) { 1315 UBool gotCountry = FALSE; 1316 /* convert 3 character code to 2 character code if possible *CWB*/ 1317 if(idLen==3) { 1318 offset=_findIndex(COUNTRIES_3, cnty); 1319 if(offset>=0) { 1320 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1321 gotCountry = TRUE; 1322 } 1323 } 1324 if (!gotCountry) { 1325 int32_t i = 0; 1326 for (i = 0; i < idLen; i++) { 1327 if (i < countryCapacity) { 1328 country[i]=(char)uprv_toupper(localeID[i]); 1329 } 1330 } 1331 } 1332 localeID+=idLen; 1333 } else { 1334 idLen = 0; 1335 } 1336 1337 if(pEnd!=NULL) { 1338 *pEnd=localeID; 1339 } 1340 1341 return idLen; 1342 } 1343 1344 /** 1345 * @param needSeparator if true, then add leading '_' if any variants 1346 * are added to 'variant' 1347 */ 1348 static int32_t 1349 _getVariantEx(const char *localeID, 1350 char prev, 1351 char *variant, int32_t variantCapacity, 1352 UBool needSeparator) { 1353 int32_t i=0; 1354 1355 /* get one or more variant tags and separate them with '_' */ 1356 if(_isIDSeparator(prev)) { 1357 /* get a variant string after a '-' or '_' */ 1358 while(!_isTerminator(*localeID)) { 1359 if (needSeparator) { 1360 if (i<variantCapacity) { 1361 variant[i] = '_'; 1362 } 1363 ++i; 1364 needSeparator = FALSE; 1365 } 1366 if(i<variantCapacity) { 1367 variant[i]=(char)uprv_toupper(*localeID); 1368 if(variant[i]=='-') { 1369 variant[i]='_'; 1370 } 1371 } 1372 i++; 1373 localeID++; 1374 } 1375 } 1376 1377 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1378 if(i==0) { 1379 if(prev=='@') { 1380 /* keep localeID */ 1381 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1382 ++localeID; /* point after the '@' */ 1383 } else { 1384 return 0; 1385 } 1386 while(!_isTerminator(*localeID)) { 1387 if (needSeparator) { 1388 if (i<variantCapacity) { 1389 variant[i] = '_'; 1390 } 1391 ++i; 1392 needSeparator = FALSE; 1393 } 1394 if(i<variantCapacity) { 1395 variant[i]=(char)uprv_toupper(*localeID); 1396 if(variant[i]=='-' || variant[i]==',') { 1397 variant[i]='_'; 1398 } 1399 } 1400 i++; 1401 localeID++; 1402 } 1403 } 1404 1405 return i; 1406 } 1407 1408 static int32_t 1409 _getVariant(const char *localeID, 1410 char prev, 1411 char *variant, int32_t variantCapacity) { 1412 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1413 } 1414 1415 /** 1416 * Delete ALL instances of a variant from the given list of one or 1417 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1418 * @param variants the source string of one or more variants, 1419 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1420 * terminated; if it is, trailing zero will NOT be maintained. 1421 * @param variantsLen length of variants 1422 * @param toDelete variant to delete, without separators, e.g. "EURO" 1423 * or "PREEURO"; not zero terminated 1424 * @param toDeleteLen length of toDelete 1425 * @return number of characters deleted from variants 1426 */ 1427 static int32_t 1428 _deleteVariant(char* variants, int32_t variantsLen, 1429 const char* toDelete, int32_t toDeleteLen) 1430 { 1431 int32_t delta = 0; /* number of chars deleted */ 1432 for (;;) { 1433 UBool flag = FALSE; 1434 if (variantsLen < toDeleteLen) { 1435 return delta; 1436 } 1437 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1438 (variantsLen == toDeleteLen || 1439 (flag=(variants[toDeleteLen] == '_')))) 1440 { 1441 int32_t d = toDeleteLen + (flag?1:0); 1442 variantsLen -= d; 1443 delta += d; 1444 if (variantsLen > 0) { 1445 uprv_memmove(variants, variants+d, variantsLen); 1446 } 1447 } else { 1448 char* p = _strnchr(variants, variantsLen, '_'); 1449 if (p == NULL) { 1450 return delta; 1451 } 1452 ++p; 1453 variantsLen -= (int32_t)(p - variants); 1454 variants = p; 1455 } 1456 } 1457 } 1458 1459 /* Keyword enumeration */ 1460 1461 typedef struct UKeywordsContext { 1462 char* keywords; 1463 char* current; 1464 } UKeywordsContext; 1465 1466 static void U_CALLCONV 1467 uloc_kw_closeKeywords(UEnumeration *enumerator) { 1468 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1469 uprv_free(enumerator->context); 1470 uprv_free(enumerator); 1471 } 1472 1473 static int32_t U_CALLCONV 1474 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { 1475 char *kw = ((UKeywordsContext *)en->context)->keywords; 1476 int32_t result = 0; 1477 while(*kw) { 1478 result++; 1479 kw += uprv_strlen(kw)+1; 1480 } 1481 return result; 1482 } 1483 1484 static const char* U_CALLCONV 1485 uloc_kw_nextKeyword(UEnumeration* en, 1486 int32_t* resultLength, 1487 UErrorCode* /*status*/) { 1488 const char* result = ((UKeywordsContext *)en->context)->current; 1489 int32_t len = 0; 1490 if(*result) { 1491 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1492 ((UKeywordsContext *)en->context)->current += len+1; 1493 } else { 1494 result = NULL; 1495 } 1496 if (resultLength) { 1497 *resultLength = len; 1498 } 1499 return result; 1500 } 1501 1502 static void U_CALLCONV 1503 uloc_kw_resetKeywords(UEnumeration* en, 1504 UErrorCode* /*status*/) { 1505 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1506 } 1507 1508 static const UEnumeration gKeywordsEnum = { 1509 NULL, 1510 NULL, 1511 uloc_kw_closeKeywords, 1512 uloc_kw_countKeywords, 1513 uenum_unextDefault, 1514 uloc_kw_nextKeyword, 1515 uloc_kw_resetKeywords 1516 }; 1517 1518 U_CAPI UEnumeration* U_EXPORT2 1519 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1520 { 1521 UKeywordsContext *myContext = NULL; 1522 UEnumeration *result = NULL; 1523 1524 if(U_FAILURE(*status)) { 1525 return NULL; 1526 } 1527 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1528 /* Null pointer test */ 1529 if (result == NULL) { 1530 *status = U_MEMORY_ALLOCATION_ERROR; 1531 return NULL; 1532 } 1533 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1534 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); 1535 if (myContext == NULL) { 1536 *status = U_MEMORY_ALLOCATION_ERROR; 1537 uprv_free(result); 1538 return NULL; 1539 } 1540 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1541 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1542 myContext->keywords[keywordListSize] = 0; 1543 myContext->current = myContext->keywords; 1544 result->context = myContext; 1545 return result; 1546 } 1547 1548 U_CAPI UEnumeration* U_EXPORT2 1549 uloc_openKeywords(const char* localeID, 1550 UErrorCode* status) 1551 { 1552 int32_t i=0; 1553 char keywords[256]; 1554 int32_t keywordsCapacity = 256; 1555 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1556 const char* tmpLocaleID; 1557 1558 if(status==NULL || U_FAILURE(*status)) { 1559 return 0; 1560 } 1561 1562 if (_hasBCP47Extension(localeID)) { 1563 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1564 } else { 1565 if (localeID==NULL) { 1566 localeID=uloc_getDefault(); 1567 } 1568 tmpLocaleID=localeID; 1569 } 1570 1571 /* Skip the language */ 1572 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1573 if(_isIDSeparator(*tmpLocaleID)) { 1574 const char *scriptID; 1575 /* Skip the script if available */ 1576 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1577 if(scriptID != tmpLocaleID+1) { 1578 /* Found optional script */ 1579 tmpLocaleID = scriptID; 1580 } 1581 /* Skip the Country */ 1582 if (_isIDSeparator(*tmpLocaleID)) { 1583 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1584 if(_isIDSeparator(*tmpLocaleID)) { 1585 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1586 } 1587 } 1588 } 1589 1590 /* keywords are located after '@' */ 1591 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1592 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1593 } 1594 1595 if(i) { 1596 return uloc_openKeywordList(keywords, i, status); 1597 } else { 1598 return NULL; 1599 } 1600 } 1601 1602 1603 /* bit-flags for 'options' parameter of _canonicalize */ 1604 #define _ULOC_STRIP_KEYWORDS 0x2 1605 #define _ULOC_CANONICALIZE 0x1 1606 1607 #define OPTION_SET(options, mask) ((options & mask) != 0) 1608 1609 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1610 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) 1611 1612 /** 1613 * Canonicalize the given localeID, to level 1 or to level 2, 1614 * depending on the options. To specify level 1, pass in options=0. 1615 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1616 * 1617 * This is the code underlying uloc_getName and uloc_canonicalize. 1618 */ 1619 static int32_t 1620 _canonicalize(const char* localeID, 1621 char* result, 1622 int32_t resultCapacity, 1623 uint32_t options, 1624 UErrorCode* err) { 1625 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1626 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1627 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1628 const char* origLocaleID; 1629 const char* tmpLocaleID; 1630 const char* keywordAssign = NULL; 1631 const char* separatorIndicator = NULL; 1632 const char* addKeyword = NULL; 1633 const char* addValue = NULL; 1634 char* name; 1635 char* variant = NULL; /* pointer into name, or NULL */ 1636 1637 if (U_FAILURE(*err)) { 1638 return 0; 1639 } 1640 1641 if (_hasBCP47Extension(localeID)) { 1642 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1643 } else { 1644 if (localeID==NULL) { 1645 localeID=uloc_getDefault(); 1646 } 1647 tmpLocaleID=localeID; 1648 } 1649 1650 origLocaleID=tmpLocaleID; 1651 1652 /* if we are doing a full canonicalization, then put results in 1653 localeBuffer, if necessary; otherwise send them to result. */ 1654 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1655 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { 1656 name = localeBuffer; 1657 nameCapacity = (int32_t)sizeof(localeBuffer); 1658 } else { 1659 name = result; 1660 nameCapacity = resultCapacity; 1661 } 1662 1663 /* get all pieces, one after another, and separate with '_' */ 1664 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1665 1666 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1667 const char *d = uloc_getDefault(); 1668 1669 len = (int32_t)uprv_strlen(d); 1670 1671 if (name != NULL) { 1672 uprv_strncpy(name, d, len); 1673 } 1674 } else if(_isIDSeparator(*tmpLocaleID)) { 1675 const char *scriptID; 1676 1677 ++fieldCount; 1678 if(len<nameCapacity) { 1679 name[len]='_'; 1680 } 1681 ++len; 1682 1683 scriptSize=ulocimp_getScript(tmpLocaleID+1, 1684 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID); 1685 if(scriptSize > 0) { 1686 /* Found optional script */ 1687 tmpLocaleID = scriptID; 1688 ++fieldCount; 1689 len+=scriptSize; 1690 if (_isIDSeparator(*tmpLocaleID)) { 1691 /* If there is something else, then we add the _ */ 1692 if(len<nameCapacity) { 1693 name[len]='_'; 1694 } 1695 ++len; 1696 } 1697 } 1698 1699 if (_isIDSeparator(*tmpLocaleID)) { 1700 const char *cntryID; 1701 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, 1702 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID); 1703 if (cntrySize > 0) { 1704 /* Found optional country */ 1705 tmpLocaleID = cntryID; 1706 len+=cntrySize; 1707 } 1708 if(_isIDSeparator(*tmpLocaleID)) { 1709 /* If there is something else, then we add the _ if we found country before. */ 1710 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { 1711 ++fieldCount; 1712 if(len<nameCapacity) { 1713 name[len]='_'; 1714 } 1715 ++len; 1716 } 1717 1718 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, 1719 (len<nameCapacity ? name+len : NULL), nameCapacity-len); 1720 if (variantSize > 0) { 1721 variant = len<nameCapacity ? name+len : NULL; 1722 len += variantSize; 1723 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1724 } 1725 } 1726 } 1727 } 1728 1729 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1730 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1731 UBool done = FALSE; 1732 do { 1733 char c = *tmpLocaleID; 1734 switch (c) { 1735 case 0: 1736 case '@': 1737 done = TRUE; 1738 break; 1739 default: 1740 if (len<nameCapacity) { 1741 name[len] = c; 1742 } 1743 ++len; 1744 ++tmpLocaleID; 1745 break; 1746 } 1747 } while (!done); 1748 } 1749 1750 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1751 After this, tmpLocaleID either points to '@' or is NULL */ 1752 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1753 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1754 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1755 } 1756 1757 /* Copy POSIX-style variant, if any [mr@FOO] */ 1758 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1759 tmpLocaleID != NULL && keywordAssign == NULL) { 1760 for (;;) { 1761 char c = *tmpLocaleID; 1762 if (c == 0) { 1763 break; 1764 } 1765 if (len<nameCapacity) { 1766 name[len] = c; 1767 } 1768 ++len; 1769 ++tmpLocaleID; 1770 } 1771 } 1772 1773 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1774 /* Handle @FOO variant if @ is present and not followed by = */ 1775 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1776 int32_t posixVariantSize; 1777 /* Add missing '_' if needed */ 1778 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1779 do { 1780 if(len<nameCapacity) { 1781 name[len]='_'; 1782 } 1783 ++len; 1784 ++fieldCount; 1785 } while(fieldCount<2); 1786 } 1787 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1788 (UBool)(variantSize > 0)); 1789 if (posixVariantSize > 0) { 1790 if (variant == NULL) { 1791 variant = name+len; 1792 } 1793 len += posixVariantSize; 1794 variantSize += posixVariantSize; 1795 } 1796 } 1797 1798 /* Handle generic variants first */ 1799 if (variant) { 1800 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { 1801 const char* variantToCompare = VARIANT_MAP[j].variant; 1802 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1803 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1804 len -= variantLen; 1805 if (variantLen > 0) { 1806 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1807 --len; 1808 } 1809 addKeyword = VARIANT_MAP[j].keyword; 1810 addValue = VARIANT_MAP[j].value; 1811 break; 1812 } 1813 } 1814 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1815 --len; 1816 } 1817 } 1818 1819 /* Look up the ID in the canonicalization map */ 1820 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { 1821 const char* id = CANONICALIZE_MAP[j].id; 1822 int32_t n = (int32_t)uprv_strlen(id); 1823 if (len == n && uprv_strncmp(name, id, n) == 0) { 1824 if (n == 0 && tmpLocaleID != NULL) { 1825 break; /* Don't remap "" if keywords present */ 1826 } 1827 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1828 if (CANONICALIZE_MAP[j].keyword) { 1829 addKeyword = CANONICALIZE_MAP[j].keyword; 1830 addValue = CANONICALIZE_MAP[j].value; 1831 } 1832 break; 1833 } 1834 } 1835 } 1836 1837 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1838 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1839 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1840 if(len<nameCapacity) { 1841 name[len]='@'; 1842 } 1843 ++len; 1844 ++fieldCount; 1845 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len, 1846 NULL, 0, NULL, TRUE, addKeyword, addValue, err); 1847 } else if (addKeyword != NULL) { 1848 U_ASSERT(addValue != NULL && len < nameCapacity); 1849 /* inelegant but works -- later make _getKeywords do this? */ 1850 len += _copyCount(name+len, nameCapacity-len, "@"); 1851 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1852 len += _copyCount(name+len, nameCapacity-len, "="); 1853 len += _copyCount(name+len, nameCapacity-len, addValue); 1854 } 1855 } 1856 1857 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1858 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1859 } 1860 1861 return u_terminateChars(result, resultCapacity, len, err); 1862 } 1863 1864 /* ### ID parsing API **************************************************/ 1865 1866 U_CAPI int32_t U_EXPORT2 1867 uloc_getParent(const char* localeID, 1868 char* parent, 1869 int32_t parentCapacity, 1870 UErrorCode* err) 1871 { 1872 const char *lastUnderscore; 1873 int32_t i; 1874 1875 if (U_FAILURE(*err)) 1876 return 0; 1877 1878 if (localeID == NULL) 1879 localeID = uloc_getDefault(); 1880 1881 lastUnderscore=uprv_strrchr(localeID, '_'); 1882 if(lastUnderscore!=NULL) { 1883 i=(int32_t)(lastUnderscore-localeID); 1884 } else { 1885 i=0; 1886 } 1887 1888 if(i>0 && parent != localeID) { 1889 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1890 } 1891 return u_terminateChars(parent, parentCapacity, i, err); 1892 } 1893 1894 U_CAPI int32_t U_EXPORT2 1895 uloc_getLanguage(const char* localeID, 1896 char* language, 1897 int32_t languageCapacity, 1898 UErrorCode* err) 1899 { 1900 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1901 int32_t i=0; 1902 1903 if (err==NULL || U_FAILURE(*err)) { 1904 return 0; 1905 } 1906 1907 if(localeID==NULL) { 1908 localeID=uloc_getDefault(); 1909 } 1910 1911 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1912 return u_terminateChars(language, languageCapacity, i, err); 1913 } 1914 1915 U_CAPI int32_t U_EXPORT2 1916 uloc_getScript(const char* localeID, 1917 char* script, 1918 int32_t scriptCapacity, 1919 UErrorCode* err) 1920 { 1921 int32_t i=0; 1922 1923 if(err==NULL || U_FAILURE(*err)) { 1924 return 0; 1925 } 1926 1927 if(localeID==NULL) { 1928 localeID=uloc_getDefault(); 1929 } 1930 1931 /* skip the language */ 1932 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1933 if(_isIDSeparator(*localeID)) { 1934 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 1935 } 1936 return u_terminateChars(script, scriptCapacity, i, err); 1937 } 1938 1939 U_CAPI int32_t U_EXPORT2 1940 uloc_getCountry(const char* localeID, 1941 char* country, 1942 int32_t countryCapacity, 1943 UErrorCode* err) 1944 { 1945 int32_t i=0; 1946 1947 if(err==NULL || U_FAILURE(*err)) { 1948 return 0; 1949 } 1950 1951 if(localeID==NULL) { 1952 localeID=uloc_getDefault(); 1953 } 1954 1955 /* Skip the language */ 1956 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1957 if(_isIDSeparator(*localeID)) { 1958 const char *scriptID; 1959 /* Skip the script if available */ 1960 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 1961 if(scriptID != localeID+1) { 1962 /* Found optional script */ 1963 localeID = scriptID; 1964 } 1965 if(_isIDSeparator(*localeID)) { 1966 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 1967 } 1968 } 1969 return u_terminateChars(country, countryCapacity, i, err); 1970 } 1971 1972 U_CAPI int32_t U_EXPORT2 1973 uloc_getVariant(const char* localeID, 1974 char* variant, 1975 int32_t variantCapacity, 1976 UErrorCode* err) 1977 { 1978 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1979 const char* tmpLocaleID; 1980 int32_t i=0; 1981 1982 if(err==NULL || U_FAILURE(*err)) { 1983 return 0; 1984 } 1985 1986 if (_hasBCP47Extension(localeID)) { 1987 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1988 } else { 1989 if (localeID==NULL) { 1990 localeID=uloc_getDefault(); 1991 } 1992 tmpLocaleID=localeID; 1993 } 1994 1995 /* Skip the language */ 1996 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1997 if(_isIDSeparator(*tmpLocaleID)) { 1998 const char *scriptID; 1999 /* Skip the script if available */ 2000 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 2001 if(scriptID != tmpLocaleID+1) { 2002 /* Found optional script */ 2003 tmpLocaleID = scriptID; 2004 } 2005 /* Skip the Country */ 2006 if (_isIDSeparator(*tmpLocaleID)) { 2007 const char *cntryID; 2008 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 2009 if (cntryID != tmpLocaleID+1) { 2010 /* Found optional country */ 2011 tmpLocaleID = cntryID; 2012 } 2013 if(_isIDSeparator(*tmpLocaleID)) { 2014 /* If there was no country ID, skip a possible extra IDSeparator */ 2015 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2016 tmpLocaleID++; 2017 } 2018 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2019 } 2020 } 2021 } 2022 2023 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2024 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2025 /* 2026 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2027 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2028 } 2029 */ 2030 return u_terminateChars(variant, variantCapacity, i, err); 2031 } 2032 2033 U_CAPI int32_t U_EXPORT2 2034 uloc_getName(const char* localeID, 2035 char* name, 2036 int32_t nameCapacity, 2037 UErrorCode* err) 2038 { 2039 return _canonicalize(localeID, name, nameCapacity, 0, err); 2040 } 2041 2042 U_CAPI int32_t U_EXPORT2 2043 uloc_getBaseName(const char* localeID, 2044 char* name, 2045 int32_t nameCapacity, 2046 UErrorCode* err) 2047 { 2048 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2049 } 2050 2051 U_CAPI int32_t U_EXPORT2 2052 uloc_canonicalize(const char* localeID, 2053 char* name, 2054 int32_t nameCapacity, 2055 UErrorCode* err) 2056 { 2057 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2058 } 2059 2060 U_CAPI const char* U_EXPORT2 2061 uloc_getISO3Language(const char* localeID) 2062 { 2063 int16_t offset; 2064 char lang[ULOC_LANG_CAPACITY]; 2065 UErrorCode err = U_ZERO_ERROR; 2066 2067 if (localeID == NULL) 2068 { 2069 localeID = uloc_getDefault(); 2070 } 2071 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2072 if (U_FAILURE(err)) 2073 return ""; 2074 offset = _findIndex(LANGUAGES, lang); 2075 if (offset < 0) 2076 return ""; 2077 return LANGUAGES_3[offset]; 2078 } 2079 2080 U_CAPI const char* U_EXPORT2 2081 uloc_getISO3Country(const char* localeID) 2082 { 2083 int16_t offset; 2084 char cntry[ULOC_LANG_CAPACITY]; 2085 UErrorCode err = U_ZERO_ERROR; 2086 2087 if (localeID == NULL) 2088 { 2089 localeID = uloc_getDefault(); 2090 } 2091 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2092 if (U_FAILURE(err)) 2093 return ""; 2094 offset = _findIndex(COUNTRIES, cntry); 2095 if (offset < 0) 2096 return ""; 2097 2098 return COUNTRIES_3[offset]; 2099 } 2100 2101 U_CAPI uint32_t U_EXPORT2 2102 uloc_getLCID(const char* localeID) 2103 { 2104 UErrorCode status = U_ZERO_ERROR; 2105 char langID[ULOC_FULLNAME_CAPACITY]; 2106 2107 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2108 if (U_FAILURE(status)) { 2109 return 0; 2110 } 2111 2112 if (uprv_strchr(localeID, '@')) { 2113 // uprv_convertToLCID does not support keywords other than collation. 2114 // Remove all keywords except collation. 2115 int32_t len; 2116 char collVal[ULOC_KEYWORDS_CAPACITY]; 2117 char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; 2118 2119 len = uloc_getKeywordValue(localeID, "collation", collVal, 2120 sizeof(collVal)/sizeof(collVal[0]) - 1, &status); 2121 2122 if (U_SUCCESS(status) && len > 0) { 2123 collVal[len] = 0; 2124 2125 len = uloc_getBaseName(localeID, tmpLocaleID, 2126 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status); 2127 2128 if (U_SUCCESS(status)) { 2129 tmpLocaleID[len] = 0; 2130 2131 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, 2132 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status); 2133 2134 if (U_SUCCESS(status)) { 2135 tmpLocaleID[len] = 0; 2136 return uprv_convertToLCID(langID, tmpLocaleID, &status); 2137 } 2138 } 2139 } 2140 2141 // fall through - all keywords are simply ignored 2142 status = U_ZERO_ERROR; 2143 } 2144 2145 return uprv_convertToLCID(langID, localeID, &status); 2146 } 2147 2148 U_CAPI int32_t U_EXPORT2 2149 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2150 UErrorCode *status) 2151 { 2152 return uprv_convertToPosix(hostid, locale, localeCapacity, status); 2153 } 2154 2155 /* ### Default locale **************************************************/ 2156 2157 U_CAPI const char* U_EXPORT2 2158 uloc_getDefault() 2159 { 2160 return locale_get_default(); 2161 } 2162 2163 U_CAPI void U_EXPORT2 2164 uloc_setDefault(const char* newDefaultLocale, 2165 UErrorCode* err) 2166 { 2167 if (U_FAILURE(*err)) 2168 return; 2169 /* the error code isn't currently used for anything by this function*/ 2170 2171 /* propagate change to C++ */ 2172 locale_set_default(newDefaultLocale); 2173 } 2174 2175 /** 2176 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer 2177 * to an array of pointers to arrays of char. All of these pointers are owned 2178 * by ICU-- do not delete them, and do not write through them. The array is 2179 * terminated with a null pointer. 2180 */ 2181 U_CAPI const char* const* U_EXPORT2 2182 uloc_getISOLanguages() 2183 { 2184 return LANGUAGES; 2185 } 2186 2187 /** 2188 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2189 * pointer to an array of pointers to arrays of char. All of these pointers are 2190 * owned by ICU-- do not delete them, and do not write through them. The array is 2191 * terminated with a null pointer. 2192 */ 2193 U_CAPI const char* const* U_EXPORT2 2194 uloc_getISOCountries() 2195 { 2196 return COUNTRIES; 2197 } 2198 2199 2200 /* this function to be moved into cstring.c later */ 2201 static char gDecimal = 0; 2202 2203 static /* U_CAPI */ 2204 double 2205 /* U_EXPORT2 */ 2206 _uloc_strtod(const char *start, char **end) { 2207 char *decimal; 2208 char *myEnd; 2209 char buf[30]; 2210 double rv; 2211 if (!gDecimal) { 2212 char rep[5]; 2213 /* For machines that decide to change the decimal on you, 2214 and try to be too smart with localization. 2215 This normally should be just a '.'. */ 2216 sprintf(rep, "%+1.1f", 1.0); 2217 gDecimal = rep[2]; 2218 } 2219 2220 if(gDecimal == '.') { 2221 return uprv_strtod(start, end); /* fall through to OS */ 2222 } else { 2223 uprv_strncpy(buf, start, 29); 2224 buf[29]=0; 2225 decimal = uprv_strchr(buf, '.'); 2226 if(decimal) { 2227 *decimal = gDecimal; 2228 } else { 2229 return uprv_strtod(start, end); /* no decimal point */ 2230 } 2231 rv = uprv_strtod(buf, &myEnd); 2232 if(end) { 2233 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2234 } 2235 return rv; 2236 } 2237 } 2238 2239 typedef struct { 2240 float q; 2241 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2242 char *locale; 2243 } _acceptLangItem; 2244 2245 static int32_t U_CALLCONV 2246 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) 2247 { 2248 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2249 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2250 2251 int32_t rc = 0; 2252 if(bb->q < aa->q) { 2253 rc = -1; /* A > B */ 2254 } else if(bb->q > aa->q) { 2255 rc = 1; /* A < B */ 2256 } else { 2257 rc = 0; /* A = B */ 2258 } 2259 2260 if(rc==0) { 2261 rc = uprv_stricmp(aa->locale, bb->locale); 2262 } 2263 2264 #if defined(ULOC_DEBUG) 2265 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2266 aa->locale, aa->q, 2267 bb->locale, bb->q, 2268 rc);*/ 2269 #endif 2270 2271 return rc; 2272 } 2273 2274 /* 2275 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2276 */ 2277 2278 U_CAPI int32_t U_EXPORT2 2279 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2280 const char *httpAcceptLanguage, 2281 UEnumeration* availableLocales, 2282 UErrorCode *status) 2283 { 2284 _acceptLangItem *j; 2285 _acceptLangItem smallBuffer[30]; 2286 char **strs; 2287 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2288 int32_t n = 0; 2289 const char *itemEnd; 2290 const char *paramEnd; 2291 const char *s; 2292 const char *t; 2293 int32_t res; 2294 int32_t i; 2295 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2296 int32_t jSize; 2297 char *tempstr; /* Use for null pointer check */ 2298 2299 j = smallBuffer; 2300 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); 2301 if(U_FAILURE(*status)) { 2302 return -1; 2303 } 2304 2305 for(s=httpAcceptLanguage;s&&*s;) { 2306 while(isspace(*s)) /* eat space at the beginning */ 2307 s++; 2308 itemEnd=uprv_strchr(s,','); 2309 paramEnd=uprv_strchr(s,';'); 2310 if(!itemEnd) { 2311 itemEnd = httpAcceptLanguage+l; /* end of string */ 2312 } 2313 if(paramEnd && paramEnd<itemEnd) { 2314 /* semicolon (;) is closer than end (,) */ 2315 t = paramEnd+1; 2316 if(*t=='q') { 2317 t++; 2318 } 2319 while(isspace(*t)) { 2320 t++; 2321 } 2322 if(*t=='=') { 2323 t++; 2324 } 2325 while(isspace(*t)) { 2326 t++; 2327 } 2328 j[n].q = (float)_uloc_strtod(t,NULL); 2329 } else { 2330 /* no semicolon - it's 1.0 */ 2331 j[n].q = 1.0f; 2332 paramEnd = itemEnd; 2333 } 2334 j[n].dummy=0; 2335 /* eat spaces prior to semi */ 2336 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2337 ; 2338 /* Check for null pointer from uprv_strndup */ 2339 tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); 2340 if (tempstr == NULL) { 2341 *status = U_MEMORY_ALLOCATION_ERROR; 2342 return -1; 2343 } 2344 j[n].locale = tempstr; 2345 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); 2346 if(strcmp(j[n].locale,tmp)) { 2347 uprv_free(j[n].locale); 2348 j[n].locale=uprv_strdup(tmp); 2349 } 2350 #if defined(ULOC_DEBUG) 2351 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2352 #endif 2353 n++; 2354 s = itemEnd; 2355 while(*s==',') { /* eat duplicate commas */ 2356 s++; 2357 } 2358 if(n>=jSize) { 2359 if(j==smallBuffer) { /* overflowed the small buffer. */ 2360 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); 2361 if(j!=NULL) { 2362 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); 2363 } 2364 #if defined(ULOC_DEBUG) 2365 fprintf(stderr,"malloced at size %d\n", jSize); 2366 #endif 2367 } else { 2368 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); 2369 #if defined(ULOC_DEBUG) 2370 fprintf(stderr,"re-alloced at size %d\n", jSize); 2371 #endif 2372 } 2373 jSize *= 2; 2374 if(j==NULL) { 2375 *status = U_MEMORY_ALLOCATION_ERROR; 2376 return -1; 2377 } 2378 } 2379 } 2380 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2381 if(U_FAILURE(*status)) { 2382 if(j != smallBuffer) { 2383 #if defined(ULOC_DEBUG) 2384 fprintf(stderr,"freeing j %p\n", j); 2385 #endif 2386 uprv_free(j); 2387 } 2388 return -1; 2389 } 2390 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n))); 2391 /* Check for null pointer */ 2392 if (strs == NULL) { 2393 uprv_free(j); /* Free to avoid memory leak */ 2394 *status = U_MEMORY_ALLOCATION_ERROR; 2395 return -1; 2396 } 2397 for(i=0;i<n;i++) { 2398 #if defined(ULOC_DEBUG) 2399 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2400 #endif 2401 strs[i]=j[i].locale; 2402 } 2403 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2404 (const char**)strs, n, availableLocales, status); 2405 for(i=0;i<n;i++) { 2406 uprv_free(strs[i]); 2407 } 2408 uprv_free(strs); 2409 if(j != smallBuffer) { 2410 #if defined(ULOC_DEBUG) 2411 fprintf(stderr,"freeing j %p\n", j); 2412 #endif 2413 uprv_free(j); 2414 } 2415 return res; 2416 } 2417 2418 2419 U_CAPI int32_t U_EXPORT2 2420 uloc_acceptLanguage(char *result, int32_t resultAvailable, 2421 UAcceptResult *outResult, const char **acceptList, 2422 int32_t acceptListCount, 2423 UEnumeration* availableLocales, 2424 UErrorCode *status) 2425 { 2426 int32_t i,j; 2427 int32_t len; 2428 int32_t maxLen=0; 2429 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2430 const char *l; 2431 char **fallbackList; 2432 if(U_FAILURE(*status)) { 2433 return -1; 2434 } 2435 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); 2436 if(fallbackList==NULL) { 2437 *status = U_MEMORY_ALLOCATION_ERROR; 2438 return -1; 2439 } 2440 for(i=0;i<acceptListCount;i++) { 2441 #if defined(ULOC_DEBUG) 2442 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2443 #endif 2444 while((l=uenum_next(availableLocales, NULL, status))) { 2445 #if defined(ULOC_DEBUG) 2446 fprintf(stderr," %s\n", l); 2447 #endif 2448 len = (int32_t)uprv_strlen(l); 2449 if(!uprv_strcmp(acceptList[i], l)) { 2450 if(outResult) { 2451 *outResult = ULOC_ACCEPT_VALID; 2452 } 2453 #if defined(ULOC_DEBUG) 2454 fprintf(stderr, "MATCH! %s\n", l); 2455 #endif 2456 if(len>0) { 2457 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2458 } 2459 for(j=0;j<i;j++) { 2460 uprv_free(fallbackList[j]); 2461 } 2462 uprv_free(fallbackList); 2463 return u_terminateChars(result, resultAvailable, len, status); 2464 } 2465 if(len>maxLen) { 2466 maxLen = len; 2467 } 2468 } 2469 uenum_reset(availableLocales, status); 2470 /* save off parent info */ 2471 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2472 fallbackList[i] = uprv_strdup(tmp); 2473 } else { 2474 fallbackList[i]=0; 2475 } 2476 } 2477 2478 for(maxLen--;maxLen>0;maxLen--) { 2479 for(i=0;i<acceptListCount;i++) { 2480 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2481 #if defined(ULOC_DEBUG) 2482 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2483 #endif 2484 while((l=uenum_next(availableLocales, NULL, status))) { 2485 #if defined(ULOC_DEBUG) 2486 fprintf(stderr," %s\n", l); 2487 #endif 2488 len = (int32_t)uprv_strlen(l); 2489 if(!uprv_strcmp(fallbackList[i], l)) { 2490 if(outResult) { 2491 *outResult = ULOC_ACCEPT_FALLBACK; 2492 } 2493 #if defined(ULOC_DEBUG) 2494 fprintf(stderr, "fallback MATCH! %s\n", l); 2495 #endif 2496 if(len>0) { 2497 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2498 } 2499 for(j=0;j<acceptListCount;j++) { 2500 uprv_free(fallbackList[j]); 2501 } 2502 uprv_free(fallbackList); 2503 return u_terminateChars(result, resultAvailable, len, status); 2504 } 2505 } 2506 uenum_reset(availableLocales, status); 2507 2508 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2509 uprv_free(fallbackList[i]); 2510 fallbackList[i] = uprv_strdup(tmp); 2511 } else { 2512 uprv_free(fallbackList[i]); 2513 fallbackList[i]=0; 2514 } 2515 } 2516 } 2517 if(outResult) { 2518 *outResult = ULOC_ACCEPT_FAILED; 2519 } 2520 } 2521 for(i=0;i<acceptListCount;i++) { 2522 uprv_free(fallbackList[i]); 2523 } 2524 uprv_free(fallbackList); 2525 return -1; 2526 } 2527 2528 /*eof*/ 2529