1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1997-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File ULOC.CPP 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 04/01/97 aliu Creation. 15 * 08/21/98 stephen JDK 1.2 sync 16 * 12/08/98 rtg New Locale implementation and C API 17 * 03/15/99 damiba overhaul. 18 * 04/06/99 stephen changed setDefault() to realloc and copy 19 * 06/14/99 stephen Changed calls to ures_open for new params 20 * 07/21/99 stephen Modified setDefault() to propagate to C++ 21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 22 * brought canonicalization code into line with spec 23 *****************************************************************************/ 24 25 /* 26 POSIX's locale format, from putil.c: [no spaces] 27 28 ll [ _CC ] [ . MM ] [ @ VV] 29 30 l = lang, C = ctry, M = charmap, V = variant 31 */ 32 33 #include "unicode/utypes.h" 34 #include "unicode/ustring.h" 35 #include "unicode/uloc.h" 36 37 #include "putilimp.h" 38 #include "ustr_imp.h" 39 #include "ulocimp.h" 40 #include "umutex.h" 41 #include "cstring.h" 42 #include "cmemory.h" 43 #include "locmap.h" 44 #include "uarrsort.h" 45 #include "uenumimp.h" 46 #include "uassert.h" 47 #include "charstr.h" 48 49 #include <stdio.h> /* for sprintf */ 50 51 U_NAMESPACE_USE 52 53 /* ### Declarations **************************************************/ 54 55 /* Locale stuff from locid.cpp */ 56 U_CFUNC void locale_set_default(const char *id); 57 U_CFUNC const char *locale_get_default(void); 58 U_CFUNC int32_t 59 locale_getKeywords(const char *localeID, 60 char prev, 61 char *keywords, int32_t keywordCapacity, 62 char *values, int32_t valuesCapacity, int32_t *valLen, 63 UBool valuesToo, 64 UErrorCode *status); 65 66 /* ### Data tables **************************************************/ 67 68 /** 69 * Table of language codes, both 2- and 3-letter, with preference 70 * given to 2-letter codes where possible. Includes 3-letter codes 71 * that lack a 2-letter equivalent. 72 * 73 * This list must be in sorted order. This list is returned directly 74 * to the user by some API. 75 * 76 * This list must be kept in sync with LANGUAGES_3, with corresponding 77 * entries matched. 78 * 79 * This table should be terminated with a NULL entry, followed by a 80 * second list, and another NULL entry. The first list is visible to 81 * user code when this array is returned by API. The second list 82 * contains codes we support, but do not expose through user API. 83 * 84 * Notes 85 * 86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 87 * include the revisions up to 2001/7/27 *CWB* 88 * 89 * The 3 character codes are the terminology codes like RFC 3066. This 90 * is compatible with prior ICU codes 91 * 92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 93 * table but now at the end of the table because 3 character codes are 94 * duplicates. This avoids bad searches going from 3 to 2 character 95 * codes. 96 * 97 * The range qaa-qtz is reserved for local use 98 */ 99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 100 /* ISO639 table version is 20150505 */ 101 /* Subsequent hand addition of selected languages */ 102 static const char * const LANGUAGES[] = { 103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", 104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", 105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", 106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as", 107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", 108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", 110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", 111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", 112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", 113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg", 114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", 116 "cs", "csb", "cu", "cv", "cy", 117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr", 118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", 119 "dyo", "dyu", "dz", "dzg", 120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx", 121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo", 122 "ext", 123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj", 124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", 125 "frs", "fur", "fy", 126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd", 127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom", 128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc", 129 "gur", "guz", "gv", "gwi", 130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil", 131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", 132 "hup", "hy", "hz", 133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik", 134 "ilo", "inh", "io", "is", "it", "iu", "izh", 135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 136 "jv", 137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp", 139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk", 140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", 141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", 142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", 143 "kv", "kw", "ky", 144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", 145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", 146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", 147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz", 148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", 150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", 151 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", 152 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 153 "my", "mye", "myv", "mzn", 154 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", 155 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn", 156 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso", 157 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", 158 "oc", "oj", "om", "or", "os", "osa", "ota", 159 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 160 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt", 161 "pon", "prg", "pro", "ps", "pt", 162 "qu", "quc", "qug", 163 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro", 164 "rof", "rom", "rtm", "ru", "rue", "rug", "rup", 165 "rw", "rwk", 166 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 167 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh", 168 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga", 169 "sgs", "shi", "shn", "shu", "si", "sid", "sk", 170 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", 171 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", 172 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", 173 "sv", "sw", "swb", "swc", "syc", "syr", "szl", 174 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", 175 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", 176 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", 177 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", 178 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", 179 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", 180 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", 181 "vot", "vro", "vun", 182 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", 183 "xal", "xh", "xmf", "xog", 184 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", 185 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", 186 "zun", "zxx", "zza", 187 NULL, 188 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 189 NULL 190 }; 191 192 static const char* const DEPRECATED_LANGUAGES[]={ 193 "in", "iw", "ji", "jw", NULL, NULL 194 }; 195 static const char* const REPLACEMENT_LANGUAGES[]={ 196 "id", "he", "yi", "jv", NULL, NULL 197 }; 198 199 /** 200 * Table of 3-letter language codes. 201 * 202 * This is a lookup table used to convert 3-letter language codes to 203 * their 2-letter equivalent, where possible. It must be kept in sync 204 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 205 * same language as LANGUAGES_3[i]. The commented-out lines are 206 * copied from LANGUAGES to make eyeballing this baby easier. 207 * 208 * Where a 3-letter language code has no 2-letter equivalent, the 209 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 210 * 211 * This table should be terminated with a NULL entry, followed by a 212 * second list, and another NULL entry. The two lists correspond to 213 * the two lists in LANGUAGES. 214 */ 215 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 216 /* ISO639 table version is 20150505 */ 217 /* Subsequent hand addition of selected languages */ 218 static const char * const LANGUAGES_3[] = { 219 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", 220 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", 221 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", 222 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm", 223 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", 224 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 225 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", 226 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", 227 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", 228 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", 229 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg", 230 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 231 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", 232 "ces", "csb", "chu", "chv", "cym", 233 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", 234 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", 235 "dyo", "dyu", "dzo", "dzg", 236 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx", 237 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo", 238 "ext", 239 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij", 240 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr", 241 "frs", "fur", "fry", 242 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla", 243 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom", 244 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc", 245 "gur", "guz", "glv", "gwi", 246 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil", 247 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", 248 "hup", "hye", "her", 249 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk", 250 "ilo", "inh", "ido", "isl", "ita", "iku", "izh", 251 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 252 "jav", 253 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 254 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp", 255 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz", 256 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", 257 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", 258 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", 259 "kom", "cor", "kir", 260 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", 261 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", 262 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", 263 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz", 264 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 265 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", 266 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", 267 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", 268 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 269 "mya", "mye", "myv", "mzn", 270 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", 271 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno", 272 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso", 273 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", 274 "oci", "oji", "orm", "ori", "oss", "osa", "ota", 275 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 276 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt", 277 "pon", "prg", "pro", "pus", "por", 278 "que", "quc", "qug", 279 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron", 280 "rof", "rom", "rtm", "rus", "rue", "rug", "rup", 281 "kin", "rwk", 282 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 283 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh", 284 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga", 285 "sgs", "shi", "shn", "shu", "sin", "sid", "slk", 286 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", 287 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", 288 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", 289 "swe", "swa", "swb", "swc", "syc", "syr", "szl", 290 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", 291 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", 292 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", 293 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", 294 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", 295 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", 296 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", 297 "vot", "vro", "vun", 298 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", 299 "xal", "xho", "xmf", "xog", 300 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", 301 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", 302 "zun", "zxx", "zza", 303 NULL, 304 /* "in", "iw", "ji", "jw", "sh", */ 305 "ind", "heb", "yid", "jaw", "srp", 306 NULL 307 }; 308 309 /** 310 * Table of 2-letter country codes. 311 * 312 * This list must be in sorted order. This list is returned directly 313 * to the user by some API. 314 * 315 * This list must be kept in sync with COUNTRIES_3, with corresponding 316 * entries matched. 317 * 318 * This table should be terminated with a NULL entry, followed by a 319 * second list, and another NULL entry. The first list is visible to 320 * user code when this array is returned by API. The second list 321 * contains codes we support, but do not expose through user API. 322 * 323 * Notes: 324 * 325 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 326 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 327 * new codes keeping the old ones for compatibility updated to include 328 * 1999/12/03 revisions *CWB* 329 * 330 * RO(ROM) is now RO(ROU) according to 331 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 332 */ 333 static const char * const COUNTRIES[] = { 334 "AD", "AE", "AF", "AG", "AI", "AL", "AM", 335 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 336 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 337 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", 338 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 339 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 340 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", 341 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 342 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 343 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 344 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 345 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 346 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 347 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 348 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 349 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 350 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 351 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 352 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 353 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 354 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 355 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 356 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 357 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 358 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", 359 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 360 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 361 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 362 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 363 "WS", "YE", "YT", "ZA", "ZM", "ZW", 364 NULL, 365 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ 366 NULL 367 }; 368 369 static const char* const DEPRECATED_COUNTRIES[] = { 370 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ 371 }; 372 static const char* const REPLACEMENT_COUNTRIES[] = { 373 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ 374 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ 375 }; 376 377 /** 378 * Table of 3-letter country codes. 379 * 380 * This is a lookup table used to convert 3-letter country codes to 381 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 382 * For all valid i, COUNTRIES[i] must refer to the same country as 383 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 384 * to make eyeballing this baby easier. 385 * 386 * This table should be terminated with a NULL entry, followed by a 387 * second list, and another NULL entry. The two lists correspond to 388 * the two lists in COUNTRIES. 389 */ 390 static const char * const COUNTRIES_3[] = { 391 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ 392 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", 393 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 394 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 395 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 396 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 397 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ 398 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", 399 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 400 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 401 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 402 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 403 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 404 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 405 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 406 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 407 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 408 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 409 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 410 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 411 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 412 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 413 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 414 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 415 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 416 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 417 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 418 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 419 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 420 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 421 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 422 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 423 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 424 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 425 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 426 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 427 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 428 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 429 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 430 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 431 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 432 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 433 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 434 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 435 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 436 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 437 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 438 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 439 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ 440 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", 441 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 442 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 443 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 444 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 445 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 446 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 447 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 448 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 449 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 450 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 451 NULL, 452 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ 453 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", 454 NULL 455 }; 456 457 typedef struct CanonicalizationMap { 458 const char *id; /* input ID */ 459 const char *canonicalID; /* canonicalized output ID */ 460 const char *keyword; /* keyword, or NULL if none */ 461 const char *value; /* keyword value, or NULL if kw==NULL */ 462 } CanonicalizationMap; 463 464 /** 465 * A map to canonicalize locale IDs. This handles a variety of 466 * different semantic kinds of transformations. 467 */ 468 static const CanonicalizationMap CANONICALIZE_MAP[] = { 469 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 470 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 471 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 472 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 473 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 474 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 475 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 476 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 477 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 478 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 479 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 480 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 481 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 482 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 483 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 484 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 485 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 486 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 487 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 488 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 489 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 490 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 491 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 492 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 493 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 494 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 495 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 496 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 497 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 498 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 508 { "zh_GAN", "gan", NULL, NULL }, /* registered name */ 509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 510 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ 511 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ 512 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ 513 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ 514 { "zh_YUE", "yue", NULL, NULL }, /* registered name */ 515 }; 516 517 typedef struct VariantMap { 518 const char *variant; /* input ID */ 519 const char *keyword; /* keyword, or NULL if none */ 520 const char *value; /* keyword value, or NULL if kw==NULL */ 521 } VariantMap; 522 523 static const VariantMap VARIANT_MAP[] = { 524 { "EURO", "currency", "EUR" }, 525 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 526 { "STROKE", "collation", "stroke" } /* Solaris variant */ 527 }; 528 529 /* ### BCP47 Conversion *******************************************/ 530 /* Test if the locale id has BCP47 u extension and does not have '@' */ 531 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 532 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 533 #define _ConvertBCP47(finalID, id, buffer, length,err) \ 534 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ 535 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ 536 finalID=id; \ 537 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ 538 } else { \ 539 finalID=buffer; \ 540 } 541 /* Gets the size of the shortest subtag in the given localeID. */ 542 static int32_t getShortestSubtagLength(const char *localeID) { 543 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID)); 544 int32_t length = localeIDLength; 545 int32_t tmpLength = 0; 546 int32_t i; 547 UBool reset = TRUE; 548 549 for (i = 0; i < localeIDLength; i++) { 550 if (localeID[i] != '_' && localeID[i] != '-') { 551 if (reset) { 552 tmpLength = 0; 553 reset = FALSE; 554 } 555 tmpLength++; 556 } else { 557 if (tmpLength != 0 && tmpLength < length) { 558 length = tmpLength; 559 } 560 reset = TRUE; 561 } 562 } 563 564 return length; 565 } 566 567 /* ### Keywords **************************************************/ 568 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) 569 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) 570 /* Punctuation/symbols allowed in legacy key values */ 571 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/') 572 573 #define ULOC_KEYWORD_BUFFER_LEN 25 574 #define ULOC_MAX_NO_KEYWORDS 25 575 576 U_CAPI const char * U_EXPORT2 577 locale_getKeywordsStart(const char *localeID) { 578 const char *result = NULL; 579 if((result = uprv_strchr(localeID, '@')) != NULL) { 580 return result; 581 } 582 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 583 else { 584 /* We do this because the @ sign is variant, and the @ sign used on one 585 EBCDIC machine won't be compiled the same way on other EBCDIC based 586 machines. */ 587 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 588 const uint8_t *charToFind = ebcdicSigns; 589 while(*charToFind) { 590 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 591 return result; 592 } 593 charToFind++; 594 } 595 } 596 #endif 597 return NULL; 598 } 599 600 /** 601 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 602 * @param keywordName incoming name to be canonicalized 603 * @param status return status (keyword too long) 604 * @return length of the keyword name 605 */ 606 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 607 { 608 int32_t keywordNameLen = 0; 609 610 for (; *keywordName != 0; keywordName++) { 611 if (!UPRV_ISALPHANUM(*keywordName)) { 612 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ 613 return 0; 614 } 615 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { 616 buf[keywordNameLen++] = uprv_tolower(*keywordName); 617 } else { 618 /* keyword name too long for internal buffer */ 619 *status = U_INTERNAL_PROGRAM_ERROR; 620 return 0; 621 } 622 } 623 if (keywordNameLen == 0) { 624 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ 625 return 0; 626 } 627 buf[keywordNameLen] = 0; /* terminate */ 628 629 return keywordNameLen; 630 } 631 632 typedef struct { 633 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 634 int32_t keywordLen; 635 const char *valueStart; 636 int32_t valueLen; 637 } KeywordStruct; 638 639 static int32_t U_CALLCONV 640 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { 641 const char* leftString = ((const KeywordStruct *)left)->keyword; 642 const char* rightString = ((const KeywordStruct *)right)->keyword; 643 return uprv_strcmp(leftString, rightString); 644 } 645 646 /** 647 * Both addKeyword and addValue must already be in canonical form. 648 * Either both addKeyword and addValue are NULL, or neither is NULL. 649 * If they are not NULL they must be zero terminated. 650 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 651 */ 652 static int32_t 653 _getKeywords(const char *localeID, 654 char prev, 655 char *keywords, int32_t keywordCapacity, 656 char *values, int32_t valuesCapacity, int32_t *valLen, 657 UBool valuesToo, 658 const char* addKeyword, 659 const char* addValue, 660 UErrorCode *status) 661 { 662 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 663 664 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 665 int32_t numKeywords = 0; 666 const char* pos = localeID; 667 const char* equalSign = NULL; 668 const char* semicolon = NULL; 669 int32_t i = 0, j, n; 670 int32_t keywordsLen = 0; 671 int32_t valuesLen = 0; 672 673 if(prev == '@') { /* start of keyword definition */ 674 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 675 do { 676 UBool duplicate = FALSE; 677 /* skip leading spaces */ 678 while(*pos == ' ') { 679 pos++; 680 } 681 if (!*pos) { /* handle trailing "; " */ 682 break; 683 } 684 if(numKeywords == maxKeywords) { 685 *status = U_INTERNAL_PROGRAM_ERROR; 686 return 0; 687 } 688 equalSign = uprv_strchr(pos, '='); 689 semicolon = uprv_strchr(pos, ';'); 690 /* lack of '=' [foo@currency] is illegal */ 691 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 692 if(!equalSign || (semicolon && semicolon<equalSign)) { 693 *status = U_INVALID_FORMAT_ERROR; 694 return 0; 695 } 696 /* need to normalize both keyword and keyword name */ 697 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 698 /* keyword name too long for internal buffer */ 699 *status = U_INTERNAL_PROGRAM_ERROR; 700 return 0; 701 } 702 for(i = 0, n = 0; i < equalSign - pos; ++i) { 703 if (pos[i] != ' ') { 704 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 705 } 706 } 707 708 /* zero-length keyword is an error. */ 709 if (n == 0) { 710 *status = U_INVALID_FORMAT_ERROR; 711 return 0; 712 } 713 714 keywordList[numKeywords].keyword[n] = 0; 715 keywordList[numKeywords].keywordLen = n; 716 /* now grab the value part. First we skip the '=' */ 717 equalSign++; 718 /* then we leading spaces */ 719 while(*equalSign == ' ') { 720 equalSign++; 721 } 722 723 /* Premature end or zero-length value */ 724 if (!*equalSign || equalSign == semicolon) { 725 *status = U_INVALID_FORMAT_ERROR; 726 return 0; 727 } 728 729 keywordList[numKeywords].valueStart = equalSign; 730 731 pos = semicolon; 732 i = 0; 733 if(pos) { 734 while(*(pos - i - 1) == ' ') { 735 i++; 736 } 737 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 738 pos++; 739 } else { 740 i = (int32_t)uprv_strlen(equalSign); 741 while(i && equalSign[i-1] == ' ') { 742 i--; 743 } 744 keywordList[numKeywords].valueLen = i; 745 } 746 /* If this is a duplicate keyword, then ignore it */ 747 for (j=0; j<numKeywords; ++j) { 748 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 749 duplicate = TRUE; 750 break; 751 } 752 } 753 if (!duplicate) { 754 ++numKeywords; 755 } 756 } while(pos); 757 758 /* Handle addKeyword/addValue. */ 759 if (addKeyword != NULL) { 760 UBool duplicate = FALSE; 761 U_ASSERT(addValue != NULL); 762 /* Search for duplicate; if found, do nothing. Explicit keyword 763 overrides addKeyword. */ 764 for (j=0; j<numKeywords; ++j) { 765 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 766 duplicate = TRUE; 767 break; 768 } 769 } 770 if (!duplicate) { 771 if (numKeywords == maxKeywords) { 772 *status = U_INTERNAL_PROGRAM_ERROR; 773 return 0; 774 } 775 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 776 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 777 keywordList[numKeywords].valueStart = addValue; 778 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 779 ++numKeywords; 780 } 781 } else { 782 U_ASSERT(addValue == NULL); 783 } 784 785 /* now we have a list of keywords */ 786 /* we need to sort it */ 787 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 788 789 /* Now construct the keyword part */ 790 for(i = 0; i < numKeywords; i++) { 791 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 792 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 793 if(valuesToo) { 794 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 795 } else { 796 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 797 } 798 } 799 keywordsLen += keywordList[i].keywordLen + 1; 800 if(valuesToo) { 801 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 802 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 803 } 804 keywordsLen += keywordList[i].valueLen; 805 806 if(i < numKeywords - 1) { 807 if(keywordsLen < keywordCapacity) { 808 keywords[keywordsLen] = ';'; 809 } 810 keywordsLen++; 811 } 812 } 813 if(values) { 814 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 815 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 816 values[valuesLen + keywordList[i].valueLen] = 0; 817 } 818 valuesLen += keywordList[i].valueLen + 1; 819 } 820 } 821 if(values) { 822 values[valuesLen] = 0; 823 if(valLen) { 824 *valLen = valuesLen; 825 } 826 } 827 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 828 } else { 829 return 0; 830 } 831 } 832 833 U_CFUNC int32_t 834 locale_getKeywords(const char *localeID, 835 char prev, 836 char *keywords, int32_t keywordCapacity, 837 char *values, int32_t valuesCapacity, int32_t *valLen, 838 UBool valuesToo, 839 UErrorCode *status) { 840 return _getKeywords(localeID, prev, keywords, keywordCapacity, 841 values, valuesCapacity, valLen, valuesToo, 842 NULL, NULL, status); 843 } 844 845 U_CAPI int32_t U_EXPORT2 846 uloc_getKeywordValue(const char* localeID, 847 const char* keywordName, 848 char* buffer, int32_t bufferCapacity, 849 UErrorCode* status) 850 { 851 const char* startSearchHere = NULL; 852 const char* nextSeparator = NULL; 853 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 854 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 855 int32_t result = 0; 856 857 if(status && U_SUCCESS(*status) && localeID) { 858 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 859 const char* tmpLocaleID; 860 861 if (keywordName == NULL || keywordName[0] == 0) { 862 *status = U_ILLEGAL_ARGUMENT_ERROR; 863 return 0; 864 } 865 866 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 867 if(U_FAILURE(*status)) { 868 return 0; 869 } 870 871 if (_hasBCP47Extension(localeID)) { 872 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 873 } else { 874 tmpLocaleID=localeID; 875 } 876 877 startSearchHere = locale_getKeywordsStart(tmpLocaleID); 878 if(startSearchHere == NULL) { 879 /* no keywords, return at once */ 880 return 0; 881 } 882 883 /* find the first keyword */ 884 while(startSearchHere) { 885 const char* keyValueTail; 886 int32_t keyValueLen; 887 888 startSearchHere++; /* skip @ or ; */ 889 nextSeparator = uprv_strchr(startSearchHere, '='); 890 if(!nextSeparator) { 891 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ 892 return 0; 893 } 894 /* strip leading & trailing spaces (TC decided to tolerate these) */ 895 while(*startSearchHere == ' ') { 896 startSearchHere++; 897 } 898 keyValueTail = nextSeparator; 899 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') { 900 keyValueTail--; 901 } 902 /* now keyValueTail points to first char after the keyName */ 903 /* copy & normalize keyName from locale */ 904 if (startSearchHere == keyValueTail) { 905 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ 906 return 0; 907 } 908 keyValueLen = 0; 909 while (startSearchHere < keyValueTail) { 910 if (!UPRV_ISALPHANUM(*startSearchHere)) { 911 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ 912 return 0; 913 } 914 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { 915 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); 916 } else { 917 /* keyword name too long for internal buffer */ 918 *status = U_INTERNAL_PROGRAM_ERROR; 919 return 0; 920 } 921 } 922 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ 923 924 startSearchHere = uprv_strchr(nextSeparator, ';'); 925 926 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 927 /* current entry matches the keyword. */ 928 nextSeparator++; /* skip '=' */ 929 /* First strip leading & trailing spaces (TC decided to tolerate these) */ 930 while(*nextSeparator == ' ') { 931 nextSeparator++; 932 } 933 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator); 934 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { 935 keyValueTail--; 936 } 937 /* Now copy the value, but check well-formedness */ 938 if (nextSeparator == keyValueTail) { 939 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ 940 return 0; 941 } 942 keyValueLen = 0; 943 while (nextSeparator < keyValueTail) { 944 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { 945 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ 946 return 0; 947 } 948 if (keyValueLen < bufferCapacity) { 949 /* Should we lowercase value to return here? Tests expect as-is. */ 950 buffer[keyValueLen++] = *nextSeparator++; 951 } else { /* keep advancing so we return correct length in case of overflow */ 952 keyValueLen++; 953 nextSeparator++; 954 } 955 } 956 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status); 957 return result; 958 } 959 } 960 } 961 return 0; 962 } 963 964 U_CAPI int32_t U_EXPORT2 965 uloc_setKeywordValue(const char* keywordName, 966 const char* keywordValue, 967 char* buffer, int32_t bufferCapacity, 968 UErrorCode* status) 969 { 970 /* TODO: sorting. removal. */ 971 int32_t keywordNameLen; 972 int32_t keywordValueLen; 973 int32_t bufLen; 974 int32_t needLen = 0; 975 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 976 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1]; 977 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 978 int32_t rc; 979 char* nextSeparator = NULL; 980 char* nextEqualsign = NULL; 981 char* startSearchHere = NULL; 982 char* keywordStart = NULL; 983 CharString updatedKeysAndValues; 984 int32_t updatedKeysAndValuesLen; 985 UBool handledInputKeyAndValue = FALSE; 986 char keyValuePrefix = '@'; 987 988 if(U_FAILURE(*status)) { 989 return -1; 990 } 991 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { 992 *status = U_ILLEGAL_ARGUMENT_ERROR; 993 return 0; 994 } 995 bufLen = (int32_t)uprv_strlen(buffer); 996 if(bufferCapacity<bufLen) { 997 /* The capacity is less than the length?! Is this NULL terminated? */ 998 *status = U_ILLEGAL_ARGUMENT_ERROR; 999 return 0; 1000 } 1001 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 1002 if(U_FAILURE(*status)) { 1003 return 0; 1004 } 1005 1006 keywordValueLen = 0; 1007 if(keywordValue) { 1008 while (*keywordValue != 0) { 1009 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) { 1010 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ 1011 return 0; 1012 } 1013 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) { 1014 /* Should we force lowercase in value to set? */ 1015 keywordValueBuffer[keywordValueLen++] = *keywordValue++; 1016 } else { 1017 /* keywordValue too long for internal buffer */ 1018 *status = U_INTERNAL_PROGRAM_ERROR; 1019 return 0; 1020 } 1021 } 1022 } 1023 keywordValueBuffer[keywordValueLen] = 0; /* terminate */ 1024 1025 startSearchHere = (char*)locale_getKeywordsStart(buffer); 1026 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 1027 if(keywordValueLen == 0) { /* no keywords = nothing to remove */ 1028 return bufLen; 1029 } 1030 1031 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1032 if(startSearchHere) { /* had a single @ */ 1033 needLen--; /* already had the @ */ 1034 /* startSearchHere points at the @ */ 1035 } else { 1036 startSearchHere=buffer+bufLen; 1037 } 1038 if(needLen >= bufferCapacity) { 1039 *status = U_BUFFER_OVERFLOW_ERROR; 1040 return needLen; /* no change */ 1041 } 1042 *startSearchHere++ = '@'; 1043 uprv_strcpy(startSearchHere, keywordNameBuffer); 1044 startSearchHere += keywordNameLen; 1045 *startSearchHere++ = '='; 1046 uprv_strcpy(startSearchHere, keywordValueBuffer); 1047 return needLen; 1048 } /* end shortcut - no @ */ 1049 1050 keywordStart = startSearchHere; 1051 /* search for keyword */ 1052 while(keywordStart) { 1053 const char* keyValueTail; 1054 int32_t keyValueLen; 1055 1056 keywordStart++; /* skip @ or ; */ 1057 nextEqualsign = uprv_strchr(keywordStart, '='); 1058 if (!nextEqualsign) { 1059 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ 1060 return 0; 1061 } 1062 /* strip leading & trailing spaces (TC decided to tolerate these) */ 1063 while(*keywordStart == ' ') { 1064 keywordStart++; 1065 } 1066 keyValueTail = nextEqualsign; 1067 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { 1068 keyValueTail--; 1069 } 1070 /* now keyValueTail points to first char after the keyName */ 1071 /* copy & normalize keyName from locale */ 1072 if (keywordStart == keyValueTail) { 1073 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ 1074 return 0; 1075 } 1076 keyValueLen = 0; 1077 while (keywordStart < keyValueTail) { 1078 if (!UPRV_ISALPHANUM(*keywordStart)) { 1079 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ 1080 return 0; 1081 } 1082 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { 1083 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++); 1084 } else { 1085 /* keyword name too long for internal buffer */ 1086 *status = U_INTERNAL_PROGRAM_ERROR; 1087 return 0; 1088 } 1089 } 1090 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ 1091 1092 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1093 1094 /* start processing the value part */ 1095 nextEqualsign++; /* skip '=' */ 1096 /* First strip leading & trailing spaces (TC decided to tolerate these) */ 1097 while(*nextEqualsign == ' ') { 1098 nextEqualsign++; 1099 } 1100 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign); 1101 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { 1102 keyValueTail--; 1103 } 1104 if (nextEqualsign == keyValueTail) { 1105 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */ 1106 return 0; 1107 } 1108 1109 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1110 if(rc == 0) { 1111 /* Current entry matches the input keyword. Update the entry */ 1112 if(keywordValueLen > 0) { /* updating a value */ 1113 updatedKeysAndValues.append(keyValuePrefix, *status); 1114 keyValuePrefix = ';'; /* for any subsequent key-value pair */ 1115 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); 1116 updatedKeysAndValues.append('=', *status); 1117 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); 1118 } /* else removing this entry, don't emit anything */ 1119 handledInputKeyAndValue = TRUE; 1120 } else { 1121 /* input keyword sorts earlier than current entry, add before current entry */ 1122 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { 1123 /* insert new entry at this location */ 1124 updatedKeysAndValues.append(keyValuePrefix, *status); 1125 keyValuePrefix = ';'; /* for any subsequent key-value pair */ 1126 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); 1127 updatedKeysAndValues.append('=', *status); 1128 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); 1129 handledInputKeyAndValue = TRUE; 1130 } 1131 /* copy the current entry */ 1132 updatedKeysAndValues.append(keyValuePrefix, *status); 1133 keyValuePrefix = ';'; /* for any subsequent key-value pair */ 1134 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); 1135 updatedKeysAndValues.append('=', *status); 1136 updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status); 1137 } 1138 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { 1139 /* append new entry at the end, it sorts later than existing entries */ 1140 updatedKeysAndValues.append(keyValuePrefix, *status); 1141 /* skip keyValuePrefix update, no subsequent key-value pair */ 1142 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); 1143 updatedKeysAndValues.append('=', *status); 1144 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); 1145 handledInputKeyAndValue = TRUE; 1146 } 1147 keywordStart = nextSeparator; 1148 } /* end loop searching */ 1149 1150 /* Any error from updatedKeysAndValues.append above would be internal and not due to 1151 * problems with the passed-in locale. So if we did encounter problems with the 1152 * passed-in locale above, those errors took precedence and overrode any error 1153 * status from updatedKeysAndValues.append, and also caused a return of 0. If there 1154 * are errors here they are from updatedKeysAndValues.append; they do cause an 1155 * error return but the passed-in locale is unmodified and the original bufLen is 1156 * returned. 1157 */ 1158 if (!handledInputKeyAndValue || U_FAILURE(*status)) { 1159 /* if input key/value specified removal of a keyword not present in locale, or 1160 * there was an error in CharString.append, leave original locale alone. */ 1161 return bufLen; 1162 } 1163 1164 updatedKeysAndValuesLen = updatedKeysAndValues.length(); 1165 /* needLen = length of the part before '@' + length of updated key-value part including '@' */ 1166 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; 1167 if(needLen >= bufferCapacity) { 1168 *status = U_BUFFER_OVERFLOW_ERROR; 1169 return needLen; /* no change */ 1170 } 1171 if (updatedKeysAndValuesLen > 0) { 1172 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); 1173 } 1174 buffer[needLen]=0; 1175 return needLen; 1176 } 1177 1178 /* ### ID parsing implementation **************************************************/ 1179 1180 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1181 1182 /*returns TRUE if one of the special prefixes is here (s=string) 1183 'x-' or 'i-' */ 1184 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1185 1186 /* Dot terminates it because of POSIX form where dot precedes the codepage 1187 * except for variant 1188 */ 1189 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1190 1191 static char* _strnchr(const char* str, int32_t len, char c) { 1192 U_ASSERT(str != 0 && len >= 0); 1193 while (len-- != 0) { 1194 char d = *str; 1195 if (d == c) { 1196 return (char*) str; 1197 } else if (d == 0) { 1198 break; 1199 } 1200 ++str; 1201 } 1202 return NULL; 1203 } 1204 1205 /** 1206 * Lookup 'key' in the array 'list'. The array 'list' should contain 1207 * a NULL entry, followed by more entries, and a second NULL entry. 1208 * 1209 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1210 * COUNTRIES_3. 1211 */ 1212 static int16_t _findIndex(const char* const* list, const char* key) 1213 { 1214 const char* const* anchor = list; 1215 int32_t pass = 0; 1216 1217 /* Make two passes through two NULL-terminated arrays at 'list' */ 1218 while (pass++ < 2) { 1219 while (*list) { 1220 if (uprv_strcmp(key, *list) == 0) { 1221 return (int16_t)(list - anchor); 1222 } 1223 list++; 1224 } 1225 ++list; /* skip final NULL *CWB*/ 1226 } 1227 return -1; 1228 } 1229 1230 /* count the length of src while copying it to dest; return strlen(src) */ 1231 static inline int32_t 1232 _copyCount(char *dest, int32_t destCapacity, const char *src) { 1233 const char *anchor; 1234 char c; 1235 1236 anchor=src; 1237 for(;;) { 1238 if((c=*src)==0) { 1239 return (int32_t)(src-anchor); 1240 } 1241 if(destCapacity<=0) { 1242 return (int32_t)((src-anchor)+uprv_strlen(src)); 1243 } 1244 ++src; 1245 *dest++=c; 1246 --destCapacity; 1247 } 1248 } 1249 1250 U_CFUNC const char* 1251 uloc_getCurrentCountryID(const char* oldID){ 1252 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1253 if (offset >= 0) { 1254 return REPLACEMENT_COUNTRIES[offset]; 1255 } 1256 return oldID; 1257 } 1258 U_CFUNC const char* 1259 uloc_getCurrentLanguageID(const char* oldID){ 1260 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1261 if (offset >= 0) { 1262 return REPLACEMENT_LANGUAGES[offset]; 1263 } 1264 return oldID; 1265 } 1266 /* 1267 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1268 * avoid duplicating code to handle the earlier locale ID pieces 1269 * in the functions for the later ones by 1270 * setting the *pEnd pointer to where they stopped parsing 1271 * 1272 * TODO try to use this in Locale 1273 */ 1274 U_CFUNC int32_t 1275 ulocimp_getLanguage(const char *localeID, 1276 char *language, int32_t languageCapacity, 1277 const char **pEnd) { 1278 int32_t i=0; 1279 int32_t offset; 1280 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1281 1282 /* if it starts with i- or x- then copy that prefix */ 1283 if(_isIDPrefix(localeID)) { 1284 if(i<languageCapacity) { 1285 language[i]=(char)uprv_tolower(*localeID); 1286 } 1287 if(i<languageCapacity) { 1288 language[i+1]='-'; 1289 } 1290 i+=2; 1291 localeID+=2; 1292 } 1293 1294 /* copy the language as far as possible and count its length */ 1295 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1296 if(i<languageCapacity) { 1297 language[i]=(char)uprv_tolower(*localeID); 1298 } 1299 if(i<3) { 1300 U_ASSERT(i>=0); 1301 lang[i]=(char)uprv_tolower(*localeID); 1302 } 1303 i++; 1304 localeID++; 1305 } 1306 1307 if(i==3) { 1308 /* convert 3 character code to 2 character code if possible *CWB*/ 1309 offset=_findIndex(LANGUAGES_3, lang); 1310 if(offset>=0) { 1311 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1312 } 1313 } 1314 1315 if(pEnd!=NULL) { 1316 *pEnd=localeID; 1317 } 1318 return i; 1319 } 1320 1321 U_CFUNC int32_t 1322 ulocimp_getScript(const char *localeID, 1323 char *script, int32_t scriptCapacity, 1324 const char **pEnd) 1325 { 1326 int32_t idLen = 0; 1327 1328 if (pEnd != NULL) { 1329 *pEnd = localeID; 1330 } 1331 1332 /* copy the second item as far as possible and count its length */ 1333 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) 1334 && uprv_isASCIILetter(localeID[idLen])) { 1335 idLen++; 1336 } 1337 1338 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1339 if (idLen == 4) { 1340 int32_t i; 1341 if (pEnd != NULL) { 1342 *pEnd = localeID+idLen; 1343 } 1344 if(idLen > scriptCapacity) { 1345 idLen = scriptCapacity; 1346 } 1347 if (idLen >= 1) { 1348 script[0]=(char)uprv_toupper(*(localeID++)); 1349 } 1350 for (i = 1; i < idLen; i++) { 1351 script[i]=(char)uprv_tolower(*(localeID++)); 1352 } 1353 } 1354 else { 1355 idLen = 0; 1356 } 1357 return idLen; 1358 } 1359 1360 U_CFUNC int32_t 1361 ulocimp_getCountry(const char *localeID, 1362 char *country, int32_t countryCapacity, 1363 const char **pEnd) 1364 { 1365 int32_t idLen=0; 1366 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1367 int32_t offset; 1368 1369 /* copy the country as far as possible and count its length */ 1370 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1371 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1372 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1373 } 1374 idLen++; 1375 } 1376 1377 /* the country should be either length 2 or 3 */ 1378 if (idLen == 2 || idLen == 3) { 1379 UBool gotCountry = FALSE; 1380 /* convert 3 character code to 2 character code if possible *CWB*/ 1381 if(idLen==3) { 1382 offset=_findIndex(COUNTRIES_3, cnty); 1383 if(offset>=0) { 1384 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1385 gotCountry = TRUE; 1386 } 1387 } 1388 if (!gotCountry) { 1389 int32_t i = 0; 1390 for (i = 0; i < idLen; i++) { 1391 if (i < countryCapacity) { 1392 country[i]=(char)uprv_toupper(localeID[i]); 1393 } 1394 } 1395 } 1396 localeID+=idLen; 1397 } else { 1398 idLen = 0; 1399 } 1400 1401 if(pEnd!=NULL) { 1402 *pEnd=localeID; 1403 } 1404 1405 return idLen; 1406 } 1407 1408 /** 1409 * @param needSeparator if true, then add leading '_' if any variants 1410 * are added to 'variant' 1411 */ 1412 static int32_t 1413 _getVariantEx(const char *localeID, 1414 char prev, 1415 char *variant, int32_t variantCapacity, 1416 UBool needSeparator) { 1417 int32_t i=0; 1418 1419 /* get one or more variant tags and separate them with '_' */ 1420 if(_isIDSeparator(prev)) { 1421 /* get a variant string after a '-' or '_' */ 1422 while(!_isTerminator(*localeID)) { 1423 if (needSeparator) { 1424 if (i<variantCapacity) { 1425 variant[i] = '_'; 1426 } 1427 ++i; 1428 needSeparator = FALSE; 1429 } 1430 if(i<variantCapacity) { 1431 variant[i]=(char)uprv_toupper(*localeID); 1432 if(variant[i]=='-') { 1433 variant[i]='_'; 1434 } 1435 } 1436 i++; 1437 localeID++; 1438 } 1439 } 1440 1441 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1442 if(i==0) { 1443 if(prev=='@') { 1444 /* keep localeID */ 1445 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1446 ++localeID; /* point after the '@' */ 1447 } else { 1448 return 0; 1449 } 1450 while(!_isTerminator(*localeID)) { 1451 if (needSeparator) { 1452 if (i<variantCapacity) { 1453 variant[i] = '_'; 1454 } 1455 ++i; 1456 needSeparator = FALSE; 1457 } 1458 if(i<variantCapacity) { 1459 variant[i]=(char)uprv_toupper(*localeID); 1460 if(variant[i]=='-' || variant[i]==',') { 1461 variant[i]='_'; 1462 } 1463 } 1464 i++; 1465 localeID++; 1466 } 1467 } 1468 1469 return i; 1470 } 1471 1472 static int32_t 1473 _getVariant(const char *localeID, 1474 char prev, 1475 char *variant, int32_t variantCapacity) { 1476 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1477 } 1478 1479 /** 1480 * Delete ALL instances of a variant from the given list of one or 1481 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1482 * @param variants the source string of one or more variants, 1483 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1484 * terminated; if it is, trailing zero will NOT be maintained. 1485 * @param variantsLen length of variants 1486 * @param toDelete variant to delete, without separators, e.g. "EURO" 1487 * or "PREEURO"; not zero terminated 1488 * @param toDeleteLen length of toDelete 1489 * @return number of characters deleted from variants 1490 */ 1491 static int32_t 1492 _deleteVariant(char* variants, int32_t variantsLen, 1493 const char* toDelete, int32_t toDeleteLen) 1494 { 1495 int32_t delta = 0; /* number of chars deleted */ 1496 for (;;) { 1497 UBool flag = FALSE; 1498 if (variantsLen < toDeleteLen) { 1499 return delta; 1500 } 1501 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1502 (variantsLen == toDeleteLen || 1503 (flag=(variants[toDeleteLen] == '_')))) 1504 { 1505 int32_t d = toDeleteLen + (flag?1:0); 1506 variantsLen -= d; 1507 delta += d; 1508 if (variantsLen > 0) { 1509 uprv_memmove(variants, variants+d, variantsLen); 1510 } 1511 } else { 1512 char* p = _strnchr(variants, variantsLen, '_'); 1513 if (p == NULL) { 1514 return delta; 1515 } 1516 ++p; 1517 variantsLen -= (int32_t)(p - variants); 1518 variants = p; 1519 } 1520 } 1521 } 1522 1523 /* Keyword enumeration */ 1524 1525 typedef struct UKeywordsContext { 1526 char* keywords; 1527 char* current; 1528 } UKeywordsContext; 1529 1530 U_CDECL_BEGIN 1531 1532 static void U_CALLCONV 1533 uloc_kw_closeKeywords(UEnumeration *enumerator) { 1534 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1535 uprv_free(enumerator->context); 1536 uprv_free(enumerator); 1537 } 1538 1539 static int32_t U_CALLCONV 1540 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { 1541 char *kw = ((UKeywordsContext *)en->context)->keywords; 1542 int32_t result = 0; 1543 while(*kw) { 1544 result++; 1545 kw += uprv_strlen(kw)+1; 1546 } 1547 return result; 1548 } 1549 1550 static const char * U_CALLCONV 1551 uloc_kw_nextKeyword(UEnumeration* en, 1552 int32_t* resultLength, 1553 UErrorCode* /*status*/) { 1554 const char* result = ((UKeywordsContext *)en->context)->current; 1555 int32_t len = 0; 1556 if(*result) { 1557 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1558 ((UKeywordsContext *)en->context)->current += len+1; 1559 } else { 1560 result = NULL; 1561 } 1562 if (resultLength) { 1563 *resultLength = len; 1564 } 1565 return result; 1566 } 1567 1568 static void U_CALLCONV 1569 uloc_kw_resetKeywords(UEnumeration* en, 1570 UErrorCode* /*status*/) { 1571 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1572 } 1573 1574 U_CDECL_END 1575 1576 1577 static const UEnumeration gKeywordsEnum = { 1578 NULL, 1579 NULL, 1580 uloc_kw_closeKeywords, 1581 uloc_kw_countKeywords, 1582 uenum_unextDefault, 1583 uloc_kw_nextKeyword, 1584 uloc_kw_resetKeywords 1585 }; 1586 1587 U_CAPI UEnumeration* U_EXPORT2 1588 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1589 { 1590 UKeywordsContext *myContext = NULL; 1591 UEnumeration *result = NULL; 1592 1593 if(U_FAILURE(*status)) { 1594 return NULL; 1595 } 1596 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1597 /* Null pointer test */ 1598 if (result == NULL) { 1599 *status = U_MEMORY_ALLOCATION_ERROR; 1600 return NULL; 1601 } 1602 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1603 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); 1604 if (myContext == NULL) { 1605 *status = U_MEMORY_ALLOCATION_ERROR; 1606 uprv_free(result); 1607 return NULL; 1608 } 1609 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1610 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1611 myContext->keywords[keywordListSize] = 0; 1612 myContext->current = myContext->keywords; 1613 result->context = myContext; 1614 return result; 1615 } 1616 1617 U_CAPI UEnumeration* U_EXPORT2 1618 uloc_openKeywords(const char* localeID, 1619 UErrorCode* status) 1620 { 1621 int32_t i=0; 1622 char keywords[256]; 1623 int32_t keywordsCapacity = 256; 1624 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1625 const char* tmpLocaleID; 1626 1627 if(status==NULL || U_FAILURE(*status)) { 1628 return 0; 1629 } 1630 1631 if (_hasBCP47Extension(localeID)) { 1632 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1633 } else { 1634 if (localeID==NULL) { 1635 localeID=uloc_getDefault(); 1636 } 1637 tmpLocaleID=localeID; 1638 } 1639 1640 /* Skip the language */ 1641 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1642 if(_isIDSeparator(*tmpLocaleID)) { 1643 const char *scriptID; 1644 /* Skip the script if available */ 1645 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1646 if(scriptID != tmpLocaleID+1) { 1647 /* Found optional script */ 1648 tmpLocaleID = scriptID; 1649 } 1650 /* Skip the Country */ 1651 if (_isIDSeparator(*tmpLocaleID)) { 1652 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1653 if(_isIDSeparator(*tmpLocaleID)) { 1654 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1655 } 1656 } 1657 } 1658 1659 /* keywords are located after '@' */ 1660 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1661 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1662 } 1663 1664 if(i) { 1665 return uloc_openKeywordList(keywords, i, status); 1666 } else { 1667 return NULL; 1668 } 1669 } 1670 1671 1672 /* bit-flags for 'options' parameter of _canonicalize */ 1673 #define _ULOC_STRIP_KEYWORDS 0x2 1674 #define _ULOC_CANONICALIZE 0x1 1675 1676 #define OPTION_SET(options, mask) ((options & mask) != 0) 1677 1678 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1679 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default) 1680 1681 /** 1682 * Canonicalize the given localeID, to level 1 or to level 2, 1683 * depending on the options. To specify level 1, pass in options=0. 1684 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1685 * 1686 * This is the code underlying uloc_getName and uloc_canonicalize. 1687 */ 1688 static int32_t 1689 _canonicalize(const char* localeID, 1690 char* result, 1691 int32_t resultCapacity, 1692 uint32_t options, 1693 UErrorCode* err) { 1694 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1695 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1696 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1697 const char* origLocaleID; 1698 const char* tmpLocaleID; 1699 const char* keywordAssign = NULL; 1700 const char* separatorIndicator = NULL; 1701 const char* addKeyword = NULL; 1702 const char* addValue = NULL; 1703 char* name; 1704 char* variant = NULL; /* pointer into name, or NULL */ 1705 1706 if (U_FAILURE(*err)) { 1707 return 0; 1708 } 1709 1710 if (_hasBCP47Extension(localeID)) { 1711 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1712 } else { 1713 if (localeID==NULL) { 1714 localeID=uloc_getDefault(); 1715 } 1716 tmpLocaleID=localeID; 1717 } 1718 1719 origLocaleID=tmpLocaleID; 1720 1721 /* if we are doing a full canonicalization, then put results in 1722 localeBuffer, if necessary; otherwise send them to result. */ 1723 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1724 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { 1725 name = localeBuffer; 1726 nameCapacity = (int32_t)sizeof(localeBuffer); 1727 } else { 1728 name = result; 1729 nameCapacity = resultCapacity; 1730 } 1731 1732 /* get all pieces, one after another, and separate with '_' */ 1733 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1734 1735 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1736 const char *d = uloc_getDefault(); 1737 1738 len = (int32_t)uprv_strlen(d); 1739 1740 if (name != NULL) { 1741 uprv_strncpy(name, d, len); 1742 } 1743 } else if(_isIDSeparator(*tmpLocaleID)) { 1744 const char *scriptID; 1745 1746 ++fieldCount; 1747 if(len<nameCapacity) { 1748 name[len]='_'; 1749 } 1750 ++len; 1751 1752 scriptSize=ulocimp_getScript(tmpLocaleID+1, 1753 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID); 1754 if(scriptSize > 0) { 1755 /* Found optional script */ 1756 tmpLocaleID = scriptID; 1757 ++fieldCount; 1758 len+=scriptSize; 1759 if (_isIDSeparator(*tmpLocaleID)) { 1760 /* If there is something else, then we add the _ */ 1761 if(len<nameCapacity) { 1762 name[len]='_'; 1763 } 1764 ++len; 1765 } 1766 } 1767 1768 if (_isIDSeparator(*tmpLocaleID)) { 1769 const char *cntryID; 1770 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, 1771 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID); 1772 if (cntrySize > 0) { 1773 /* Found optional country */ 1774 tmpLocaleID = cntryID; 1775 len+=cntrySize; 1776 } 1777 if(_isIDSeparator(*tmpLocaleID)) { 1778 /* If there is something else, then we add the _ if we found country before. */ 1779 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { 1780 ++fieldCount; 1781 if(len<nameCapacity) { 1782 name[len]='_'; 1783 } 1784 ++len; 1785 } 1786 1787 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, 1788 (len<nameCapacity ? name+len : NULL), nameCapacity-len); 1789 if (variantSize > 0) { 1790 variant = len<nameCapacity ? name+len : NULL; 1791 len += variantSize; 1792 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1793 } 1794 } 1795 } 1796 } 1797 1798 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1799 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1800 UBool done = FALSE; 1801 do { 1802 char c = *tmpLocaleID; 1803 switch (c) { 1804 case 0: 1805 case '@': 1806 done = TRUE; 1807 break; 1808 default: 1809 if (len<nameCapacity) { 1810 name[len] = c; 1811 } 1812 ++len; 1813 ++tmpLocaleID; 1814 break; 1815 } 1816 } while (!done); 1817 } 1818 1819 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1820 After this, tmpLocaleID either points to '@' or is NULL */ 1821 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1822 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1823 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1824 } 1825 1826 /* Copy POSIX-style variant, if any [mr@FOO] */ 1827 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1828 tmpLocaleID != NULL && keywordAssign == NULL) { 1829 for (;;) { 1830 char c = *tmpLocaleID; 1831 if (c == 0) { 1832 break; 1833 } 1834 if (len<nameCapacity) { 1835 name[len] = c; 1836 } 1837 ++len; 1838 ++tmpLocaleID; 1839 } 1840 } 1841 1842 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1843 /* Handle @FOO variant if @ is present and not followed by = */ 1844 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1845 int32_t posixVariantSize; 1846 /* Add missing '_' if needed */ 1847 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1848 do { 1849 if(len<nameCapacity) { 1850 name[len]='_'; 1851 } 1852 ++len; 1853 ++fieldCount; 1854 } while(fieldCount<2); 1855 } 1856 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1857 (UBool)(variantSize > 0)); 1858 if (posixVariantSize > 0) { 1859 if (variant == NULL) { 1860 variant = name+len; 1861 } 1862 len += posixVariantSize; 1863 variantSize += posixVariantSize; 1864 } 1865 } 1866 1867 /* Handle generic variants first */ 1868 if (variant) { 1869 for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) { 1870 const char* variantToCompare = VARIANT_MAP[j].variant; 1871 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1872 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1873 len -= variantLen; 1874 if (variantLen > 0) { 1875 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1876 --len; 1877 } 1878 addKeyword = VARIANT_MAP[j].keyword; 1879 addValue = VARIANT_MAP[j].value; 1880 break; 1881 } 1882 } 1883 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1884 --len; 1885 } 1886 } 1887 1888 /* Look up the ID in the canonicalization map */ 1889 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) { 1890 const char* id = CANONICALIZE_MAP[j].id; 1891 int32_t n = (int32_t)uprv_strlen(id); 1892 if (len == n && uprv_strncmp(name, id, n) == 0) { 1893 if (n == 0 && tmpLocaleID != NULL) { 1894 break; /* Don't remap "" if keywords present */ 1895 } 1896 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1897 if (CANONICALIZE_MAP[j].keyword) { 1898 addKeyword = CANONICALIZE_MAP[j].keyword; 1899 addValue = CANONICALIZE_MAP[j].value; 1900 } 1901 break; 1902 } 1903 } 1904 } 1905 1906 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1907 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1908 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1909 if(len<nameCapacity) { 1910 name[len]='@'; 1911 } 1912 ++len; 1913 ++fieldCount; 1914 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len, 1915 NULL, 0, NULL, TRUE, addKeyword, addValue, err); 1916 } else if (addKeyword != NULL) { 1917 U_ASSERT(addValue != NULL && len < nameCapacity); 1918 /* inelegant but works -- later make _getKeywords do this? */ 1919 len += _copyCount(name+len, nameCapacity-len, "@"); 1920 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1921 len += _copyCount(name+len, nameCapacity-len, "="); 1922 len += _copyCount(name+len, nameCapacity-len, addValue); 1923 } 1924 } 1925 1926 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1927 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1928 } 1929 1930 return u_terminateChars(result, resultCapacity, len, err); 1931 } 1932 1933 /* ### ID parsing API **************************************************/ 1934 1935 U_CAPI int32_t U_EXPORT2 1936 uloc_getParent(const char* localeID, 1937 char* parent, 1938 int32_t parentCapacity, 1939 UErrorCode* err) 1940 { 1941 const char *lastUnderscore; 1942 int32_t i; 1943 1944 if (U_FAILURE(*err)) 1945 return 0; 1946 1947 if (localeID == NULL) 1948 localeID = uloc_getDefault(); 1949 1950 lastUnderscore=uprv_strrchr(localeID, '_'); 1951 if(lastUnderscore!=NULL) { 1952 i=(int32_t)(lastUnderscore-localeID); 1953 } else { 1954 i=0; 1955 } 1956 1957 if(i>0 && parent != localeID) { 1958 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1959 } 1960 return u_terminateChars(parent, parentCapacity, i, err); 1961 } 1962 1963 U_CAPI int32_t U_EXPORT2 1964 uloc_getLanguage(const char* localeID, 1965 char* language, 1966 int32_t languageCapacity, 1967 UErrorCode* err) 1968 { 1969 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1970 int32_t i=0; 1971 1972 if (err==NULL || U_FAILURE(*err)) { 1973 return 0; 1974 } 1975 1976 if(localeID==NULL) { 1977 localeID=uloc_getDefault(); 1978 } 1979 1980 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1981 return u_terminateChars(language, languageCapacity, i, err); 1982 } 1983 1984 U_CAPI int32_t U_EXPORT2 1985 uloc_getScript(const char* localeID, 1986 char* script, 1987 int32_t scriptCapacity, 1988 UErrorCode* err) 1989 { 1990 int32_t i=0; 1991 1992 if(err==NULL || U_FAILURE(*err)) { 1993 return 0; 1994 } 1995 1996 if(localeID==NULL) { 1997 localeID=uloc_getDefault(); 1998 } 1999 2000 /* skip the language */ 2001 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 2002 if(_isIDSeparator(*localeID)) { 2003 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 2004 } 2005 return u_terminateChars(script, scriptCapacity, i, err); 2006 } 2007 2008 U_CAPI int32_t U_EXPORT2 2009 uloc_getCountry(const char* localeID, 2010 char* country, 2011 int32_t countryCapacity, 2012 UErrorCode* err) 2013 { 2014 int32_t i=0; 2015 2016 if(err==NULL || U_FAILURE(*err)) { 2017 return 0; 2018 } 2019 2020 if(localeID==NULL) { 2021 localeID=uloc_getDefault(); 2022 } 2023 2024 /* Skip the language */ 2025 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 2026 if(_isIDSeparator(*localeID)) { 2027 const char *scriptID; 2028 /* Skip the script if available */ 2029 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 2030 if(scriptID != localeID+1) { 2031 /* Found optional script */ 2032 localeID = scriptID; 2033 } 2034 if(_isIDSeparator(*localeID)) { 2035 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 2036 } 2037 } 2038 return u_terminateChars(country, countryCapacity, i, err); 2039 } 2040 2041 U_CAPI int32_t U_EXPORT2 2042 uloc_getVariant(const char* localeID, 2043 char* variant, 2044 int32_t variantCapacity, 2045 UErrorCode* err) 2046 { 2047 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 2048 const char* tmpLocaleID; 2049 int32_t i=0; 2050 2051 if(err==NULL || U_FAILURE(*err)) { 2052 return 0; 2053 } 2054 2055 if (_hasBCP47Extension(localeID)) { 2056 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 2057 } else { 2058 if (localeID==NULL) { 2059 localeID=uloc_getDefault(); 2060 } 2061 tmpLocaleID=localeID; 2062 } 2063 2064 /* Skip the language */ 2065 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 2066 if(_isIDSeparator(*tmpLocaleID)) { 2067 const char *scriptID; 2068 /* Skip the script if available */ 2069 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 2070 if(scriptID != tmpLocaleID+1) { 2071 /* Found optional script */ 2072 tmpLocaleID = scriptID; 2073 } 2074 /* Skip the Country */ 2075 if (_isIDSeparator(*tmpLocaleID)) { 2076 const char *cntryID; 2077 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 2078 if (cntryID != tmpLocaleID+1) { 2079 /* Found optional country */ 2080 tmpLocaleID = cntryID; 2081 } 2082 if(_isIDSeparator(*tmpLocaleID)) { 2083 /* If there was no country ID, skip a possible extra IDSeparator */ 2084 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2085 tmpLocaleID++; 2086 } 2087 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2088 } 2089 } 2090 } 2091 2092 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2093 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2094 /* 2095 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2096 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2097 } 2098 */ 2099 return u_terminateChars(variant, variantCapacity, i, err); 2100 } 2101 2102 U_CAPI int32_t U_EXPORT2 2103 uloc_getName(const char* localeID, 2104 char* name, 2105 int32_t nameCapacity, 2106 UErrorCode* err) 2107 { 2108 return _canonicalize(localeID, name, nameCapacity, 0, err); 2109 } 2110 2111 U_CAPI int32_t U_EXPORT2 2112 uloc_getBaseName(const char* localeID, 2113 char* name, 2114 int32_t nameCapacity, 2115 UErrorCode* err) 2116 { 2117 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2118 } 2119 2120 U_CAPI int32_t U_EXPORT2 2121 uloc_canonicalize(const char* localeID, 2122 char* name, 2123 int32_t nameCapacity, 2124 UErrorCode* err) 2125 { 2126 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2127 } 2128 2129 U_CAPI const char* U_EXPORT2 2130 uloc_getISO3Language(const char* localeID) 2131 { 2132 int16_t offset; 2133 char lang[ULOC_LANG_CAPACITY]; 2134 UErrorCode err = U_ZERO_ERROR; 2135 2136 if (localeID == NULL) 2137 { 2138 localeID = uloc_getDefault(); 2139 } 2140 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2141 if (U_FAILURE(err)) 2142 return ""; 2143 offset = _findIndex(LANGUAGES, lang); 2144 if (offset < 0) 2145 return ""; 2146 return LANGUAGES_3[offset]; 2147 } 2148 2149 U_CAPI const char* U_EXPORT2 2150 uloc_getISO3Country(const char* localeID) 2151 { 2152 int16_t offset; 2153 char cntry[ULOC_LANG_CAPACITY]; 2154 UErrorCode err = U_ZERO_ERROR; 2155 2156 if (localeID == NULL) 2157 { 2158 localeID = uloc_getDefault(); 2159 } 2160 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2161 if (U_FAILURE(err)) 2162 return ""; 2163 offset = _findIndex(COUNTRIES, cntry); 2164 if (offset < 0) 2165 return ""; 2166 2167 return COUNTRIES_3[offset]; 2168 } 2169 2170 U_CAPI uint32_t U_EXPORT2 2171 uloc_getLCID(const char* localeID) 2172 { 2173 UErrorCode status = U_ZERO_ERROR; 2174 char langID[ULOC_FULLNAME_CAPACITY]; 2175 uint32_t lcid = 0; 2176 2177 /* Check for incomplete id. */ 2178 if (!localeID || uprv_strlen(localeID) < 2) { 2179 return 0; 2180 } 2181 2182 // Attempt platform lookup if available 2183 lcid = uprv_convertToLCIDPlatform(localeID); 2184 if (lcid > 0) 2185 { 2186 // Windows found an LCID, return that 2187 return lcid; 2188 } 2189 2190 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2191 if (U_FAILURE(status)) { 2192 return 0; 2193 } 2194 2195 if (uprv_strchr(localeID, '@')) { 2196 // uprv_convertToLCID does not support keywords other than collation. 2197 // Remove all keywords except collation. 2198 int32_t len; 2199 char collVal[ULOC_KEYWORDS_CAPACITY]; 2200 char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; 2201 2202 len = uloc_getKeywordValue(localeID, "collation", collVal, 2203 UPRV_LENGTHOF(collVal) - 1, &status); 2204 2205 if (U_SUCCESS(status) && len > 0) { 2206 collVal[len] = 0; 2207 2208 len = uloc_getBaseName(localeID, tmpLocaleID, 2209 UPRV_LENGTHOF(tmpLocaleID) - 1, &status); 2210 2211 if (U_SUCCESS(status) && len > 0) { 2212 tmpLocaleID[len] = 0; 2213 2214 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, 2215 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status); 2216 2217 if (U_SUCCESS(status) && len > 0) { 2218 tmpLocaleID[len] = 0; 2219 return uprv_convertToLCID(langID, tmpLocaleID, &status); 2220 } 2221 } 2222 } 2223 2224 // fall through - all keywords are simply ignored 2225 status = U_ZERO_ERROR; 2226 } 2227 2228 return uprv_convertToLCID(langID, localeID, &status); 2229 } 2230 2231 U_CAPI int32_t U_EXPORT2 2232 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2233 UErrorCode *status) 2234 { 2235 return uprv_convertToPosix(hostid, locale, localeCapacity, status); 2236 } 2237 2238 /* ### Default locale **************************************************/ 2239 2240 U_CAPI const char* U_EXPORT2 2241 uloc_getDefault() 2242 { 2243 return locale_get_default(); 2244 } 2245 2246 U_CAPI void U_EXPORT2 2247 uloc_setDefault(const char* newDefaultLocale, 2248 UErrorCode* err) 2249 { 2250 if (U_FAILURE(*err)) 2251 return; 2252 /* the error code isn't currently used for anything by this function*/ 2253 2254 /* propagate change to C++ */ 2255 locale_set_default(newDefaultLocale); 2256 } 2257 2258 /** 2259 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer 2260 * to an array of pointers to arrays of char. All of these pointers are owned 2261 * by ICU-- do not delete them, and do not write through them. The array is 2262 * terminated with a null pointer. 2263 */ 2264 U_CAPI const char* const* U_EXPORT2 2265 uloc_getISOLanguages() 2266 { 2267 return LANGUAGES; 2268 } 2269 2270 /** 2271 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2272 * pointer to an array of pointers to arrays of char. All of these pointers are 2273 * owned by ICU-- do not delete them, and do not write through them. The array is 2274 * terminated with a null pointer. 2275 */ 2276 U_CAPI const char* const* U_EXPORT2 2277 uloc_getISOCountries() 2278 { 2279 return COUNTRIES; 2280 } 2281 2282 2283 /* this function to be moved into cstring.c later */ 2284 static char gDecimal = 0; 2285 2286 static /* U_CAPI */ 2287 double 2288 /* U_EXPORT2 */ 2289 _uloc_strtod(const char *start, char **end) { 2290 char *decimal; 2291 char *myEnd; 2292 char buf[30]; 2293 double rv; 2294 if (!gDecimal) { 2295 char rep[5]; 2296 /* For machines that decide to change the decimal on you, 2297 and try to be too smart with localization. 2298 This normally should be just a '.'. */ 2299 sprintf(rep, "%+1.1f", 1.0); 2300 gDecimal = rep[2]; 2301 } 2302 2303 if(gDecimal == '.') { 2304 return uprv_strtod(start, end); /* fall through to OS */ 2305 } else { 2306 uprv_strncpy(buf, start, 29); 2307 buf[29]=0; 2308 decimal = uprv_strchr(buf, '.'); 2309 if(decimal) { 2310 *decimal = gDecimal; 2311 } else { 2312 return uprv_strtod(start, end); /* no decimal point */ 2313 } 2314 rv = uprv_strtod(buf, &myEnd); 2315 if(end) { 2316 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2317 } 2318 return rv; 2319 } 2320 } 2321 2322 typedef struct { 2323 float q; 2324 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2325 char locale[ULOC_FULLNAME_CAPACITY+1]; 2326 } _acceptLangItem; 2327 2328 static int32_t U_CALLCONV 2329 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) 2330 { 2331 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2332 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2333 2334 int32_t rc = 0; 2335 if(bb->q < aa->q) { 2336 rc = -1; /* A > B */ 2337 } else if(bb->q > aa->q) { 2338 rc = 1; /* A < B */ 2339 } else { 2340 rc = 0; /* A = B */ 2341 } 2342 2343 if(rc==0) { 2344 rc = uprv_stricmp(aa->locale, bb->locale); 2345 } 2346 2347 #if defined(ULOC_DEBUG) 2348 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2349 aa->locale, aa->q, 2350 bb->locale, bb->q, 2351 rc);*/ 2352 #endif 2353 2354 return rc; 2355 } 2356 2357 /* 2358 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2359 */ 2360 2361 U_CAPI int32_t U_EXPORT2 2362 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2363 const char *httpAcceptLanguage, 2364 UEnumeration* availableLocales, 2365 UErrorCode *status) 2366 { 2367 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. 2368 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2369 int32_t n = 0; 2370 const char *itemEnd; 2371 const char *paramEnd; 2372 const char *s; 2373 const char *t; 2374 int32_t res; 2375 int32_t i; 2376 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2377 2378 if(U_FAILURE(*status)) { 2379 return -1; 2380 } 2381 2382 for(s=httpAcceptLanguage;s&&*s;) { 2383 while(isspace(*s)) /* eat space at the beginning */ 2384 s++; 2385 itemEnd=uprv_strchr(s,','); 2386 paramEnd=uprv_strchr(s,';'); 2387 if(!itemEnd) { 2388 itemEnd = httpAcceptLanguage+l; /* end of string */ 2389 } 2390 if(paramEnd && paramEnd<itemEnd) { 2391 /* semicolon (;) is closer than end (,) */ 2392 t = paramEnd+1; 2393 if(*t=='q') { 2394 t++; 2395 } 2396 while(isspace(*t)) { 2397 t++; 2398 } 2399 if(*t=='=') { 2400 t++; 2401 } 2402 while(isspace(*t)) { 2403 t++; 2404 } 2405 items[n].q = (float)_uloc_strtod(t,NULL); 2406 } else { 2407 /* no semicolon - it's 1.0 */ 2408 items[n].q = 1.0f; 2409 paramEnd = itemEnd; 2410 } 2411 items[n].dummy=0; 2412 /* eat spaces prior to semi */ 2413 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2414 ; 2415 int32_t slen = ((t+1)-s); 2416 if(slen > ULOC_FULLNAME_CAPACITY) { 2417 *status = U_BUFFER_OVERFLOW_ERROR; 2418 return -1; // too big 2419 } 2420 uprv_strncpy(items[n].locale, s, slen); 2421 items[n].locale[slen]=0; // terminate 2422 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status); 2423 if(U_FAILURE(*status)) return -1; 2424 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) { 2425 // canonicalization had an effect- copy back 2426 uprv_strncpy(items[n].locale, tmp, clen); 2427 items[n].locale[clen] = 0; // terminate 2428 } 2429 #if defined(ULOC_DEBUG) 2430 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2431 #endif 2432 n++; 2433 s = itemEnd; 2434 while(*s==',') { /* eat duplicate commas */ 2435 s++; 2436 } 2437 if(n>=items.getCapacity()) { // If we need more items 2438 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) { 2439 *status = U_MEMORY_ALLOCATION_ERROR; 2440 return -1; 2441 } 2442 #if defined(ULOC_DEBUG) 2443 fprintf(stderr,"malloced at size %d\n", items.getCapacity()); 2444 #endif 2445 } 2446 } 2447 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2448 if (U_FAILURE(*status)) { 2449 return -1; 2450 } 2451 LocalMemory<const char*> strs(NULL); 2452 if (strs.allocateInsteadAndReset(n) == NULL) { 2453 *status = U_MEMORY_ALLOCATION_ERROR; 2454 return -1; 2455 } 2456 for(i=0;i<n;i++) { 2457 #if defined(ULOC_DEBUG) 2458 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2459 #endif 2460 strs[i]=items[i].locale; 2461 } 2462 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2463 strs.getAlias(), n, availableLocales, status); 2464 return res; 2465 } 2466 2467 2468 U_CAPI int32_t U_EXPORT2 2469 uloc_acceptLanguage(char *result, int32_t resultAvailable, 2470 UAcceptResult *outResult, const char **acceptList, 2471 int32_t acceptListCount, 2472 UEnumeration* availableLocales, 2473 UErrorCode *status) 2474 { 2475 int32_t i,j; 2476 int32_t len; 2477 int32_t maxLen=0; 2478 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2479 const char *l; 2480 char **fallbackList; 2481 if(U_FAILURE(*status)) { 2482 return -1; 2483 } 2484 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); 2485 if(fallbackList==NULL) { 2486 *status = U_MEMORY_ALLOCATION_ERROR; 2487 return -1; 2488 } 2489 for(i=0;i<acceptListCount;i++) { 2490 #if defined(ULOC_DEBUG) 2491 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2492 #endif 2493 while((l=uenum_next(availableLocales, NULL, status)) != NULL) { 2494 #if defined(ULOC_DEBUG) 2495 fprintf(stderr," %s\n", l); 2496 #endif 2497 len = (int32_t)uprv_strlen(l); 2498 if(!uprv_strcmp(acceptList[i], l)) { 2499 if(outResult) { 2500 *outResult = ULOC_ACCEPT_VALID; 2501 } 2502 #if defined(ULOC_DEBUG) 2503 fprintf(stderr, "MATCH! %s\n", l); 2504 #endif 2505 if(len>0) { 2506 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2507 } 2508 for(j=0;j<i;j++) { 2509 uprv_free(fallbackList[j]); 2510 } 2511 uprv_free(fallbackList); 2512 return u_terminateChars(result, resultAvailable, len, status); 2513 } 2514 if(len>maxLen) { 2515 maxLen = len; 2516 } 2517 } 2518 uenum_reset(availableLocales, status); 2519 /* save off parent info */ 2520 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) { 2521 fallbackList[i] = uprv_strdup(tmp); 2522 } else { 2523 fallbackList[i]=0; 2524 } 2525 } 2526 2527 for(maxLen--;maxLen>0;maxLen--) { 2528 for(i=0;i<acceptListCount;i++) { 2529 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2530 #if defined(ULOC_DEBUG) 2531 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2532 #endif 2533 while((l=uenum_next(availableLocales, NULL, status)) != NULL) { 2534 #if defined(ULOC_DEBUG) 2535 fprintf(stderr," %s\n", l); 2536 #endif 2537 len = (int32_t)uprv_strlen(l); 2538 if(!uprv_strcmp(fallbackList[i], l)) { 2539 if(outResult) { 2540 *outResult = ULOC_ACCEPT_FALLBACK; 2541 } 2542 #if defined(ULOC_DEBUG) 2543 fprintf(stderr, "fallback MATCH! %s\n", l); 2544 #endif 2545 if(len>0) { 2546 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2547 } 2548 for(j=0;j<acceptListCount;j++) { 2549 uprv_free(fallbackList[j]); 2550 } 2551 uprv_free(fallbackList); 2552 return u_terminateChars(result, resultAvailable, len, status); 2553 } 2554 } 2555 uenum_reset(availableLocales, status); 2556 2557 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) { 2558 uprv_free(fallbackList[i]); 2559 fallbackList[i] = uprv_strdup(tmp); 2560 } else { 2561 uprv_free(fallbackList[i]); 2562 fallbackList[i]=0; 2563 } 2564 } 2565 } 2566 if(outResult) { 2567 *outResult = ULOC_ACCEPT_FAILED; 2568 } 2569 } 2570 for(i=0;i<acceptListCount;i++) { 2571 uprv_free(fallbackList[i]); 2572 } 2573 uprv_free(fallbackList); 2574 return -1; 2575 } 2576 2577 U_CAPI const char* U_EXPORT2 2578 uloc_toUnicodeLocaleKey(const char* keyword) 2579 { 2580 const char* bcpKey = ulocimp_toBcpKey(keyword); 2581 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) { 2582 // unknown keyword, but syntax is fine.. 2583 return keyword; 2584 } 2585 return bcpKey; 2586 } 2587 2588 U_CAPI const char* U_EXPORT2 2589 uloc_toUnicodeLocaleType(const char* keyword, const char* value) 2590 { 2591 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL); 2592 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) { 2593 // unknown keyword, but syntax is fine.. 2594 return value; 2595 } 2596 return bcpType; 2597 } 2598 2599 static UBool 2600 isWellFormedLegacyKey(const char* legacyKey) 2601 { 2602 const char* p = legacyKey; 2603 while (*p) { 2604 if (!UPRV_ISALPHANUM(*p)) { 2605 return FALSE; 2606 } 2607 p++; 2608 } 2609 return TRUE; 2610 } 2611 2612 static UBool 2613 isWellFormedLegacyType(const char* legacyType) 2614 { 2615 const char* p = legacyType; 2616 int32_t alphaNumLen = 0; 2617 while (*p) { 2618 if (*p == '_' || *p == '/' || *p == '-') { 2619 if (alphaNumLen == 0) { 2620 return FALSE; 2621 } 2622 alphaNumLen = 0; 2623 } else if (UPRV_ISALPHANUM(*p)) { 2624 alphaNumLen++; 2625 } else { 2626 return FALSE; 2627 } 2628 p++; 2629 } 2630 return (alphaNumLen != 0); 2631 } 2632 2633 U_CAPI const char* U_EXPORT2 2634 uloc_toLegacyKey(const char* keyword) 2635 { 2636 const char* legacyKey = ulocimp_toLegacyKey(keyword); 2637 if (legacyKey == NULL) { 2638 // Checks if the specified locale key is well-formed with the legacy locale syntax. 2639 // 2640 // Note: 2641 // LDML/CLDR provides some definition of keyword syntax in 2642 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and 2643 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax 2644 // Keys can only consist of [0-9a-zA-Z]. 2645 if (isWellFormedLegacyKey(keyword)) { 2646 return keyword; 2647 } 2648 } 2649 return legacyKey; 2650 } 2651 2652 U_CAPI const char* U_EXPORT2 2653 uloc_toLegacyType(const char* keyword, const char* value) 2654 { 2655 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL); 2656 if (legacyType == NULL) { 2657 // Checks if the specified locale type is well-formed with the legacy locale syntax. 2658 // 2659 // Note: 2660 // LDML/CLDR provides some definition of keyword syntax in 2661 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and 2662 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax 2663 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values 2664 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") 2665 if (isWellFormedLegacyType(value)) { 2666 return value; 2667 } 2668 } 2669 return legacyType; 2670 } 2671 2672 /*eof*/ 2673