1 /* 2 ********************************************************************** 3 * Copyright (C) 1997-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File ULOC.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/01/97 aliu Creation. 13 * 08/21/98 stephen JDK 1.2 sync 14 * 12/08/98 rtg New Locale implementation and C API 15 * 03/15/99 damiba overhaul. 16 * 04/06/99 stephen changed setDefault() to realloc and copy 17 * 06/14/99 stephen Changed calls to ures_open for new params 18 * 07/21/99 stephen Modified setDefault() to propagate to C++ 19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 20 * brought canonicalization code into line with spec 21 *****************************************************************************/ 22 23 /* 24 POSIX's locale format, from putil.c: [no spaces] 25 26 ll [ _CC ] [ . MM ] [ @ VV] 27 28 l = lang, C = ctry, M = charmap, V = variant 29 */ 30 31 #include "unicode/utypes.h" 32 #include "unicode/ustring.h" 33 #include "unicode/uloc.h" 34 35 #include "putilimp.h" 36 #include "ustr_imp.h" 37 #include "ulocimp.h" 38 #include "umutex.h" 39 #include "cstring.h" 40 #include "cmemory.h" 41 #include "locmap.h" 42 #include "uarrsort.h" 43 #include "uenumimp.h" 44 #include "uassert.h" 45 46 #include <stdio.h> /* for sprintf */ 47 48 /* ### Declarations **************************************************/ 49 50 /* Locale stuff from locid.cpp */ 51 U_CFUNC void locale_set_default(const char *id); 52 U_CFUNC const char *locale_get_default(void); 53 U_CFUNC int32_t 54 locale_getKeywords(const char *localeID, 55 char prev, 56 char *keywords, int32_t keywordCapacity, 57 char *values, int32_t valuesCapacity, int32_t *valLen, 58 UBool valuesToo, 59 UErrorCode *status); 60 61 /* ### Data tables **************************************************/ 62 63 /** 64 * Table of language codes, both 2- and 3-letter, with preference 65 * given to 2-letter codes where possible. Includes 3-letter codes 66 * that lack a 2-letter equivalent. 67 * 68 * This list must be in sorted order. This list is returned directly 69 * to the user by some API. 70 * 71 * This list must be kept in sync with LANGUAGES_3, with corresponding 72 * entries matched. 73 * 74 * This table should be terminated with a NULL entry, followed by a 75 * second list, and another NULL entry. The first list is visible to 76 * user code when this array is returned by API. The second list 77 * contains codes we support, but do not expose through user API. 78 * 79 * Notes 80 * 81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 82 * include the revisions up to 2001/7/27 *CWB* 83 * 84 * The 3 character codes are the terminology codes like RFC 3066. This 85 * is compatible with prior ICU codes 86 * 87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 88 * table but now at the end of the table because 3 character codes are 89 * duplicates. This avoids bad searches going from 3 to 2 character 90 * codes. 91 * 92 * The range qaa-qtz is reserved for local use 93 */ 94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 95 /* ISO639 table version is 20150505 */ 96 static const char * const LANGUAGES[] = { 97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", 98 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", 99 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", 100 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "as", 101 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", 102 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 103 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", 104 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", 105 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", 106 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", 107 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg", 108 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 109 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", 110 "cs", "csb", "cu", "cv", "cy", 111 "da", "dak", "dar", "dav", "de", "del", "den", "dgr", 112 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", 113 "dyo", "dyu", "dz", "dzg", 114 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx", 115 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo", 116 "ext", 117 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj", 118 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", 119 "frs", "fur", "fy", 120 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd", 121 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom", 122 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc", 123 "gur", "guz", "gv", "gwi", 124 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil", 125 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", 126 "hup", "hy", "hz", 127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik", 128 "ilo", "inh", "io", "is", "it", "iu", "izh", 129 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 130 "jv", 131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 132 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp", 133 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk", 134 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", 135 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", 136 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", 137 "kv", "kw", "ky", 138 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", 139 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", 140 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", 141 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz", 142 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 143 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", 144 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", 145 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", 146 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 147 "my", "mye", "myv", "mzn", 148 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", 149 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn", 150 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso", 151 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", 152 "oc", "oj", "om", "or", "os", "osa", "ota", 153 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 154 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt", 155 "pon", "prg", "pro", "ps", "pt", 156 "qu", "quc", "qug", 157 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro", 158 "rof", "rom", "rtm", "ru", "rue", "rug", "rup", 159 "rw", "rwk", 160 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 161 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh", 162 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga", 163 "sgs", "shi", "shn", "shu", "si", "sid", "sk", 164 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", 165 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", 166 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", 167 "sv", "sw", "swb", "swc", "syc", "syr", "szl", 168 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", 169 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", 170 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", 171 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", 172 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", 173 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", 174 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", 175 "vot", "vro", "vun", 176 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", 177 "xal", "xh", "xmf", "xog", 178 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", 179 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", 180 "zun", "zxx", "zza", 181 NULL, 182 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 183 NULL 184 }; 185 186 static const char* const DEPRECATED_LANGUAGES[]={ 187 "in", "iw", "ji", "jw", NULL, NULL 188 }; 189 static const char* const REPLACEMENT_LANGUAGES[]={ 190 "id", "he", "yi", "jv", NULL, NULL 191 }; 192 193 /** 194 * Table of 3-letter language codes. 195 * 196 * This is a lookup table used to convert 3-letter language codes to 197 * their 2-letter equivalent, where possible. It must be kept in sync 198 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 199 * same language as LANGUAGES_3[i]. The commented-out lines are 200 * copied from LANGUAGES to make eyeballing this baby easier. 201 * 202 * Where a 3-letter language code has no 2-letter equivalent, the 203 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 204 * 205 * This table should be terminated with a NULL entry, followed by a 206 * second list, and another NULL entry. The two lists correspond to 207 * the two lists in LANGUAGES. 208 */ 209 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 210 /* ISO639 table version is 20150505 */ 211 static const char * const LANGUAGES_3[] = { 212 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", 213 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", 214 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", 215 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "asm", 216 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", 217 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 218 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", 219 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", 220 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", 221 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", 222 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg", 223 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 224 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", 225 "ces", "csb", "chu", "chv", "cym", 226 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", 227 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", 228 "dyo", "dyu", "dzo", "dzg", 229 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx", 230 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo", 231 "ext", 232 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij", 233 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr", 234 "frs", "fur", "fry", 235 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla", 236 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom", 237 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc", 238 "gur", "guz", "glv", "gwi", 239 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil", 240 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", 241 "hup", "hye", "her", 242 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk", 243 "ilo", "inh", "ido", "isl", "ita", "iku", "izh", 244 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 245 "jav", 246 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 247 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp", 248 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz", 249 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", 250 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", 251 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", 252 "kom", "cor", "kir", 253 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", 254 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", 255 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", 256 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz", 257 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 258 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", 259 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", 260 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", 261 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 262 "mya", "mye", "myv", "mzn", 263 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", 264 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno", 265 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso", 266 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", 267 "oci", "oji", "orm", "ori", "oss", "osa", "ota", 268 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 269 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt", 270 "pon", "prg", "pro", "pus", "por", 271 "que", "quc", "qug", 272 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron", 273 "rof", "rom", "rtm", "rus", "rue", "rug", "rup", 274 "kin", "rwk", 275 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 276 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh", 277 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga", 278 "sgs", "shi", "shn", "shu", "sin", "sid", "slk", 279 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", 280 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", 281 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", 282 "swe", "swa", "swb", "swc", "syc", "syr", "szl", 283 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", 284 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", 285 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", 286 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", 287 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", 288 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", 289 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", 290 "vot", "vro", "vun", 291 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", 292 "xal", "xho", "xmf", "xog", 293 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", 294 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", 295 "zun", "zxx", "zza", 296 NULL, 297 /* "in", "iw", "ji", "jw", "sh", */ 298 "ind", "heb", "yid", "jaw", "srp", 299 NULL 300 }; 301 302 /** 303 * Table of 2-letter country codes. 304 * 305 * This list must be in sorted order. This list is returned directly 306 * to the user by some API. 307 * 308 * This list must be kept in sync with COUNTRIES_3, with corresponding 309 * entries matched. 310 * 311 * This table should be terminated with a NULL entry, followed by a 312 * second list, and another NULL entry. The first list is visible to 313 * user code when this array is returned by API. The second list 314 * contains codes we support, but do not expose through user API. 315 * 316 * Notes: 317 * 318 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 319 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 320 * new codes keeping the old ones for compatibility updated to include 321 * 1999/12/03 revisions *CWB* 322 * 323 * RO(ROM) is now RO(ROU) according to 324 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 325 */ 326 static const char * const COUNTRIES[] = { 327 "AD", "AE", "AF", "AG", "AI", "AL", "AM", 328 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 329 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 330 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", 331 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 332 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 333 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", 334 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 335 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 336 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 337 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 338 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 339 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 340 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 341 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 342 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 343 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 344 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 345 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 346 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 347 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 348 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 349 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 350 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 351 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", 352 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 353 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 354 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 355 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 356 "WS", "YE", "YT", "ZA", "ZM", "ZW", 357 NULL, 358 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ 359 NULL 360 }; 361 362 static const char* const DEPRECATED_COUNTRIES[] = { 363 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ 364 }; 365 static const char* const REPLACEMENT_COUNTRIES[] = { 366 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ 367 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ 368 }; 369 370 /** 371 * Table of 3-letter country codes. 372 * 373 * This is a lookup table used to convert 3-letter country codes to 374 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 375 * For all valid i, COUNTRIES[i] must refer to the same country as 376 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 377 * to make eyeballing this baby easier. 378 * 379 * This table should be terminated with a NULL entry, followed by a 380 * second list, and another NULL entry. The two lists correspond to 381 * the two lists in COUNTRIES. 382 */ 383 static const char * const COUNTRIES_3[] = { 384 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ 385 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", 386 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 387 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 388 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 389 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 390 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ 391 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", 392 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 393 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 394 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 395 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 396 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 397 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 398 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 399 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 400 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 401 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 402 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 403 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 404 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 405 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 406 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 407 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 408 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 409 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 410 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 411 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 412 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 413 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 414 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 415 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 416 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 417 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 418 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 419 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 420 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 421 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 422 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 423 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 424 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 425 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 426 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 427 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 428 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 429 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 430 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 431 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 432 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ 433 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", 434 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 435 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 436 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 437 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 438 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 439 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 440 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 441 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 442 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 443 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 444 NULL, 445 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ 446 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", 447 NULL 448 }; 449 450 typedef struct CanonicalizationMap { 451 const char *id; /* input ID */ 452 const char *canonicalID; /* canonicalized output ID */ 453 const char *keyword; /* keyword, or NULL if none */ 454 const char *value; /* keyword value, or NULL if kw==NULL */ 455 } CanonicalizationMap; 456 457 /** 458 * A map to canonicalize locale IDs. This handles a variety of 459 * different semantic kinds of transformations. 460 */ 461 static const CanonicalizationMap CANONICALIZE_MAP[] = { 462 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 463 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 464 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 465 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 466 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 467 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 468 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 469 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 470 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 471 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 472 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 473 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 474 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 475 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 476 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 477 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 478 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 479 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 480 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 481 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 482 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 483 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 484 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 485 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 486 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 487 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 488 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 489 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 490 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 491 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 492 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 493 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 494 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 495 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 496 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 497 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 498 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 499 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 500 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 501 { "zh_GAN", "gan", NULL, NULL }, /* registered name */ 502 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 503 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ 504 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ 505 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ 506 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ 507 { "zh_YUE", "yue", NULL, NULL }, /* registered name */ 508 }; 509 510 typedef struct VariantMap { 511 const char *variant; /* input ID */ 512 const char *keyword; /* keyword, or NULL if none */ 513 const char *value; /* keyword value, or NULL if kw==NULL */ 514 } VariantMap; 515 516 static const VariantMap VARIANT_MAP[] = { 517 { "EURO", "currency", "EUR" }, 518 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 519 { "STROKE", "collation", "stroke" } /* Solaris variant */ 520 }; 521 522 /* ### BCP47 Conversion *******************************************/ 523 /* Test if the locale id has BCP47 u extension and does not have '@' */ 524 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 525 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 526 #define _ConvertBCP47(finalID, id, buffer, length,err) \ 527 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ 528 finalID=id; \ 529 } else { \ 530 finalID=buffer; \ 531 } 532 /* Gets the size of the shortest subtag in the given localeID. */ 533 static int32_t getShortestSubtagLength(const char *localeID) { 534 int32_t localeIDLength = uprv_strlen(localeID); 535 int32_t length = localeIDLength; 536 int32_t tmpLength = 0; 537 int32_t i; 538 UBool reset = TRUE; 539 540 for (i = 0; i < localeIDLength; i++) { 541 if (localeID[i] != '_' && localeID[i] != '-') { 542 if (reset) { 543 tmpLength = 0; 544 reset = FALSE; 545 } 546 tmpLength++; 547 } else { 548 if (tmpLength != 0 && tmpLength < length) { 549 length = tmpLength; 550 } 551 reset = TRUE; 552 } 553 } 554 555 return length; 556 } 557 558 /* ### Keywords **************************************************/ 559 560 #define ULOC_KEYWORD_BUFFER_LEN 25 561 #define ULOC_MAX_NO_KEYWORDS 25 562 563 U_CAPI const char * U_EXPORT2 564 locale_getKeywordsStart(const char *localeID) { 565 const char *result = NULL; 566 if((result = uprv_strchr(localeID, '@')) != NULL) { 567 return result; 568 } 569 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 570 else { 571 /* We do this because the @ sign is variant, and the @ sign used on one 572 EBCDIC machine won't be compiled the same way on other EBCDIC based 573 machines. */ 574 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 575 const uint8_t *charToFind = ebcdicSigns; 576 while(*charToFind) { 577 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 578 return result; 579 } 580 charToFind++; 581 } 582 } 583 #endif 584 return NULL; 585 } 586 587 /** 588 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 589 * @param keywordName incoming name to be canonicalized 590 * @param status return status (keyword too long) 591 * @return length of the keyword name 592 */ 593 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 594 { 595 int32_t i; 596 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 597 598 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 599 /* keyword name too long for internal buffer */ 600 *status = U_INTERNAL_PROGRAM_ERROR; 601 return 0; 602 } 603 604 /* normalize the keyword name */ 605 for(i = 0; i < keywordNameLen; i++) { 606 buf[i] = uprv_tolower(keywordName[i]); 607 } 608 buf[i] = 0; 609 610 return keywordNameLen; 611 } 612 613 typedef struct { 614 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 615 int32_t keywordLen; 616 const char *valueStart; 617 int32_t valueLen; 618 } KeywordStruct; 619 620 static int32_t U_CALLCONV 621 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { 622 const char* leftString = ((const KeywordStruct *)left)->keyword; 623 const char* rightString = ((const KeywordStruct *)right)->keyword; 624 return uprv_strcmp(leftString, rightString); 625 } 626 627 /** 628 * Both addKeyword and addValue must already be in canonical form. 629 * Either both addKeyword and addValue are NULL, or neither is NULL. 630 * If they are not NULL they must be zero terminated. 631 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 632 */ 633 static int32_t 634 _getKeywords(const char *localeID, 635 char prev, 636 char *keywords, int32_t keywordCapacity, 637 char *values, int32_t valuesCapacity, int32_t *valLen, 638 UBool valuesToo, 639 const char* addKeyword, 640 const char* addValue, 641 UErrorCode *status) 642 { 643 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 644 645 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 646 int32_t numKeywords = 0; 647 const char* pos = localeID; 648 const char* equalSign = NULL; 649 const char* semicolon = NULL; 650 int32_t i = 0, j, n; 651 int32_t keywordsLen = 0; 652 int32_t valuesLen = 0; 653 654 if(prev == '@') { /* start of keyword definition */ 655 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 656 do { 657 UBool duplicate = FALSE; 658 /* skip leading spaces */ 659 while(*pos == ' ') { 660 pos++; 661 } 662 if (!*pos) { /* handle trailing "; " */ 663 break; 664 } 665 if(numKeywords == maxKeywords) { 666 *status = U_INTERNAL_PROGRAM_ERROR; 667 return 0; 668 } 669 equalSign = uprv_strchr(pos, '='); 670 semicolon = uprv_strchr(pos, ';'); 671 /* lack of '=' [foo@currency] is illegal */ 672 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 673 if(!equalSign || (semicolon && semicolon<equalSign)) { 674 *status = U_INVALID_FORMAT_ERROR; 675 return 0; 676 } 677 /* need to normalize both keyword and keyword name */ 678 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 679 /* keyword name too long for internal buffer */ 680 *status = U_INTERNAL_PROGRAM_ERROR; 681 return 0; 682 } 683 for(i = 0, n = 0; i < equalSign - pos; ++i) { 684 if (pos[i] != ' ') { 685 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 686 } 687 } 688 689 /* zero-length keyword is an error. */ 690 if (n == 0) { 691 *status = U_INVALID_FORMAT_ERROR; 692 return 0; 693 } 694 695 keywordList[numKeywords].keyword[n] = 0; 696 keywordList[numKeywords].keywordLen = n; 697 /* now grab the value part. First we skip the '=' */ 698 equalSign++; 699 /* then we leading spaces */ 700 while(*equalSign == ' ') { 701 equalSign++; 702 } 703 704 /* Premature end or zero-length value */ 705 if (!*equalSign || equalSign == semicolon) { 706 *status = U_INVALID_FORMAT_ERROR; 707 return 0; 708 } 709 710 keywordList[numKeywords].valueStart = equalSign; 711 712 pos = semicolon; 713 i = 0; 714 if(pos) { 715 while(*(pos - i - 1) == ' ') { 716 i++; 717 } 718 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 719 pos++; 720 } else { 721 i = (int32_t)uprv_strlen(equalSign); 722 while(i && equalSign[i-1] == ' ') { 723 i--; 724 } 725 keywordList[numKeywords].valueLen = i; 726 } 727 /* If this is a duplicate keyword, then ignore it */ 728 for (j=0; j<numKeywords; ++j) { 729 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 730 duplicate = TRUE; 731 break; 732 } 733 } 734 if (!duplicate) { 735 ++numKeywords; 736 } 737 } while(pos); 738 739 /* Handle addKeyword/addValue. */ 740 if (addKeyword != NULL) { 741 UBool duplicate = FALSE; 742 U_ASSERT(addValue != NULL); 743 /* Search for duplicate; if found, do nothing. Explicit keyword 744 overrides addKeyword. */ 745 for (j=0; j<numKeywords; ++j) { 746 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 747 duplicate = TRUE; 748 break; 749 } 750 } 751 if (!duplicate) { 752 if (numKeywords == maxKeywords) { 753 *status = U_INTERNAL_PROGRAM_ERROR; 754 return 0; 755 } 756 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 757 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 758 keywordList[numKeywords].valueStart = addValue; 759 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 760 ++numKeywords; 761 } 762 } else { 763 U_ASSERT(addValue == NULL); 764 } 765 766 /* now we have a list of keywords */ 767 /* we need to sort it */ 768 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 769 770 /* Now construct the keyword part */ 771 for(i = 0; i < numKeywords; i++) { 772 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 773 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 774 if(valuesToo) { 775 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 776 } else { 777 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 778 } 779 } 780 keywordsLen += keywordList[i].keywordLen + 1; 781 if(valuesToo) { 782 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 783 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 784 } 785 keywordsLen += keywordList[i].valueLen; 786 787 if(i < numKeywords - 1) { 788 if(keywordsLen < keywordCapacity) { 789 keywords[keywordsLen] = ';'; 790 } 791 keywordsLen++; 792 } 793 } 794 if(values) { 795 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 796 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 797 values[valuesLen + keywordList[i].valueLen] = 0; 798 } 799 valuesLen += keywordList[i].valueLen + 1; 800 } 801 } 802 if(values) { 803 values[valuesLen] = 0; 804 if(valLen) { 805 *valLen = valuesLen; 806 } 807 } 808 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 809 } else { 810 return 0; 811 } 812 } 813 814 U_CFUNC int32_t 815 locale_getKeywords(const char *localeID, 816 char prev, 817 char *keywords, int32_t keywordCapacity, 818 char *values, int32_t valuesCapacity, int32_t *valLen, 819 UBool valuesToo, 820 UErrorCode *status) { 821 return _getKeywords(localeID, prev, keywords, keywordCapacity, 822 values, valuesCapacity, valLen, valuesToo, 823 NULL, NULL, status); 824 } 825 826 U_CAPI int32_t U_EXPORT2 827 uloc_getKeywordValue(const char* localeID, 828 const char* keywordName, 829 char* buffer, int32_t bufferCapacity, 830 UErrorCode* status) 831 { 832 const char* startSearchHere = NULL; 833 const char* nextSeparator = NULL; 834 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 835 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 836 int32_t i = 0; 837 int32_t result = 0; 838 839 if(status && U_SUCCESS(*status) && localeID) { 840 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 841 const char* tmpLocaleID; 842 843 if (_hasBCP47Extension(localeID)) { 844 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 845 } else { 846 tmpLocaleID=localeID; 847 } 848 849 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 850 if(startSearchHere == NULL) { 851 /* no keywords, return at once */ 852 return 0; 853 } 854 855 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 856 if(U_FAILURE(*status)) { 857 return 0; 858 } 859 860 /* find the first keyword */ 861 while(startSearchHere) { 862 startSearchHere++; 863 /* skip leading spaces (allowed?) */ 864 while(*startSearchHere == ' ') { 865 startSearchHere++; 866 } 867 nextSeparator = uprv_strchr(startSearchHere, '='); 868 /* need to normalize both keyword and keyword name */ 869 if(!nextSeparator) { 870 break; 871 } 872 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 873 /* keyword name too long for internal buffer */ 874 *status = U_INTERNAL_PROGRAM_ERROR; 875 return 0; 876 } 877 for(i = 0; i < nextSeparator - startSearchHere; i++) { 878 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 879 } 880 /* trim trailing spaces */ 881 while(startSearchHere[i-1] == ' ') { 882 i--; 883 U_ASSERT(i>=0); 884 } 885 localeKeywordNameBuffer[i] = 0; 886 887 startSearchHere = uprv_strchr(nextSeparator, ';'); 888 889 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 890 nextSeparator++; 891 while(*nextSeparator == ' ') { 892 nextSeparator++; 893 } 894 /* we actually found the keyword. Copy the value */ 895 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 896 while(*(startSearchHere-1) == ' ') { 897 startSearchHere--; 898 } 899 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 900 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 901 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 902 i = (int32_t)uprv_strlen(nextSeparator); 903 while(nextSeparator[i - 1] == ' ') { 904 i--; 905 } 906 uprv_strncpy(buffer, nextSeparator, i); 907 result = u_terminateChars(buffer, bufferCapacity, i, status); 908 } else { 909 /* give a bigger buffer, please */ 910 *status = U_BUFFER_OVERFLOW_ERROR; 911 if(startSearchHere) { 912 result = (int32_t)(startSearchHere - nextSeparator); 913 } else { 914 result = (int32_t)uprv_strlen(nextSeparator); 915 } 916 } 917 return result; 918 } 919 } 920 } 921 return 0; 922 } 923 924 U_CAPI int32_t U_EXPORT2 925 uloc_setKeywordValue(const char* keywordName, 926 const char* keywordValue, 927 char* buffer, int32_t bufferCapacity, 928 UErrorCode* status) 929 { 930 /* TODO: sorting. removal. */ 931 int32_t keywordNameLen; 932 int32_t keywordValueLen; 933 int32_t bufLen; 934 int32_t needLen = 0; 935 int32_t foundValueLen; 936 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 937 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 938 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 939 int32_t i = 0; 940 int32_t rc; 941 char* nextSeparator = NULL; 942 char* nextEqualsign = NULL; 943 char* startSearchHere = NULL; 944 char* keywordStart = NULL; 945 char *insertHere = NULL; 946 if(U_FAILURE(*status)) { 947 return -1; 948 } 949 if(bufferCapacity>1) { 950 bufLen = (int32_t)uprv_strlen(buffer); 951 } else { 952 *status = U_ILLEGAL_ARGUMENT_ERROR; 953 return 0; 954 } 955 if(bufferCapacity<bufLen) { 956 /* The capacity is less than the length?! Is this NULL terminated? */ 957 *status = U_ILLEGAL_ARGUMENT_ERROR; 958 return 0; 959 } 960 if(keywordValue && !*keywordValue) { 961 keywordValue = NULL; 962 } 963 if(keywordValue) { 964 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 965 } else { 966 keywordValueLen = 0; 967 } 968 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 969 if(U_FAILURE(*status)) { 970 return 0; 971 } 972 startSearchHere = (char*)locale_getKeywordsStart(buffer); 973 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 974 if(!keywordValue) { /* no keywords = nothing to remove */ 975 return bufLen; 976 } 977 978 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 979 if(startSearchHere) { /* had a single @ */ 980 needLen--; /* already had the @ */ 981 /* startSearchHere points at the @ */ 982 } else { 983 startSearchHere=buffer+bufLen; 984 } 985 if(needLen >= bufferCapacity) { 986 *status = U_BUFFER_OVERFLOW_ERROR; 987 return needLen; /* no change */ 988 } 989 *startSearchHere = '@'; 990 startSearchHere++; 991 uprv_strcpy(startSearchHere, keywordNameBuffer); 992 startSearchHere += keywordNameLen; 993 *startSearchHere = '='; 994 startSearchHere++; 995 uprv_strcpy(startSearchHere, keywordValue); 996 startSearchHere+=keywordValueLen; 997 return needLen; 998 } /* end shortcut - no @ */ 999 1000 keywordStart = startSearchHere; 1001 /* search for keyword */ 1002 while(keywordStart) { 1003 keywordStart++; 1004 /* skip leading spaces (allowed?) */ 1005 while(*keywordStart == ' ') { 1006 keywordStart++; 1007 } 1008 nextEqualsign = uprv_strchr(keywordStart, '='); 1009 /* need to normalize both keyword and keyword name */ 1010 if(!nextEqualsign) { 1011 break; 1012 } 1013 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 1014 /* keyword name too long for internal buffer */ 1015 *status = U_INTERNAL_PROGRAM_ERROR; 1016 return 0; 1017 } 1018 for(i = 0; i < nextEqualsign - keywordStart; i++) { 1019 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 1020 } 1021 /* trim trailing spaces */ 1022 while(keywordStart[i-1] == ' ') { 1023 i--; 1024 } 1025 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN); 1026 localeKeywordNameBuffer[i] = 0; 1027 1028 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1029 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1030 if(rc == 0) { 1031 nextEqualsign++; 1032 while(*nextEqualsign == ' ') { 1033 nextEqualsign++; 1034 } 1035 /* we actually found the keyword. Change the value */ 1036 if (nextSeparator) { 1037 keywordAtEnd = 0; 1038 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1039 } else { 1040 keywordAtEnd = 1; 1041 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1042 } 1043 if(keywordValue) { /* adding a value - not removing */ 1044 if(foundValueLen == keywordValueLen) { 1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1046 return bufLen; /* no change in size */ 1047 } else if(foundValueLen > keywordValueLen) { 1048 int32_t delta = foundValueLen - keywordValueLen; 1049 if(nextSeparator) { /* RH side */ 1050 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1051 } 1052 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1053 bufLen -= delta; 1054 buffer[bufLen]=0; 1055 return bufLen; 1056 } else { /* FVL < KVL */ 1057 int32_t delta = keywordValueLen - foundValueLen; 1058 if((bufLen+delta) >= bufferCapacity) { 1059 *status = U_BUFFER_OVERFLOW_ERROR; 1060 return bufLen+delta; 1061 } 1062 if(nextSeparator) { /* RH side */ 1063 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1064 } 1065 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1066 bufLen += delta; 1067 buffer[bufLen]=0; 1068 return bufLen; 1069 } 1070 } else { /* removing a keyword */ 1071 if(keywordAtEnd) { 1072 /* zero out the ';' or '@' just before startSearchhere */ 1073 keywordStart[-1] = 0; 1074 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1075 } else { 1076 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1077 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1078 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1079 } 1080 } 1081 } else if(rc<0){ /* end match keyword */ 1082 /* could insert at this location. */ 1083 insertHere = keywordStart; 1084 } 1085 keywordStart = nextSeparator; 1086 } /* end loop searching */ 1087 1088 if(!keywordValue) { 1089 return bufLen; /* removal of non-extant keyword - no change */ 1090 } 1091 1092 /* we know there is at least one keyword. */ 1093 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1094 if(needLen >= bufferCapacity) { 1095 *status = U_BUFFER_OVERFLOW_ERROR; 1096 return needLen; /* no change */ 1097 } 1098 1099 if(insertHere) { 1100 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1101 keywordStart = insertHere; 1102 } else { 1103 keywordStart = buffer+bufLen; 1104 *keywordStart = ';'; 1105 keywordStart++; 1106 } 1107 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1108 keywordStart += keywordNameLen; 1109 *keywordStart = '='; 1110 keywordStart++; 1111 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1112 keywordStart+=keywordValueLen; 1113 if(insertHere) { 1114 *keywordStart = ';'; 1115 keywordStart++; 1116 } 1117 buffer[needLen]=0; 1118 return needLen; 1119 } 1120 1121 /* ### ID parsing implementation **************************************************/ 1122 1123 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1124 1125 /*returns TRUE if one of the special prefixes is here (s=string) 1126 'x-' or 'i-' */ 1127 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1128 1129 /* Dot terminates it because of POSIX form where dot precedes the codepage 1130 * except for variant 1131 */ 1132 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1133 1134 static char* _strnchr(const char* str, int32_t len, char c) { 1135 U_ASSERT(str != 0 && len >= 0); 1136 while (len-- != 0) { 1137 char d = *str; 1138 if (d == c) { 1139 return (char*) str; 1140 } else if (d == 0) { 1141 break; 1142 } 1143 ++str; 1144 } 1145 return NULL; 1146 } 1147 1148 /** 1149 * Lookup 'key' in the array 'list'. The array 'list' should contain 1150 * a NULL entry, followed by more entries, and a second NULL entry. 1151 * 1152 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1153 * COUNTRIES_3. 1154 */ 1155 static int16_t _findIndex(const char* const* list, const char* key) 1156 { 1157 const char* const* anchor = list; 1158 int32_t pass = 0; 1159 1160 /* Make two passes through two NULL-terminated arrays at 'list' */ 1161 while (pass++ < 2) { 1162 while (*list) { 1163 if (uprv_strcmp(key, *list) == 0) { 1164 return (int16_t)(list - anchor); 1165 } 1166 list++; 1167 } 1168 ++list; /* skip final NULL *CWB*/ 1169 } 1170 return -1; 1171 } 1172 1173 /* count the length of src while copying it to dest; return strlen(src) */ 1174 static inline int32_t 1175 _copyCount(char *dest, int32_t destCapacity, const char *src) { 1176 const char *anchor; 1177 char c; 1178 1179 anchor=src; 1180 for(;;) { 1181 if((c=*src)==0) { 1182 return (int32_t)(src-anchor); 1183 } 1184 if(destCapacity<=0) { 1185 return (int32_t)((src-anchor)+uprv_strlen(src)); 1186 } 1187 ++src; 1188 *dest++=c; 1189 --destCapacity; 1190 } 1191 } 1192 1193 U_CFUNC const char* 1194 uloc_getCurrentCountryID(const char* oldID){ 1195 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1196 if (offset >= 0) { 1197 return REPLACEMENT_COUNTRIES[offset]; 1198 } 1199 return oldID; 1200 } 1201 U_CFUNC const char* 1202 uloc_getCurrentLanguageID(const char* oldID){ 1203 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1204 if (offset >= 0) { 1205 return REPLACEMENT_LANGUAGES[offset]; 1206 } 1207 return oldID; 1208 } 1209 /* 1210 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1211 * avoid duplicating code to handle the earlier locale ID pieces 1212 * in the functions for the later ones by 1213 * setting the *pEnd pointer to where they stopped parsing 1214 * 1215 * TODO try to use this in Locale 1216 */ 1217 U_CFUNC int32_t 1218 ulocimp_getLanguage(const char *localeID, 1219 char *language, int32_t languageCapacity, 1220 const char **pEnd) { 1221 int32_t i=0; 1222 int32_t offset; 1223 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1224 1225 /* if it starts with i- or x- then copy that prefix */ 1226 if(_isIDPrefix(localeID)) { 1227 if(i<languageCapacity) { 1228 language[i]=(char)uprv_tolower(*localeID); 1229 } 1230 if(i<languageCapacity) { 1231 language[i+1]='-'; 1232 } 1233 i+=2; 1234 localeID+=2; 1235 } 1236 1237 /* copy the language as far as possible and count its length */ 1238 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1239 if(i<languageCapacity) { 1240 language[i]=(char)uprv_tolower(*localeID); 1241 } 1242 if(i<3) { 1243 U_ASSERT(i>=0); 1244 lang[i]=(char)uprv_tolower(*localeID); 1245 } 1246 i++; 1247 localeID++; 1248 } 1249 1250 if(i==3) { 1251 /* convert 3 character code to 2 character code if possible *CWB*/ 1252 offset=_findIndex(LANGUAGES_3, lang); 1253 if(offset>=0) { 1254 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1255 } 1256 } 1257 1258 if(pEnd!=NULL) { 1259 *pEnd=localeID; 1260 } 1261 return i; 1262 } 1263 1264 U_CFUNC int32_t 1265 ulocimp_getScript(const char *localeID, 1266 char *script, int32_t scriptCapacity, 1267 const char **pEnd) 1268 { 1269 int32_t idLen = 0; 1270 1271 if (pEnd != NULL) { 1272 *pEnd = localeID; 1273 } 1274 1275 /* copy the second item as far as possible and count its length */ 1276 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) 1277 && uprv_isASCIILetter(localeID[idLen])) { 1278 idLen++; 1279 } 1280 1281 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1282 if (idLen == 4) { 1283 int32_t i; 1284 if (pEnd != NULL) { 1285 *pEnd = localeID+idLen; 1286 } 1287 if(idLen > scriptCapacity) { 1288 idLen = scriptCapacity; 1289 } 1290 if (idLen >= 1) { 1291 script[0]=(char)uprv_toupper(*(localeID++)); 1292 } 1293 for (i = 1; i < idLen; i++) { 1294 script[i]=(char)uprv_tolower(*(localeID++)); 1295 } 1296 } 1297 else { 1298 idLen = 0; 1299 } 1300 return idLen; 1301 } 1302 1303 U_CFUNC int32_t 1304 ulocimp_getCountry(const char *localeID, 1305 char *country, int32_t countryCapacity, 1306 const char **pEnd) 1307 { 1308 int32_t idLen=0; 1309 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1310 int32_t offset; 1311 1312 /* copy the country as far as possible and count its length */ 1313 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1314 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1315 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1316 } 1317 idLen++; 1318 } 1319 1320 /* the country should be either length 2 or 3 */ 1321 if (idLen == 2 || idLen == 3) { 1322 UBool gotCountry = FALSE; 1323 /* convert 3 character code to 2 character code if possible *CWB*/ 1324 if(idLen==3) { 1325 offset=_findIndex(COUNTRIES_3, cnty); 1326 if(offset>=0) { 1327 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1328 gotCountry = TRUE; 1329 } 1330 } 1331 if (!gotCountry) { 1332 int32_t i = 0; 1333 for (i = 0; i < idLen; i++) { 1334 if (i < countryCapacity) { 1335 country[i]=(char)uprv_toupper(localeID[i]); 1336 } 1337 } 1338 } 1339 localeID+=idLen; 1340 } else { 1341 idLen = 0; 1342 } 1343 1344 if(pEnd!=NULL) { 1345 *pEnd=localeID; 1346 } 1347 1348 return idLen; 1349 } 1350 1351 /** 1352 * @param needSeparator if true, then add leading '_' if any variants 1353 * are added to 'variant' 1354 */ 1355 static int32_t 1356 _getVariantEx(const char *localeID, 1357 char prev, 1358 char *variant, int32_t variantCapacity, 1359 UBool needSeparator) { 1360 int32_t i=0; 1361 1362 /* get one or more variant tags and separate them with '_' */ 1363 if(_isIDSeparator(prev)) { 1364 /* get a variant string after a '-' or '_' */ 1365 while(!_isTerminator(*localeID)) { 1366 if (needSeparator) { 1367 if (i<variantCapacity) { 1368 variant[i] = '_'; 1369 } 1370 ++i; 1371 needSeparator = FALSE; 1372 } 1373 if(i<variantCapacity) { 1374 variant[i]=(char)uprv_toupper(*localeID); 1375 if(variant[i]=='-') { 1376 variant[i]='_'; 1377 } 1378 } 1379 i++; 1380 localeID++; 1381 } 1382 } 1383 1384 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1385 if(i==0) { 1386 if(prev=='@') { 1387 /* keep localeID */ 1388 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1389 ++localeID; /* point after the '@' */ 1390 } else { 1391 return 0; 1392 } 1393 while(!_isTerminator(*localeID)) { 1394 if (needSeparator) { 1395 if (i<variantCapacity) { 1396 variant[i] = '_'; 1397 } 1398 ++i; 1399 needSeparator = FALSE; 1400 } 1401 if(i<variantCapacity) { 1402 variant[i]=(char)uprv_toupper(*localeID); 1403 if(variant[i]=='-' || variant[i]==',') { 1404 variant[i]='_'; 1405 } 1406 } 1407 i++; 1408 localeID++; 1409 } 1410 } 1411 1412 return i; 1413 } 1414 1415 static int32_t 1416 _getVariant(const char *localeID, 1417 char prev, 1418 char *variant, int32_t variantCapacity) { 1419 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1420 } 1421 1422 /** 1423 * Delete ALL instances of a variant from the given list of one or 1424 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1425 * @param variants the source string of one or more variants, 1426 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1427 * terminated; if it is, trailing zero will NOT be maintained. 1428 * @param variantsLen length of variants 1429 * @param toDelete variant to delete, without separators, e.g. "EURO" 1430 * or "PREEURO"; not zero terminated 1431 * @param toDeleteLen length of toDelete 1432 * @return number of characters deleted from variants 1433 */ 1434 static int32_t 1435 _deleteVariant(char* variants, int32_t variantsLen, 1436 const char* toDelete, int32_t toDeleteLen) 1437 { 1438 int32_t delta = 0; /* number of chars deleted */ 1439 for (;;) { 1440 UBool flag = FALSE; 1441 if (variantsLen < toDeleteLen) { 1442 return delta; 1443 } 1444 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1445 (variantsLen == toDeleteLen || 1446 (flag=(variants[toDeleteLen] == '_')))) 1447 { 1448 int32_t d = toDeleteLen + (flag?1:0); 1449 variantsLen -= d; 1450 delta += d; 1451 if (variantsLen > 0) { 1452 uprv_memmove(variants, variants+d, variantsLen); 1453 } 1454 } else { 1455 char* p = _strnchr(variants, variantsLen, '_'); 1456 if (p == NULL) { 1457 return delta; 1458 } 1459 ++p; 1460 variantsLen -= (int32_t)(p - variants); 1461 variants = p; 1462 } 1463 } 1464 } 1465 1466 /* Keyword enumeration */ 1467 1468 typedef struct UKeywordsContext { 1469 char* keywords; 1470 char* current; 1471 } UKeywordsContext; 1472 1473 static void U_CALLCONV 1474 uloc_kw_closeKeywords(UEnumeration *enumerator) { 1475 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1476 uprv_free(enumerator->context); 1477 uprv_free(enumerator); 1478 } 1479 1480 static int32_t U_CALLCONV 1481 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { 1482 char *kw = ((UKeywordsContext *)en->context)->keywords; 1483 int32_t result = 0; 1484 while(*kw) { 1485 result++; 1486 kw += uprv_strlen(kw)+1; 1487 } 1488 return result; 1489 } 1490 1491 static const char* U_CALLCONV 1492 uloc_kw_nextKeyword(UEnumeration* en, 1493 int32_t* resultLength, 1494 UErrorCode* /*status*/) { 1495 const char* result = ((UKeywordsContext *)en->context)->current; 1496 int32_t len = 0; 1497 if(*result) { 1498 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1499 ((UKeywordsContext *)en->context)->current += len+1; 1500 } else { 1501 result = NULL; 1502 } 1503 if (resultLength) { 1504 *resultLength = len; 1505 } 1506 return result; 1507 } 1508 1509 static void U_CALLCONV 1510 uloc_kw_resetKeywords(UEnumeration* en, 1511 UErrorCode* /*status*/) { 1512 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1513 } 1514 1515 static const UEnumeration gKeywordsEnum = { 1516 NULL, 1517 NULL, 1518 uloc_kw_closeKeywords, 1519 uloc_kw_countKeywords, 1520 uenum_unextDefault, 1521 uloc_kw_nextKeyword, 1522 uloc_kw_resetKeywords 1523 }; 1524 1525 U_CAPI UEnumeration* U_EXPORT2 1526 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1527 { 1528 UKeywordsContext *myContext = NULL; 1529 UEnumeration *result = NULL; 1530 1531 if(U_FAILURE(*status)) { 1532 return NULL; 1533 } 1534 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1535 /* Null pointer test */ 1536 if (result == NULL) { 1537 *status = U_MEMORY_ALLOCATION_ERROR; 1538 return NULL; 1539 } 1540 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1541 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); 1542 if (myContext == NULL) { 1543 *status = U_MEMORY_ALLOCATION_ERROR; 1544 uprv_free(result); 1545 return NULL; 1546 } 1547 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1548 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1549 myContext->keywords[keywordListSize] = 0; 1550 myContext->current = myContext->keywords; 1551 result->context = myContext; 1552 return result; 1553 } 1554 1555 U_CAPI UEnumeration* U_EXPORT2 1556 uloc_openKeywords(const char* localeID, 1557 UErrorCode* status) 1558 { 1559 int32_t i=0; 1560 char keywords[256]; 1561 int32_t keywordsCapacity = 256; 1562 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1563 const char* tmpLocaleID; 1564 1565 if(status==NULL || U_FAILURE(*status)) { 1566 return 0; 1567 } 1568 1569 if (_hasBCP47Extension(localeID)) { 1570 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1571 } else { 1572 if (localeID==NULL) { 1573 localeID=uloc_getDefault(); 1574 } 1575 tmpLocaleID=localeID; 1576 } 1577 1578 /* Skip the language */ 1579 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1580 if(_isIDSeparator(*tmpLocaleID)) { 1581 const char *scriptID; 1582 /* Skip the script if available */ 1583 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1584 if(scriptID != tmpLocaleID+1) { 1585 /* Found optional script */ 1586 tmpLocaleID = scriptID; 1587 } 1588 /* Skip the Country */ 1589 if (_isIDSeparator(*tmpLocaleID)) { 1590 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1591 if(_isIDSeparator(*tmpLocaleID)) { 1592 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1593 } 1594 } 1595 } 1596 1597 /* keywords are located after '@' */ 1598 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1599 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1600 } 1601 1602 if(i) { 1603 return uloc_openKeywordList(keywords, i, status); 1604 } else { 1605 return NULL; 1606 } 1607 } 1608 1609 1610 /* bit-flags for 'options' parameter of _canonicalize */ 1611 #define _ULOC_STRIP_KEYWORDS 0x2 1612 #define _ULOC_CANONICALIZE 0x1 1613 1614 #define OPTION_SET(options, mask) ((options & mask) != 0) 1615 1616 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1617 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) 1618 1619 /** 1620 * Canonicalize the given localeID, to level 1 or to level 2, 1621 * depending on the options. To specify level 1, pass in options=0. 1622 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1623 * 1624 * This is the code underlying uloc_getName and uloc_canonicalize. 1625 */ 1626 static int32_t 1627 _canonicalize(const char* localeID, 1628 char* result, 1629 int32_t resultCapacity, 1630 uint32_t options, 1631 UErrorCode* err) { 1632 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1633 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1634 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1635 const char* origLocaleID; 1636 const char* tmpLocaleID; 1637 const char* keywordAssign = NULL; 1638 const char* separatorIndicator = NULL; 1639 const char* addKeyword = NULL; 1640 const char* addValue = NULL; 1641 char* name; 1642 char* variant = NULL; /* pointer into name, or NULL */ 1643 1644 if (U_FAILURE(*err)) { 1645 return 0; 1646 } 1647 1648 if (_hasBCP47Extension(localeID)) { 1649 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1650 } else { 1651 if (localeID==NULL) { 1652 localeID=uloc_getDefault(); 1653 } 1654 tmpLocaleID=localeID; 1655 } 1656 1657 origLocaleID=tmpLocaleID; 1658 1659 /* if we are doing a full canonicalization, then put results in 1660 localeBuffer, if necessary; otherwise send them to result. */ 1661 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1662 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { 1663 name = localeBuffer; 1664 nameCapacity = (int32_t)sizeof(localeBuffer); 1665 } else { 1666 name = result; 1667 nameCapacity = resultCapacity; 1668 } 1669 1670 /* get all pieces, one after another, and separate with '_' */ 1671 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1672 1673 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1674 const char *d = uloc_getDefault(); 1675 1676 len = (int32_t)uprv_strlen(d); 1677 1678 if (name != NULL) { 1679 uprv_strncpy(name, d, len); 1680 } 1681 } else if(_isIDSeparator(*tmpLocaleID)) { 1682 const char *scriptID; 1683 1684 ++fieldCount; 1685 if(len<nameCapacity) { 1686 name[len]='_'; 1687 } 1688 ++len; 1689 1690 scriptSize=ulocimp_getScript(tmpLocaleID+1, 1691 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID); 1692 if(scriptSize > 0) { 1693 /* Found optional script */ 1694 tmpLocaleID = scriptID; 1695 ++fieldCount; 1696 len+=scriptSize; 1697 if (_isIDSeparator(*tmpLocaleID)) { 1698 /* If there is something else, then we add the _ */ 1699 if(len<nameCapacity) { 1700 name[len]='_'; 1701 } 1702 ++len; 1703 } 1704 } 1705 1706 if (_isIDSeparator(*tmpLocaleID)) { 1707 const char *cntryID; 1708 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, 1709 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID); 1710 if (cntrySize > 0) { 1711 /* Found optional country */ 1712 tmpLocaleID = cntryID; 1713 len+=cntrySize; 1714 } 1715 if(_isIDSeparator(*tmpLocaleID)) { 1716 /* If there is something else, then we add the _ if we found country before. */ 1717 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { 1718 ++fieldCount; 1719 if(len<nameCapacity) { 1720 name[len]='_'; 1721 } 1722 ++len; 1723 } 1724 1725 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, 1726 (len<nameCapacity ? name+len : NULL), nameCapacity-len); 1727 if (variantSize > 0) { 1728 variant = len<nameCapacity ? name+len : NULL; 1729 len += variantSize; 1730 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1731 } 1732 } 1733 } 1734 } 1735 1736 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1737 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1738 UBool done = FALSE; 1739 do { 1740 char c = *tmpLocaleID; 1741 switch (c) { 1742 case 0: 1743 case '@': 1744 done = TRUE; 1745 break; 1746 default: 1747 if (len<nameCapacity) { 1748 name[len] = c; 1749 } 1750 ++len; 1751 ++tmpLocaleID; 1752 break; 1753 } 1754 } while (!done); 1755 } 1756 1757 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1758 After this, tmpLocaleID either points to '@' or is NULL */ 1759 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1760 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1761 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1762 } 1763 1764 /* Copy POSIX-style variant, if any [mr@FOO] */ 1765 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1766 tmpLocaleID != NULL && keywordAssign == NULL) { 1767 for (;;) { 1768 char c = *tmpLocaleID; 1769 if (c == 0) { 1770 break; 1771 } 1772 if (len<nameCapacity) { 1773 name[len] = c; 1774 } 1775 ++len; 1776 ++tmpLocaleID; 1777 } 1778 } 1779 1780 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1781 /* Handle @FOO variant if @ is present and not followed by = */ 1782 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1783 int32_t posixVariantSize; 1784 /* Add missing '_' if needed */ 1785 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1786 do { 1787 if(len<nameCapacity) { 1788 name[len]='_'; 1789 } 1790 ++len; 1791 ++fieldCount; 1792 } while(fieldCount<2); 1793 } 1794 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1795 (UBool)(variantSize > 0)); 1796 if (posixVariantSize > 0) { 1797 if (variant == NULL) { 1798 variant = name+len; 1799 } 1800 len += posixVariantSize; 1801 variantSize += posixVariantSize; 1802 } 1803 } 1804 1805 /* Handle generic variants first */ 1806 if (variant) { 1807 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { 1808 const char* variantToCompare = VARIANT_MAP[j].variant; 1809 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1810 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1811 len -= variantLen; 1812 if (variantLen > 0) { 1813 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1814 --len; 1815 } 1816 addKeyword = VARIANT_MAP[j].keyword; 1817 addValue = VARIANT_MAP[j].value; 1818 break; 1819 } 1820 } 1821 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1822 --len; 1823 } 1824 } 1825 1826 /* Look up the ID in the canonicalization map */ 1827 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { 1828 const char* id = CANONICALIZE_MAP[j].id; 1829 int32_t n = (int32_t)uprv_strlen(id); 1830 if (len == n && uprv_strncmp(name, id, n) == 0) { 1831 if (n == 0 && tmpLocaleID != NULL) { 1832 break; /* Don't remap "" if keywords present */ 1833 } 1834 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1835 if (CANONICALIZE_MAP[j].keyword) { 1836 addKeyword = CANONICALIZE_MAP[j].keyword; 1837 addValue = CANONICALIZE_MAP[j].value; 1838 } 1839 break; 1840 } 1841 } 1842 } 1843 1844 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1845 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1846 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1847 if(len<nameCapacity) { 1848 name[len]='@'; 1849 } 1850 ++len; 1851 ++fieldCount; 1852 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len, 1853 NULL, 0, NULL, TRUE, addKeyword, addValue, err); 1854 } else if (addKeyword != NULL) { 1855 U_ASSERT(addValue != NULL && len < nameCapacity); 1856 /* inelegant but works -- later make _getKeywords do this? */ 1857 len += _copyCount(name+len, nameCapacity-len, "@"); 1858 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1859 len += _copyCount(name+len, nameCapacity-len, "="); 1860 len += _copyCount(name+len, nameCapacity-len, addValue); 1861 } 1862 } 1863 1864 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1865 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1866 } 1867 1868 return u_terminateChars(result, resultCapacity, len, err); 1869 } 1870 1871 /* ### ID parsing API **************************************************/ 1872 1873 U_CAPI int32_t U_EXPORT2 1874 uloc_getParent(const char* localeID, 1875 char* parent, 1876 int32_t parentCapacity, 1877 UErrorCode* err) 1878 { 1879 const char *lastUnderscore; 1880 int32_t i; 1881 1882 if (U_FAILURE(*err)) 1883 return 0; 1884 1885 if (localeID == NULL) 1886 localeID = uloc_getDefault(); 1887 1888 lastUnderscore=uprv_strrchr(localeID, '_'); 1889 if(lastUnderscore!=NULL) { 1890 i=(int32_t)(lastUnderscore-localeID); 1891 } else { 1892 i=0; 1893 } 1894 1895 if(i>0 && parent != localeID) { 1896 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1897 } 1898 return u_terminateChars(parent, parentCapacity, i, err); 1899 } 1900 1901 U_CAPI int32_t U_EXPORT2 1902 uloc_getLanguage(const char* localeID, 1903 char* language, 1904 int32_t languageCapacity, 1905 UErrorCode* err) 1906 { 1907 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1908 int32_t i=0; 1909 1910 if (err==NULL || U_FAILURE(*err)) { 1911 return 0; 1912 } 1913 1914 if(localeID==NULL) { 1915 localeID=uloc_getDefault(); 1916 } 1917 1918 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1919 return u_terminateChars(language, languageCapacity, i, err); 1920 } 1921 1922 U_CAPI int32_t U_EXPORT2 1923 uloc_getScript(const char* localeID, 1924 char* script, 1925 int32_t scriptCapacity, 1926 UErrorCode* err) 1927 { 1928 int32_t i=0; 1929 1930 if(err==NULL || U_FAILURE(*err)) { 1931 return 0; 1932 } 1933 1934 if(localeID==NULL) { 1935 localeID=uloc_getDefault(); 1936 } 1937 1938 /* skip the language */ 1939 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1940 if(_isIDSeparator(*localeID)) { 1941 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 1942 } 1943 return u_terminateChars(script, scriptCapacity, i, err); 1944 } 1945 1946 U_CAPI int32_t U_EXPORT2 1947 uloc_getCountry(const char* localeID, 1948 char* country, 1949 int32_t countryCapacity, 1950 UErrorCode* err) 1951 { 1952 int32_t i=0; 1953 1954 if(err==NULL || U_FAILURE(*err)) { 1955 return 0; 1956 } 1957 1958 if(localeID==NULL) { 1959 localeID=uloc_getDefault(); 1960 } 1961 1962 /* Skip the language */ 1963 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1964 if(_isIDSeparator(*localeID)) { 1965 const char *scriptID; 1966 /* Skip the script if available */ 1967 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 1968 if(scriptID != localeID+1) { 1969 /* Found optional script */ 1970 localeID = scriptID; 1971 } 1972 if(_isIDSeparator(*localeID)) { 1973 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 1974 } 1975 } 1976 return u_terminateChars(country, countryCapacity, i, err); 1977 } 1978 1979 U_CAPI int32_t U_EXPORT2 1980 uloc_getVariant(const char* localeID, 1981 char* variant, 1982 int32_t variantCapacity, 1983 UErrorCode* err) 1984 { 1985 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1986 const char* tmpLocaleID; 1987 int32_t i=0; 1988 1989 if(err==NULL || U_FAILURE(*err)) { 1990 return 0; 1991 } 1992 1993 if (_hasBCP47Extension(localeID)) { 1994 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1995 } else { 1996 if (localeID==NULL) { 1997 localeID=uloc_getDefault(); 1998 } 1999 tmpLocaleID=localeID; 2000 } 2001 2002 /* Skip the language */ 2003 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 2004 if(_isIDSeparator(*tmpLocaleID)) { 2005 const char *scriptID; 2006 /* Skip the script if available */ 2007 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 2008 if(scriptID != tmpLocaleID+1) { 2009 /* Found optional script */ 2010 tmpLocaleID = scriptID; 2011 } 2012 /* Skip the Country */ 2013 if (_isIDSeparator(*tmpLocaleID)) { 2014 const char *cntryID; 2015 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 2016 if (cntryID != tmpLocaleID+1) { 2017 /* Found optional country */ 2018 tmpLocaleID = cntryID; 2019 } 2020 if(_isIDSeparator(*tmpLocaleID)) { 2021 /* If there was no country ID, skip a possible extra IDSeparator */ 2022 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2023 tmpLocaleID++; 2024 } 2025 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2026 } 2027 } 2028 } 2029 2030 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2031 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2032 /* 2033 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2034 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2035 } 2036 */ 2037 return u_terminateChars(variant, variantCapacity, i, err); 2038 } 2039 2040 U_CAPI int32_t U_EXPORT2 2041 uloc_getName(const char* localeID, 2042 char* name, 2043 int32_t nameCapacity, 2044 UErrorCode* err) 2045 { 2046 return _canonicalize(localeID, name, nameCapacity, 0, err); 2047 } 2048 2049 U_CAPI int32_t U_EXPORT2 2050 uloc_getBaseName(const char* localeID, 2051 char* name, 2052 int32_t nameCapacity, 2053 UErrorCode* err) 2054 { 2055 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2056 } 2057 2058 U_CAPI int32_t U_EXPORT2 2059 uloc_canonicalize(const char* localeID, 2060 char* name, 2061 int32_t nameCapacity, 2062 UErrorCode* err) 2063 { 2064 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2065 } 2066 2067 U_CAPI const char* U_EXPORT2 2068 uloc_getISO3Language(const char* localeID) 2069 { 2070 int16_t offset; 2071 char lang[ULOC_LANG_CAPACITY]; 2072 UErrorCode err = U_ZERO_ERROR; 2073 2074 if (localeID == NULL) 2075 { 2076 localeID = uloc_getDefault(); 2077 } 2078 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2079 if (U_FAILURE(err)) 2080 return ""; 2081 offset = _findIndex(LANGUAGES, lang); 2082 if (offset < 0) 2083 return ""; 2084 return LANGUAGES_3[offset]; 2085 } 2086 2087 U_CAPI const char* U_EXPORT2 2088 uloc_getISO3Country(const char* localeID) 2089 { 2090 int16_t offset; 2091 char cntry[ULOC_LANG_CAPACITY]; 2092 UErrorCode err = U_ZERO_ERROR; 2093 2094 if (localeID == NULL) 2095 { 2096 localeID = uloc_getDefault(); 2097 } 2098 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2099 if (U_FAILURE(err)) 2100 return ""; 2101 offset = _findIndex(COUNTRIES, cntry); 2102 if (offset < 0) 2103 return ""; 2104 2105 return COUNTRIES_3[offset]; 2106 } 2107 2108 U_CAPI uint32_t U_EXPORT2 2109 uloc_getLCID(const char* localeID) 2110 { 2111 UErrorCode status = U_ZERO_ERROR; 2112 char langID[ULOC_FULLNAME_CAPACITY]; 2113 2114 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2115 if (U_FAILURE(status)) { 2116 return 0; 2117 } 2118 2119 if (uprv_strchr(localeID, '@')) { 2120 // uprv_convertToLCID does not support keywords other than collation. 2121 // Remove all keywords except collation. 2122 int32_t len; 2123 char collVal[ULOC_KEYWORDS_CAPACITY]; 2124 char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; 2125 2126 len = uloc_getKeywordValue(localeID, "collation", collVal, 2127 sizeof(collVal)/sizeof(collVal[0]) - 1, &status); 2128 2129 if (U_SUCCESS(status) && len > 0) { 2130 collVal[len] = 0; 2131 2132 len = uloc_getBaseName(localeID, tmpLocaleID, 2133 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status); 2134 2135 if (U_SUCCESS(status)) { 2136 tmpLocaleID[len] = 0; 2137 2138 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, 2139 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status); 2140 2141 if (U_SUCCESS(status)) { 2142 tmpLocaleID[len] = 0; 2143 return uprv_convertToLCID(langID, tmpLocaleID, &status); 2144 } 2145 } 2146 } 2147 2148 // fall through - all keywords are simply ignored 2149 status = U_ZERO_ERROR; 2150 } 2151 2152 return uprv_convertToLCID(langID, localeID, &status); 2153 } 2154 2155 U_CAPI int32_t U_EXPORT2 2156 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2157 UErrorCode *status) 2158 { 2159 return uprv_convertToPosix(hostid, locale, localeCapacity, status); 2160 } 2161 2162 /* ### Default locale **************************************************/ 2163 2164 U_CAPI const char* U_EXPORT2 2165 uloc_getDefault() 2166 { 2167 return locale_get_default(); 2168 } 2169 2170 U_CAPI void U_EXPORT2 2171 uloc_setDefault(const char* newDefaultLocale, 2172 UErrorCode* err) 2173 { 2174 if (U_FAILURE(*err)) 2175 return; 2176 /* the error code isn't currently used for anything by this function*/ 2177 2178 /* propagate change to C++ */ 2179 locale_set_default(newDefaultLocale); 2180 } 2181 2182 /** 2183 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer 2184 * to an array of pointers to arrays of char. All of these pointers are owned 2185 * by ICU-- do not delete them, and do not write through them. The array is 2186 * terminated with a null pointer. 2187 */ 2188 U_CAPI const char* const* U_EXPORT2 2189 uloc_getISOLanguages() 2190 { 2191 return LANGUAGES; 2192 } 2193 2194 /** 2195 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2196 * pointer to an array of pointers to arrays of char. All of these pointers are 2197 * owned by ICU-- do not delete them, and do not write through them. The array is 2198 * terminated with a null pointer. 2199 */ 2200 U_CAPI const char* const* U_EXPORT2 2201 uloc_getISOCountries() 2202 { 2203 return COUNTRIES; 2204 } 2205 2206 2207 /* this function to be moved into cstring.c later */ 2208 static char gDecimal = 0; 2209 2210 static /* U_CAPI */ 2211 double 2212 /* U_EXPORT2 */ 2213 _uloc_strtod(const char *start, char **end) { 2214 char *decimal; 2215 char *myEnd; 2216 char buf[30]; 2217 double rv; 2218 if (!gDecimal) { 2219 char rep[5]; 2220 /* For machines that decide to change the decimal on you, 2221 and try to be too smart with localization. 2222 This normally should be just a '.'. */ 2223 sprintf(rep, "%+1.1f", 1.0); 2224 gDecimal = rep[2]; 2225 } 2226 2227 if(gDecimal == '.') { 2228 return uprv_strtod(start, end); /* fall through to OS */ 2229 } else { 2230 uprv_strncpy(buf, start, 29); 2231 buf[29]=0; 2232 decimal = uprv_strchr(buf, '.'); 2233 if(decimal) { 2234 *decimal = gDecimal; 2235 } else { 2236 return uprv_strtod(start, end); /* no decimal point */ 2237 } 2238 rv = uprv_strtod(buf, &myEnd); 2239 if(end) { 2240 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2241 } 2242 return rv; 2243 } 2244 } 2245 2246 typedef struct { 2247 float q; 2248 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2249 char *locale; 2250 } _acceptLangItem; 2251 2252 static int32_t U_CALLCONV 2253 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) 2254 { 2255 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2256 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2257 2258 int32_t rc = 0; 2259 if(bb->q < aa->q) { 2260 rc = -1; /* A > B */ 2261 } else if(bb->q > aa->q) { 2262 rc = 1; /* A < B */ 2263 } else { 2264 rc = 0; /* A = B */ 2265 } 2266 2267 if(rc==0) { 2268 rc = uprv_stricmp(aa->locale, bb->locale); 2269 } 2270 2271 #if defined(ULOC_DEBUG) 2272 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2273 aa->locale, aa->q, 2274 bb->locale, bb->q, 2275 rc);*/ 2276 #endif 2277 2278 return rc; 2279 } 2280 2281 /* 2282 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2283 */ 2284 2285 U_CAPI int32_t U_EXPORT2 2286 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2287 const char *httpAcceptLanguage, 2288 UEnumeration* availableLocales, 2289 UErrorCode *status) 2290 { 2291 _acceptLangItem *j; 2292 _acceptLangItem smallBuffer[30]; 2293 char **strs; 2294 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2295 int32_t n = 0; 2296 const char *itemEnd; 2297 const char *paramEnd; 2298 const char *s; 2299 const char *t; 2300 int32_t res; 2301 int32_t i; 2302 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2303 int32_t jSize; 2304 char *tempstr; /* Use for null pointer check */ 2305 2306 j = smallBuffer; 2307 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); 2308 if(U_FAILURE(*status)) { 2309 return -1; 2310 } 2311 2312 for(s=httpAcceptLanguage;s&&*s;) { 2313 while(isspace(*s)) /* eat space at the beginning */ 2314 s++; 2315 itemEnd=uprv_strchr(s,','); 2316 paramEnd=uprv_strchr(s,';'); 2317 if(!itemEnd) { 2318 itemEnd = httpAcceptLanguage+l; /* end of string */ 2319 } 2320 if(paramEnd && paramEnd<itemEnd) { 2321 /* semicolon (;) is closer than end (,) */ 2322 t = paramEnd+1; 2323 if(*t=='q') { 2324 t++; 2325 } 2326 while(isspace(*t)) { 2327 t++; 2328 } 2329 if(*t=='=') { 2330 t++; 2331 } 2332 while(isspace(*t)) { 2333 t++; 2334 } 2335 j[n].q = (float)_uloc_strtod(t,NULL); 2336 } else { 2337 /* no semicolon - it's 1.0 */ 2338 j[n].q = 1.0f; 2339 paramEnd = itemEnd; 2340 } 2341 j[n].dummy=0; 2342 /* eat spaces prior to semi */ 2343 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2344 ; 2345 /* Check for null pointer from uprv_strndup */ 2346 tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); 2347 if (tempstr == NULL) { 2348 *status = U_MEMORY_ALLOCATION_ERROR; 2349 return -1; 2350 } 2351 j[n].locale = tempstr; 2352 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); 2353 if(strcmp(j[n].locale,tmp)) { 2354 uprv_free(j[n].locale); 2355 j[n].locale=uprv_strdup(tmp); 2356 } 2357 #if defined(ULOC_DEBUG) 2358 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2359 #endif 2360 n++; 2361 s = itemEnd; 2362 while(*s==',') { /* eat duplicate commas */ 2363 s++; 2364 } 2365 if(n>=jSize) { 2366 if(j==smallBuffer) { /* overflowed the small buffer. */ 2367 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); 2368 if(j!=NULL) { 2369 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); 2370 } 2371 #if defined(ULOC_DEBUG) 2372 fprintf(stderr,"malloced at size %d\n", jSize); 2373 #endif 2374 } else { 2375 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); 2376 #if defined(ULOC_DEBUG) 2377 fprintf(stderr,"re-alloced at size %d\n", jSize); 2378 #endif 2379 } 2380 jSize *= 2; 2381 if(j==NULL) { 2382 *status = U_MEMORY_ALLOCATION_ERROR; 2383 return -1; 2384 } 2385 } 2386 } 2387 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2388 if(U_FAILURE(*status)) { 2389 if(j != smallBuffer) { 2390 #if defined(ULOC_DEBUG) 2391 fprintf(stderr,"freeing j %p\n", j); 2392 #endif 2393 uprv_free(j); 2394 } 2395 return -1; 2396 } 2397 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n))); 2398 /* Check for null pointer */ 2399 if (strs == NULL) { 2400 uprv_free(j); /* Free to avoid memory leak */ 2401 *status = U_MEMORY_ALLOCATION_ERROR; 2402 return -1; 2403 } 2404 for(i=0;i<n;i++) { 2405 #if defined(ULOC_DEBUG) 2406 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2407 #endif 2408 strs[i]=j[i].locale; 2409 } 2410 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2411 (const char**)strs, n, availableLocales, status); 2412 for(i=0;i<n;i++) { 2413 uprv_free(strs[i]); 2414 } 2415 uprv_free(strs); 2416 if(j != smallBuffer) { 2417 #if defined(ULOC_DEBUG) 2418 fprintf(stderr,"freeing j %p\n", j); 2419 #endif 2420 uprv_free(j); 2421 } 2422 return res; 2423 } 2424 2425 2426 U_CAPI int32_t U_EXPORT2 2427 uloc_acceptLanguage(char *result, int32_t resultAvailable, 2428 UAcceptResult *outResult, const char **acceptList, 2429 int32_t acceptListCount, 2430 UEnumeration* availableLocales, 2431 UErrorCode *status) 2432 { 2433 int32_t i,j; 2434 int32_t len; 2435 int32_t maxLen=0; 2436 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2437 const char *l; 2438 char **fallbackList; 2439 if(U_FAILURE(*status)) { 2440 return -1; 2441 } 2442 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); 2443 if(fallbackList==NULL) { 2444 *status = U_MEMORY_ALLOCATION_ERROR; 2445 return -1; 2446 } 2447 for(i=0;i<acceptListCount;i++) { 2448 #if defined(ULOC_DEBUG) 2449 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2450 #endif 2451 while((l=uenum_next(availableLocales, NULL, status))) { 2452 #if defined(ULOC_DEBUG) 2453 fprintf(stderr," %s\n", l); 2454 #endif 2455 len = (int32_t)uprv_strlen(l); 2456 if(!uprv_strcmp(acceptList[i], l)) { 2457 if(outResult) { 2458 *outResult = ULOC_ACCEPT_VALID; 2459 } 2460 #if defined(ULOC_DEBUG) 2461 fprintf(stderr, "MATCH! %s\n", l); 2462 #endif 2463 if(len>0) { 2464 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2465 } 2466 for(j=0;j<i;j++) { 2467 uprv_free(fallbackList[j]); 2468 } 2469 uprv_free(fallbackList); 2470 return u_terminateChars(result, resultAvailable, len, status); 2471 } 2472 if(len>maxLen) { 2473 maxLen = len; 2474 } 2475 } 2476 uenum_reset(availableLocales, status); 2477 /* save off parent info */ 2478 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2479 fallbackList[i] = uprv_strdup(tmp); 2480 } else { 2481 fallbackList[i]=0; 2482 } 2483 } 2484 2485 for(maxLen--;maxLen>0;maxLen--) { 2486 for(i=0;i<acceptListCount;i++) { 2487 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2488 #if defined(ULOC_DEBUG) 2489 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2490 #endif 2491 while((l=uenum_next(availableLocales, NULL, status))) { 2492 #if defined(ULOC_DEBUG) 2493 fprintf(stderr," %s\n", l); 2494 #endif 2495 len = (int32_t)uprv_strlen(l); 2496 if(!uprv_strcmp(fallbackList[i], l)) { 2497 if(outResult) { 2498 *outResult = ULOC_ACCEPT_FALLBACK; 2499 } 2500 #if defined(ULOC_DEBUG) 2501 fprintf(stderr, "fallback MATCH! %s\n", l); 2502 #endif 2503 if(len>0) { 2504 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2505 } 2506 for(j=0;j<acceptListCount;j++) { 2507 uprv_free(fallbackList[j]); 2508 } 2509 uprv_free(fallbackList); 2510 return u_terminateChars(result, resultAvailable, len, status); 2511 } 2512 } 2513 uenum_reset(availableLocales, status); 2514 2515 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 2516 uprv_free(fallbackList[i]); 2517 fallbackList[i] = uprv_strdup(tmp); 2518 } else { 2519 uprv_free(fallbackList[i]); 2520 fallbackList[i]=0; 2521 } 2522 } 2523 } 2524 if(outResult) { 2525 *outResult = ULOC_ACCEPT_FAILED; 2526 } 2527 } 2528 for(i=0;i<acceptListCount;i++) { 2529 uprv_free(fallbackList[i]); 2530 } 2531 uprv_free(fallbackList); 2532 return -1; 2533 } 2534 2535 U_CAPI const char* U_EXPORT2 2536 uloc_toUnicodeLocaleKey(const char* keyword) 2537 { 2538 const char* bcpKey = ulocimp_toBcpKey(keyword); 2539 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) { 2540 // unknown keyword, but syntax is fine.. 2541 return keyword; 2542 } 2543 return bcpKey; 2544 } 2545 2546 U_CAPI const char* U_EXPORT2 2547 uloc_toUnicodeLocaleType(const char* keyword, const char* value) 2548 { 2549 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL); 2550 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) { 2551 // unknown keyword, but syntax is fine.. 2552 return value; 2553 } 2554 return bcpType; 2555 } 2556 2557 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) 2558 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) 2559 2560 static UBool 2561 isWellFormedLegacyKey(const char* legacyKey) 2562 { 2563 const char* p = legacyKey; 2564 while (*p) { 2565 if (!UPRV_ISALPHANUM(*p)) { 2566 return FALSE; 2567 } 2568 p++; 2569 } 2570 return TRUE; 2571 } 2572 2573 static UBool 2574 isWellFormedLegacyType(const char* legacyType) 2575 { 2576 const char* p = legacyType; 2577 int32_t alphaNumLen = 0; 2578 while (*p) { 2579 if (*p == '_' || *p == '/' || *p == '-') { 2580 if (alphaNumLen == 0) { 2581 return FALSE; 2582 } 2583 alphaNumLen = 0; 2584 } else if (UPRV_ISALPHANUM(*p)) { 2585 alphaNumLen++; 2586 } else { 2587 return FALSE; 2588 } 2589 p++; 2590 } 2591 return (alphaNumLen != 0); 2592 } 2593 2594 U_CAPI const char* U_EXPORT2 2595 uloc_toLegacyKey(const char* keyword) 2596 { 2597 const char* legacyKey = ulocimp_toLegacyKey(keyword); 2598 if (legacyKey == NULL) { 2599 // Checks if the specified locale key is well-formed with the legacy locale syntax. 2600 // 2601 // Note: 2602 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. 2603 // However, a key should not contain '=' obviously. For now, all existing 2604 // keys are using ASCII alphabetic letters only. We won't add any new key 2605 // that is not compatible with the BCP 47 syntax. Therefore, we assume 2606 // a valid key consist from [0-9a-zA-Z], no symbols. 2607 if (isWellFormedLegacyKey(keyword)) { 2608 return keyword; 2609 } 2610 } 2611 return legacyKey; 2612 } 2613 2614 U_CAPI const char* U_EXPORT2 2615 uloc_toLegacyType(const char* keyword, const char* value) 2616 { 2617 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL); 2618 if (legacyType == NULL) { 2619 // Checks if the specified locale type is well-formed with the legacy locale syntax. 2620 // 2621 // Note: 2622 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. 2623 // However, a type should not contain '=' obviously. For now, all existing 2624 // types are using ASCII alphabetic letters with a few symbol letters. We won't 2625 // add any new type that is not compatible with the BCP 47 syntax except timezone 2626 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain 2627 // '-' '_' '/' in the middle. 2628 if (isWellFormedLegacyType(value)) { 2629 return value; 2630 } 2631 } 2632 return legacyType; 2633 } 2634 2635 /*eof*/ 2636